diff options
Diffstat (limited to 'arch/powerpc/platforms')
40 files changed, 1587 insertions, 522 deletions
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index f442120e0033..0c1e6903597e 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -274,7 +274,7 @@ config CORENET_GENERIC For 32bit kernel, the following boards are supported: P2041 RDB, P3041 DS, P4080 DS, kmcoge4, and OCA4080 For 64bit kernel, the following boards are supported: - T4240 QDS and B4 QDS + T208x QDS/RDB, T4240 QDS/RDB and B4 QDS The following boards are supported for both 32bit and 64bit kernel: P5020 DS, P5040 DS and T104xQDS diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 5db1e117fdde..d22dd85e50bf 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -119,7 +119,11 @@ static const char * const boards[] __initconst = { "fsl,P4080DS", "fsl,P5020DS", "fsl,P5040DS", + "fsl,T2080QDS", + "fsl,T2080RDB", + "fsl,T2081QDS", "fsl,T4240QDS", + "fsl,T4240RDB", "fsl,B4860QDS", "fsl,B4420QDS", "fsl,B4220QDS", @@ -129,28 +133,14 @@ static const char * const boards[] __initconst = { NULL }; -static const char * const hv_boards[] __initconst = { - "fsl,P2041RDB-hv", - "fsl,P3041DS-hv", - "fsl,OCA4080-hv", - "fsl,P4080DS-hv", - "fsl,P5020DS-hv", - "fsl,P5040DS-hv", - "fsl,T4240QDS-hv", - "fsl,B4860QDS-hv", - "fsl,B4420QDS-hv", - "fsl,B4220QDS-hv", - "fsl,T1040QDS-hv", - "fsl,T1042QDS-hv", - NULL -}; - /* * Called very early, device-tree isn't unflattened */ static int __init corenet_generic_probe(void) { unsigned long root = of_get_flat_dt_root(); + char hv_compat[24]; + int i; #ifdef CONFIG_SMP extern struct smp_ops_t smp_85xx_ops; #endif @@ -159,21 +149,26 @@ static int __init corenet_generic_probe(void) return 1; /* Check if we're running under the Freescale hypervisor */ - if (of_flat_dt_match(root, hv_boards)) { - ppc_md.init_IRQ = ehv_pic_init; - ppc_md.get_irq = ehv_pic_get_irq; - ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; - ppc_md.halt = fsl_hv_halt; + for (i = 0; boards[i]; i++) { + snprintf(hv_compat, sizeof(hv_compat), "%s-hv", boards[i]); + if (of_flat_dt_is_compatible(root, hv_compat)) { + ppc_md.init_IRQ = ehv_pic_init; + + ppc_md.get_irq = ehv_pic_get_irq; + ppc_md.restart = fsl_hv_restart; + ppc_md.power_off = fsl_hv_halt; + ppc_md.halt = fsl_hv_halt; #ifdef CONFIG_SMP - /* - * Disable the timebase sync operations because we can't write - * to the timebase registers under the hypervisor. - */ - smp_85xx_ops.give_timebase = NULL; - smp_85xx_ops.take_timebase = NULL; + /* + * Disable the timebase sync operations because we + * can't write to the timebase registers under the + * hypervisor. + */ + smp_85xx_ops.give_timebase = NULL; + smp_85xx_ops.take_timebase = NULL; #endif - return 1; + return 1; + } } return 0; diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index ba093f553678..d7c1e69f3070 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -28,6 +28,7 @@ #include <asm/dbell.h> #include <asm/fsl_guts.h> #include <asm/code-patching.h> +#include <asm/cputhreads.h> #include <sysdev/fsl_soc.h> #include <sysdev/mpic.h> @@ -168,6 +169,24 @@ static inline u32 read_spin_table_addr_l(void *spin_table) return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l); } +#ifdef CONFIG_PPC64 +static void wake_hw_thread(void *info) +{ + void fsl_secondary_thread_init(void); + unsigned long imsr1, inia1; + int nr = *(const int *)info; + + imsr1 = MSR_KERNEL; + inia1 = *(unsigned long *)fsl_secondary_thread_init; + + mttmr(TMRN_IMSR1, imsr1); + mttmr(TMRN_INIA1, inia1); + mtspr(SPRN_TENS, TEN_THREAD(1)); + + smp_generic_kick_cpu(nr); +} +#endif + static int smp_85xx_kick_cpu(int nr) { unsigned long flags; @@ -183,6 +202,31 @@ static int smp_85xx_kick_cpu(int nr) pr_debug("smp_85xx_kick_cpu: kick CPU #%d\n", nr); +#ifdef CONFIG_PPC64 + /* Threads don't use the spin table */ + if (cpu_thread_in_core(nr) != 0) { + int primary = cpu_first_thread_sibling(nr); + + if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT))) + return -ENOENT; + + if (cpu_thread_in_core(nr) != 1) { + pr_err("%s: cpu %d: invalid hw thread %d\n", + __func__, nr, cpu_thread_in_core(nr)); + return -ENOENT; + } + + if (!cpu_online(primary)) { + pr_err("%s: cpu %d: primary %d not online\n", + __func__, nr, primary); + return -ENOENT; + } + + smp_call_function_single(primary, wake_hw_thread, &nr, 0); + return 0; + } +#endif + np = of_get_cpu_node(nr, NULL); cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 587a2828b06c..d3037747031d 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -18,7 +18,6 @@ #include <linux/fsl_devices.h> #include <asm/io.h> -#include <asm/mpc8xx.h> #include <asm/8xx_immap.h> #include <asm/prom.h> #include <asm/fs_pd.h> @@ -28,8 +27,6 @@ #include "mpc8xx.h" -struct mpc8xx_pcmcia_ops m8xx_pcmcia_ops; - extern int cpm_pic_init(void); extern int cpm_get_irq(void); diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index c1262581b63c..5921dcb498fd 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -35,7 +35,6 @@ #include <asm/page.h> #include <asm/processor.h> #include <asm/time.h> -#include <asm/mpc8xx.h> #include <asm/8xx_immap.h> #include <asm/cpm1.h> #include <asm/fs_pd.h> @@ -46,61 +45,6 @@ static u32 __iomem *bcsr, *bcsr5; -#ifdef CONFIG_PCMCIA_M8XX -static void pcmcia_hw_setup(int slot, int enable) -{ - if (enable) - clrbits32(&bcsr[1], BCSR1_PCCEN); - else - setbits32(&bcsr[1], BCSR1_PCCEN); -} - -static int pcmcia_set_voltage(int slot, int vcc, int vpp) -{ - u32 reg = 0; - - switch (vcc) { - case 0: - break; - case 33: - reg |= BCSR1_PCCVCC0; - break; - case 50: - reg |= BCSR1_PCCVCC1; - break; - default: - return 1; - } - - switch (vpp) { - case 0: - break; - case 33: - case 50: - if (vcc == vpp) - reg |= BCSR1_PCCVPP1; - else - return 1; - break; - case 120: - if ((vcc == 33) || (vcc == 50)) - reg |= BCSR1_PCCVPP0; - else - return 1; - default: - return 1; - } - - /* first, turn off all power */ - clrbits32(&bcsr[1], 0x00610000); - - /* enable new powersettings */ - setbits32(&bcsr[1], reg); - - return 0; -} -#endif - struct cpm_pin { int port, pin, flags; }; @@ -245,12 +189,6 @@ static void __init mpc885ads_setup_arch(void) of_detach_node(np); of_node_put(np); } - -#ifdef CONFIG_PCMCIA_M8XX - /* Set up board specific hook-ups.*/ - m8xx_pcmcia_ops.hw_ctrl = pcmcia_hw_setup; - m8xx_pcmcia_ops.voltage_set = pcmcia_set_voltage; -#endif } static int __init mpc885ads_probe(void) diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index 251aba8759e4..dda607807def 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -37,7 +37,6 @@ #include <asm/page.h> #include <asm/processor.h> #include <asm/time.h> -#include <asm/mpc8xx.h> #include <asm/8xx_immap.h> #include <asm/cpm1.h> #include <asm/fs_pd.h> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index a41bd023647a..e8bc40869cbd 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -61,7 +61,7 @@ choice help There are two families of 64 bit PowerPC chips supported. The most common ones are the desktop and server CPUs - (POWER3, RS64, POWER4, POWER5, POWER5+, POWER6, ...) + (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...) The other are the "embedded" processors compliant with the "Book 3E" variant of the architecture @@ -140,14 +140,6 @@ config 6xx depends on PPC32 && PPC_BOOK3S select PPC_HAVE_PMU_SUPPORT -config POWER3 - depends on PPC64 && PPC_BOOK3S - def_bool y - -config POWER4 - depends on PPC64 && PPC_BOOK3S - def_bool y - config TUNE_CELL bool "Optimize for Cell Broadband Engine" depends on PPC64 && PPC_BOOK3S @@ -244,7 +236,7 @@ config PHYS_64BIT config ALTIVEC bool "AltiVec Support" - depends on 6xx || POWER4 || (PPC_E500MC && PPC64) + depends on 6xx || PPC_BOOK3S_64 || (PPC_E500MC && PPC64) ---help--- This option enables kernel support for the Altivec extensions to the PowerPC processor. The kernel currently supports saving and restoring @@ -260,7 +252,7 @@ config ALTIVEC config VSX bool "VSX Support" - depends on POWER4 && ALTIVEC && PPC_FPU + depends on PPC_BOOK3S_64 && ALTIVEC && PPC_FPU ---help--- This option enables kernel support for the Vector Scaler extensions @@ -276,7 +268,7 @@ config VSX config PPC_ICSWX bool "Support for PowerPC icswx coprocessor instruction" - depends on POWER4 + depends on PPC_BOOK3S_64 default n ---help--- @@ -294,7 +286,7 @@ config PPC_ICSWX config PPC_ICSWX_PID bool "icswx requires direct PID management" - depends on PPC_ICSWX && POWER4 + depends on PPC_ICSWX default y ---help--- The PID register in server is used explicitly for ICSWX. In diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index 1afd10f67858..607124bae2e7 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -10,7 +10,7 @@ config PPC_PMAC config PPC_PMAC64 bool - depends on PPC_PMAC && POWER4 + depends on PPC_PMAC && PPC64 select MPIC select U3_DART select MPIC_U3_HT_IRQS diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 63d82bbc05e9..1413e72bc2e1 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -158,7 +158,7 @@ static inline int simple_feature_tweak(struct device_node *node, int type, return 0; } -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 static long ohare_htw_scc_enable(struct device_node *node, long param, long value) @@ -1318,7 +1318,7 @@ intrepid_aack_delay_enable(struct device_node *node, long param, long value) } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ static long core99_read_gpio(struct device_node *node, long param, long value) @@ -1338,7 +1338,7 @@ core99_write_gpio(struct device_node *node, long param, long value) return 0; } -#ifdef CONFIG_POWER4 +#ifdef CONFIG_PPC64 static long g5_gmac_enable(struct device_node *node, long param, long value) { struct macio_chip *macio = &macio_chips[0]; @@ -1550,9 +1550,9 @@ void g5_phy_disable_cpu1(void) if (uninorth_maj == 3) UN_OUT(U3_API_PHY_CONFIG_1, 0); } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 #ifdef CONFIG_PM @@ -1864,7 +1864,7 @@ core99_sleep_state(struct device_node *node, long param, long value) return 0; } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ static long generic_dev_can_wake(struct device_node *node, long param, long value) @@ -1906,7 +1906,7 @@ static struct feature_table_entry any_features[] = { { 0, NULL } }; -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 /* OHare based motherboards. Currently, we only use these on the * 2400,3400 and 3500 series powerbooks. Some older desktops seem @@ -2056,7 +2056,7 @@ static struct feature_table_entry intrepid_features[] = { { 0, NULL } }; -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ /* G5 features */ @@ -2074,10 +2074,10 @@ static struct feature_table_entry g5_features[] = { { 0, NULL } }; -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ static struct pmac_mb_def pmac_mb_defs[] = { -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 /* * Desktops */ @@ -2342,7 +2342,7 @@ static struct pmac_mb_def pmac_mb_defs[] = { PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, }, -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ { "PowerMac7,2", "PowerMac G5", PMAC_TYPE_POWERMAC_G5, g5_features, 0, @@ -2373,7 +2373,7 @@ static struct pmac_mb_def pmac_mb_defs[] = { 0, }, #endif /* CONFIG_PPC64 */ -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ }; /* @@ -2441,7 +2441,7 @@ static int __init probe_motherboard(void) /* Fallback to selection depending on mac-io chip type */ switch(macio->type) { -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 case macio_grand_central: pmac_mb.model_id = PMAC_TYPE_PSURGE; pmac_mb.model_name = "Unknown PowerSurge"; @@ -2475,7 +2475,7 @@ static int __init probe_motherboard(void) pmac_mb.model_name = "Unknown Intrepid-based"; pmac_mb.features = intrepid_features; break; -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ case macio_keylargo2: pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2; pmac_mb.model_name = "Unknown K2-based"; @@ -2486,13 +2486,13 @@ static int __init probe_motherboard(void) pmac_mb.model_name = "Unknown Shasta-based"; pmac_mb.features = g5_features; break; -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ default: ret = -ENODEV; goto done; } found: -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 /* Fixup Hooper vs. Comet */ if (pmac_mb.model_id == PMAC_TYPE_HOOPER) { u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4); @@ -2546,9 +2546,9 @@ found: */ powersave_lowspeed = 1; -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ powersave_nap = 1; -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ /* Check for "mobile" machine */ if (model && (strncmp(model, "PowerBook", 9) == 0 @@ -2786,7 +2786,7 @@ set_initial_features(void) MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); } -#ifdef CONFIG_POWER4 +#ifdef CONFIG_PPC64 if (macio_chips[0].type == macio_keylargo2 || macio_chips[0].type == macio_shasta) { #ifndef CONFIG_SMP @@ -2826,7 +2826,7 @@ set_initial_features(void) np = of_find_node_by_name(np, "firewire"); } } -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ if (macio_chips[0].type == macio_keylargo || macio_chips[0].type == macio_pangea || @@ -2895,7 +2895,7 @@ set_initial_features(void) MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ /* On all machines, switch modem & serial ports off */ for_each_node_by_name(np, "ch-a") diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 4ad227d04c1a..f241accc053d 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,10 +1,11 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o +obj-y += opal-msglog.o opal-hmi.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o +obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 8ad0c5b891f4..c945bed4dc9e 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -187,10 +187,10 @@ static int ioda_eeh_post_init(struct pci_controller *hose) */ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) { - s64 ret; - u32 pe_no; struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; + int enable, ret = 0; + s64 rc; /* Check on PE number */ if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { @@ -201,184 +201,214 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) return -EINVAL; } - pe_no = pe->addr; switch (option) { case EEH_OPT_DISABLE: - ret = -EEXIST; - break; + return -EPERM; case EEH_OPT_ENABLE: - ret = 0; - break; + return 0; case EEH_OPT_THAW_MMIO: - ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, - OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO); - if (ret) { - pr_warning("%s: Failed to enable MMIO for " - "PHB#%x-PE#%x, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return -EIO; - } - + enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO; break; case EEH_OPT_THAW_DMA: - ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, - OPAL_EEH_ACTION_CLEAR_FREEZE_DMA); - if (ret) { - pr_warning("%s: Failed to enable DMA for " - "PHB#%x-PE#%x, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return -EIO; - } - + enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA; break; default: - pr_warning("%s: Invalid option %d\n", __func__, option); + pr_warn("%s: Invalid option %d\n", + __func__, option); return -EINVAL; } + /* If PHB supports compound PE, to handle it */ + if (phb->unfreeze_pe) { + ret = phb->unfreeze_pe(phb, pe->addr, enable); + } else { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + pe->addr, + enable); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n", + __func__, rc, option, phb->hose->global_number, + pe->addr); + ret = -EIO; + } + } + return ret; } -static void ioda_eeh_phb_diag(struct pci_controller *hose) +static void ioda_eeh_phb_diag(struct eeh_pe *pe) { - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pe->phb->private_data; long rc; - rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, + rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data, PNV_PCI_DIAG_BUF_SIZE); - if (rc != OPAL_SUCCESS) { - pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", - __func__, hose->global_number, rc); - return; - } - - pnv_pci_dump_phb_diag_data(hose, phb->diag.blob); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n", + __func__, pe->phb->global_number, rc); } -/** - * ioda_eeh_get_state - Retrieve the state of PE - * @pe: EEH PE - * - * The PE's state should be retrieved from the PEEV, PEST - * IODA tables. Since the OPAL has exported the function - * to do it, it'd better to use that. - */ -static int ioda_eeh_get_state(struct eeh_pe *pe) +static int ioda_eeh_get_phb_state(struct eeh_pe *pe) { - s64 ret = 0; + struct pnv_phb *phb = pe->phb->private_data; u8 fstate; __be16 pcierr; - u32 pe_no; - int result; - struct pci_controller *hose = pe->phb; - struct pnv_phb *phb = hose->private_data; + s64 rc; + int result = 0; + + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe->addr, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting PHB#%x state\n", + __func__, rc, phb->hose->global_number); + return EEH_STATE_NOT_SUPPORT; + } /* - * Sanity check on PE address. The PHB PE address should - * be zero. + * Check PHB state. If the PHB is frozen for the + * first time, to dump the PHB diag-data. */ - if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { - pr_err("%s: PE address %x out of range [0, %x] " - "on PHB#%x\n", - __func__, pe->addr, phb->ioda.total_pe, - hose->global_number); - return EEH_STATE_NOT_SUPPORT; + if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); + } else if (!(pe->state & EEH_PE_ISOLATED)) { + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + ioda_eeh_phb_diag(pe); } + return result; +} + +static int ioda_eeh_get_pe_state(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + u8 fstate; + __be16 pcierr; + s64 rc; + int result; + /* - * If we're in middle of PE reset, return normal - * state to keep EEH core going. For PHB reset, we - * still expect to have fenced PHB cleared with - * PHB reset. + * We don't clobber hardware frozen state until PE + * reset is completed. In order to keep EEH core + * moving forward, we have to return operational + * state during PE reset. */ - if (!(pe->type & EEH_PE_PHB) && - (pe->state & EEH_PE_RESET)) { - result = (EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE | + if (pe->state & EEH_PE_RESET) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | EEH_STATE_MMIO_ENABLED | EEH_STATE_DMA_ENABLED); return result; } - /* Retrieve PE status through OPAL */ - pe_no = pe->addr; - ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, - &fstate, &pcierr, NULL); - if (ret) { - pr_err("%s: Failed to get EEH status on " - "PHB#%x-PE#%x\n, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return EEH_STATE_NOT_SUPPORT; - } - - /* Check PHB status */ - if (pe->type & EEH_PE_PHB) { - result = 0; - result &= ~EEH_STATE_RESET_ACTIVE; - - if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - result |= EEH_STATE_MMIO_ENABLED; - result |= EEH_STATE_DMA_ENABLED; - } else if (!(pe->state & EEH_PE_ISOLATED)) { - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(hose); + /* + * Fetch PE state from hardware. If the PHB + * supports compound PE, let it handle that. + */ + if (phb->get_pe_state) { + fstate = phb->get_pe_state(phb, pe->addr); + } else { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe->addr, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n", + __func__, rc, phb->hose->global_number, pe->addr); + return EEH_STATE_NOT_SUPPORT; } - - return result; } - /* Parse result out */ - result = 0; + /* Figure out state */ switch (fstate) { case OPAL_EEH_STOPPED_NOT_FROZEN: - result &= ~EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - result |= EEH_STATE_MMIO_ENABLED; - result |= EEH_STATE_DMA_ENABLED; + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); break; case OPAL_EEH_STOPPED_MMIO_FREEZE: - result &= ~EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - result |= EEH_STATE_DMA_ENABLED; + result = (EEH_STATE_DMA_ACTIVE | + EEH_STATE_DMA_ENABLED); break; case OPAL_EEH_STOPPED_DMA_FREEZE: - result &= ~EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_MMIO_ENABLED; + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_MMIO_ENABLED); break; case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: - result &= ~EEH_STATE_RESET_ACTIVE; + result = 0; break; case OPAL_EEH_STOPPED_RESET: - result |= EEH_STATE_RESET_ACTIVE; + result = EEH_STATE_RESET_ACTIVE; break; case OPAL_EEH_STOPPED_TEMP_UNAVAIL: - result |= EEH_STATE_UNAVAILABLE; + result = EEH_STATE_UNAVAILABLE; break; case OPAL_EEH_STOPPED_PERM_UNAVAIL: - result |= EEH_STATE_NOT_SUPPORT; + result = EEH_STATE_NOT_SUPPORT; break; default: - pr_warning("%s: Unexpected EEH status 0x%x " - "on PHB#%x-PE#%x\n", - __func__, fstate, hose->global_number, pe_no); + result = EEH_STATE_NOT_SUPPORT; + pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n", + __func__, phb->hose->global_number, + pe->addr, fstate); } - /* Dump PHB diag-data for frozen PE */ - if (result != EEH_STATE_NOT_SUPPORT && - (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) != - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) && + /* + * If PHB supports compound PE, to freeze all + * slave PEs for consistency. + * + * If the PE is switching to frozen state for the + * first time, to dump the PHB diag-data. + */ + if (!(result & EEH_STATE_NOT_SUPPORT) && + !(result & EEH_STATE_UNAVAILABLE) && + !(result & EEH_STATE_MMIO_ACTIVE) && + !(result & EEH_STATE_DMA_ACTIVE) && !(pe->state & EEH_PE_ISOLATED)) { + if (phb->freeze_pe) + phb->freeze_pe(phb, pe->addr); + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(hose); + ioda_eeh_phb_diag(pe); } return result; } +/** + * ioda_eeh_get_state - Retrieve the state of PE + * @pe: EEH PE + * + * The PE's state should be retrieved from the PEEV, PEST + * IODA tables. Since the OPAL has exported the function + * to do it, it'd better to use that. + */ +static int ioda_eeh_get_state(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + + /* Sanity check on PE number. PHB PE should have 0 */ + if (pe->addr < 0 || + pe->addr >= phb->ioda.total_pe) { + pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n", + __func__, phb->hose->global_number, + pe->addr, phb->ioda.total_pe); + return EEH_STATE_NOT_SUPPORT; + } + + if (pe->type & EEH_PE_PHB) + return ioda_eeh_get_phb_state(pe); + + return ioda_eeh_get_pe_state(pe); +} + static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) { s64 rc = OPAL_HARDWARE; @@ -589,6 +619,24 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) } /** + * ioda_eeh_get_log - Retrieve error log + * @pe: frozen PE + * @severity: permanent or temporary error + * @drv_log: device driver log + * @len: length of device driver log + * + * Retrieve error log, which contains log from device driver + * and firmware. + */ +int ioda_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) +{ + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + + return 0; +} + +/** * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE * @pe: EEH PE * @@ -605,18 +653,24 @@ static int ioda_eeh_configure_bridge(struct eeh_pe *pe) static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) { /* GEM */ - pr_info(" GEM XFIR: %016llx\n", data->gemXfir); - pr_info(" GEM RFIR: %016llx\n", data->gemRfir); - pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir); - pr_info(" GEM Mask: %016llx\n", data->gemMask); - pr_info(" GEM RWOF: %016llx\n", data->gemRwof); + if (data->gemXfir || data->gemRfir || + data->gemRirqfir || data->gemMask || data->gemRwof) + pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->gemXfir), + be64_to_cpu(data->gemRfir), + be64_to_cpu(data->gemRirqfir), + be64_to_cpu(data->gemMask), + be64_to_cpu(data->gemRwof)); /* LEM */ - pr_info(" LEM FIR: %016llx\n", data->lemFir); - pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask); - pr_info(" LEM Action 0: %016llx\n", data->lemAction0); - pr_info(" LEM Action 1: %016llx\n", data->lemAction1); - pr_info(" LEM WOF: %016llx\n", data->lemWof); + if (data->lemFir || data->lemErrMask || + data->lemAction0 || data->lemAction1 || data->lemWof) + pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrMask), + be64_to_cpu(data->lemAction0), + be64_to_cpu(data->lemAction1), + be64_to_cpu(data->lemWof)); } static void ioda_eeh_hub_diag(struct pci_controller *hose) @@ -627,8 +681,8 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); if (rc != OPAL_SUCCESS) { - pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", - __func__, phb->hub_id, rc); + pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n", + __func__, phb->hub_id, rc); return; } @@ -636,24 +690,31 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) case OPAL_P7IOC_DIAG_TYPE_RGC: pr_info("P7IOC diag-data for RGC\n\n"); ioda_eeh_hub_diag_common(data); - pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus); - pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp); + if (data->rgc.rgcStatus || data->rgc.rgcLdcp) + pr_info(" RGC: %016llx %016llx\n", + be64_to_cpu(data->rgc.rgcStatus), + be64_to_cpu(data->rgc.rgcLdcp)); break; case OPAL_P7IOC_DIAG_TYPE_BI: pr_info("P7IOC diag-data for BI %s\n\n", data->bi.biDownbound ? "Downbound" : "Upbound"); ioda_eeh_hub_diag_common(data); - pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0); - pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1); - pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2); - pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus); + if (data->bi.biLdcp0 || data->bi.biLdcp1 || + data->bi.biLdcp2 || data->bi.biFenceStatus) + pr_info(" BI: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->bi.biLdcp0), + be64_to_cpu(data->bi.biLdcp1), + be64_to_cpu(data->bi.biLdcp2), + be64_to_cpu(data->bi.biFenceStatus)); break; case OPAL_P7IOC_DIAG_TYPE_CI: - pr_info("P7IOC diag-data for CI Port %d\\nn", + pr_info("P7IOC diag-data for CI Port %d\n\n", data->ci.ciPort); ioda_eeh_hub_diag_common(data); - pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus); - pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp); + if (data->ci.ciPortStatus || data->ci.ciPortLdcp) + pr_info(" CI: %016llx %016llx\n", + be64_to_cpu(data->ci.ciPortStatus), + be64_to_cpu(data->ci.ciPortLdcp)); break; case OPAL_P7IOC_DIAG_TYPE_MISC: pr_info("P7IOC diag-data for MISC\n\n"); @@ -664,30 +725,51 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) ioda_eeh_hub_diag_common(data); break; default: - pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n", - __func__, phb->hub_id, data->type); + pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n", + __func__, phb->hub_id, data->type); } } static int ioda_eeh_get_pe(struct pci_controller *hose, u16 pe_no, struct eeh_pe **pe) { - struct eeh_pe *phb_pe, *dev_pe; - struct eeh_dev dev; + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pnv_pe; + struct eeh_pe *dev_pe; + struct eeh_dev edev; - /* Find the PHB PE */ - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe) - return -EEXIST; + /* + * If PHB supports compound PE, to fetch + * the master PE because slave PE is invisible + * to EEH core. + */ + if (phb->get_pe_state) { + pnv_pe = &phb->ioda.pe_array[pe_no]; + if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { + pnv_pe = pnv_pe->master; + WARN_ON(!pnv_pe || + !(pnv_pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pnv_pe->pe_number; + } + } /* Find the PE according to PE# */ - memset(&dev, 0, sizeof(struct eeh_dev)); - dev.phb = hose; - dev.pe_config_addr = pe_no; - dev_pe = eeh_pe_get(&dev); - if (!dev_pe) return -EEXIST; + memset(&edev, 0, sizeof(struct eeh_dev)); + edev.phb = hose; + edev.pe_config_addr = pe_no; + dev_pe = eeh_pe_get(&edev); + if (!dev_pe) + return -EEXIST; + /* + * At this point, we're sure the compound PE should + * be put into frozen state. + */ *pe = dev_pe; + if (phb->freeze_pe && + !(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, pe_no); + return 0; } @@ -792,7 +874,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) "detected, location: %s\n", hose->global_number, eeh_pe_loc_get(phb_pe)); - ioda_eeh_phb_diag(hose); + ioda_eeh_phb_diag(phb_pe); + pnv_pci_dump_phb_diag_data(hose, phb_pe->data); ret = EEH_NEXT_ERR_NONE; } @@ -812,7 +895,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); ret = EEH_NEXT_ERR_NONE; - } else if ((*pe)->state & EEH_PE_ISOLATED) { + } else if ((*pe)->state & EEH_PE_ISOLATED || + eeh_pe_passed(*pe)) { ret = EEH_NEXT_ERR_NONE; } else { pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", @@ -839,7 +923,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) ret == EEH_NEXT_ERR_FENCED_PHB) && !((*pe)->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(hose); + ioda_eeh_phb_diag(*pe); } /* @@ -885,6 +969,7 @@ struct pnv_eeh_ops ioda_eeh_ops = { .set_option = ioda_eeh_set_option, .get_state = ioda_eeh_get_state, .reset = ioda_eeh_reset, + .get_log = ioda_eeh_get_log, .configure_bridge = ioda_eeh_configure_bridge, .next_error = ioda_eeh_next_error }; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 56a206f32f77..fd7a16f855ed 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -45,14 +45,31 @@ */ static int powernv_eeh_init(void) { + struct pci_controller *hose; + struct pnv_phb *phb; + /* We require OPALv3 */ if (!firmware_has_feature(FW_FEATURE_OPALv3)) { - pr_warning("%s: OPALv3 is required !\n", __func__); + pr_warn("%s: OPALv3 is required !\n", + __func__); return -EINVAL; } - /* Set EEH probe mode */ - eeh_probe_mode_set(EEH_PROBE_MODE_DEV); + /* Set probe mode */ + eeh_add_flag(EEH_PROBE_MODE_DEV); + + /* + * P7IOC blocks PCI config access to frozen PE, but PHB3 + * doesn't do that. So we have to selectively enable I/O + * prior to collecting error log. + */ + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->model == PNV_PHB_MODEL_P7IOC) + eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); + break; + } return 0; } @@ -107,6 +124,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) struct pnv_phb *phb = hose->private_data; struct device_node *dn = pci_device_to_OF_node(dev); struct eeh_dev *edev = of_node_to_eeh_dev(dn); + int ret; /* * When probing the root bridge, which doesn't have any @@ -143,13 +161,27 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff); /* Create PE */ - eeh_add_to_parent_pe(edev); + ret = eeh_add_to_parent_pe(edev); + if (ret) { + pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n", + __func__, pci_name(dev), ret); + return ret; + } + + /* + * Cache the PE primary bus, which can't be fetched when + * full hotplug is in progress. In that case, all child + * PCI devices of the PE are expected to be removed prior + * to PE reset. + */ + if (!edev->pe->bus) + edev->pe->bus = dev->bus; /* * Enable EEH explicitly so that we will do EEH check * while accessing I/O stuff */ - eeh_set_enable(true); + eeh_add_flag(EEH_ENABLED); /* Save memory bars */ eeh_save_bars(edev); @@ -273,8 +305,8 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) max_wait -= mwait; if (max_wait <= 0) { - pr_warning("%s: Timeout getting PE#%x's state (%d)\n", - __func__, pe->addr, max_wait); + pr_warn("%s: Timeout getting PE#%x's state (%d)\n", + __func__, pe->addr, max_wait); return EEH_STATE_NOT_SUPPORT; } @@ -294,7 +326,7 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) * Retrieve the temporary or permanent error from the PE. */ static int powernv_eeh_get_log(struct eeh_pe *pe, int severity, - char *drv_log, unsigned long len) + char *drv_log, unsigned long len) { struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; @@ -398,9 +430,7 @@ static int __init eeh_powernv_init(void) { int ret = -EINVAL; - if (!machine_is(powernv)) - return ret; - + eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE); ret = eeh_ops_register(&powernv_eeh_ops); if (!ret) pr_info("EEH: PowerNV platform initialized\n"); @@ -409,5 +439,4 @@ static int __init eeh_powernv_init(void) return ret; } - -early_initcall(eeh_powernv_init); +machine_early_initcall(powernv, eeh_powernv_init); diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index 32e2adfa5320..e462ab947d16 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -20,6 +20,7 @@ #include <linux/wait.h> #include <linux/gfp.h> #include <linux/of.h> +#include <asm/machdep.h> #include <asm/opal.h> #define N_ASYNC_COMPLETIONS 64 @@ -201,4 +202,4 @@ out_opal_node: out: return err; } -subsys_initcall(opal_async_comp_init); +machine_subsys_initcall(powernv, opal_async_comp_init); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c new file mode 100644 index 000000000000..97ac8dc33667 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -0,0 +1,188 @@ +/* + * OPAL hypervisor Maintenance interrupt handling support in PowreNV. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; If not, see <http://www.gnu.org/licenses/>. + * + * Copyright 2014 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/mm.h> +#include <linux/slab.h> + +#include <asm/opal.h> +#include <asm/cputable.h> + +static int opal_hmi_handler_nb_init; +struct OpalHmiEvtNode { + struct list_head list; + struct OpalHMIEvent hmi_evt; +}; +static LIST_HEAD(opal_hmi_evt_list); +static DEFINE_SPINLOCK(opal_hmi_evt_lock); + +static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) +{ + const char *level, *sevstr, *error_info; + static const char *hmi_error_types[] = { + "Malfunction Alert", + "Processor Recovery done", + "Processor recovery occurred again", + "Processor recovery occurred for masked error", + "Timer facility experienced an error", + "TFMR SPR is corrupted", + "UPS (Uniterrupted Power System) Overflow indication", + "An XSCOM operation failure", + "An XSCOM operation completed", + "SCOM has set a reserved FIR bit to cause recovery", + "Debug trigger has set a reserved FIR bit to cause recovery", + "A hypervisor resource error occurred" + }; + + /* Print things out */ + if (hmi_evt->version != OpalHMIEvt_V1) { + pr_err("HMI Interrupt, Unknown event version %d !\n", + hmi_evt->version); + return; + } + switch (hmi_evt->severity) { + case OpalHMI_SEV_NO_ERROR: + level = KERN_INFO; + sevstr = "Harmless"; + break; + case OpalHMI_SEV_WARNING: + level = KERN_WARNING; + sevstr = ""; + break; + case OpalHMI_SEV_ERROR_SYNC: + level = KERN_ERR; + sevstr = "Severe"; + break; + case OpalHMI_SEV_FATAL: + default: + level = KERN_ERR; + sevstr = "Fatal"; + break; + } + + printk("%s%s Hypervisor Maintenance interrupt [%s]\n", + level, sevstr, + hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? + hmi_error_types[hmi_evt->type] + : "Unknown"; + printk("%s Error detail: %s\n", level, error_info); + printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer)); + if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || + (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) + printk("%s TFMR: %016llx\n", level, + be64_to_cpu(hmi_evt->tfmr)); +} + +static void hmi_event_handler(struct work_struct *work) +{ + unsigned long flags; + struct OpalHMIEvent *hmi_evt; + struct OpalHmiEvtNode *msg_node; + uint8_t disposition; + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + while (!list_empty(&opal_hmi_evt_list)) { + msg_node = list_entry(opal_hmi_evt_list.next, + struct OpalHmiEvtNode, list); + list_del(&msg_node->list); + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt; + print_hmi_event_info(hmi_evt); + disposition = hmi_evt->disposition; + kfree(msg_node); + + /* + * Check if HMI event has been recovered or not. If not + * then we can't continue, invoke panic. + */ + if (disposition != OpalHMI_DISPOSITION_RECOVERED) + panic("Unrecoverable HMI exception"); + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + } + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); +} + +static DECLARE_WORK(hmi_event_work, hmi_event_handler); +/* + * opal_handle_hmi_event - notifier handler that queues up HMI events + * to be preocessed later. + */ +static int opal_handle_hmi_event(struct notifier_block *nb, + unsigned long msg_type, void *msg) +{ + unsigned long flags; + struct OpalHMIEvent *hmi_evt; + struct opal_msg *hmi_msg = msg; + struct OpalHmiEvtNode *msg_node; + + /* Sanity Checks */ + if (msg_type != OPAL_MSG_HMI_EVT) + return 0; + + /* HMI event info starts from param[0] */ + hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0]; + + /* Delay the logging of HMI events to workqueue. */ + msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); + if (!msg_node) { + pr_err("HMI: out of memory, Opal message event not handled\n"); + return -ENOMEM; + } + memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent)); + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + list_add(&msg_node->list, &opal_hmi_evt_list); + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + schedule_work(&hmi_event_work); + return 0; +} + +static struct notifier_block opal_hmi_handler_nb = { + .notifier_call = opal_handle_hmi_event, + .next = NULL, + .priority = 0, +}; + +static int __init opal_hmi_handler_init(void) +{ + int ret; + + if (!opal_hmi_handler_nb_init) { + ret = opal_message_notifier_register( + OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + opal_hmi_handler_nb_init = 1; + } + return 0; +} +subsys_initcall(opal_hmi_handler_init); diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index f04b4d8aca5a..ad4b31df779a 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -324,7 +324,7 @@ static int opal_lpc_init_debugfs(void) rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW); return rc; } -device_initcall(opal_lpc_init_debugfs); +machine_device_initcall(powernv, opal_lpc_init_debugfs); #endif /* CONFIG_DEBUG_FS */ void opal_lpc_init(void) diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index b17a34b695ef..43db2136dbff 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -27,6 +27,7 @@ #include <linux/mm.h> #include <linux/slab.h> +#include <asm/machdep.h> #include <asm/opal.h> #include <asm/cputable.h> @@ -143,4 +144,4 @@ static int __init opal_mem_err_init(void) } return 0; } -subsys_initcall(opal_mem_err_init); +machine_subsys_initcall(powernv, opal_mem_err_init); diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c new file mode 100644 index 000000000000..d8a000a9988b --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -0,0 +1,84 @@ +#include <linux/percpu.h> +#include <linux/jump_label.h> +#include <asm/trace.h> + +#ifdef CONFIG_JUMP_LABEL +struct static_key opal_tracepoint_key = STATIC_KEY_INIT; + +void opal_tracepoint_regfunc(void) +{ + static_key_slow_inc(&opal_tracepoint_key); +} + +void opal_tracepoint_unregfunc(void) +{ + static_key_slow_dec(&opal_tracepoint_key); +} +#else +/* + * We optimise OPAL calls by placing opal_tracepoint_refcount + * directly in the TOC so we can check if the opal tracepoints are + * enabled via a single load. + */ + +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ +extern long opal_tracepoint_refcount; + +void opal_tracepoint_regfunc(void) +{ + opal_tracepoint_refcount++; +} + +void opal_tracepoint_unregfunc(void) +{ + opal_tracepoint_refcount--; +} +#endif + +/* + * Since the tracing code might execute OPAL calls we need to guard against + * recursion. + */ +static DEFINE_PER_CPU(unsigned int, opal_trace_depth); + +void __trace_opal_entry(unsigned long opcode, unsigned long *args) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = &__get_cpu_var(opal_trace_depth); + + if (*depth) + goto out; + + (*depth)++; + preempt_disable(); + trace_opal_entry(opcode, args); + (*depth)--; + +out: + local_irq_restore(flags); +} + +void __trace_opal_exit(long opcode, unsigned long retval) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = &__get_cpu_var(opal_trace_depth); + + if (*depth) + goto out; + + (*depth)++; + trace_opal_exit(opcode, retval); + preempt_enable(); + (*depth)--; + +out: + local_irq_restore(flags); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 4abbff22a61f..a328be44880f 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -13,30 +13,69 @@ #include <asm/hvcall.h> #include <asm/asm-offsets.h> #include <asm/opal.h> +#include <asm/jump_label.h> + + .section ".text" + +#ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_JUMP_LABEL +#define OPAL_BRANCH(LABEL) \ + ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) +#else + + .section ".toc","aw" + + .globl opal_tracepoint_refcount +opal_tracepoint_refcount: + .llong 0 + + .section ".text" + +/* + * We branch around this in early init by using an unconditional cpu + * feature. + */ +#define OPAL_BRANCH(LABEL) \ +BEGIN_FTR_SECTION; \ + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,opal_tracepoint_refcount@toc(r2); \ + std r12,32(r1); \ + cmpdi r12,0; \ + bne- LABEL; \ +1: + +#endif + +#else +#define OPAL_BRANCH(LABEL) +#endif /* TODO: * * - Trace irqs in/off (needs saving/restoring all args, argh...) * - Get r11 feed up by Dave so I can have better register usage */ + #define OPAL_CALL(name, token) \ _GLOBAL(name); \ mflr r0; \ - mfcr r12; \ std r0,16(r1); \ + li r0,token; \ + OPAL_BRANCH(opal_tracepoint_entry) \ + mfcr r12; \ stw r12,8(r1); \ std r1,PACAR1(r13); \ - li r0,0; \ + li r11,0; \ mfmsr r12; \ - ori r0,r0,MSR_EE; \ + ori r11,r11,MSR_EE; \ std r12,PACASAVEDMSR(r13); \ - andc r12,r12,r0; \ + andc r12,r12,r11; \ mtmsrd r12,1; \ - LOAD_REG_ADDR(r0,opal_return); \ - mtlr r0; \ - li r0,MSR_DR|MSR_IR|MSR_LE;\ - andc r12,r12,r0; \ - li r0,token; \ + LOAD_REG_ADDR(r11,opal_return); \ + mtlr r11; \ + li r11,MSR_DR|MSR_IR|MSR_LE;\ + andc r12,r12,r11; \ mtspr SPRN_HSRR1,r12; \ LOAD_REG_ADDR(r11,opal); \ ld r12,8(r11); \ @@ -61,6 +100,64 @@ opal_return: mtcr r4; rfid +#ifdef CONFIG_TRACEPOINTS +opal_tracepoint_entry: + stdu r1,-STACKFRAMESIZE(r1) + std r0,STK_REG(R23)(r1) + std r3,STK_REG(R24)(r1) + std r4,STK_REG(R25)(r1) + std r5,STK_REG(R26)(r1) + std r6,STK_REG(R27)(r1) + std r7,STK_REG(R28)(r1) + std r8,STK_REG(R29)(r1) + std r9,STK_REG(R30)(r1) + std r10,STK_REG(R31)(r1) + mr r3,r0 + addi r4,r1,STK_REG(R24) + bl __trace_opal_entry + ld r0,STK_REG(R23)(r1) + ld r3,STK_REG(R24)(r1) + ld r4,STK_REG(R25)(r1) + ld r5,STK_REG(R26)(r1) + ld r6,STK_REG(R27)(r1) + ld r7,STK_REG(R28)(r1) + ld r8,STK_REG(R29)(r1) + ld r9,STK_REG(R30)(r1) + ld r10,STK_REG(R31)(r1) + LOAD_REG_ADDR(r11,opal_tracepoint_return) + mfcr r12 + std r11,16(r1) + stw r12,8(r1) + std r1,PACAR1(r13) + li r11,0 + mfmsr r12 + ori r11,r11,MSR_EE + std r12,PACASAVEDMSR(r13) + andc r12,r12,r11 + mtmsrd r12,1 + LOAD_REG_ADDR(r11,opal_return) + mtlr r11 + li r11,MSR_DR|MSR_IR|MSR_LE + andc r12,r12,r11 + mtspr SPRN_HSRR1,r12 + LOAD_REG_ADDR(r11,opal) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +opal_tracepoint_return: + std r3,STK_REG(R31)(r1) + mr r4,r3 + ld r0,STK_REG(R23)(r1) + bl __trace_opal_exit + ld r3,STK_REG(R31)(r1) + addi r1,r1,STACKFRAMESIZE + ld r0,16(r1) + mtlr r0 + blr +#endif + OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); @@ -86,6 +183,7 @@ OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR); +OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET); OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC); OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE); OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW); @@ -146,3 +244,4 @@ OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); +OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index 4cd2ea6c0dbe..7634d1c62299 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -130,4 +130,4 @@ static int opal_xscom_init(void) scom_init(&opal_scom_controller); return 0; } -arch_initcall(opal_xscom_init); +machine_arch_initcall(powernv, opal_xscom_init); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 199975613fe9..f0a01a46a57d 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -22,6 +22,8 @@ #include <linux/kobject.h> #include <linux/delay.h> #include <linux/memblock.h> + +#include <asm/machdep.h> #include <asm/opal.h> #include <asm/firmware.h> #include <asm/mce.h> @@ -192,16 +194,12 @@ static int __init opal_register_exception_handlers(void) * fwnmi area at 0x7000 to provide the glue space to OPAL */ glue = 0x7000; - opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER, - 0, glue); - glue += 128; opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); #endif return 0; } - -early_initcall(opal_register_exception_handlers); +machine_early_initcall(powernv, opal_register_exception_handlers); int opal_notifier_register(struct notifier_block *nb) { @@ -368,7 +366,7 @@ static int __init opal_message_init(void) } return 0; } -early_initcall(opal_message_init); +machine_early_initcall(powernv, opal_message_init); int opal_get_chars(uint32_t vtermno, char *buf, int count) { @@ -513,6 +511,46 @@ int opal_machine_check(struct pt_regs *regs) return 0; } +/* Early hmi handler called in real mode. */ +int opal_hmi_exception_early(struct pt_regs *regs) +{ + s64 rc; + + /* + * call opal hmi handler. Pass paca address as token. + * The return value OPAL_SUCCESS is an indication that there is + * an HMI event generated waiting to pull by Linux. + */ + rc = opal_handle_hmi(); + if (rc == OPAL_SUCCESS) { + local_paca->hmi_event_available = 1; + return 1; + } + return 0; +} + +/* HMI exception handler called in virtual mode during check_irq_replay. */ +int opal_handle_hmi_exception(struct pt_regs *regs) +{ + s64 rc; + __be64 evt = 0; + + /* + * Check if HMI event is available. + * if Yes, then call opal_poll_events to pull opal messages and + * process them. + */ + if (!local_paca->hmi_event_available) + return 0; + + local_paca->hmi_event_available = 0; + rc = opal_poll_events(&evt); + if (rc == OPAL_SUCCESS && evt) + opal_do_notifier(be64_to_cpu(evt)); + + return 1; +} + static uint64_t find_recovery_address(uint64_t nip) { int i; @@ -630,7 +668,7 @@ static int __init opal_init(void) return 0; } -subsys_initcall(opal_init); +machine_subsys_initcall(powernv, opal_init); void opal_shutdown(void) { diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index de19edeaa7a7..b136108ddc99 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -36,6 +36,7 @@ #include <asm/tce.h> #include <asm/xics.h> #include <asm/debug.h> +#include <asm/firmware.h> #include "powernv.h" #include "pci.h" @@ -82,6 +83,12 @@ static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) : : "r" (val), "r" (paddr) : "memory"); } +static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) +{ + return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == + (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); +} + static int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -106,6 +113,380 @@ static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) clear_bit(pe, phb->ioda.pe_alloc); } +/* The default M64 BAR is shared by all PEs */ +static int pnv_ioda2_init_m64(struct pnv_phb *phb) +{ + const char *desc; + struct resource *r; + s64 rc; + + /* Configure the default M64 BAR */ + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + phb->ioda.m64_base, + 0, /* unused */ + phb->ioda.m64_size); + if (rc != OPAL_SUCCESS) { + desc = "configuring"; + goto fail; + } + + /* Enable the default M64 BAR */ + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + OPAL_ENABLE_M64_SPLIT); + if (rc != OPAL_SUCCESS) { + desc = "enabling"; + goto fail; + } + + /* Mark the M64 BAR assigned */ + set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc); + + /* + * Strip off the segment used by the reserved PE, which is + * expected to be 0 or last one of PE capabicity. + */ + r = &phb->hose->mem_resources[1]; + if (phb->ioda.reserved_pe == 0) + r->start += phb->ioda.m64_segsize; + else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1)) + r->end -= phb->ioda.m64_segsize; + else + pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", + phb->ioda.reserved_pe); + + return 0; + +fail: + pr_warn(" Failure %lld %s M64 BAR#%d\n", + rc, desc, phb->ioda.m64_bar_idx); + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + OPAL_DISABLE_M64); + return -EIO; +} + +static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb) +{ + resource_size_t sgsz = phb->ioda.m64_segsize; + struct pci_dev *pdev; + struct resource *r; + int base, step, i; + + /* + * Root bus always has full M64 range and root port has + * M64 range used in reality. So we're checking root port + * instead of root bus. + */ + list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { + for (i = PCI_BRIDGE_RESOURCES; + i <= PCI_BRIDGE_RESOURCE_END; i++) { + r = &pdev->resource[i]; + if (!r->parent || + !pnv_pci_is_mem_pref_64(r->flags)) + continue; + + base = (r->start - phb->ioda.m64_base) / sgsz; + for (step = 0; step < resource_size(r) / sgsz; step++) + set_bit(base + step, phb->ioda.pe_alloc); + } + } +} + +static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, + struct pci_bus *bus, int all) +{ + resource_size_t segsz = phb->ioda.m64_segsize; + struct pci_dev *pdev; + struct resource *r; + struct pnv_ioda_pe *master_pe, *pe; + unsigned long size, *pe_alloc; + bool found; + int start, i, j; + + /* Root bus shouldn't use M64 */ + if (pci_is_root_bus(bus)) + return IODA_INVALID_PE; + + /* We support only one M64 window on each bus */ + found = false; + pci_bus_for_each_resource(bus, r, i) { + if (r && r->parent && + pnv_pci_is_mem_pref_64(r->flags)) { + found = true; + break; + } + } + + /* No M64 window found ? */ + if (!found) + return IODA_INVALID_PE; + + /* Allocate bitmap */ + size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); + pe_alloc = kzalloc(size, GFP_KERNEL); + if (!pe_alloc) { + pr_warn("%s: Out of memory !\n", + __func__); + return IODA_INVALID_PE; + } + + /* + * Figure out reserved PE numbers by the PE + * the its child PEs. + */ + start = (r->start - phb->ioda.m64_base) / segsz; + for (i = 0; i < resource_size(r) / segsz; i++) + set_bit(start + i, pe_alloc); + + if (all) + goto done; + + /* + * If the PE doesn't cover all subordinate buses, + * we need subtract from reserved PEs for children. + */ + list_for_each_entry(pdev, &bus->devices, bus_list) { + if (!pdev->subordinate) + continue; + + pci_bus_for_each_resource(pdev->subordinate, r, i) { + if (!r || !r->parent || + !pnv_pci_is_mem_pref_64(r->flags)) + continue; + + start = (r->start - phb->ioda.m64_base) / segsz; + for (j = 0; j < resource_size(r) / segsz ; j++) + clear_bit(start + j, pe_alloc); + } + } + + /* + * the current bus might not own M64 window and that's all + * contributed by its child buses. For the case, we needn't + * pick M64 dependent PE#. + */ + if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) { + kfree(pe_alloc); + return IODA_INVALID_PE; + } + + /* + * Figure out the master PE and put all slave PEs to master + * PE's list to form compound PE. + */ +done: + master_pe = NULL; + i = -1; + while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < + phb->ioda.total_pe) { + pe = &phb->ioda.pe_array[i]; + pe->phb = phb; + pe->pe_number = i; + + if (!master_pe) { + pe->flags |= PNV_IODA_PE_MASTER; + INIT_LIST_HEAD(&pe->slaves); + master_pe = pe; + } else { + pe->flags |= PNV_IODA_PE_SLAVE; + pe->master = master_pe; + list_add_tail(&pe->list, &master_pe->slaves); + } + } + + kfree(pe_alloc); + return master_pe->pe_number; +} + +static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) +{ + struct pci_controller *hose = phb->hose; + struct device_node *dn = hose->dn; + struct resource *res; + const u32 *r; + u64 pci_addr; + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) { + pr_info(" Firmware too old to support M64 window\n"); + return; + } + + r = of_get_property(dn, "ibm,opal-m64-window", NULL); + if (!r) { + pr_info(" No <ibm,opal-m64-window> on %s\n", + dn->full_name); + return; + } + + /* FIXME: Support M64 for P7IOC */ + if (phb->type != PNV_PHB_IODA2) { + pr_info(" Not support M64 window\n"); + return; + } + + res = &hose->mem_resources[1]; + res->start = of_translate_address(dn, r + 2); + res->end = res->start + of_read_number(r + 4, 2) - 1; + res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + pci_addr = of_read_number(r, 2); + hose->mem_offset[1] = res->start - pci_addr; + + phb->ioda.m64_size = resource_size(res); + phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe; + phb->ioda.m64_base = pci_addr; + + /* Use last M64 BAR to cover M64 window */ + phb->ioda.m64_bar_idx = 15; + phb->init_m64 = pnv_ioda2_init_m64; + phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe; + phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; +} + +static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) +{ + struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no]; + struct pnv_ioda_pe *slave; + s64 rc; + + /* Fetch master PE */ + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Freeze master PE */ + rc = opal_pci_eeh_freeze_set(phb->opal_id, + pe_no, + OPAL_EEH_ACTION_SET_FREEZE_ALL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, pe_no); + return; + } + + /* Freeze slave PEs */ + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return; + + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_set(phb->opal_id, + slave->pe_number, + OPAL_EEH_ACTION_SET_FREEZE_ALL); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + slave->pe_number); + } +} + +int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) +{ + struct pnv_ioda_pe *pe, *slave; + s64 rc; + + /* Find master PE */ + pe = &phb->ioda.pe_array[pe_no]; + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Clear frozen state for master PE */ + rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", + __func__, rc, opt, phb->hose->global_number, pe_no); + return -EIO; + } + + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return 0; + + /* Clear frozen state for slave PEs */ + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + slave->pe_number, + opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", + __func__, rc, opt, phb->hose->global_number, + slave->pe_number); + return -EIO; + } + } + + return 0; +} + +static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) +{ + struct pnv_ioda_pe *slave, *pe; + u8 fstate, state; + __be16 pcierr; + s64 rc; + + /* Sanity check on PE number */ + if (pe_no < 0 || pe_no >= phb->ioda.total_pe) + return OPAL_EEH_STOPPED_PERM_UNAVAIL; + + /* + * Fetch the master PE and the PE instance might be + * not initialized yet. + */ + pe = &phb->ioda.pe_array[pe_no]; + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Check the master PE */ + rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, + &state, &pcierr, NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting " + "PHB#%x-PE#%x state\n", + __func__, rc, + phb->hose->global_number, pe_no); + return OPAL_EEH_STOPPED_TEMP_UNAVAIL; + } + + /* Check the slave PE */ + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return state; + + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + slave->pe_number, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting " + "PHB#%x-PE#%x state\n", + __func__, rc, + phb->hose->global_number, slave->pe_number); + return OPAL_EEH_STOPPED_TEMP_UNAVAIL; + } + + /* + * Override the result based on the ascending + * priority. + */ + if (fstate > state) + state = fstate; + } + + return state; +} + /* Currently those 2 are only used when MSIs are enabled, this will change * but in the meantime, we need to protect them to avoid warnings */ @@ -363,9 +744,16 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; struct pnv_ioda_pe *pe; - int pe_num; + int pe_num = IODA_INVALID_PE; + + /* Check if PE is determined by M64 */ + if (phb->pick_m64_pe) + pe_num = phb->pick_m64_pe(phb, bus, all); + + /* The PE number isn't pinned by M64 */ + if (pe_num == IODA_INVALID_PE) + pe_num = pnv_ioda_alloc_pe(phb); - pe_num = pnv_ioda_alloc_pe(phb); if (pe_num == IODA_INVALID_PE) { pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", __func__, pci_domain_nr(bus), bus->number); @@ -373,7 +761,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) } pe = &phb->ioda.pe_array[pe_num]; - pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); + pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); pe->pbus = bus; pe->pdev = NULL; pe->tce32_seg = -1; @@ -441,8 +829,15 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus) static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose, *tmp; + struct pnv_phb *phb; list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + phb = hose->private_data; + + /* M64 layout might affect PE allocation */ + if (phb->alloc_m64_pe) + phb->alloc_m64_pe(phb); + pnv_ioda_setup_PEs(hose->bus); } } @@ -491,17 +886,26 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, set_dma_ops(&pdev->dev, &dma_iommu_ops); set_iommu_table_base(&pdev->dev, &pe->tce32_table); } + *pdev->dev.dma_mask = dma_mask; return 0; } -static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, + struct pci_bus *bus, + bool add_to_iommu_group) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table); + if (add_to_iommu_group) + set_iommu_table_base_and_group(&dev->dev, + &pe->tce32_table); + else + set_iommu_table_base(&dev->dev, &pe->tce32_table); + if (dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate); + pnv_ioda_setup_bus_dma(pe, dev->subordinate, + add_to_iommu_group); } } @@ -513,15 +917,16 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, (__be64 __iomem *)pe->tce_inval_reg_phys : (__be64 __iomem *)tbl->it_index; unsigned long start, end, inc; + const unsigned shift = tbl->it_page_shift; start = __pa(startp); end = __pa(endp); /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ if (tbl->it_busno) { - start <<= 12; - end <<= 12; - inc = 128 << 12; + start <<= shift; + end <<= shift; + inc = 128ull << shift; start |= tbl->it_busno; end |= tbl->it_busno; } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { @@ -559,18 +964,19 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : (__be64 __iomem *)tbl->it_index; + const unsigned shift = tbl->it_page_shift; /* We'll invalidate DMA address in PE scope */ - start = 0x2ul << 60; + start = 0x2ull << 60; start |= (pe->pe_number & 0xFF); end = start; /* Figure out the start, end and step */ inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); - start |= (inc << 12); + start |= (inc << shift); inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); - end |= (inc << 12); - inc = (0x1ul << 12); + end |= (inc << shift); + inc = (0x1ull << shift); mb(); while (start <= end) { @@ -654,7 +1060,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, /* Setup linux iommu table */ tbl = &pe->tce32_table; pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, - base << 28); + base << 28, IOMMU_PAGE_SHIFT_4K); /* OPAL variant of P7IOC SW invalidated TCEs */ swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); @@ -677,7 +1083,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (pe->pdev) set_iommu_table_base_and_group(&pe->pdev->dev, tbl); else - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); return; fail: @@ -713,11 +1119,15 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 0); /* - * We might want to reset the DMA ops of all devices on - * this PE. However in theory, that shouldn't be necessary - * as this is used for VFIO/KVM pass-through and the device - * hasn't yet been returned to its kernel driver + * EEH needs the mapping between IOMMU table and group + * of those VFIO/KVM pass-through devices. We can postpone + * resetting DMA ops until the DMA mask is configured in + * host side. */ + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus, false); } if (rc) pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); @@ -784,7 +1194,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* Setup linux iommu table */ tbl = &pe->tce32_table; - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0); + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, + IOMMU_PAGE_SHIFT_4K); /* OPAL variant of PHB3 invalidated TCEs */ swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); @@ -805,7 +1216,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (pe->pdev) set_iommu_table_base_and_group(&pe->pdev->dev, tbl); else - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); /* Also create a bypass window */ pnv_pci_ioda2_setup_bypass_pe(phb, pe); @@ -1055,9 +1466,6 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, index++; } } else if (res->flags & IORESOURCE_MEM) { - /* WARNING: Assumes M32 is mem region 0 in PHB. We need to - * harden that algorithm when we start supporting M64 - */ region.start = res->start - hose->mem_offset[0] - phb->ioda.m32_pci_base; @@ -1141,9 +1549,8 @@ static void pnv_pci_ioda_fixup(void) pnv_pci_ioda_create_dbgfs(); #ifdef CONFIG_EEH - eeh_probe_mode_set(EEH_PROBE_MODE_DEV); - eeh_addr_cache_build(); eeh_init(); + eeh_addr_cache_build(); #endif } @@ -1178,7 +1585,10 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, bridge = bridge->bus->self; } - /* We need support prefetchable memory window later */ + /* We fail back to M32 if M64 isn't supported */ + if (phb->ioda.m64_segsize && + pnv_pci_is_mem_pref_64(type)) + return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize; @@ -1299,6 +1709,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); if (prop32) phb->ioda.reserved_pe = be32_to_cpup(prop32); + + /* Parse 64-bit MMIO range */ + pnv_ioda_parse_m64_window(phb); + phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); /* FW Has already off top 64k of M32 space (MSI space) */ phb->ioda.m32_size += 0x10000; @@ -1334,14 +1748,6 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Calculate how many 32-bit TCE segments we have */ phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; - /* Clear unusable m64 */ - hose->mem_resources[1].flags = 0; - hose->mem_resources[1].start = 0; - hose->mem_resources[1].end = 0; - hose->mem_resources[2].flags = 0; - hose->mem_resources[2].start = 0; - hose->mem_resources[2].end = 0; - #if 0 /* We should really do that ... */ rc = opal_pci_set_phb_mem_window(opal->phb_id, window_type, @@ -1351,14 +1757,21 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, segment_size); #endif - pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]" - " IO: 0x%x [segment=0x%x]\n", - phb->ioda.total_pe, - phb->ioda.reserved_pe, - phb->ioda.m32_size, phb->ioda.m32_segsize, - phb->ioda.io_size, phb->ioda.io_segsize); + pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n", + phb->ioda.total_pe, phb->ioda.reserved_pe, + phb->ioda.m32_size, phb->ioda.m32_segsize); + if (phb->ioda.m64_size) + pr_info(" M64: 0x%lx [segment=0x%lx]\n", + phb->ioda.m64_size, phb->ioda.m64_segsize); + if (phb->ioda.io_size) + pr_info(" IO: 0x%x [segment=0x%x]\n", + phb->ioda.io_size, phb->ioda.io_segsize); + phb->hose->ops = &pnv_pci_ops; + phb->get_pe_state = pnv_ioda_get_pe_state; + phb->freeze_pe = pnv_ioda_freeze_pe; + phb->unfreeze_pe = pnv_ioda_unfreeze_pe; #ifdef CONFIG_EEH phb->eeh_ops = &ioda_eeh_ops; #endif @@ -1404,6 +1817,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET); } + + /* Configure M64 window */ + if (phb->init_m64 && phb->init_m64(phb)) + hose->mem_resources[1].flags = 0; } void __init pnv_pci_init_ioda2_phb(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index e3807d69393e..94ce3481490b 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -172,7 +172,8 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup; pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table, - tce_mem, tce_size, 0); + tce_mem, tce_size, 0, + IOMMU_PAGE_SHIFT_4K); } void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index f91a4e5d872e..b854b57ed5e1 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -132,61 +132,78 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, data = (struct OpalIoP7IOCPhbErrorData *)common; pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n", - hose->global_number, common->version); + hose->global_number, be32_to_cpu(common->version)); if (data->brdgCtl) pr_info("brdgCtl: %08x\n", - data->brdgCtl); + be32_to_cpu(data->brdgCtl)); if (data->portStatusReg || data->rootCmplxStatus || data->busAgentStatus) pr_info("UtlSts: %08x %08x %08x\n", - data->portStatusReg, data->rootCmplxStatus, - data->busAgentStatus); + be32_to_cpu(data->portStatusReg), + be32_to_cpu(data->rootCmplxStatus), + be32_to_cpu(data->busAgentStatus)); if (data->deviceStatus || data->slotStatus || data->linkStatus || data->devCmdStatus || data->devSecStatus) pr_info("RootSts: %08x %08x %08x %08x %08x\n", - data->deviceStatus, data->slotStatus, - data->linkStatus, data->devCmdStatus, - data->devSecStatus); + be32_to_cpu(data->deviceStatus), + be32_to_cpu(data->slotStatus), + be32_to_cpu(data->linkStatus), + be32_to_cpu(data->devCmdStatus), + be32_to_cpu(data->devSecStatus)); if (data->rootErrorStatus || data->uncorrErrorStatus || data->corrErrorStatus) pr_info("RootErrSts: %08x %08x %08x\n", - data->rootErrorStatus, data->uncorrErrorStatus, - data->corrErrorStatus); + be32_to_cpu(data->rootErrorStatus), + be32_to_cpu(data->uncorrErrorStatus), + be32_to_cpu(data->corrErrorStatus)); if (data->tlpHdr1 || data->tlpHdr2 || data->tlpHdr3 || data->tlpHdr4) pr_info("RootErrLog: %08x %08x %08x %08x\n", - data->tlpHdr1, data->tlpHdr2, - data->tlpHdr3, data->tlpHdr4); + be32_to_cpu(data->tlpHdr1), + be32_to_cpu(data->tlpHdr2), + be32_to_cpu(data->tlpHdr3), + be32_to_cpu(data->tlpHdr4)); if (data->sourceId || data->errorClass || data->correlator) pr_info("RootErrLog1: %08x %016llx %016llx\n", - data->sourceId, data->errorClass, - data->correlator); + be32_to_cpu(data->sourceId), + be64_to_cpu(data->errorClass), + be64_to_cpu(data->correlator)); if (data->p7iocPlssr || data->p7iocCsr) pr_info("PhbSts: %016llx %016llx\n", - data->p7iocPlssr, data->p7iocCsr); + be64_to_cpu(data->p7iocPlssr), + be64_to_cpu(data->p7iocCsr)); if (data->lemFir) pr_info("Lem: %016llx %016llx %016llx\n", - data->lemFir, data->lemErrorMask, - data->lemWOF); + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrorMask), + be64_to_cpu(data->lemWOF)); if (data->phbErrorStatus) pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", - data->phbErrorStatus, data->phbFirstErrorStatus, - data->phbErrorLog0, data->phbErrorLog1); + be64_to_cpu(data->phbErrorStatus), + be64_to_cpu(data->phbFirstErrorStatus), + be64_to_cpu(data->phbErrorLog0), + be64_to_cpu(data->phbErrorLog1)); if (data->mmioErrorStatus) pr_info("OutErr: %016llx %016llx %016llx %016llx\n", - data->mmioErrorStatus, data->mmioFirstErrorStatus, - data->mmioErrorLog0, data->mmioErrorLog1); + be64_to_cpu(data->mmioErrorStatus), + be64_to_cpu(data->mmioFirstErrorStatus), + be64_to_cpu(data->mmioErrorLog0), + be64_to_cpu(data->mmioErrorLog1)); if (data->dma0ErrorStatus) pr_info("InAErr: %016llx %016llx %016llx %016llx\n", - data->dma0ErrorStatus, data->dma0FirstErrorStatus, - data->dma0ErrorLog0, data->dma0ErrorLog1); + be64_to_cpu(data->dma0ErrorStatus), + be64_to_cpu(data->dma0FirstErrorStatus), + be64_to_cpu(data->dma0ErrorLog0), + be64_to_cpu(data->dma0ErrorLog1)); if (data->dma1ErrorStatus) pr_info("InBErr: %016llx %016llx %016llx %016llx\n", - data->dma1ErrorStatus, data->dma1FirstErrorStatus, - data->dma1ErrorLog0, data->dma1ErrorLog1); + be64_to_cpu(data->dma1ErrorStatus), + be64_to_cpu(data->dma1FirstErrorStatus), + be64_to_cpu(data->dma1ErrorLog0), + be64_to_cpu(data->dma1ErrorLog1)); for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { if ((data->pestA[i] >> 63) == 0 && @@ -194,7 +211,8 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, continue; pr_info("PE[%3d] A/B: %016llx %016llx\n", - i, data->pestA[i], data->pestB[i]); + i, be64_to_cpu(data->pestA[i]), + be64_to_cpu(data->pestB[i])); } } @@ -319,43 +337,52 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) { unsigned long flags, rc; - int has_diag; + int has_diag, ret = 0; spin_lock_irqsave(&phb->lock, flags); + /* Fetch PHB diag-data */ rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); has_diag = (rc == OPAL_SUCCESS); - rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + /* If PHB supports compound PE, to handle it */ + if (phb->unfreeze_pe) { + ret = phb->unfreeze_pe(phb, + pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - if (rc) { - pr_warning("PCI %d: Failed to clear EEH freeze state" - " for PE#%d, err %ld\n", - phb->hose->global_number, pe_no, rc); - - /* For now, let's only display the diag buffer when we fail to clear - * the EEH status. We'll do more sensible things later when we have - * proper EEH support. We need to make sure we don't pollute ourselves - * with the normal errors generated when probing empty slots - */ - if (has_diag) - pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob); - else - pr_warning("PCI %d: No diag data available\n", - phb->hose->global_number); + } else { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (rc) { + pr_warn("%s: Failure %ld clearing frozen " + "PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + pe_no); + ret = -EIO; + } } + /* + * For now, let's only display the diag buffer when we fail to clear + * the EEH status. We'll do more sensible things later when we have + * proper EEH support. We need to make sure we don't pollute ourselves + * with the normal errors generated when probing empty slots + */ + if (has_diag && ret) + pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob); + spin_unlock_irqrestore(&phb->lock, flags); } static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct device_node *dn) { - s64 rc; u8 fstate; __be16 pcierr; - u32 pe_no; + int pe_no; + s64 rc; /* * Get the PE#. During the PCI probe stage, we might not @@ -370,20 +397,42 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, pe_no = phb->ioda.reserved_pe; } - /* Read freeze status */ - rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr, - NULL); - if (rc) { - pr_warning("%s: Can't read EEH status (PE#%d) for " - "%s, err %lld\n", - __func__, pe_no, dn->full_name, rc); - return; + /* + * Fetch frozen state. If the PHB support compound PE, + * we need handle that case. + */ + if (phb->get_pe_state) { + fstate = phb->get_pe_state(phb, pe_no); + } else { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe_no, + &fstate, + &pcierr, + NULL); + if (rc) { + pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n", + __func__, rc, phb->hose->global_number, pe_no); + return; + } } + cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn), pe_no, fstate); - if (fstate != 0) + + /* Clear the frozen state if applicable */ + if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE || + fstate == OPAL_EEH_STOPPED_DMA_FREEZE || + fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) { + /* + * If PHB supports compound PE, freeze it for + * consistency. + */ + if (phb->freeze_pe) + phb->freeze_pe(phb, pe_no); + pnv_pci_handle_eeh_config(phb, pe_no); + } } int pnv_pci_cfg_read(struct device_node *dn, @@ -564,10 +613,11 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, proto_tce |= TCE_PCI_WRITE; tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; - rpn = __pa(uaddr) >> TCE_SHIFT; + rpn = __pa(uaddr) >> tbl->it_page_shift; while (npages--) - *(tcep++) = cpu_to_be64(proto_tce | (rpn++ << TCE_RPN_SHIFT)); + *(tcep++) = cpu_to_be64(proto_tce | + (rpn++ << tbl->it_page_shift)); /* Some implementations won't cache invalid TCEs and thus may not * need that flush. We'll probably turn it_type into a bit mask @@ -627,11 +677,11 @@ static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages) void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, - u64 dma_offset) + u64 dma_offset, unsigned page_shift) { tbl->it_blocksize = 16; tbl->it_base = (unsigned long)tce_mem; - tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->it_page_shift = page_shift; tbl->it_offset = dma_offset >> tbl->it_page_shift; tbl->it_index = 0; tbl->it_size = tce_size >> 3; @@ -656,7 +706,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose) if (WARN_ON(!tbl)) return NULL; pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), - be32_to_cpup(sizep), 0); + be32_to_cpup(sizep), 0, IOMMU_PAGE_SHIFT_4K); iommu_init_table(tbl, hose->node); iommu_register_group(tbl, pci_domain_nr(hose->bus), 0); @@ -842,5 +892,4 @@ static int __init tce_iommu_bus_notifier_init(void) bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); return 0; } - -subsys_initcall_sync(tce_iommu_bus_notifier_init); +machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 676232c34328..48494d4b6058 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -21,6 +21,8 @@ enum pnv_phb_model { #define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */ #define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */ #define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */ +#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */ +#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ /* Data associated with a PE, including IOMMU tracking etc.. */ struct pnv_phb; @@ -64,6 +66,10 @@ struct pnv_ioda_pe { */ int mve_number; + /* PEs in compound case */ + struct pnv_ioda_pe *master; + struct list_head slaves; + /* Link in list of PE#s */ struct list_head dma_link; struct list_head list; @@ -119,6 +125,12 @@ struct pnv_phb { void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); void (*shutdown)(struct pnv_phb *phb); + int (*init_m64)(struct pnv_phb *phb); + void (*alloc_m64_pe)(struct pnv_phb *phb); + int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); + int (*get_pe_state)(struct pnv_phb *phb, int pe_no); + void (*freeze_pe)(struct pnv_phb *phb, int pe_no); + int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); union { struct { @@ -129,9 +141,20 @@ struct pnv_phb { /* Global bridge info */ unsigned int total_pe; unsigned int reserved_pe; + + /* 32-bit MMIO window */ unsigned int m32_size; unsigned int m32_segsize; unsigned int m32_pci_base; + + /* 64-bit MMIO window */ + unsigned int m64_bar_idx; + unsigned long m64_size; + unsigned long m64_segsize; + unsigned long m64_base; + unsigned long m64_bar_alloc; + + /* IO ports */ unsigned int io_size; unsigned int io_segsize; unsigned int io_pci_base; @@ -198,7 +221,7 @@ int pnv_pci_cfg_write(struct device_node *dn, int where, int size, u32 val); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, - u64 dma_offset); + u64 dma_offset, unsigned page_shift); extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 1cb160dc1609..80db43944afe 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -123,4 +123,4 @@ static __init int rng_init(void) return 0; } -subsys_initcall(rng_init); +machine_subsys_initcall(powernv, rng_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index d9b88fa7c5a3..5a0e2dc6de5f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -264,6 +264,8 @@ static void __init pnv_setup_machdep_opal(void) ppc_md.halt = pnv_halt; ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; + ppc_md.hmi_exception_early = opal_hmi_exception_early; + ppc_md.handle_hmi_exception = opal_handle_hmi_exception; } #ifdef CONFIG_PPC_POWERNV_RTAS diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 7d61498e45c0..1062f71f5a85 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -29,6 +29,7 @@ #include <asm/lppaca.h> #include <asm/debug.h> #include <asm/plpar_wrappers.h> +#include <asm/machdep.h> struct dtl { struct dtl_entry *buf; @@ -391,4 +392,4 @@ err_remove_dir: err: return rc; } -arch_initcall(dtl_init); +machine_arch_initcall(pseries, dtl_init); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 0bec0c02c5e7..b08053819d99 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -89,26 +89,26 @@ static int pseries_eeh_init(void) * of domain/bus/slot/function for EEH RTAS operations. */ if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,set-eeh-option> invalid\n", + pr_warn("%s: RTAS service <ibm,set-eeh-option> invalid\n", __func__); return -EINVAL; } else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,set-slot-reset> invalid\n", + pr_warn("%s: RTAS service <ibm,set-slot-reset> invalid\n", __func__); return -EINVAL; } else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,read-slot-reset-state2> and " + pr_warn("%s: RTAS service <ibm,read-slot-reset-state2> and " "<ibm,read-slot-reset-state> invalid\n", __func__); return -EINVAL; } else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,slot-error-detail> invalid\n", + pr_warn("%s: RTAS service <ibm,slot-error-detail> invalid\n", __func__); return -EINVAL; } else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,configure-pe> and " + pr_warn("%s: RTAS service <ibm,configure-pe> and " "<ibm,configure-bridge> invalid\n", __func__); return -EINVAL; @@ -118,17 +118,17 @@ static int pseries_eeh_init(void) spin_lock_init(&slot_errbuf_lock); eeh_error_buf_size = rtas_token("rtas-error-log-max"); if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: unknown EEH error log size\n", + pr_warn("%s: unknown EEH error log size\n", __func__); eeh_error_buf_size = 1024; } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - pr_warning("%s: EEH error log size %d exceeds the maximal %d\n", + pr_warn("%s: EEH error log size %d exceeds the maximal %d\n", __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); eeh_error_buf_size = RTAS_ERROR_LOG_MAX; } /* Set EEH probe mode */ - eeh_probe_mode_set(EEH_PROBE_MODE_DEVTREE); + eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); return 0; } @@ -270,7 +270,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) /* Retrieve the device address */ regs = of_get_property(dn, "reg", NULL); if (!regs) { - pr_warning("%s: OF node property %s::reg not found\n", + pr_warn("%s: OF node property %s::reg not found\n", __func__, dn->full_name); return NULL; } @@ -297,7 +297,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) enable = 1; if (enable) { - eeh_set_enable(true); + eeh_add_flag(EEH_ENABLED); eeh_add_to_parent_pe(edev); pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n", @@ -398,7 +398,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) pe->config_addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n", + pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -411,7 +411,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) pe->config_addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n", + pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -584,17 +584,17 @@ static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait) return ret; if (max_wait <= 0) { - pr_warning("%s: Timeout when getting PE's state (%d)\n", + pr_warn("%s: Timeout when getting PE's state (%d)\n", __func__, max_wait); return EEH_STATE_NOT_SUPPORT; } if (mwait <= 0) { - pr_warning("%s: Firmware returned bad wait value %d\n", + pr_warn("%s: Firmware returned bad wait value %d\n", __func__, mwait); mwait = EEH_STATE_MIN_WAIT_TIME; } else if (mwait > EEH_STATE_MAX_WAIT_TIME) { - pr_warning("%s: Firmware returned too long wait value %d\n", + pr_warn("%s: Firmware returned too long wait value %d\n", __func__, mwait); mwait = EEH_STATE_MAX_WAIT_TIME; } @@ -675,7 +675,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) } if (ret) - pr_warning("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", + pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", __func__, pe->phb->global_number, pe->addr, ret); return ret; @@ -743,10 +743,7 @@ static struct eeh_ops pseries_eeh_ops = { */ static int __init eeh_pseries_init(void) { - int ret = -EINVAL; - - if (!machine_is(pseries)) - return ret; + int ret; ret = eeh_ops_register(&pseries_eeh_ops); if (!ret) @@ -757,5 +754,4 @@ static int __init eeh_pseries_init(void) return ret; } - -early_initcall(eeh_pseries_init); +machine_early_initcall(pseries, eeh_pseries_init); diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 99ecf0a5a929..3fda3f17b84e 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -12,9 +12,13 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> +#include <asm/jump_label.h> + + .section ".text" #ifdef CONFIG_TRACEPOINTS +#ifndef CONFIG_JUMP_LABEL .section ".toc","aw" .globl hcall_tracepoint_refcount @@ -22,21 +26,13 @@ hcall_tracepoint_refcount: .llong 0 .section ".text" +#endif /* * precall must preserve all registers. use unused STK_PARAM() - * areas to save snapshots and opcode. We branch around this - * in early init (eg when populating the MMU hashtable) by using an - * unconditional cpu feature. + * areas to save snapshots and opcode. */ #define HCALL_INST_PRECALL(FIRST_REG) \ -BEGIN_FTR_SECTION; \ - b 1f; \ -END_FTR_SECTION(0, 1); \ - ld r12,hcall_tracepoint_refcount@toc(r2); \ - std r12,32(r1); \ - cmpdi r12,0; \ - beq+ 1f; \ mflr r0; \ std r3,STK_PARAM(R3)(r1); \ std r4,STK_PARAM(R4)(r1); \ @@ -50,45 +46,29 @@ END_FTR_SECTION(0, 1); \ addi r4,r1,STK_PARAM(FIRST_REG); \ stdu r1,-STACK_FRAME_OVERHEAD(r1); \ bl __trace_hcall_entry; \ - addi r1,r1,STACK_FRAME_OVERHEAD; \ - ld r0,16(r1); \ - ld r3,STK_PARAM(R3)(r1); \ - ld r4,STK_PARAM(R4)(r1); \ - ld r5,STK_PARAM(R5)(r1); \ - ld r6,STK_PARAM(R6)(r1); \ - ld r7,STK_PARAM(R7)(r1); \ - ld r8,STK_PARAM(R8)(r1); \ - ld r9,STK_PARAM(R9)(r1); \ - ld r10,STK_PARAM(R10)(r1); \ - mtlr r0; \ -1: + ld r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ + ld r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1); \ + ld r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1); \ + ld r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1); \ + ld r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1); \ + ld r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1); \ + ld r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1); \ + ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1) /* * postcall is performed immediately before function return which - * allows liberal use of volatile registers. We branch around this - * in early init (eg when populating the MMU hashtable) by using an - * unconditional cpu feature. + * allows liberal use of volatile registers. */ #define __HCALL_INST_POSTCALL \ -BEGIN_FTR_SECTION; \ - b 1f; \ -END_FTR_SECTION(0, 1); \ - ld r12,32(r1); \ - cmpdi r12,0; \ - beq+ 1f; \ - mflr r0; \ - ld r6,STK_PARAM(R3)(r1); \ - std r3,STK_PARAM(R3)(r1); \ + ld r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ + std r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ mr r4,r3; \ - mr r3,r6; \ - std r0,16(r1); \ - stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + mr r3,r0; \ bl __trace_hcall_exit; \ + ld r0,STACK_FRAME_OVERHEAD+16(r1); \ addi r1,r1,STACK_FRAME_OVERHEAD; \ - ld r0,16(r1); \ ld r3,STK_PARAM(R3)(r1); \ - mtlr r0; \ -1: + mtlr r0 #define HCALL_INST_POSTCALL_NORETS \ li r5,0; \ @@ -98,37 +78,62 @@ END_FTR_SECTION(0, 1); \ mr r5,BUFREG; \ __HCALL_INST_POSTCALL +#ifdef CONFIG_JUMP_LABEL +#define HCALL_BRANCH(LABEL) \ + ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key) +#else + +/* + * We branch around this in early init (eg when populating the MMU + * hashtable) by using an unconditional cpu feature. + */ +#define HCALL_BRANCH(LABEL) \ +BEGIN_FTR_SECTION; \ + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + std r12,32(r1); \ + cmpdi r12,0; \ + bne- LABEL; \ +1: +#endif + #else #define HCALL_INST_PRECALL(FIRST_ARG) #define HCALL_INST_POSTCALL_NORETS #define HCALL_INST_POSTCALL(BUFREG) +#define HCALL_BRANCH(LABEL) #endif - .text - _GLOBAL_TOC(plpar_hcall_norets) HMT_MEDIUM mfcr r0 stw r0,8(r1) - - HCALL_INST_PRECALL(R4) - + HCALL_BRANCH(plpar_hcall_norets_trace) HVSC /* invoke the hypervisor */ - HCALL_INST_POSTCALL_NORETS - lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ +#ifdef CONFIG_TRACEPOINTS +plpar_hcall_norets_trace: + HCALL_INST_PRECALL(R4) + HVSC + HCALL_INST_POSTCALL_NORETS + lwz r0,8(r1) + mtcrf 0xff,r0 + blr +#endif + _GLOBAL_TOC(plpar_hcall) HMT_MEDIUM mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL(R5) + HCALL_BRANCH(plpar_hcall_trace) std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ @@ -147,12 +152,40 @@ _GLOBAL_TOC(plpar_hcall) std r6, 16(r12) std r7, 24(r12) + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ + +#ifdef CONFIG_TRACEPOINTS +plpar_hcall_trace: + HCALL_INST_PRECALL(R5) + + std r4,STK_PARAM(R4)(r1) + mr r0,r4 + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + + HVSC + + ld r12,STK_PARAM(R4)(r1) + std r4,0(r12) + std r5,8(r12) + std r6,16(r12) + std r7,24(r12) + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 - blr /* return r3 = status */ + blr +#endif /* * plpar_hcall_raw can be called in real mode. kexec/kdump need some @@ -194,7 +227,7 @@ _GLOBAL_TOC(plpar_hcall9) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL(R5) + HCALL_BRANCH(plpar_hcall9_trace) std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ @@ -222,12 +255,49 @@ _GLOBAL_TOC(plpar_hcall9) std r11,56(r12) std r0, 64(r12) + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ + +#ifdef CONFIG_TRACEPOINTS +plpar_hcall9_trace: + HCALL_INST_PRECALL(R5) + + std r4,STK_PARAM(R4)(r1) + mr r0,r4 + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1) + ld r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1) + ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1) + + HVSC + + mr r0,r12 + ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1) + std r4,0(r12) + std r5,8(r12) + std r6,16(r12) + std r7,24(r12) + std r8,32(r12) + std r9,40(r12) + std r10,48(r12) + std r11,56(r12) + std r0,64(r12) + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 - blr /* return r3 = status */ + blr +#endif /* See plpar_hcall_raw to see why this is needed */ _GLOBAL(plpar_hcall9_raw) diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index cf4e7736e4f1..4575f0c9e521 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -27,6 +27,7 @@ #include <asm/firmware.h> #include <asm/cputable.h> #include <asm/trace.h> +#include <asm/machdep.h> DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); @@ -162,4 +163,4 @@ static int __init hcall_inst_init(void) return 0; } -__initcall(hcall_inst_init); +machine_device_initcall(pseries, hcall_inst_init); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b02af9ef3ff6..fbfcef514aa7 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -26,6 +26,7 @@ #include <linux/dma-mapping.h> #include <linux/console.h> #include <linux/export.h> +#include <linux/static_key.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/page.h> @@ -649,6 +650,19 @@ EXPORT_SYMBOL(arch_free_page); #endif #ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_JUMP_LABEL +struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; + +void hcall_tracepoint_regfunc(void) +{ + static_key_slow_inc(&hcall_tracepoint_key); +} + +void hcall_tracepoint_unregfunc(void) +{ + static_key_slow_dec(&hcall_tracepoint_key); +} +#else /* * We optimise our hcall path by placing hcall_tracepoint_refcount * directly in the TOC so we can check if the hcall tracepoints are @@ -658,13 +672,6 @@ EXPORT_SYMBOL(arch_free_page); /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ extern long hcall_tracepoint_refcount; -/* - * Since the tracing code might execute hcalls we need to guard against - * recursion. One example of this are spinlocks calling H_YIELD on - * shared processor partitions. - */ -static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); - void hcall_tracepoint_regfunc(void) { hcall_tracepoint_refcount++; @@ -674,6 +681,15 @@ void hcall_tracepoint_unregfunc(void) { hcall_tracepoint_refcount--; } +#endif + +/* + * Since the tracing code might execute hcalls we need to guard against + * recursion. One example of this are spinlocks calling H_YIELD on + * shared processor partitions. + */ +static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); + void __trace_hcall_entry(unsigned long opcode, unsigned long *args) { diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index bde7ebad3949..d146fef038b8 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -18,6 +18,7 @@ #include <linux/delay.h> #include <linux/slab.h> +#include <asm/machdep.h> #include <asm/rtas.h> #include "pseries.h" @@ -362,4 +363,4 @@ static int __init mobility_sysfs_init(void) return rc; } -device_initcall(mobility_sysfs_init); +machine_device_initcall(pseries, mobility_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 0c882e83c4ce..18ff4626d74e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -16,6 +16,7 @@ #include <asm/rtas.h> #include <asm/hw_irq.h> #include <asm/ppc-pci.h> +#include <asm/machdep.h> static int query_token, change_token; @@ -532,5 +533,4 @@ static int rtas_msi_init(void) return 0; } -arch_initcall(rtas_msi_init); - +machine_arch_initcall(pseries, rtas_msi_init); diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 203cbf0dc101..89e23811199c 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -118,10 +118,10 @@ int remove_phb_dynamic(struct pci_controller *phb) } } - /* Unregister the bridge device from sysfs and remove the PCI bus */ - device_unregister(b->bridge); + /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); + device_unregister(b->bridge); /* Now release the IO resource */ if (res->flags & IORESOURCE_IO) diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c index 6d6266236446..c26eadde434c 100644 --- a/arch/powerpc/platforms/pseries/power.c +++ b/arch/powerpc/platforms/pseries/power.c @@ -25,6 +25,7 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/init.h> +#include <asm/machdep.h> unsigned long rtas_poweron_auto; /* default and normal state is 0 */ @@ -71,11 +72,11 @@ static int __init pm_init(void) return -ENOMEM; return sysfs_create_group(power_kobj, &attr_group); } -core_initcall(pm_init); +machine_core_initcall(pseries, pm_init); #else static int __init apo_pm_init(void) { return (sysfs_create_file(power_kobj, &auto_poweron_attr.attr)); } -__initcall(apo_pm_init); +machine_device_initcall(pseries, apo_pm_init); #endif diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9c5778e6ed4b..dff05b9eb946 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -71,7 +71,7 @@ static int __init init_ras_IRQ(void) return 0; } -subsys_initcall(init_ras_IRQ); +machine_subsys_initcall(pseries, init_ras_IRQ); #define EPOW_SHUTDOWN_NORMAL 1 #define EPOW_SHUTDOWN_ON_UPS 2 diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 1c0a60d98867..0f319521e002 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -446,13 +446,10 @@ static int proc_ppc64_create_ofdt(void) { struct proc_dir_entry *ent; - if (!machine_is(pseries)) - return 0; - ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops); if (ent) proc_set_size(ent, 0); return 0; } -__initcall(proc_ppc64_create_ofdt); +machine_device_initcall(pseries, proc_ppc64_create_ofdt); diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index 72a102758d4e..e09608770909 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -42,4 +42,4 @@ static __init int rng_init(void) return 0; } -subsys_initcall(rng_init); +machine_subsys_initcall(pseries, rng_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index f2f40e64658f..cfe8a6389a51 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -351,7 +351,7 @@ static int alloc_dispatch_log_kmem_cache(void) return alloc_dispatch_logs(); } -early_initcall(alloc_dispatch_log_kmem_cache); +machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache); static void pseries_lpar_idle(void) { diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index b87b97849d4c..e76aefae2aa2 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -265,7 +265,7 @@ static int __init pseries_suspend_init(void) { int rc; - if (!machine_is(pseries) || !firmware_has_feature(FW_FEATURE_LPAR)) + if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; suspend_data.token = rtas_token("ibm,suspend-me"); @@ -280,5 +280,4 @@ static int __init pseries_suspend_init(void) suspend_set_ops(&pseries_suspend_ops); return 0; } - -__initcall(pseries_suspend_init); +machine_device_initcall(pseries, pseries_suspend_init); |