From e9ef08bdc189e98610bc4b9a6e6f19bc3793b2c8 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 8 Dec 2006 16:06:01 -0800 Subject: [IA64] Itanium MC Error Injection Tool: Kernel configuration This patch has kenrel configuration changes for the MC Error Injection Tool. Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 10 ++++++++++ arch/ia64/defconfig | 1 + 2 files changed, 11 insertions(+) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index fcacfe291b9b..f1e7cc1389b0 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -429,6 +429,16 @@ config IA64_PALINFO To use this option, you have to ensure that the "/proc file system support" (CONFIG_PROC_FS) is enabled, too. +config IA64_MC_ERR_INJECT + tristate "MC error injection support" + help + Selets whether support for MC error injection. By enabling the + support, kernel provide sysfs interface for user application to + call MC error injection PAL procedure to inject various errors. + This is a useful tool for MCA testing. + + If you're unsure, do not select this option. + config SGI_SN def_bool y if (IA64_SGI_SN2 || IA64_GENERIC) diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index 9001b3fbaa32..327e1f511f8c 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -144,6 +144,7 @@ CONFIG_COMPAT=y CONFIG_IA64_MCA_RECOVERY=y CONFIG_PERFMON=y CONFIG_IA64_PALINFO=y +# CONFIG_MC_ERR_INJECT is not set CONFIG_SGI_SN=y # -- cgit v1.2.3 From bf6285278418f1dc6f07296bbb286da0bfe26d5d Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 8 Dec 2006 16:14:22 -0800 Subject: [IA64] Itanium MC Error Injection Tool: Doc and sample application This patch contains a documention and sample application. Since the sample application has ~1000 lines of code, it might not be suitable in a kernel documention in kenrel tree. If you think this is not good place to hold the sample application, please let me know and I'm open to other choices e.g. sourceforge etc. Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck --- Documentation/ia64/err_inject.txt | 1068 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1068 insertions(+) create mode 100644 Documentation/ia64/err_inject.txt diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt new file mode 100644 index 000000000000..26487c172cfc --- /dev/null +++ b/Documentation/ia64/err_inject.txt @@ -0,0 +1,1068 @@ + +IPF Machine Check (MC) error inject tool +======================================== + +IPF Machine Check (MC) error inject tool is used to inject MC +errors from Linux. The tool is a test bed for IPF MC work flow including +hardware correctable error handling, OS recoverable error handling, MC +event logging, etc. + +The tool includes two parts: a kernel driver and a user application +sample. The driver provides interface to PAL to inject error +and query error injection capabilities. The driver code is in +arch/ia64/kernel/err_inject.c. The application sample (shown below) +provides a combination of various errors and calls the driver's interface +(sysfs interface) to inject errors or query error injection capabilities. + +The tool can be used to test Intel IPF machine MC handling capabilities. +It's especially useful for people who can not access hardware MC injection +tool to inject error. It's also very useful to integrate with other +software test suits to do stressful testing on IPF. + +Below is a sample application as part of the whole tool. The sample +can be used as a working test tool. Or it can be expanded to include +more features. It also can be a integrated into a libary or other user +application to have more thorough test. + +The sample application takes err.conf as error configuation input. Gcc +compiles the code. After you install err_inject driver, you can run +this sample application to inject errors. + +Errata: Itanium 2 Processors Specification Update lists some errata against +the pal_mc_error_inject PAL procedure. The following err.conf has been tested +on latest Montecito PAL. + +err.conf: + +#This is configuration file for err_inject_tool. +#The format of the each line is: +#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer +#where +# cpu: logical cpu number the error will be inject in. +# loop: times the error will be injected. +# interval: In second. every so often one error is injected. +# err_type_info, err_struct_info: PAL parameters. +# +#Note: All values are hex w/o or w/ 0x prefix. + + +#On cpu2, inject only total 0x10 errors, interval 5 seconds +#corrected, data cache, hier-2, physical addr(assigned by tool code). +#working on Montecito latest PAL. +2, 10, 5, 4101, 95 + +#On cpu4, inject and consume total 0x10 errors, interval 5 seconds +#corrected, data cache, hier-2, physical addr(assigned by tool code). +#working on Montecito latest PAL. +4, 10, 5, 4109, 95 + +#On cpu15, inject and consume total 0x10 errors, interval 5 seconds +#recoverable, DTR0, hier-2. +#working on Montecito latest PAL. +0xf, 0x10, 5, 4249, 15 + +The sample application source code: + +err_injection_tool.c: + +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Copyright (C) 2006 Intel Co + * Fenghua Yu + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_FN_SIZE 256 +#define MAX_BUF_SIZE 256 +#define DATA_BUF_SIZE 256 +#define NR_CPUS 512 +#define MAX_TASK_NUM 2048 +#define MIN_INTERVAL 5 // seconds +#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte. +#define PARA_FIELD_NUM 5 +#define MASK_SIZE (NR_CPUS/64) +#define PATH_FORMAT "/sys/devices/system/node/node0/cpu%d/err_inject/" + +int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask); + +int verbose; +#define vbprintf if (verbose) printf + +int log_info(int cpu, const char *fmt, ...) +{ + FILE *log; + char fn[MAX_FN_SIZE]; + char buf[MAX_BUF_SIZE]; + va_list args; + + sprintf(fn, "%d.log", cpu); + log=fopen(fn, "a+"); + if (log==NULL) { + perror("Error open:"); + return -1; + } + + va_start(args, fmt); + vprintf(fmt, args); + memset(buf, 0, MAX_BUF_SIZE); + vsprintf(buf, fmt, args); + va_end(args); + + fwrite(buf, sizeof(buf), 1, log); + fclose(log); + + return 0; +} + +typedef unsigned long u64; +typedef unsigned int u32; + +typedef union err_type_info_u { + struct { + u64 mode : 3, /* 0-2 */ + err_inj : 3, /* 3-5 */ + err_sev : 2, /* 6-7 */ + err_struct : 5, /* 8-12 */ + struct_hier : 3, /* 13-15 */ + reserved : 48; /* 16-63 */ + } err_type_info_u; + u64 err_type_info; +} err_type_info_t; + +typedef union err_struct_info_u { + struct { + u64 siv : 1, /* 0 */ + c_t : 2, /* 1-2 */ + cl_p : 3, /* 3-5 */ + cl_id : 3, /* 6-8 */ + cl_dp : 1, /* 9 */ + reserved1 : 22, /* 10-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_cache; + struct { + u64 siv : 1, /* 0 */ + tt : 2, /* 1-2 */ + tc_tr : 2, /* 3-4 */ + tr_slot : 8, /* 5-12 */ + reserved1 : 19, /* 13-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_tlb; + struct { + u64 siv : 1, /* 0 */ + regfile_id : 4, /* 1-4 */ + reg_num : 7, /* 5-11 */ + reserved1 : 20, /* 12-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_register; + struct { + u64 reserved; + } err_struct_info_bus_processor_interconnect; + u64 err_struct_info; +} err_struct_info_t; + +typedef union err_data_buffer_u { + struct { + u64 trigger_addr; /* 0-63 */ + u64 inj_addr; /* 64-127 */ + u64 way : 5, /* 128-132 */ + index : 20, /* 133-152 */ + : 39; /* 153-191 */ + } err_data_buffer_cache; + struct { + u64 trigger_addr; /* 0-63 */ + u64 inj_addr; /* 64-127 */ + u64 way : 5, /* 128-132 */ + index : 20, /* 133-152 */ + reserved : 39; /* 153-191 */ + } err_data_buffer_tlb; + struct { + u64 trigger_addr; /* 0-63 */ + } err_data_buffer_register; + struct { + u64 reserved; /* 0-63 */ + } err_data_buffer_bus_processor_interconnect; + u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; +} err_data_buffer_t; + +typedef union capabilities_u { + struct { + u64 i : 1, + d : 1, + rv : 1, + tag : 1, + data : 1, + mesi : 1, + dp : 1, + reserved1 : 3, + pa : 1, + va : 1, + wi : 1, + reserved2 : 20, + trigger : 1, + trigger_pl : 1, + reserved3 : 30; + } capabilities_cache; + struct { + u64 d : 1, + i : 1, + rv : 1, + tc : 1, + tr : 1, + reserved1 : 27, + trigger : 1, + trigger_pl : 1, + reserved2 : 30; + } capabilities_tlb; + struct { + u64 gr_b0 : 1, + gr_b1 : 1, + fr : 1, + br : 1, + pr : 1, + ar : 1, + cr : 1, + rr : 1, + pkr : 1, + dbr : 1, + ibr : 1, + pmc : 1, + pmd : 1, + reserved1 : 3, + regnum : 1, + reserved2 : 15, + trigger : 1, + trigger_pl : 1, + reserved3 : 30; + } capabilities_register; + struct { + u64 reserved; + } capabilities_bus_processor_interconnect; +} capabilities_t; + +typedef struct resources_s { + u64 ibr0 : 1, + ibr2 : 1, + ibr4 : 1, + ibr6 : 1, + dbr0 : 1, + dbr2 : 1, + dbr4 : 1, + dbr6 : 1, + reserved : 48; +} resources_t; + + +long get_page_size(void) +{ + long page_size=sysconf(_SC_PAGESIZE); + return page_size; +} + +#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size()) +#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS) +#define SHM_VA 0x2000000100000000 + +int shmid; +void *shmaddr; + +int create_shm(void) +{ + key_t key; + char fn[MAX_FN_SIZE]; + + /* cpu0 is always existing */ + sprintf(fn, PATH_FORMAT, 0); + if ((key = ftok(fn, 's')) == -1) { + perror("ftok"); + return -1; + } + + shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT); + if (shmid == -1) { + if (errno==EEXIST) { + shmid = shmget(key, SHM_SIZE, 0); + if (shmid == -1) { + perror("shmget"); + return -1; + } + } + else { + perror("shmget"); + return -1; + } + } + vbprintf("shmid=%d", shmid); + + /* connect to the segment: */ + shmaddr = shmat(shmid, (void *)SHM_VA, 0); + if (shmaddr == (void*)-1) { + perror("shmat"); + return -1; + } + + memset(shmaddr, 0, SHM_SIZE); + mlock(shmaddr, SHM_SIZE); + + return 0; +} + +int free_shm() +{ + munlock(shmaddr, SHM_SIZE); + shmdt(shmaddr); + semctl(shmid, 0, IPC_RMID); + + return 0; +} + +#ifdef _SEM_SEMUN_UNDEFINED +union semun +{ + int val; + struct semid_ds *buf; + unsigned short int *array; + struct seminfo *__buf; +}; +#endif + +u32 mode=1; /* 1: physical mode; 2: virtual mode. */ +int one_lock=1; +key_t key[NR_CPUS]; +int semid[NR_CPUS]; + +int create_sem(int cpu) +{ + union semun arg; + char fn[MAX_FN_SIZE]; + int sid; + + sprintf(fn, PATH_FORMAT, cpu); + sprintf(fn, "%s/%s", fn, "err_type_info"); + if ((key[cpu] = ftok(fn, 'e')) == -1) { + perror("ftok"); + return -1; + } + + if (semid[cpu]!=0) + return 0; + + /* clear old semaphore */ + if ((sid = semget(key[cpu], 1, 0)) != -1) + semctl(sid, 0, IPC_RMID); + + /* get one semaphore */ + if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) { + perror("semget"); + printf("Please remove semaphore with key=0x%lx, then run the tool.\n", + (u64)key[cpu]); + return -1; + } + + vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu, + (u64)key[cpu]); + /* initialize the semaphore to 1: */ + arg.val = 1; + if (semctl(semid[cpu], 0, SETVAL, arg) == -1) { + perror("semctl"); + return -1; + } + + return 0; +} + +static int lock(int cpu) +{ + struct sembuf lock; + + lock.sem_num = cpu; + lock.sem_op = 1; + semop(semid[cpu], &lock, 1); + + return 0; +} + +static int unlock(int cpu) +{ + struct sembuf unlock; + + unlock.sem_num = cpu; + unlock.sem_op = -1; + semop(semid[cpu], &unlock, 1); + + return 0; +} + +void free_sem(int cpu) +{ + semctl(semid[cpu], 0, IPC_RMID); +} + +int wr_multi(char *fn, unsigned long *data, int size) +{ + int fd; + char buf[MAX_BUF_SIZE]; + int ret; + + if (size==1) + sprintf(buf, "%lx", *data); + else if (size==3) + sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]); + else { + fprintf(stderr,"write to file with wrong size!\n"); + return -1; + } + + fd=open(fn, O_RDWR); + if (!fd) { + perror("Error:"); + return -1; + } + ret=write(fd, buf, sizeof(buf)); + close(fd); + return ret; +} + +int wr(char *fn, unsigned long data) +{ + return wr_multi(fn, &data, 1); +} + +int rd(char *fn, unsigned long *data) +{ + int fd; + char buf[MAX_BUF_SIZE]; + + fd=open(fn, O_RDONLY); + if (fd<0) { + perror("Error:"); + return -1; + } + read(fd, buf, MAX_BUF_SIZE); + *data=strtoul(buf, NULL, 16); + close(fd); + return 0; +} + +int rd_status(char *path, int *status) +{ + char fn[MAX_FN_SIZE]; + sprintf(fn, "%s/status", path); + if (rd(fn, (u64*)status)<0) { + perror("status reading error.\n"); + return -1; + } + + return 0; +} + +int rd_capabilities(char *path, u64 *capabilities) +{ + char fn[MAX_FN_SIZE]; + sprintf(fn, "%s/capabilities", path); + if (rd(fn, capabilities)<0) { + perror("capabilities reading error.\n"); + return -1; + } + + return 0; +} + +int rd_all(char *path) +{ + unsigned long err_type_info, err_struct_info, err_data_buffer; + int status; + unsigned long capabilities, resources; + char fn[MAX_FN_SIZE]; + + sprintf(fn, "%s/err_type_info", path); + if (rd(fn, &err_type_info)<0) { + perror("err_type_info reading error.\n"); + return -1; + } + printf("err_type_info=%lx\n", err_type_info); + + sprintf(fn, "%s/err_struct_info", path); + if (rd(fn, &err_struct_info)<0) { + perror("err_struct_info reading error.\n"); + return -1; + } + printf("err_struct_info=%lx\n", err_struct_info); + + sprintf(fn, "%s/err_data_buffer", path); + if (rd(fn, &err_data_buffer)<0) { + perror("err_data_buffer reading error.\n"); + return -1; + } + printf("err_data_buffer=%lx\n", err_data_buffer); + + sprintf(fn, "%s/status", path); + if (rd("status", (u64*)&status)<0) { + perror("status reading error.\n"); + return -1; + } + printf("status=%d\n", status); + + sprintf(fn, "%s/capabilities", path); + if (rd(fn,&capabilities)<0) { + perror("capabilities reading error.\n"); + return -1; + } + printf("capabilities=%lx\n", capabilities); + + sprintf(fn, "%s/resources", path); + if (rd(fn, &resources)<0) { + perror("resources reading error.\n"); + return -1; + } + printf("resources=%lx\n", resources); + + return 0; +} + +int query_capabilities(char *path, err_type_info_t err_type_info, + u64 *capabilities) +{ + char fn[MAX_FN_SIZE]; + err_struct_info_t err_struct_info; + err_data_buffer_t err_data_buffer; + + err_struct_info.err_struct_info=0; + memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8); + + sprintf(fn, "%s/err_type_info", path); + wr(fn, err_type_info.err_type_info); + sprintf(fn, "%s/err_struct_info", path); + wr(fn, 0x0); + sprintf(fn, "%s/err_data_buffer", path); + wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); + + // Fire pal_mc_error_inject procedure. + sprintf(fn, "%s/call_start", path); + wr(fn, mode); + + if (rd_capabilities(path, capabilities)<0) + return -1; + + return 0; +} + +int query_all_capabilities() +{ + int status; + err_type_info_t err_type_info; + int err_sev, err_struct, struct_hier; + int cap=0; + u64 capabilities; + char path[MAX_FN_SIZE]; + + err_type_info.err_type_info=0; // Initial + err_type_info.err_type_info_u.mode=0; // Query mode; + err_type_info.err_type_info_u.err_inj=0; + + printf("All capabilities implemented in pal_mc_error_inject:\n"); + sprintf(path, PATH_FORMAT ,0); + for (err_sev=0;err_sev<3;err_sev++) + for (err_struct=0;err_struct<5;err_struct++) + for (struct_hier=0;struct_hier<5;struct_hier++) + { + status=-1; + capabilities=0; + err_type_info.err_type_info_u.err_sev=err_sev; + err_type_info.err_type_info_u.err_struct=err_struct; + err_type_info.err_type_info_u.struct_hier=struct_hier; + + if (query_capabilities(path, err_type_info, &capabilities)<0) + continue; + + if (rd_status(path, &status)<0) + continue; + + if (status==0) { + cap=1; + printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ", + err_sev, err_struct, struct_hier); + printf("capabilities 0x%lx\n", capabilities); + } + } + if (!cap) { + printf("No capabilities supported.\n"); + return 0; + } + + return 0; +} + +int err_inject(int cpu, char *path, err_type_info_t err_type_info, + err_struct_info_t err_struct_info, + err_data_buffer_t err_data_buffer) +{ + int status; + char fn[MAX_FN_SIZE]; + + log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ", + err_type_info.err_type_info, + err_struct_info.err_struct_info); + log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n", + err_data_buffer.err_data_buffer[0], + err_data_buffer.err_data_buffer[1], + err_data_buffer.err_data_buffer[2]); + sprintf(fn, "%s/err_type_info", path); + wr(fn, err_type_info.err_type_info); + sprintf(fn, "%s/err_struct_info", path); + wr(fn, err_struct_info.err_struct_info); + sprintf(fn, "%s/err_data_buffer", path); + wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); + + // Fire pal_mc_error_inject procedure. + sprintf(fn, "%s/call_start", path); + wr(fn,mode); + + if (rd_status(path, &status)<0) { + vbprintf("fail: read status\n"); + return -100; + } + + if (status!=0) { + log_info(cpu, "fail: status=%d\n", status); + return status; + } + + return status; +} + +static int construct_data_buf(char *path, err_type_info_t err_type_info, + err_struct_info_t err_struct_info, + err_data_buffer_t *err_data_buffer, + void *va1) +{ + char fn[MAX_FN_SIZE]; + u64 virt_addr=0, phys_addr=0; + + vbprintf("va1=%lx\n", (u64)va1); + memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8); + + switch (err_type_info.err_type_info_u.err_struct) { + case 1: // Cache + switch (err_struct_info.err_struct_info_cache.cl_id) { + case 1: //Virtual addr + err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1; + break; + case 2: //Phys addr + sprintf(fn, "%s/virtual_to_phys", path); + virt_addr=(u64)va1; + if (wr(fn,virt_addr)<0) + return -1; + rd(fn, &phys_addr); + err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr; + break; + default: + printf("Not supported cl_id\n"); + break; + } + break; + case 2: // TLB + break; + case 3: // Register file + break; + case 4: // Bus/system interconnect + default: + printf("Not supported err_struct\n"); + break; + } + + return 0; +} + +typedef struct { + u64 cpu; + u64 loop; + u64 interval; + u64 err_type_info; + u64 err_struct_info; + u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; +} parameters_t; + +parameters_t line_para; +int para; + +static int empty_data_buffer(u64 *err_data_buffer) +{ + int empty=1; + int i; + + for (i=0;iMIN_INTERVAL + ?interval:MIN_INTERVAL; + parameters[num].err_type_info=err_type_info_conf; + parameters[num].err_struct_info=err_struct_info_conf; + memcpy(parameters[num++].err_data_buffer, + err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ; + + if (num>=MAX_TASK_NUM) + break; + } + } + else { + parameters[0].cpu=line_para.cpu; + parameters[0].loop=line_para.loop; + parameters[0].interval= line_para.interval>MIN_INTERVAL + ?line_para.interval:MIN_INTERVAL; + parameters[0].err_type_info=line_para.err_type_info; + parameters[0].err_struct_info=line_para.err_struct_info; + memcpy(parameters[0].err_data_buffer, + line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ; + + num=1; + } + + /* Create semaphore: If one_lock, one semaphore for all processors. + Otherwise, one sempaphore for each processor. */ + if (one_lock) { + if (create_sem(0)) { + printf("Can not create semaphore...exit\n"); + free_sem(0); + return -1; + } + } + else { + for (i=0;i Date: Fri, 8 Dec 2006 16:15:16 -0800 Subject: [IA64] Itanium MC Error Injection Tool: Driver sysfs interface This kernel driver patch provides sysfs interface for user application to call pal_mc_error_inject() procedure. Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/kernel/err_inject.c | 293 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 arch/ia64/kernel/err_inject.c diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c new file mode 100644 index 000000000000..d3e9f33e8bdd --- /dev/null +++ b/arch/ia64/kernel/err_inject.c @@ -0,0 +1,293 @@ +/* + * err_inject.c - + * 1.) Inject errors to a processor. + * 2.) Query error injection capabilities. + * This driver along with user space code can be acting as an error + * injection tool. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Written by: Fenghua Yu , Intel Corporation + * Copyright (C) 2006, Intel Corp. All rights reserved. + * + */ +#include +#include +#include +#include +#include + +#define ERR_INJ_DEBUG + +#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte; + +#define define_one_ro(name) \ +static SYSDEV_ATTR(name, 0444, show_##name, NULL) + +#define define_one_rw(name) \ +static SYSDEV_ATTR(name, 0644, show_##name, store_##name) + +static u64 call_start[NR_CPUS]; +static u64 phys_addr[NR_CPUS]; +static u64 err_type_info[NR_CPUS]; +static u64 err_struct_info[NR_CPUS]; +static struct { + u64 data1; + u64 data2; + u64 data3; +} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS]; +static s64 status[NR_CPUS]; +static u64 capabilities[NR_CPUS]; +static u64 resources[NR_CPUS]; + +#define show(name) \ +static ssize_t \ +show_##name(struct sys_device *dev, char *buf) \ +{ \ + u32 cpu=dev->id; \ + return sprintf(buf, "%lx\n", name[cpu]); \ +} + +#define store(name) \ +static ssize_t \ +store_##name(struct sys_device *dev, const char *buf, size_t size) \ +{ \ + unsigned int cpu=dev->id; \ + name[cpu] = simple_strtoull(buf, NULL, 16); \ + return size; \ +} + +show(call_start) + +/* It's user's responsibility to call the PAL procedure on a specific + * processor. The cpu number in driver is only used for storing data. + */ +static ssize_t +store_call_start(struct sys_device *dev, const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + unsigned long call_start = simple_strtoull(buf, NULL, 16); + +#ifdef ERR_INJ_DEBUG + printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu); + printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]); + printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]); + printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3); +#endif + switch (call_start) { + case 0: /* Do nothing. */ + break; + case 1: /* Call pal_mc_error_inject in physical mode. */ + status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu], + err_struct_info[cpu], + ia64_tpa(&err_data_buffer[cpu]), + &capabilities[cpu], + &resources[cpu]); + break; + case 2: /* Call pal_mc_error_inject in virtual mode. */ + status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu], + err_struct_info[cpu], + ia64_tpa(&err_data_buffer[cpu]), + &capabilities[cpu], + &resources[cpu]); + break; + default: + status[cpu] = -EINVAL; + break; + } + +#ifdef ERR_INJ_DEBUG + printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]); + printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]); + printk(KERN_DEBUG "resources=%lx\n", resources[cpu]); +#endif + return size; +} + +show(err_type_info) +store(err_type_info) + +static ssize_t +show_virtual_to_phys(struct sys_device *dev, char *buf) +{ + unsigned int cpu=dev->id; + return sprintf(buf, "%lx\n", phys_addr[cpu]); +} + +static ssize_t +store_virtual_to_phys(struct sys_device *dev, const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + u64 virt_addr=simple_strtoull(buf, NULL, 16); + int ret; + + ret = get_user_pages(current, current->mm, virt_addr, + 1, VM_READ, 0, NULL, NULL); + if (ret<=0) { +#ifdef ERR_INJ_DEBUG + printk("Virtual address %lx is not existing.\n",virt_addr); +#endif + return -EINVAL; + } + + phys_addr[cpu] = ia64_tpa(virt_addr); + return size; +} + +show(err_struct_info) +store(err_struct_info) + +static ssize_t +show_err_data_buffer(struct sys_device *dev, char *buf) +{ + unsigned int cpu=dev->id; + + return sprintf(buf, "%lx, %lx, %lx\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3); +} + +static ssize_t +store_err_data_buffer(struct sys_device *dev, const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + int ret; + +#ifdef ERR_INJ_DEBUG + printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3, + cpu); +#endif + ret=sscanf(buf, "%lx, %lx, %lx", + &err_data_buffer[cpu].data1, + &err_data_buffer[cpu].data2, + &err_data_buffer[cpu].data3); + if (ret!=ERR_DATA_BUFFER_SIZE) + return -EINVAL; + + return size; +} + +show(status) +show(capabilities) +show(resources) + +define_one_rw(call_start); +define_one_rw(err_type_info); +define_one_rw(err_struct_info); +define_one_rw(err_data_buffer); +define_one_rw(virtual_to_phys); +define_one_ro(status); +define_one_ro(capabilities); +define_one_ro(resources); + +static struct attribute *default_attrs[] = { + &attr_call_start.attr, + &attr_virtual_to_phys.attr, + &attr_err_type_info.attr, + &attr_err_struct_info.attr, + &attr_err_data_buffer.attr, + &attr_status.attr, + &attr_capabilities.attr, + &attr_resources.attr, + NULL +}; + +static struct attribute_group err_inject_attr_group = { + .attrs = default_attrs, + .name = "err_inject" +}; +/* Add/Remove err_inject interface for CPU device */ +static int __cpuinit err_inject_add_dev(struct sys_device * sys_dev) +{ + return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group); +} + +static int __cpuinit err_inject_remove_dev(struct sys_device * sys_dev) +{ + sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); + return 0; +} +static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + err_inject_add_dev(sys_dev); + break; + case CPU_DEAD: + err_inject_remove_dev(sys_dev); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata err_inject_cpu_notifier = +{ + .notifier_call = err_inject_cpu_callback, +}; + +static int __init +err_inject_init(void) +{ + int i; + +#ifdef ERR_INJ_DEBUG + printk(KERN_INFO "Enter error injection driver.\n"); +#endif + for_each_online_cpu(i) { + err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + register_hotcpu_notifier(&err_inject_cpu_notifier); + + return 0; +} + +static void __exit +err_inject_exit(void) +{ + int i; + struct sys_device *sys_dev; + +#ifdef ERR_INJ_DEBUG + printk(KERN_INFO "Exit error injection driver.\n"); +#endif + for_each_online_cpu(i) { + sys_dev = get_cpu_sysdev(i); + sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); + } + unregister_hotcpu_notifier(&err_inject_cpu_notifier); +} + +module_init(err_inject_init); +module_exit(err_inject_exit); + +MODULE_AUTHOR("Fenghua Yu "); +MODULE_DESCRIPTION("MC error injection kenrel sysfs interface"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 539d517ad10bbaac2c04e0ee22916a360c5bcc0d Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 8 Dec 2006 16:16:24 -0800 Subject: [IA64] Itanium MC Error Injection Tool: Makefile changes This patch has Makefile changes. Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 098ee605bf5e..33e5a598672d 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o +obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) -- cgit v1.2.3 From 1138b7e2d40711b024768034beb64885994271e4 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 8 Dec 2006 16:17:31 -0800 Subject: [IA64] Itanium MC Error Injection Tool: pal_mc_error_inject() interface This patch implements pal_mc_error_inject() interface in kernel. Both physical mode and virtual mode are supported. Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck --- include/asm-ia64/pal.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h index bc768153f3c9..56e9210733d6 100644 --- a/include/asm-ia64/pal.h +++ b/include/asm-ia64/pal.h @@ -89,6 +89,8 @@ #define PAL_GET_PSTATE_TYPE_AVGNORESET 2 #define PAL_GET_PSTATE_TYPE_INSTANT 3 +#define PAL_MC_ERROR_INJECT 276 /* Injects processor error or returns injection capabilities */ + #ifndef __ASSEMBLY__ #include @@ -1234,6 +1236,37 @@ ia64_pal_mc_error_info (u64 info_index, u64 type_index, u64 *size, u64 *error_in return iprv.status; } +/* Injects the requested processor error or returns info on + * supported injection capabilities for current processor implementation + */ +static inline s64 +ia64_pal_mc_error_inject_phys (u64 err_type_info, u64 err_struct_info, + u64 err_data_buffer, u64 *capabilities, u64 *resources) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info, + err_struct_info, err_data_buffer); + if (capabilities) + *capabilities= iprv.v0; + if (resources) + *resources= iprv.v1; + return iprv.status; +} + +static inline s64 +ia64_pal_mc_error_inject_virt (u64 err_type_info, u64 err_struct_info, + u64 err_data_buffer, u64 *capabilities, u64 *resources) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info, + err_struct_info, err_data_buffer); + if (capabilities) + *capabilities= iprv.v0; + if (resources) + *resources= iprv.v1; + return iprv.status; +} + /* Inform PALE_CHECK whether a machine check is expected so that PALE_CHECK willnot * attempt to correct any expected machine checks. */ -- cgit v1.2.3 From e1b43bd556a611584a65f529e5077c1b54ace4f7 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 5 Feb 2007 15:47:43 -0800 Subject: [IA64] Fix example error injection program Progam accessed using /sys/devices/system/node/node0/cpu%d/err_inject/ This path only exists for CONFIG_NUMA=y systems. Better to use /sys/devices/system/cpu/cpu%d/err_inject/ which is available on all systems. Signed-off-by: Tony Luck --- Documentation/ia64/err_inject.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt index 26487c172cfc..6449a7090dbb 100644 --- a/Documentation/ia64/err_inject.txt +++ b/Documentation/ia64/err_inject.txt @@ -111,7 +111,7 @@ err_injection_tool.c: #define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte. #define PARA_FIELD_NUM 5 #define MASK_SIZE (NR_CPUS/64) -#define PATH_FORMAT "/sys/devices/system/node/node0/cpu%d/err_inject/" +#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/" int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask); -- cgit v1.2.3