diff options
78 files changed, 945 insertions, 453 deletions
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index 24c5aade8998..cbee3a27f768 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -196,7 +196,7 @@ void print_delayacct(struct taskstats *t) "IO %15s%15s\n" " %15llu%15llu\n" "MEM %15s%15s\n" - " %15llu%15llu\n" + " %15llu%15llu\n", "count", "real total", "virtual total", "delay total", t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, t->cpu_delay_total, diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt new file mode 100644 index 000000000000..8242f52d0f22 --- /dev/null +++ b/Documentation/vm/numa_memory_policy.txt @@ -0,0 +1,332 @@ + +What is Linux Memory Policy? + +In the Linux kernel, "memory policy" determines from which node the kernel will +allocate memory in a NUMA system or in an emulated NUMA system. Linux has +supported platforms with Non-Uniform Memory Access architectures since 2.4.?. +The current memory policy support was added to Linux 2.6 around May 2004. This +document attempts to describe the concepts and APIs of the 2.6 memory policy +support. + +Memory policies should not be confused with cpusets (Documentation/cpusets.txt) +which is an administrative mechanism for restricting the nodes from which +memory may be allocated by a set of processes. Memory policies are a +programming interface that a NUMA-aware application can take advantage of. When +both cpusets and policies are applied to a task, the restrictions of the cpuset +takes priority. See "MEMORY POLICIES AND CPUSETS" below for more details. + +MEMORY POLICY CONCEPTS + +Scope of Memory Policies + +The Linux kernel supports _scopes_ of memory policy, described here from +most general to most specific: + + System Default Policy: this policy is "hard coded" into the kernel. It + is the policy that governs all page allocations that aren't controlled + by one of the more specific policy scopes discussed below. When the + system is "up and running", the system default policy will use "local + allocation" described below. However, during boot up, the system + default policy will be set to interleave allocations across all nodes + with "sufficient" memory, so as not to overload the initial boot node + with boot-time allocations. + + Task/Process Policy: this is an optional, per-task policy. When defined + for a specific task, this policy controls all page allocations made by or + on behalf of the task that aren't controlled by a more specific scope. + If a task does not define a task policy, then all page allocations that + would have been controlled by the task policy "fall back" to the System + Default Policy. + + The task policy applies to the entire address space of a task. Thus, + it is inheritable, and indeed is inherited, across both fork() + [clone() w/o the CLONE_VM flag] and exec*(). This allows a parent task + to establish the task policy for a child task exec()'d from an + executable image that has no awareness of memory policy. See the + MEMORY POLICY APIS section, below, for an overview of the system call + that a task may use to set/change it's task/process policy. + + In a multi-threaded task, task policies apply only to the thread + [Linux kernel task] that installs the policy and any threads + subsequently created by that thread. Any sibling threads existing + at the time a new task policy is installed retain their current + policy. + + A task policy applies only to pages allocated after the policy is + installed. Any pages already faulted in by the task when the task + changes its task policy remain where they were allocated based on + the policy at the time they were allocated. + + VMA Policy: A "VMA" or "Virtual Memory Area" refers to a range of a task's + virtual adddress space. A task may define a specific policy for a range + of its virtual address space. See the MEMORY POLICIES APIS section, + below, for an overview of the mbind() system call used to set a VMA + policy. + + A VMA policy will govern the allocation of pages that back this region of + the address space. Any regions of the task's address space that don't + have an explicit VMA policy will fall back to the task policy, which may + itself fall back to the System Default Policy. + + VMA policies have a few complicating details: + + VMA policy applies ONLY to anonymous pages. These include pages + allocated for anonymous segments, such as the task stack and heap, and + any regions of the address space mmap()ed with the MAP_ANONYMOUS flag. + If a VMA policy is applied to a file mapping, it will be ignored if + the mapping used the MAP_SHARED flag. If the file mapping used the + MAP_PRIVATE flag, the VMA policy will only be applied when an + anonymous page is allocated on an attempt to write to the mapping-- + i.e., at Copy-On-Write. + + VMA policies are shared between all tasks that share a virtual address + space--a.k.a. threads--independent of when the policy is installed; and + they are inherited across fork(). However, because VMA policies refer + to a specific region of a task's address space, and because the address + space is discarded and recreated on exec*(), VMA policies are NOT + inheritable across exec(). Thus, only NUMA-aware applications may + use VMA policies. + + A task may install a new VMA policy on a sub-range of a previously + mmap()ed region. When this happens, Linux splits the existing virtual + memory area into 2 or 3 VMAs, each with it's own policy. + + By default, VMA policy applies only to pages allocated after the policy + is installed. Any pages already faulted into the VMA range remain + where they were allocated based on the policy at the time they were + allocated. However, since 2.6.16, Linux supports page migration via + the mbind() system call, so that page contents can be moved to match + a newly installed policy. + + Shared Policy: Conceptually, shared policies apply to "memory objects" + mapped shared into one or more tasks' distinct address spaces. An + application installs a shared policies the same way as VMA policies--using + the mbind() system call specifying a range of virtual addresses that map + the shared object. However, unlike VMA policies, which can be considered + to be an attribute of a range of a task's address space, shared policies + apply directly to the shared object. Thus, all tasks that attach to the + object share the policy, and all pages allocated for the shared object, + by any task, will obey the shared policy. + + As of 2.6.22, only shared memory segments, created by shmget() or + mmap(MAP_ANONYMOUS|MAP_SHARED), support shared policy. When shared + policy support was added to Linux, the associated data structures were + added to hugetlbfs shmem segments. At the time, hugetlbfs did not + support allocation at fault time--a.k.a lazy allocation--so hugetlbfs + shmem segments were never "hooked up" to the shared policy support. + Although hugetlbfs segments now support lazy allocation, their support + for shared policy has not been completed. + + As mentioned above [re: VMA policies], allocations of page cache + pages for regular files mmap()ed with MAP_SHARED ignore any VMA + policy installed on the virtual address range backed by the shared + file mapping. Rather, shared page cache pages, including pages backing + private mappings that have not yet been written by the task, follow + task policy, if any, else System Default Policy. + + The shared policy infrastructure supports different policies on subset + ranges of the shared object. However, Linux still splits the VMA of + the task that installs the policy for each range of distinct policy. + Thus, different tasks that attach to a shared memory segment can have + different VMA configurations mapping that one shared object. This + can be seen by examining the /proc/<pid>/numa_maps of tasks sharing + a shared memory region, when one task has installed shared policy on + one or more ranges of the region. + +Components of Memory Policies + + A Linux memory policy is a tuple consisting of a "mode" and an optional set + of nodes. The mode determine the behavior of the policy, while the + optional set of nodes can be viewed as the arguments to the behavior. + + Internally, memory policies are implemented by a reference counted + structure, struct mempolicy. Details of this structure will be discussed + in context, below, as required to explain the behavior. + + Note: in some functions AND in the struct mempolicy itself, the mode + is called "policy". However, to avoid confusion with the policy tuple, + this document will continue to use the term "mode". + + Linux memory policy supports the following 4 behavioral modes: + + Default Mode--MPOL_DEFAULT: The behavior specified by this mode is + context or scope dependent. + + As mentioned in the Policy Scope section above, during normal + system operation, the System Default Policy is hard coded to + contain the Default mode. + + In this context, default mode means "local" allocation--that is + attempt to allocate the page from the node associated with the cpu + where the fault occurs. If the "local" node has no memory, or the + node's memory can be exhausted [no free pages available], local + allocation will "fallback to"--attempt to allocate pages from-- + "nearby" nodes, in order of increasing "distance". + + Implementation detail -- subject to change: "Fallback" uses + a per node list of sibling nodes--called zonelists--built at + boot time, or when nodes or memory are added or removed from + the system [memory hotplug]. These per node zonelist are + constructed with nodes in order of increasing distance based + on information provided by the platform firmware. + + When a task/process policy or a shared policy contains the Default + mode, this also means "local allocation", as described above. + + In the context of a VMA, Default mode means "fall back to task + policy"--which may or may not specify Default mode. Thus, Default + mode can not be counted on to mean local allocation when used + on a non-shared region of the address space. However, see + MPOL_PREFERRED below. + + The Default mode does not use the optional set of nodes. + + MPOL_BIND: This mode specifies that memory must come from the + set of nodes specified by the policy. + + The memory policy APIs do not specify an order in which the nodes + will be searched. However, unlike "local allocation", the Bind + policy does not consider the distance between the nodes. Rather, + allocations will fallback to the nodes specified by the policy in + order of numeric node id. Like everything in Linux, this is subject + to change. + + MPOL_PREFERRED: This mode specifies that the allocation should be + attempted from the single node specified in the policy. If that + allocation fails, the kernel will search other nodes, exactly as + it would for a local allocation that started at the preferred node + in increasing distance from the preferred node. "Local" allocation + policy can be viewed as a Preferred policy that starts at the node + containing the cpu where the allocation takes place. + + Internally, the Preferred policy uses a single node--the + preferred_node member of struct mempolicy. A "distinguished + value of this preferred_node, currently '-1', is interpreted + as "the node containing the cpu where the allocation takes + place"--local allocation. This is the way to specify + local allocation for a specific range of addresses--i.e. for + VMA policies. + + MPOL_INTERLEAVED: This mode specifies that page allocations be + interleaved, on a page granularity, across the nodes specified in + the policy. This mode also behaves slightly differently, based on + the context where it is used: + + For allocation of anonymous pages and shared memory pages, + Interleave mode indexes the set of nodes specified by the policy + using the page offset of the faulting address into the segment + [VMA] containing the address modulo the number of nodes specified + by the policy. It then attempts to allocate a page, starting at + the selected node, as if the node had been specified by a Preferred + policy or had been selected by a local allocation. That is, + allocation will follow the per node zonelist. + + For allocation of page cache pages, Interleave mode indexes the set + of nodes specified by the policy using a node counter maintained + per task. This counter wraps around to the lowest specified node + after it reaches the highest specified node. This will tend to + spread the pages out over the nodes specified by the policy based + on the order in which they are allocated, rather than based on any + page offset into an address range or file. During system boot up, + the temporary interleaved system default policy works in this + mode. + +MEMORY POLICY APIs + +Linux supports 3 system calls for controlling memory policy. These APIS +always affect only the calling task, the calling task's address space, or +some shared object mapped into the calling task's address space. + + Note: the headers that define these APIs and the parameter data types + for user space applications reside in a package that is not part of + the Linux kernel. The kernel system call interfaces, with the 'sys_' + prefix, are defined in <linux/syscalls.h>; the mode and flag + definitions are defined in <linux/mempolicy.h>. + +Set [Task] Memory Policy: + + long set_mempolicy(int mode, const unsigned long *nmask, + unsigned long maxnode); + + Set's the calling task's "task/process memory policy" to mode + specified by the 'mode' argument and the set of nodes defined + by 'nmask'. 'nmask' points to a bit mask of node ids containing + at least 'maxnode' ids. + + See the set_mempolicy(2) man page for more details + + +Get [Task] Memory Policy or Related Information + + long get_mempolicy(int *mode, + const unsigned long *nmask, unsigned long maxnode, + void *addr, int flags); + + Queries the "task/process memory policy" of the calling task, or + the policy or location of a specified virtual address, depending + on the 'flags' argument. + + See the get_mempolicy(2) man page for more details + + +Install VMA/Shared Policy for a Range of Task's Address Space + + long mbind(void *start, unsigned long len, int mode, + const unsigned long *nmask, unsigned long maxnode, + unsigned flags); + + mbind() installs the policy specified by (mode, nmask, maxnodes) as + a VMA policy for the range of the calling task's address space + specified by the 'start' and 'len' arguments. Additional actions + may be requested via the 'flags' argument. + + See the mbind(2) man page for more details. + +MEMORY POLICY COMMAND LINE INTERFACE + +Although not strictly part of the Linux implementation of memory policy, +a command line tool, numactl(8), exists that allows one to: + ++ set the task policy for a specified program via set_mempolicy(2), fork(2) and + exec(2) + ++ set the shared policy for a shared memory segment via mbind(2) + +The numactl(8) tool is packages with the run-time version of the library +containing the memory policy system call wrappers. Some distributions +package the headers and compile-time libraries in a separate development +package. + + +MEMORY POLICIES AND CPUSETS + +Memory policies work within cpusets as described above. For memory policies +that require a node or set of nodes, the nodes are restricted to the set of +nodes whose memories are allowed by the cpuset constraints. If the +intersection of the set of nodes specified for the policy and the set of nodes +allowed by the cpuset is the empty set, the policy is considered invalid and +cannot be installed. + +The interaction of memory policies and cpusets can be problematic for a +couple of reasons: + +1) the memory policy APIs take physical node id's as arguments. However, the + memory policy APIs do not provide a way to determine what nodes are valid + in the context where the application is running. An application MAY consult + the cpuset file system [directly or via an out of tree, and not generally + available, libcpuset API] to obtain this information, but then the + application must be aware that it is running in a cpuset and use what are + intended primarily as administrative APIs. + + However, as long as the policy specifies at least one node that is valid + in the controlling cpuset, the policy can be used. + +2) when tasks in two cpusets share access to a memory region, such as shared + memory segments created by shmget() of mmap() with the MAP_ANONYMOUS and + MAP_SHARED flags, and any of the tasks install shared policy on the region, + only nodes whose memories are allowed in both cpusets may be used in the + policies. Again, obtaining this information requires "stepping outside" + the memory policy APIs, as well as knowing in what cpusets other task might + be attaching to the shared region, to use the cpuset information. + Furthermore, if the cpusets' allowed memory sets are disjoint, "local" + allocation is the only valid policy. diff --git a/MAINTAINERS b/MAINTAINERS index 371fe67a4eef..abe5fa7f9c33 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3452,7 +3452,7 @@ S: Maintained TPM DEVICE DRIVER P: Kylene Hall -M: kjhall@us.ibm.com +M: tpmdd-devel@lists.sourceforge.net W: http://tpmdd.sourceforge.net P: Marcel Selhorst M: tpm@selhorst.net diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index e061b63a0038..dfbe7ab9ffe2 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -37,6 +37,7 @@ #include <linux/a.out.h> #include <linux/interrupt.h> #include <linux/reboot.h> +#include <linux/fs.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index de7688cfd573..ddc62727dc9f 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -18,6 +18,7 @@ #include <linux/mman.h> #include <linux/file.h> #include <linux/utsname.h> +#include <linux/fs.h> #include <asm/setup.h> #include <asm/uaccess.h> diff --git a/arch/i386/xen/xen-head.S b/arch/i386/xen/xen-head.S index bc71f3bc4014..f8d6937db2ec 100644 --- a/arch/i386/xen/xen-head.S +++ b/arch/i386/xen/xen-head.S @@ -7,20 +7,20 @@ #include <asm/boot.h> #include <xen/interface/elfnote.h> - .section .init.text +.pushsection .init.text ENTRY(startup_xen) movl %esi,xen_start_info cld movl $(init_thread_union+THREAD_SIZE),%esp jmp xen_start_kernel +.popsection -.pushsection ".bss.page_aligned" +.pushsection .bss.page_aligned .align PAGE_SIZE_asm ENTRY(hypercall_page) .skip 0x1000 .popsection - .section .text ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c index 7e6d5fb75390..ed3a4caec620 100644 --- a/arch/m68k/kernel/setup.c +++ b/arch/m68k/kernel/setup.c @@ -62,7 +62,6 @@ EXPORT_SYMBOL(m68k_num_memory); int m68k_realnum_memory; EXPORT_SYMBOL(m68k_realnum_memory); unsigned long m68k_memoffset; -EXPORT_SYMBOL(m68k_memoffset); struct mem_info m68k_memory[NUM_MEMINFO]; EXPORT_SYMBOL(m68k_memory); @@ -200,7 +199,6 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) (m68k_num_memory - 1)); m68k_num_memory = 1; } - m68k_memoffset = m68k_memory[0].addr-PAGE_OFFSET; #endif } diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index c42245775a4d..59fe285865ec 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -19,6 +19,8 @@ SECTIONS *(.gnu.warning) } :text = 0x4e75 + _etext = .; /* End of text section */ + . = ALIGN(16); /* Exception table */ __start___ex_table = .; __ex_table : { *(__ex_table) } @@ -26,8 +28,6 @@ SECTIONS RODATA - _etext = .; /* End of text section */ - .data : { /* Data */ DATA_DATA CONSTRUCTORS diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 7d571a2b44dd..30d34f285024 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -210,11 +210,7 @@ void __init paging_init(void) int i; #ifdef DEBUG - { - extern unsigned long availmem; - printk ("start of paging_init (%p, %lx)\n", - kernel_pg_dir, availmem); - } + printk ("start of paging_init (%p, %lx)\n", kernel_pg_dir, availmem); #endif /* Fix the cache mode in the page descriptors for the 680[46]0. */ diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 846f97534685..47502d5ec19f 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -28,6 +28,7 @@ #include <linux/a.out.h> #include <linux/interrupt.h> #include <linux/reboot.h> +#include <linux/fs.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c index 48e6b33e8b44..15d62c5279a9 100644 --- a/arch/m68knommu/kernel/sys_m68k.c +++ b/arch/m68knommu/kernel/sys_m68k.c @@ -18,6 +18,7 @@ #include <linux/mman.h> #include <linux/file.h> #include <linux/utsname.h> +#include <linux/fs.h> #include <asm/setup.h> #include <asm/uaccess.h> diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index aff661fe2ee1..0eabe73c964d 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -612,6 +612,8 @@ static int ubd_open_dev(struct ubd *ubd_dev) ubd_dev->fd = fd; if(ubd_dev->cow.file != NULL){ + blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long)); + err = -ENOMEM; ubd_dev->cow.bitmap = (void *) vmalloc(ubd_dev->cow.bitmap_len); if(ubd_dev->cow.bitmap == NULL){ @@ -712,8 +714,6 @@ static int ubd_add(int n, char **error_out) ubd_dev->queue->queuedata = ubd_dev; blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG); - if(ubd_dev->cow.file != NULL) - blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long)); err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c index 430673be1df7..7225124d96c2 100644 --- a/drivers/ata/pata_it821x.c +++ b/drivers/ata/pata_it821x.c @@ -587,7 +587,7 @@ static int it821x_port_start(struct ata_port *ap) itdev->want[1][1] = ATA_ANY; itdev->last_device = -1; - if (pdev->revision == 0x11) { + if (pdev->revision == 0x10) { itdev->timing10 = 1; /* Need to disable ATAPI DMA for this case */ if (!itdev->smart) diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c index cb44cb4f6a47..80bb06105387 100644 --- a/drivers/auxdisplay/cfag12864b.c +++ b/drivers/auxdisplay/cfag12864b.c @@ -355,7 +355,7 @@ static int __init cfag12864b_init(void) cfag12864b_cache = kmalloc(sizeof(unsigned char) * CFAG12864B_SIZE, GFP_KERNEL); - if (cfag12864b_buffer == NULL) { + if (cfag12864b_cache == NULL) { printk(KERN_ERR CFAG12864B_NAME ": ERROR: " "can't alloc cache buffer (%i bytes)\n", CFAG12864B_SIZE); diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ef32e977d307..4245b7f80a49 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -68,6 +68,7 @@ config AMIGA_Z2RAM config BLK_DEV_XD tristate "XT hard disk support" depends on ISA && ISA_DMA_API + select CHECK_SIGNATURE help Very old 8 bit hard disk controllers used in the IBM XT computer will be supported if you say Y here. diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 96d2f9ee42d6..9b07f7851061 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2292,7 +2292,7 @@ static int __devinit ipmi_of_probe(struct of_device *dev, info->irq = irq_of_parse_and_map(dev->node, 0); info->dev = &dev->dev; - dev_dbg(&dev->dev, "addr 0x%lx regsize %ld spacing %ld irq %x\n", + dev_dbg(&dev->dev, "addr 0x%lx regsize %d spacing %d irq %x\n", info->io.addr_data, info->io.regsize, info->io.regspacing, info->irq); diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index 2ce0af1bd588..d95f316afb5a 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -1022,10 +1022,6 @@ static const unsigned short x86_keycodes[256] = 308,310,313,314,315,317,318,319,320,357,322,323,324,325,276,330, 332,340,365,342,343,344,345,346,356,270,341,368,369,370,371,372 }; -#ifdef CONFIG_MAC_EMUMOUSEBTN -extern int mac_hid_mouse_emulate_buttons(int, int, int); -#endif /* CONFIG_MAC_EMUMOUSEBTN */ - #ifdef CONFIG_SPARC static int sparc_l1_a_state = 0; extern void sun_do_break(void); diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index bbb7f1292665..2f97d2f8f916 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -1565,6 +1565,9 @@ static int hdlcdev_open(struct net_device *dev) int rc; unsigned long flags; + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + DBGINFO(("%s hdlcdev_open\n", dev->name)); /* generic HDLC layer open processing */ @@ -1634,6 +1637,7 @@ static int hdlcdev_close(struct net_device *dev) info->netcount=0; spin_unlock_irqrestore(&info->netlock, flags); + module_put(THIS_MODULE); return 0; } diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 9bb542913b86..39564b76d4a3 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -7,7 +7,7 @@ * Reiner Sailer <sailer@watson.ibm.com> * Kylene Hall <kjhall@us.ibm.com> * - * Maintained by: <tpmdd_devel@lists.sourceforge.net> + * Maintained by: <tpmdd-devel@lists.sourceforge.net> * * Device driver for TCG/TCPA TPM (trusted platform module). * Specifications at www.trustedcomputinggroup.org diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index b2e2b002a1bb..d15ccddc92eb 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -7,7 +7,7 @@ * Reiner Sailer <sailer@watson.ibm.com> * Kylene Hall <kjhall@us.ibm.com> * - * Maintained by: <tpmdd_devel@lists.sourceforge.net> + * Maintained by: <tpmdd-devel@lists.sourceforge.net> * * Device driver for TCG/TCPA TPM (trusted platform module). * Specifications at www.trustedcomputinggroup.org diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index 1ab0896070be..d0e7926eb486 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -7,7 +7,7 @@ * Reiner Sailer <sailer@watson.ibm.com> * Kylene Hall <kjhall@us.ibm.com> * - * Maintained by: <tpmdd_devel@lists.sourceforge.net> + * Maintained by: <tpmdd-devel@lists.sourceforge.net> * * Device driver for TCG/TCPA TPM (trusted platform module). * Specifications at www.trustedcomputinggroup.org diff --git a/drivers/char/tpm/tpm_atmel.h b/drivers/char/tpm/tpm_atmel.h index 9363bcf0a402..6c831f9466b7 100644 --- a/drivers/char/tpm/tpm_atmel.h +++ b/drivers/char/tpm/tpm_atmel.h @@ -4,7 +4,7 @@ * Authors: * Kylene Hall <kjhall@us.ibm.com> * - * Maintained by: <tpmdd_devel@lists.sourceforge.net> + * Maintained by: <tpmdd-devel@lists.sourceforge.net> * * Device driver for TCG/TCPA TPM (trusted platform module). * Specifications at www.trustedcomputinggroup.org diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c index 8677fc6a545e..60a2d2630e36 100644 --- a/drivers/char/tpm/tpm_bios.c +++ b/drivers/char/tpm/tpm_bios.c @@ -7,6 +7,8 @@ * Reiner Sailer <sailer@watson.ibm.com> * Kylene Hall <kjhall@us.ibm.com> * + * Maintained by: <tpmdd-devel@lists.sourceforge.net> + * * Access to the eventlog extended by the TCG BIOS of PC platform * * This program is free software; you can redistribute it and/or diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 608f73071bef..6313326bc41f 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -7,7 +7,7 @@ * Reiner Sailer <sailer@watson.ibm.com> * Kylene Hall <kjhall@us.ibm.com> * - * Maintained by: <tpmdd_devel@lists.sourceforge.net> + * Maintained by: <tpmdd-devel@lists.sourceforge.net> * * Device driver for TCG/TCPA TPM (trusted platform module). * Specifications at www.trustedcomputinggroup.org diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 483f3f60013c..23fa18a6654c 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -5,6 +5,8 @@ * Leendert van Doorn <leendert@watson.ibm.com> * Kylene Hall <kjhall@us.ibm.com> * + * Maintained by: <tpmdd-devel@lists.sourceforge.net> + * * Device driver for TCG/TCPA TPM (trusted platform module). * Specifications at www.trustedcomputinggroup.org * diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 9b26574f1466..d602b8fa7d46 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -68,6 +68,7 @@ config INPUT_WISTRON_BTNS select INPUT_POLLDEV select NEW_LEDS select LEDS_CLASS + select CHECK_SIGNATURE help Say Y here for support of Winstron laptop button interface, used on laptops of various brands, including Acer and Fujitsu-Siemens. If diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig index fd6925f41647..41e2250613a1 100644 --- a/drivers/lguest/Kconfig +++ b/drivers/lguest/Kconfig @@ -1,6 +1,6 @@ config LGUEST tristate "Linux hypervisor example code" - depends on X86 && PARAVIRT && EXPERIMENTAL && !X86_PAE + depends on X86 && PARAVIRT && EXPERIMENTAL && !X86_PAE && FUTEX select LGUEST_GUEST select HVC_DRIVER ---help--- diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c index 76c1e8e4a487..33dee3a773ed 100644 --- a/drivers/macintosh/mac_hid.c +++ b/drivers/macintosh/mac_hid.c @@ -13,6 +13,7 @@ #include <linux/sysctl.h> #include <linux/input.h> #include <linux/module.h> +#include <linux/kbd_kern.h> static struct input_dev *emumousebtn; diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c index dfdf11c1eec4..e2f84da09e7c 100644 --- a/drivers/macintosh/via-pmu68k.c +++ b/drivers/macintosh/via-pmu68k.c @@ -818,243 +818,3 @@ pmu_present(void) { return (pmu_kind != PMU_UNKNOWN); } - -#if 0 /* needs some work for 68K */ - -/* - * This struct is used to store config register values for - * PCI devices which may get powered off when we sleep. - */ -static struct pci_save { - u16 command; - u16 cache_lat; - u16 intr; -} *pbook_pci_saves; -static int n_pbook_pci_saves; - -static inline void -pbook_pci_save(void) -{ - int npci; - struct pci_dev *pd = NULL; - struct pci_save *ps; - - npci = 0; - while ((pd = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pd)) != NULL) - ++npci; - n_pbook_pci_saves = npci; - if (npci == 0) - return; - ps = kmalloc(npci * sizeof(*ps), GFP_KERNEL); - pbook_pci_saves = ps; - if (ps == NULL) - return; - - pd = NULL; - while ((pd = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pd)) != NULL) { - pci_read_config_word(pd, PCI_COMMAND, &ps->command); - pci_read_config_word(pd, PCI_CACHE_LINE_SIZE, &ps->cache_lat); - pci_read_config_word(pd, PCI_INTERRUPT_LINE, &ps->intr); - ++ps; - --npci; - } -} - -static inline void -pbook_pci_restore(void) -{ - u16 cmd; - struct pci_save *ps = pbook_pci_saves; - struct pci_dev *pd = NULL; - int j; - - while ((pd = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pd)) != NULL) { - if (ps->command == 0) - continue; - pci_read_config_word(pd, PCI_COMMAND, &cmd); - if ((ps->command & ~cmd) == 0) - continue; - switch (pd->hdr_type) { - case PCI_HEADER_TYPE_NORMAL: - for (j = 0; j < 6; ++j) - pci_write_config_dword(pd, - PCI_BASE_ADDRESS_0 + j*4, - pd->resource[j].start); - pci_write_config_dword(pd, PCI_ROM_ADDRESS, - pd->resource[PCI_ROM_RESOURCE].start); - pci_write_config_word(pd, PCI_CACHE_LINE_SIZE, - ps->cache_lat); - pci_write_config_word(pd, PCI_INTERRUPT_LINE, - ps->intr); - pci_write_config_word(pd, PCI_COMMAND, ps->command); - break; - /* other header types not restored at present */ - } - } -} - -/* - * Put the powerbook to sleep. - */ -#define IRQ_ENABLE ((unsigned int *)0xf3000024) -#define MEM_CTRL ((unsigned int *)0xf8000070) - -int powerbook_sleep(void) -{ - int ret, i, x; - static int save_backlight; - static unsigned int save_irqen; - unsigned long msr; - unsigned int hid0; - unsigned long p, wait; - struct adb_request sleep_req; - - /* Notify device drivers */ - ret = blocking_notifier_call_chain(&sleep_notifier_list, - PBOOK_SLEEP, NULL); - if (ret & NOTIFY_STOP_MASK) - return -EBUSY; - - /* Sync the disks. */ - /* XXX It would be nice to have some way to ensure that - * nobody is dirtying any new buffers while we wait. */ - sys_sync(); - - /* Turn off the display backlight */ - save_backlight = backlight_enabled; - if (save_backlight) - pmu_enable_backlight(0); - - /* Give the disks a little time to actually finish writing */ - for (wait = jiffies + (HZ/4); time_before(jiffies, wait); ) - mb(); - - /* Disable all interrupts except pmu */ - save_irqen = in_le32(IRQ_ENABLE); - for (i = 0; i < 32; ++i) - if (i != vias->intrs[0].line && (save_irqen & (1 << i))) - disable_irq(i); - asm volatile("mtdec %0" : : "r" (0x7fffffff)); - - /* Save the state of PCI config space for some slots */ - pbook_pci_save(); - - /* Set the memory controller to keep the memory refreshed - while we're asleep */ - for (i = 0x403f; i >= 0x4000; --i) { - out_be32(MEM_CTRL, i); - do { - x = (in_be32(MEM_CTRL) >> 16) & 0x3ff; - } while (x == 0); - if (x >= 0x100) - break; - } - - /* Ask the PMU to put us to sleep */ - pmu_request(&sleep_req, NULL, 5, PMU_SLEEP, 'M', 'A', 'T', 'T'); - while (!sleep_req.complete) - mb(); - /* displacement-flush the L2 cache - necessary? */ - for (p = KERNELBASE; p < KERNELBASE + 0x100000; p += 0x1000) - i = *(volatile int *)p; - asleep = 1; - - /* Put the CPU into sleep mode */ - asm volatile("mfspr %0,1008" : "=r" (hid0) :); - hid0 = (hid0 & ~(HID0_NAP | HID0_DOZE)) | HID0_SLEEP; - asm volatile("mtspr 1008,%0" : : "r" (hid0)); - local_save_flags(msr); - msr |= MSR_POW | MSR_EE; - local_irq_restore(msr); - udelay(10); - - /* OK, we're awake again, start restoring things */ - out_be32(MEM_CTRL, 0x3f); - pbook_pci_restore(); - - /* wait for the PMU interrupt sequence to complete */ - while (asleep) - mb(); - - /* reenable interrupts */ - for (i = 0; i < 32; ++i) - if (i != vias->intrs[0].line && (save_irqen & (1 << i))) - enable_irq(i); - - /* Notify drivers */ - blocking_notifier_call_chain(&sleep_notifier_list, PBOOK_WAKE, NULL); - - /* reenable ADB autopoll */ - pmu_adb_autopoll(adb_dev_map); - - /* Turn on the screen backlight, if it was on before */ - if (save_backlight) - pmu_enable_backlight(1); - - /* Wait for the hard disk to spin up */ - - return 0; -} - -/* - * Support for /dev/pmu device - */ -static int pmu_open(struct inode *inode, struct file *file) -{ - return 0; -} - -static ssize_t pmu_read(struct file *file, char *buf, - size_t count, loff_t *ppos) -{ - return 0; -} - -static ssize_t pmu_write(struct file *file, const char *buf, - size_t count, loff_t *ppos) -{ - return 0; -} - -static int pmu_ioctl(struct inode * inode, struct file *filp, - u_int cmd, u_long arg) -{ - int error; - __u32 value; - - switch (cmd) { - case PMU_IOC_SLEEP: - return -ENOSYS; - case PMU_IOC_GET_BACKLIGHT: - return put_user(backlight_level, (__u32 *)arg); - case PMU_IOC_SET_BACKLIGHT: - error = get_user(value, (__u32 *)arg); - if (!error) - pmu_set_brightness(value); - return error; - case PMU_IOC_GET_MODEL: - return put_user(pmu_kind, (__u32 *)arg); - } - return -EINVAL; -} - -static const struct file_operations pmu_device_fops = { - .read = pmu_read, - .write = pmu_write, - .ioctl = pmu_ioctl, - .open = pmu_open, -}; - -static struct miscdevice pmu_device = { - PMU_MINOR, "pmu", &pmu_device_fops -}; - -void pmu_device_init(void) -{ - if (!via) - return; - if (misc_register(&pmu_device) < 0) - printk(KERN_ERR "via-pmu68k: cannot register misc device.\n"); -} -#endif /* CONFIG_PMAC_PBOOK */ - diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 650991bddd8e..f33a729960ca 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1972,7 +1972,8 @@ static int run(mddev_t *mddev) !test_bit(In_sync, &disk->rdev->flags)) { disk->head_position = 0; mddev->degraded++; - conf->fullsync = 1; + if (disk->rdev) + conf->fullsync = 1; } } if (mddev->degraded == conf->raid_disks) { @@ -2153,11 +2154,25 @@ static int raid1_reshape(mddev_t *mddev) oldpool = conf->r1bio_pool; conf->r1bio_pool = newpool; - for (d=d2=0; d < conf->raid_disks; d++) - if (conf->mirrors[d].rdev) { - conf->mirrors[d].rdev->raid_disk = d2; - newmirrors[d2++].rdev = conf->mirrors[d].rdev; + for (d = d2 = 0; d < conf->raid_disks; d++) { + mdk_rdev_t *rdev = conf->mirrors[d].rdev; + if (rdev && rdev->raid_disk != d2) { + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + rdev->raid_disk = d2; + sprintf(nm, "rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + if (sysfs_create_link(&mddev->kobj, + &rdev->kobj, nm)) + printk(KERN_WARNING + "md/raid1: cannot register " + "%s for %s\n", + nm, mdname(mddev)); } + if (rdev) + newmirrors[d2++].rdev = rdev; + } kfree(conf->mirrors); conf->mirrors = newmirrors; kfree(conf->poolinfo); diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 80c4a8463065..1cb33cac1237 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -892,7 +892,7 @@ static int m41t80_remove(struct i2c_client *client) static struct i2c_driver m41t80_driver = { .driver = { - .name = "m41t80", + .name = "rtc-m41t80", }, .probe = m41t80_probe, .remove = m41t80_remove, diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c index d94170728075..3e183cfee10f 100644 --- a/drivers/rtc/rtc-max6902.c +++ b/drivers/rtc/rtc-max6902.c @@ -13,7 +13,7 @@ * * 24-May-2006: Raphael Assenat <raph@8d.com> * - Major rework - * Converted to rtc_device and uses the SPI layer. + * Converted to rtc_device and uses the SPI layer. * * ??-???-2005: Someone at Compulab * - Initial driver creation. @@ -259,11 +259,11 @@ static int __devexit max6902_remove(struct spi_device *spi) static struct spi_driver max6902_driver = { .driver = { - .name = "max6902", + .name = "rtc-max6902", .bus = &spi_bus_type, .owner = THIS_MODULE, }, - .probe = max6902_probe, + .probe = max6902_probe, .remove = __devexit_p(max6902_remove), }; diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index d2b3898b750a..6f2c71ef47ee 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -367,6 +367,7 @@ config SCSI_3W_9XXX config SCSI_7000FASST tristate "7000FASST SCSI support" depends on ISA && SCSI && ISA_DMA_API + select CHECK_SIGNATURE help This driver supports the Western Digital 7000 SCSI host adapter family. Some information is in the source: @@ -388,6 +389,7 @@ config SCSI_AHA152X tristate "Adaptec AHA152X/2825 support" depends on ISA && SCSI && !64BIT select SCSI_SPI_ATTRS + select CHECK_SIGNATURE ---help--- This is a driver for the AHA-1510, AHA-1520, AHA-1522, and AHA-2825 SCSI host adapters. It also works for the AVA-1505, but the IRQ etc. @@ -583,6 +585,7 @@ config SCSI_DTC3280 tristate "DTC3180/3280 SCSI support" depends on ISA && SCSI select SCSI_SPI_ATTRS + select CHECK_SIGNATURE help This is support for DTC 3180/3280 SCSI Host Adapters. Please read the SCSI-HOWTO, available from @@ -657,6 +660,7 @@ config SCSI_EATA_PIO config SCSI_FUTURE_DOMAIN tristate "Future Domain 16xx SCSI/AHA-2920A support" depends on (ISA || PCI) && SCSI + select CHECK_SIGNATURE ---help--- This is support for Future Domain's 16-bit SCSI host adapters (TMC-1660/1680, TMC-1650/1670, TMC-3260, TMC-1610M/MER/MEX) and @@ -1324,6 +1328,7 @@ config SCSI_LPFC config SCSI_SEAGATE tristate "Seagate ST-02 and Future Domain TMC-8xx SCSI support" depends on X86 && ISA && SCSI + select CHECK_SIGNATURE ---help--- These are 8-bit SCSI controllers; the ST-01 is also supported by this driver. It is explained in section 3.9 of the SCSI-HOWTO, @@ -1397,6 +1402,7 @@ config SCSI_T128 tristate "Trantor T128/T128F/T228 SCSI support" depends on ISA && SCSI select SCSI_SPI_ATTRS + select CHECK_SIGNATURE ---help--- This is support for a SCSI host adapter. It is explained in section 3.11 of the SCSI-HOWTO, available from @@ -1561,7 +1567,7 @@ config A3000_SCSI built-in SCSI controller, say Y. Otherwise, say N. To compile this driver as a module, choose M here: the - module will be called wd33c93. + module will be called a3000. config A2091_SCSI tristate "A2091/A590 WD33C93A support" @@ -1571,7 +1577,7 @@ config A2091_SCSI say N. To compile this driver as a module, choose M here: the - module will be called wd33c93. + module will be called a2091. config GVP11_SCSI tristate "GVP Series II WD33C93A support" diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index 301313002f6b..f94109cbb46e 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -129,7 +129,16 @@ struct uart_8250_port { unsigned char mcr; unsigned char mcr_mask; /* mask of user bits */ unsigned char mcr_force; /* mask of forced bits */ - unsigned char lsr_break_flag; + + /* + * Some bits in registers are cleared on a read, so they must + * be saved whenever the register is read but the bits will not + * be immediately processed. + */ +#define LSR_SAVE_FLAGS UART_LSR_BRK_ERROR_BITS + unsigned char lsr_saved_flags; +#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA + unsigned char msr_saved_flags; /* * We provide a per-port pm hook. @@ -1238,6 +1247,7 @@ static void serial8250_start_tx(struct uart_port *port) if (up->bugs & UART_BUG_TXEN) { unsigned char lsr, iir; lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; iir = serial_in(up, UART_IIR) & 0x0f; if ((up->port.type == PORT_RM9000) ? (lsr & UART_LSR_THRE && @@ -1290,18 +1300,10 @@ receive_chars(struct uart_8250_port *up, unsigned int *status) flag = TTY_NORMAL; up->port.icount.rx++; -#ifdef CONFIG_SERIAL_8250_CONSOLE - /* - * Recover the break flag from console xmit - */ - if (up->port.line == up->port.cons->index) { - lsr |= up->lsr_break_flag; - up->lsr_break_flag = 0; - } -#endif + lsr |= up->lsr_saved_flags; + up->lsr_saved_flags = 0; - if (unlikely(lsr & (UART_LSR_BI | UART_LSR_PE | - UART_LSR_FE | UART_LSR_OE))) { + if (unlikely(lsr & UART_LSR_BRK_ERROR_BITS)) { /* * For statistics only */ @@ -1392,6 +1394,8 @@ static unsigned int check_modem_status(struct uart_8250_port *up) { unsigned int status = serial_in(up, UART_MSR); + status |= up->msr_saved_flags; + up->msr_saved_flags = 0; if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI && up->port.info != NULL) { if (status & UART_MSR_TERI) @@ -1591,7 +1595,8 @@ static void serial8250_timeout(unsigned long data) static void serial8250_backup_timeout(unsigned long data) { struct uart_8250_port *up = (struct uart_8250_port *)data; - unsigned int iir, ier = 0; + unsigned int iir, ier = 0, lsr; + unsigned long flags; /* * Must disable interrupts or else we risk racing with the interrupt @@ -1610,9 +1615,13 @@ static void serial8250_backup_timeout(unsigned long data) * the "Diva" UART used on the management processor on many HP * ia64 and parisc boxes. */ + spin_lock_irqsave(&up->port.lock, flags); + lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + spin_unlock_irqrestore(&up->port.lock, flags); if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) && (!uart_circ_empty(&up->port.info->xmit) || up->port.x_char) && - (serial_in(up, UART_LSR) & UART_LSR_THRE)) { + (lsr & UART_LSR_THRE)) { iir &= ~(UART_IIR_ID | UART_IIR_NO_INT); iir |= UART_IIR_THRI; } @@ -1631,13 +1640,14 @@ static unsigned int serial8250_tx_empty(struct uart_port *port) { struct uart_8250_port *up = (struct uart_8250_port *)port; unsigned long flags; - unsigned int ret; + unsigned int lsr; spin_lock_irqsave(&up->port.lock, flags); - ret = serial_in(up, UART_LSR) & UART_LSR_TEMT ? TIOCSER_TEMT : 0; + lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; spin_unlock_irqrestore(&up->port.lock, flags); - return ret; + return lsr & UART_LSR_TEMT ? TIOCSER_TEMT : 0; } static unsigned int serial8250_get_mctrl(struct uart_port *port) @@ -1708,8 +1718,7 @@ static inline void wait_for_xmitr(struct uart_8250_port *up, int bits) do { status = serial_in(up, UART_LSR); - if (status & UART_LSR_BI) - up->lsr_break_flag = UART_LSR_BI; + up->lsr_saved_flags |= status & LSR_SAVE_FLAGS; if (--tmout == 0) break; @@ -1718,8 +1727,12 @@ static inline void wait_for_xmitr(struct uart_8250_port *up, int bits) /* Wait up to 1s for flow control if necessary */ if (up->port.flags & UPF_CONS_FLOW) { - tmout = 1000000; - while (!(serial_in(up, UART_MSR) & UART_MSR_CTS) && --tmout) { + unsigned int tmout; + for (tmout = 1000000; tmout; tmout--) { + unsigned int msr = serial_in(up, UART_MSR); + up->msr_saved_flags |= msr & MSR_SAVE_FLAGS; + if (msr & UART_MSR_CTS) + break; udelay(1); touch_nmi_watchdog(); } @@ -1889,6 +1902,18 @@ static int serial8250_startup(struct uart_port *port) spin_unlock_irqrestore(&up->port.lock, flags); /* + * Clear the interrupt registers again for luck, and clear the + * saved flags to avoid getting false values from polling + * routines or the previous session. + */ + serial_inp(up, UART_LSR); + serial_inp(up, UART_RX); + serial_inp(up, UART_IIR); + serial_inp(up, UART_MSR); + up->lsr_saved_flags = 0; + up->msr_saved_flags = 0; + + /* * Finally, enable interrupts. Note: Modem status interrupts * are set via set_termios(), which will be occurring imminently * anyway, so we don't enable them here. @@ -1906,14 +1931,6 @@ static int serial8250_startup(struct uart_port *port) (void) inb_p(icp); } - /* - * And clear the interrupt registers again for luck. - */ - (void) serial_inp(up, UART_LSR); - (void) serial_inp(up, UART_RX); - (void) serial_inp(up, UART_IIR); - (void) serial_inp(up, UART_MSR); - return 0; } @@ -2484,6 +2501,16 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count) wait_for_xmitr(up, BOTH_EMPTY); serial_out(up, UART_IER, ier); + /* + * The receive handling will happen properly because the + * receive ready bit will still be set; it is not cleared + * on read. However, modem control will not, we must + * call it if we have saved something in the saved flags + * while processing with interrupts off. + */ + if (up->msr_saved_flags) + check_modem_status(up); + if (locked) spin_unlock(&up->port.lock); local_irq_restore(flags); diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 5e485876f54c..bd66339f7a3f 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -580,6 +580,138 @@ static int pci_netmos_init(struct pci_dev *dev) return num_serial; } +/* + * ITE support by Niels de Vos <niels.devos@wincor-nixdorf.com> + * + * These chips are available with optionally one parallel port and up to + * two serial ports. Unfortunately they all have the same product id. + * + * Basic configuration is done over a region of 32 I/O ports. The base + * ioport is called INTA or INTC, depending on docs/other drivers. + * + * The region of the 32 I/O ports is configured in POSIO0R... + */ + +/* registers */ +#define ITE_887x_MISCR 0x9c +#define ITE_887x_INTCBAR 0x78 +#define ITE_887x_UARTBAR 0x7c +#define ITE_887x_PS0BAR 0x10 +#define ITE_887x_POSIO0 0x60 + +/* I/O space size */ +#define ITE_887x_IOSIZE 32 +/* I/O space size (bits 26-24; 8 bytes = 011b) */ +#define ITE_887x_POSIO_IOSIZE_8 (3 << 24) +/* I/O space size (bits 26-24; 32 bytes = 101b) */ +#define ITE_887x_POSIO_IOSIZE_32 (5 << 24) +/* Decoding speed (1 = slow, 2 = medium, 3 = fast) */ +#define ITE_887x_POSIO_SPEED (3 << 29) +/* enable IO_Space bit */ +#define ITE_887x_POSIO_ENABLE (1 << 31) + +static int __devinit pci_ite887x_init(struct pci_dev *dev) +{ + /* inta_addr are the configuration addresses of the ITE */ + static const short inta_addr[] = { 0x2a0, 0x2c0, 0x220, 0x240, 0x1e0, + 0x200, 0x280, 0 }; + int ret, i, type; + struct resource *iobase = NULL; + u32 miscr, uartbar, ioport; + + /* search for the base-ioport */ + i = 0; + while (inta_addr[i] && iobase == NULL) { + iobase = request_region(inta_addr[i], ITE_887x_IOSIZE, + "ite887x"); + if (iobase != NULL) { + /* write POSIO0R - speed | size | ioport */ + pci_write_config_dword(dev, ITE_887x_POSIO0, + ITE_887x_POSIO_ENABLE | ITE_887x_POSIO_SPEED | + ITE_887x_POSIO_IOSIZE_32 | inta_addr[i]); + /* write INTCBAR - ioport */ + pci_write_config_dword(dev, ITE_887x_INTCBAR, inta_addr[i]); + ret = inb(inta_addr[i]); + if (ret != 0xff) { + /* ioport connected */ + break; + } + release_region(iobase->start, ITE_887x_IOSIZE); + iobase = NULL; + } + i++; + } + + if (!inta_addr[i]) { + printk(KERN_ERR "ite887x: could not find iobase\n"); + return -ENODEV; + } + + /* start of undocumented type checking (see parport_pc.c) */ + type = inb(iobase->start + 0x18) & 0x0f; + + switch (type) { + case 0x2: /* ITE8871 (1P) */ + case 0xa: /* ITE8875 (1P) */ + ret = 0; + break; + case 0xe: /* ITE8872 (2S1P) */ + ret = 2; + break; + case 0x6: /* ITE8873 (1S) */ + ret = 1; + break; + case 0x8: /* ITE8874 (2S) */ + ret = 2; + break; + default: + moan_device("Unknown ITE887x", dev); + ret = -ENODEV; + } + + /* configure all serial ports */ + for (i = 0; i < ret; i++) { + /* read the I/O port from the device */ + pci_read_config_dword(dev, ITE_887x_PS0BAR + (0x4 * (i + 1)), + &ioport); + ioport &= 0x0000FF00; /* the actual base address */ + pci_write_config_dword(dev, ITE_887x_POSIO0 + (0x4 * (i + 1)), + ITE_887x_POSIO_ENABLE | ITE_887x_POSIO_SPEED | + ITE_887x_POSIO_IOSIZE_8 | ioport); + + /* write the ioport to the UARTBAR */ + pci_read_config_dword(dev, ITE_887x_UARTBAR, &uartbar); + uartbar &= ~(0xffff << (16 * i)); /* clear half the reg */ + uartbar |= (ioport << (16 * i)); /* set the ioport */ + pci_write_config_dword(dev, ITE_887x_UARTBAR, uartbar); + + /* get current config */ + pci_read_config_dword(dev, ITE_887x_MISCR, &miscr); + /* disable interrupts (UARTx_Routing[3:0]) */ + miscr &= ~(0xf << (12 - 4 * i)); + /* activate the UART (UARTx_En) */ + miscr |= 1 << (23 - i); + /* write new config with activated UART */ + pci_write_config_dword(dev, ITE_887x_MISCR, miscr); + } + + if (ret <= 0) { + /* the device has no UARTs if we get here */ + release_region(iobase->start, ITE_887x_IOSIZE); + } + + return ret; +} + +static void __devexit pci_ite887x_exit(struct pci_dev *dev) +{ + u32 ioport; + /* the ioport is bit 0-15 in POSIO0R */ + pci_read_config_dword(dev, ITE_887x_POSIO0, &ioport); + ioport &= 0xffff; + release_region(ioport, ITE_887x_IOSIZE); +} + static int pci_default_setup(struct serial_private *priv, struct pciserial_board *board, struct uart_port *port, int idx) @@ -653,6 +785,18 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .setup = pci_default_setup, }, /* + * ITE + */ + { + .vendor = PCI_VENDOR_ID_ITE, + .device = PCI_DEVICE_ID_ITE_8872, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .init = pci_ite887x_init, + .setup = pci_default_setup, + .exit = __devexit_p(pci_ite887x_exit), + }, + /* * Panacom */ { @@ -933,6 +1077,7 @@ enum pci_board_num_t { pbn_b1_2_1250000, + pbn_b1_bt_1_115200, pbn_b1_bt_2_921600, pbn_b1_1_1382400, @@ -983,6 +1128,7 @@ enum pci_board_num_t { pbn_exar_XR17C152, pbn_exar_XR17C154, pbn_exar_XR17C158, + pbn_pasemi_1682M, }; /* @@ -1211,6 +1357,13 @@ static struct pciserial_board pci_boards[] __devinitdata = { .uart_offset = 8, }, + [pbn_b1_bt_1_115200] = { + .flags = FL_BASE1|FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [pbn_b1_bt_2_921600] = { .flags = FL_BASE1|FL_BASE_BARS, .num_ports = 2, @@ -1498,6 +1651,18 @@ static struct pciserial_board pci_boards[] __devinitdata = { .base_baud = 921600, .uart_offset = 0x200, }, + /* + * PA Semi PWRficient PA6T-1682M on-chip UART + */ + [pbn_pasemi_1682M] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 8333333, + }, +}; + +static const struct pci_device_id softmodem_blacklist[] = { + { PCI_VDEVICE ( AL, 0x5457 ), }, /* ALi Corporation M5457 AC'97 Modem */ }; /* @@ -1508,6 +1673,7 @@ static struct pciserial_board pci_boards[] __devinitdata = { static int __devinit serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board) { + const struct pci_device_id *blacklist; int num_iomem, num_port, first_port = -1, i; /* @@ -1522,6 +1688,18 @@ serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board) (dev->class & 0xff) > 6) return -ENODEV; + /* + * Do not access blacklisted devices that are known not to + * feature serial ports. + */ + for (blacklist = softmodem_blacklist; + blacklist < softmodem_blacklist + ARRAY_SIZE(softmodem_blacklist); + blacklist++) { + if (dev->vendor == blacklist->vendor && + dev->device == blacklist->device) + return -ENODEV; + } + num_iomem = num_port = 0; for (i = 0; i < PCI_NUM_BAR_RESOURCES; i++) { if (pci_resource_flags(dev, i) & IORESOURCE_IO) { @@ -2364,6 +2542,13 @@ static struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_TOPIC, PCI_DEVICE_ID_TOPIC_TP560, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_1_115200 }, + /* + * ITE + */ + { PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8872, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b1_bt_1_115200 }, /* * IntaShield IS-200 @@ -2382,6 +2567,13 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_SUBVENDOR_ID_PERLE, PCI_SUBDEVICE_ID_PCI_RAS8, 0, 0, pbn_b2_8_921600 }, /* + * PA Semi PA6T-1682M on-chip UART + */ + { PCI_VENDOR_ID_PASEMI, 0xa004, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_pasemi_1682M }, + + /* * These entries match devices with class COMMUNICATION_SERIAL, * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL */ diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c index 030a6063541d..a055f58f342f 100644 --- a/drivers/serial/serial_core.c +++ b/drivers/serial/serial_core.c @@ -1146,11 +1146,14 @@ static void uart_set_termios(struct tty_struct *tty, struct ktermios *old_termio /* * These are the bits that are used to setup various - * flags in the low level driver. + * flags in the low level driver. We can ignore the Bfoo + * bits in c_cflag; c_[io]speed will always be set + * appropriately by set_termios() in tty_ioctl.c */ #define RELEVANT_IFLAG(iflag) ((iflag) & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) - if ((cflag ^ old_termios->c_cflag) == 0 && + tty->termios->c_ospeed == old_termios->c_ospeed && + tty->termios->c_ispeed == old_termios->c_ispeed && RELEVANT_IFLAG(tty->termios->c_iflag ^ old_termios->c_iflag) == 0) return; diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c index b8f91e018b21..0930e2a85514 100644 --- a/drivers/serial/serial_txx9.c +++ b/drivers/serial/serial_txx9.c @@ -37,7 +37,7 @@ #include <asm/io.h> -static char *serial_version = "1.09"; +static char *serial_version = "1.10"; static char *serial_name = "TX39/49 Serial driver"; #define PASS_LIMIT 256 @@ -436,8 +436,10 @@ static unsigned int serial_txx9_get_mctrl(struct uart_port *port) struct uart_txx9_port *up = (struct uart_txx9_port *)port; unsigned int ret; - ret = ((sio_in(up, TXX9_SIFLCR) & TXX9_SIFLCR_RTSSC) ? 0 : TIOCM_RTS) - | ((sio_in(up, TXX9_SICISR) & TXX9_SICISR_CTSS) ? 0 : TIOCM_CTS); + /* no modem control lines */ + ret = TIOCM_CAR | TIOCM_DSR; + ret |= (sio_in(up, TXX9_SIFLCR) & TXX9_SIFLCR_RTSSC) ? 0 : TIOCM_RTS; + ret |= (sio_in(up, TXX9_SICISR) & TXX9_SICISR_CTSS) ? 0 : TIOCM_CTS; return ret; } @@ -557,6 +559,12 @@ serial_txx9_set_termios(struct uart_port *port, struct ktermios *termios, unsigned long flags; unsigned int baud, quot; + /* + * We don't support modem control lines. + */ + termios->c_cflag &= ~(HUPCL | CMSPAR); + termios->c_cflag |= CLOCAL; + cval = sio_in(up, TXX9_SILCR); /* byte size and parity */ cval &= ~TXX9_SILCR_UMODE_MASK; diff --git a/drivers/video/au1100fb.c b/drivers/video/au1100fb.c index 80a81eccad36..832e4613673a 100644 --- a/drivers/video/au1100fb.c +++ b/drivers/video/au1100fb.c @@ -115,6 +115,52 @@ static int nocursor = 0; module_param(nocursor, int, 0644); MODULE_PARM_DESC(nocursor, "cursor enable/disable"); +/* fb_blank + * Blank the screen. Depending on the mode, the screen will be + * activated with the backlight color, or desactivated + */ +static int au1100fb_fb_blank(int blank_mode, struct fb_info *fbi) +{ + struct au1100fb_device *fbdev = to_au1100fb_device(fbi); + + print_dbg("fb_blank %d %p", blank_mode, fbi); + + switch (blank_mode) { + + case VESA_NO_BLANKING: + /* Turn on panel */ + fbdev->regs->lcd_control |= LCD_CONTROL_GO; +#ifdef CONFIG_MIPS_PB1100 + if (drv_info.panel_idx == 1) { + au_writew(au_readw(PB1100_G_CONTROL) + | (PB1100_G_CONTROL_BL | PB1100_G_CONTROL_VDD), + PB1100_G_CONTROL); + } +#endif + au_sync(); + break; + + case VESA_VSYNC_SUSPEND: + case VESA_HSYNC_SUSPEND: + case VESA_POWERDOWN: + /* Turn off panel */ + fbdev->regs->lcd_control &= ~LCD_CONTROL_GO; +#ifdef CONFIG_MIPS_PB1100 + if (drv_info.panel_idx == 1) { + au_writew(au_readw(PB1100_G_CONTROL) + & ~(PB1100_G_CONTROL_BL | PB1100_G_CONTROL_VDD), + PB1100_G_CONTROL); + } +#endif + au_sync(); + break; + default: + break; + + } + return 0; +} + /* * Set hardware with var settings. This will enable the controller with a specific * mode, normally validated with the fb_check_var method @@ -272,52 +318,6 @@ int au1100fb_fb_setcolreg(unsigned regno, unsigned red, unsigned green, unsigned return 0; } -/* fb_blank - * Blank the screen. Depending on the mode, the screen will be - * activated with the backlight color, or desactivated - */ -int au1100fb_fb_blank(int blank_mode, struct fb_info *fbi) -{ - struct au1100fb_device *fbdev = to_au1100fb_device(fbi); - - print_dbg("fb_blank %d %p", blank_mode, fbi); - - switch (blank_mode) { - - case VESA_NO_BLANKING: - /* Turn on panel */ - fbdev->regs->lcd_control |= LCD_CONTROL_GO; -#ifdef CONFIG_MIPS_PB1100 - if (drv_info.panel_idx == 1) { - au_writew(au_readw(PB1100_G_CONTROL) - | (PB1100_G_CONTROL_BL | PB1100_G_CONTROL_VDD), - PB1100_G_CONTROL); - } -#endif - au_sync(); - break; - - case VESA_VSYNC_SUSPEND: - case VESA_HSYNC_SUSPEND: - case VESA_POWERDOWN: - /* Turn off panel */ - fbdev->regs->lcd_control &= ~LCD_CONTROL_GO; -#ifdef CONFIG_MIPS_PB1100 - if (drv_info.panel_idx == 1) { - au_writew(au_readw(PB1100_G_CONTROL) - & ~(PB1100_G_CONTROL_BL | PB1100_G_CONTROL_VDD), - PB1100_G_CONTROL); - } -#endif - au_sync(); - break; - default: - break; - - } - return 0; -} - /* fb_pan_display * Pan display in x and/or y as specified */ diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c index 7fa1afeae8dc..dda0586ab3f3 100644 --- a/drivers/video/console/newport_con.c +++ b/drivers/video/console/newport_con.c @@ -738,9 +738,8 @@ const struct consw newport_con = { #ifdef MODULE static int __init newport_console_init(void) { - if (!sgi_gfxaddr) - return NULL; + return 0; if (!npregs) npregs = (struct newport_regs *)/* ioremap cannot fail */ diff --git a/drivers/video/imsttfb.c b/drivers/video/imsttfb.c index 5715b8ad0ddc..94f4511023d8 100644 --- a/drivers/video/imsttfb.c +++ b/drivers/video/imsttfb.c @@ -1391,7 +1391,7 @@ init_imstt(struct fb_info *info) } } -#if USE_NV_MODES && defined(CONFIG_PPC) +#if USE_NV_MODES && defined(CONFIG_PPC32) { int vmode = init_vmode, cmode = init_cmode; diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 2fbd8dd16df5..6840dfebe4d4 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -170,22 +170,24 @@ void __w1_remove_master_device(struct w1_master *dev) void w1_remove_master_device(struct w1_bus_master *bm) { - struct w1_master *dev = NULL; + struct w1_master *dev, *found = NULL; list_for_each_entry(dev, &w1_masters, w1_master_entry) { if (!dev->initialized) continue; - if (dev->bus_master->data == bm->data) + if (dev->bus_master->data == bm->data) { + found = dev; break; + } } - if (!dev) { + if (!found) { printk(KERN_ERR "Device doesn't exist.\n"); return; } - __w1_remove_master_device(dev); + __w1_remove_master_device(found); } EXPORT_SYMBOL(w1_add_master_device); diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c index 9130f1c12c26..808b4f8675c5 100644 --- a/drivers/zorro/zorro-sysfs.c +++ b/drivers/zorro/zorro-sysfs.c @@ -78,7 +78,7 @@ static ssize_t zorro_read_config(struct kobject *kobj, static struct bin_attribute zorro_config_attr = { .attr = { .name = "config", - .mode = S_IRUGO | S_IWUSR, + .mode = S_IRUGO, }, .size = sizeof(struct ConfigDev), .read = zorro_read_config, diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 2d4c8a3e604e..45ff3d63b758 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -587,19 +587,20 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name); if (!unhashed) { /* - * Mark the dentry incomplete, but add it. This is needed so - * that the VFS layer knows about the dentry, and we can count - * on catching any lookups through the revalidate. - * - * Let all the hard work be done by the revalidate function that - * needs to be able to do this anyway.. - * - * We need to do this before we release the directory semaphore. + * Mark the dentry incomplete but don't hash it. We do this + * to serialize our inode creation operations (symlink and + * mkdir) which prevents deadlock during the callback to + * the daemon. Subsequent user space lookups for the same + * dentry are placed on the wait queue while the daemon + * itself is allowed passage unresticted so the create + * operation itself can then hash the dentry. Finally, + * we check for the hashed dentry and return the newly + * hashed dentry. */ dentry->d_op = &autofs4_root_dentry_operations; dentry->d_fsdata = NULL; - d_add(dentry, NULL); + d_instantiate(dentry, NULL); } else { struct autofs_info *ino = autofs4_dentry_ino(unhashed); DPRINTK("rehash %p with %p", dentry, unhashed); @@ -607,15 +608,17 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * If we are racing with expire the request might not * be quite complete but the directory has been removed * so it must have been successful, so just wait for it. + * We need to ensure the AUTOFS_INF_EXPIRING flag is clear + * before continuing as revalidate may fail when calling + * try_to_fill_dentry (returning EAGAIN) if we don't. */ - if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { + while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { DPRINTK("wait for incomplete expire %p name=%.*s", unhashed, unhashed->d_name.len, unhashed->d_name.name); autofs4_wait(sbi, unhashed, NFY_NONE); DPRINTK("request completed"); } - d_rehash(unhashed); dentry = unhashed; } @@ -658,7 +661,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s * for all system calls, but it should be OK for the operations * we permit from an autofs. */ - if (dentry->d_inode && d_unhashed(dentry)) { + if (!oz_mode && d_unhashed(dentry)) { /* * A user space application can (and has done in the past) * remove and re-create this directory during the callback. @@ -716,7 +719,7 @@ static int autofs4_dir_symlink(struct inode *dir, strcpy(cp, symname); inode = autofs4_get_inode(dir->i_sb, ino); - d_instantiate(dentry, inode); + d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) dentry->d_op = &autofs4_root_dentry_operations; @@ -844,7 +847,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) return -ENOSPC; inode = autofs4_get_inode(dir->i_sb, ino); - d_instantiate(dentry, inode); + d_add(dentry, inode); if (dir == dir->i_sb->s_root->d_inode) dentry->d_op = &autofs4_root_dentry_operations; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 131954b3fb98..5d40ad13ab5c 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -357,6 +357,10 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n"); goto out; } + if (special_file(lower_inode->i_mode)) { + ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n"); + goto out; + } if (!nd) { ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave" "as we *think* we are about to unlink\n"); diff --git a/fs/exec.c b/fs/exec.c index ce62f7b65f17..c21a8cc06277 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -780,18 +780,11 @@ static int de_thread(struct task_struct *tsk) int count; /* - * Tell all the sighand listeners that this sighand has - * been detached. The signalfd_detach() function grabs the - * sighand lock, if signal listeners are present on the sighand. - */ - signalfd_detach(tsk); - - /* * If we don't share sighandlers, then we aren't sharing anything * and we can just re-use it all. */ if (atomic_read(&oldsighand->count) <= 1) { - BUG_ON(atomic_read(&sig->count) != 1); + signalfd_detach(tsk); exit_itimers(sig); return 0; } @@ -930,12 +923,11 @@ static int de_thread(struct task_struct *tsk) sig->flags = 0; no_thread_group: + signalfd_detach(tsk); exit_itimers(sig); if (leader) release_task(leader); - BUG_ON(atomic_read(&sig->count) != 1); - if (atomic_read(&oldsighand->count) == 1) { /* * Now that we nuked the rest of the thread group, diff --git a/fs/signalfd.c b/fs/signalfd.c index 7b941abbcde0..a8e293d30034 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -56,12 +56,18 @@ static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) sighand = lock_task_sighand(lk->tsk, &lk->flags); rcu_read_unlock(); - if (sighand && !ctx->tsk) { + if (!sighand) + return 0; + + if (!ctx->tsk) { unlock_task_sighand(lk->tsk, &lk->flags); - sighand = NULL; + return 0; } - return sighand != NULL; + if (lk->tsk->tgid == current->tgid) + lk->tsk = current; + + return 1; } static void signalfd_unlock(struct signalfd_lockctx *lk) @@ -331,7 +337,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas init_waitqueue_head(&ctx->wqh); ctx->sigmask = sigmask; - ctx->tsk = current; + ctx->tsk = current->group_leader; sighand = current->sighand; /* diff --git a/include/asm-m68k/ioctls.h b/include/asm-m68k/ioctls.h index 0c48929ab444..b8d2f4be7fd7 100644 --- a/include/asm-m68k/ioctls.h +++ b/include/asm-m68k/ioctls.h @@ -46,6 +46,10 @@ #define TIOCSBRK 0x5427 /* BSD compatibility */ #define TIOCCBRK 0x5428 /* BSD compatibility */ #define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TCGETS2 _IOR('T',0x2A, struct termios2) +#define TCSETS2 _IOW('T',0x2B, struct termios2) +#define TCSETSW2 _IOW('T',0x2C, struct termios2) +#define TCSETSF2 _IOW('T',0x2D, struct termios2) #define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ #define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h index 9e6d0d6debdb..1431ea0b59e0 100644 --- a/include/asm-m68k/page.h +++ b/include/asm-m68k/page.h @@ -4,17 +4,15 @@ #ifdef __KERNEL__ +#include <linux/const.h> + /* PAGE_SHIFT determines the page size */ #ifndef CONFIG_SUN3 #define PAGE_SHIFT (12) #else #define PAGE_SHIFT (13) #endif -#ifdef __ASSEMBLY__ -#define PAGE_SIZE (1 << PAGE_SHIFT) -#else -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#endif +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) #include <asm/setup.h> @@ -27,6 +25,8 @@ #ifndef __ASSEMBLY__ +#include <linux/compiler.h> + #include <asm/module.h> #define get_user_page(vaddr) __get_free_page(GFP_KERNEL) diff --git a/include/asm-m68k/processor.h b/include/asm-m68k/processor.h index 8455f778b601..4453ec379c5d 100644 --- a/include/asm-m68k/processor.h +++ b/include/asm-m68k/processor.h @@ -38,12 +38,8 @@ static inline void wrusp(unsigned long usp) #ifndef CONFIG_SUN3 #define TASK_SIZE (0xF0000000UL) #else -#ifdef __ASSEMBLY__ -#define TASK_SIZE (0x0E000000) -#else #define TASK_SIZE (0x0E000000UL) #endif -#endif /* This decides where the kernel will search for a free chunk of vm * space during mmap's. diff --git a/include/asm-m68k/termbits.h b/include/asm-m68k/termbits.h index 0e520f328f53..8c14170996bb 100644 --- a/include/asm-m68k/termbits.h +++ b/include/asm-m68k/termbits.h @@ -141,6 +141,7 @@ struct ktermios { #define HUPCL 0002000 #define CLOCAL 0004000 #define CBAUDEX 0010000 +#define BOTHER 0010000 #define B57600 0010001 #define B115200 0010002 #define B230400 0010003 @@ -156,10 +157,12 @@ struct ktermios { #define B3000000 0010015 #define B3500000 0010016 #define B4000000 0010017 -#define CIBAUD 002003600000 /* input baud rate (not used) */ +#define CIBAUD 002003600000 /* input baud rate */ #define CMSPAR 010000000000 /* mark or space (stick) parity */ #define CRTSCTS 020000000000 /* flow control */ +#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */ + /* c_lflag bits */ #define ISIG 0000001 #define ICANON 0000002 diff --git a/include/asm-m68k/termios.h b/include/asm-m68k/termios.h index 00edabd76168..0823032e4045 100644 --- a/include/asm-m68k/termios.h +++ b/include/asm-m68k/termios.h @@ -82,8 +82,10 @@ struct termio { copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ }) -#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios)) -#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios)) +#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) +#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) +#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) +#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) #endif /* __KERNEL__ */ diff --git a/include/asm-m68knommu/pgtable.h b/include/asm-m68knommu/pgtable.h index e1e6a1d2333a..46251016e821 100644 --- a/include/asm-m68knommu/pgtable.h +++ b/include/asm-m68knommu/pgtable.h @@ -65,4 +65,6 @@ extern unsigned int kobjsize(const void *objp); #define VMALLOC_START 0 #define VMALLOC_END 0xffffffff +#include <asm-generic/pgtable.h> + #endif /* _M68KNOMMU_PGTABLE_H */ diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index 506ad20c18f8..8bdb16bfe5fb 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -161,4 +161,7 @@ static inline void con_schedule_flip(struct tty_struct *t) schedule_delayed_work(&t->buf.work, 0); } +/* mac_hid.c */ +extern int mac_hid_mouse_emulate_buttons(int, unsigned int, int); + #endif diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index e147cf50529f..5bdd656e88cf 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -166,7 +166,7 @@ extern enum zone_type policy_zone; static inline void check_highest_zone(enum zone_type k) { - if (k > policy_zone) + if (k > policy_zone && k != ZONE_MOVABLE) policy_zone = k; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 655094dc9440..1692dd6cb915 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1042,7 +1042,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma, } /* mmap.c */ -extern int __vm_enough_memory(long pages, int cap_sys_admin); +extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); extern struct vm_area_struct *vma_merge(struct mm_struct *, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3ea68cd3b61f..4e5627379b09 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -410,6 +410,24 @@ struct zonelist { #endif }; +#ifdef CONFIG_NUMA +/* + * Only custom zonelists like MPOL_BIND need to be filtered as part of + * policies. As described in the comment for struct zonelist_cache, these + * zonelists will not have a zlcache so zlcache_ptr will not be set. Use + * that to determine if the zonelists needs to be filtered or not. + */ +static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) +{ + return !zonelist->zlcache_ptr; +} +#else +static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) +{ + return 0; +} +#endif /* CONFIG_NUMA */ + #ifdef CONFIG_ARCH_POPULATES_NODE_MAP struct node_active_region { unsigned long start_pfn; diff --git a/include/linux/security.h b/include/linux/security.h index c11dc8aa0351..1a15526e9f67 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -54,7 +54,7 @@ extern int cap_inode_removexattr(struct dentry *dentry, char *name); extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); extern void cap_task_reparent_to_init (struct task_struct *p); extern int cap_syslog (int type); -extern int cap_vm_enough_memory (long pages); +extern int cap_vm_enough_memory (struct mm_struct *mm, long pages); struct msghdr; struct sk_buff; @@ -1125,6 +1125,7 @@ struct request_sock; * Return 0 if permission is granted. * @vm_enough_memory: * Check permissions for allocating a new virtual mapping. + * @mm contains the mm struct it is being added to. * @pages contains the number of pages. * Return 0 if permission is granted. * @@ -1169,7 +1170,7 @@ struct security_operations { int (*quota_on) (struct dentry * dentry); int (*syslog) (int type); int (*settime) (struct timespec *ts, struct timezone *tz); - int (*vm_enough_memory) (long pages); + int (*vm_enough_memory) (struct mm_struct *mm, long pages); int (*bprm_alloc_security) (struct linux_binprm * bprm); void (*bprm_free_security) (struct linux_binprm * bprm); @@ -1469,10 +1470,14 @@ static inline int security_settime(struct timespec *ts, struct timezone *tz) return security_ops->settime(ts, tz); } - static inline int security_vm_enough_memory(long pages) { - return security_ops->vm_enough_memory(pages); + return security_ops->vm_enough_memory(current->mm, pages); +} + +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) +{ + return security_ops->vm_enough_memory(mm, pages); } static inline int security_bprm_alloc (struct linux_binprm *bprm) @@ -2219,7 +2224,12 @@ static inline int security_settime(struct timespec *ts, struct timezone *tz) static inline int security_vm_enough_memory(long pages) { - return cap_vm_enough_memory(pages); + return cap_vm_enough_memory(current->mm, pages); +} + +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) +{ + return cap_vm_enough_memory(mm, pages); } static inline int security_bprm_alloc (struct linux_binprm *bprm) diff --git a/include/linux/selection.h b/include/linux/selection.h index ed3408b400f1..f9457861937c 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -10,6 +10,8 @@ #include <linux/tiocl.h> #include <linux/vt_buffer.h> +struct tty_struct; + extern struct vc_data *sel_cons; extern void clear_selection(void); diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index 1c5ed7d92b0f..96c0d93fc2ca 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -118,6 +118,7 @@ #define UART_LSR_PE 0x04 /* Parity error indicator */ #define UART_LSR_OE 0x02 /* Overrun error indicator */ #define UART_LSR_DR 0x01 /* Receiver data ready */ +#define UART_LSR_BRK_ERROR_BITS 0x1E /* BI, FE, PE, OE bits */ #define UART_MSR 6 /* In: Modem Status Register */ #define UART_MSR_DCD 0x80 /* Data Carrier Detect */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3401293359e8..04f3ffb8d9d4 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2023,7 +2023,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->d.next = ctx->aux_pids; ctx->aux_pids = (void *)axp; } - BUG_ON(axp->pid_count > AUDIT_AUX_PIDS); + BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS); axp->target_pid[axp->pid_count] = t->tgid; selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); diff --git a/kernel/futex.c b/kernel/futex.c index 3415e9ad1391..e8935b195e88 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1670,6 +1670,7 @@ pi_faulted: attempt); if (ret) goto out; + uval = 0; goto retry_unlocked; } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 203a518b6f14..853aefbd184b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -462,7 +462,9 @@ void free_irq(unsigned int irq, void *dev_id) * We do this after actually deregistering it, to make sure that * a 'real' IRQ doesn't run in parallel with our fake */ + local_irq_save(flags); handler(irq, dev_id); + local_irq_restore(flags); } #endif } diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 55b3761edaa9..7a15afb73ed0 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -547,9 +547,9 @@ sys_timer_create(const clockid_t which_clock, new_timer->it_process = process; list_add(&new_timer->list, &process->signal->posix_timers); - spin_unlock_irqrestore(&process->sighand->siglock, flags); if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) get_task_struct(process); + spin_unlock_irqrestore(&process->sighand->siglock, flags); } else { spin_unlock_irqrestore(&process->sighand->siglock, flags); process = NULL; @@ -605,13 +605,14 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); if (timr) { spin_lock(&timr->it_lock); - spin_unlock(&idr_lock); if ((timr->it_id != timer_id) || !(timr->it_process) || timr->it_process->tgid != current->tgid) { - unlock_timer(timr, *flags); + spin_unlock(&timr->it_lock); + spin_unlock_irqrestore(&idr_lock, *flags); timr = NULL; - } + } else + spin_unlock(&idr_lock); } else spin_unlock_irqrestore(&idr_lock, *flags); diff --git a/kernel/signal.c b/kernel/signal.c index b27c01a66448..ad63109e413c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -378,7 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ - if (tsk == current) + if (likely(tsk == current)) signr = __dequeue_signal(&tsk->pending, mask, info); if (!signr) { signr = __dequeue_signal(&tsk->signal->shared_pending, @@ -425,7 +425,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; } - if ( signr && + if (signr && likely(tsk == current) && ((info->si_code & __SI_MASK) == __SI_TIMER) && info->si_sys_private){ /* diff --git a/lib/Kconfig b/lib/Kconfig index e5c2c514174a..ba3d104994d9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -138,4 +138,7 @@ config HAS_DMA depends on !NO_DMA default y +config CHECK_SIGNATURE + bool + endmenu diff --git a/lib/Makefile b/lib/Makefile index d9e5f1cd0bfb..6b0ba8cf4e5f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -21,7 +21,8 @@ CFLAGS_kobject_uevent.o += -DDEBUG endif obj-$(CONFIG_GENERIC_IOMAP) += iomap.o -obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o check_signature.o +obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o +obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d7ca59d66c59..de4cf458d6e1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -643,7 +643,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, spin_unlock(&mm->page_table_lock); ret = hugetlb_fault(mm, vma, vaddr, 0); spin_lock(&mm->page_table_lock); - if (!(ret & VM_FAULT_MAJOR)) + if (!(ret & VM_FAULT_ERROR)) continue; remainder = 0; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 71b84b45154a..172abffeb2e3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) lower zones etc. Avoid empty zones because the memory allocator doesn't like them. If you implement node hot removal you have to fix that. */ - k = policy_zone; + k = MAX_NR_ZONES - 1; while (1) { for_each_node_mask(nd, *nodes) { struct zone *z = &NODE_DATA(nd)->node_zones[k]; diff --git a/mm/mmap.c b/mm/mmap.c index b6537211b9cc..0d40e66c841b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -93,7 +93,7 @@ atomic_t vm_committed_space = ATOMIC_INIT(0); * Note this is a helper function intended to be used by LSMs which * wish to use this logic. */ -int __vm_enough_memory(long pages, int cap_sys_admin) +int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { unsigned long free, allowed; @@ -166,7 +166,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) /* Don't let a single process grow too big: leave 3% of the size of this process for other processes */ - allowed -= current->mm->total_vm / 32; + allowed -= mm->total_vm / 32; /* * cast `allowed' as a signed long because vm_committed_space @@ -2077,7 +2077,7 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) if (__vma && __vma->vm_start < vma->vm_end) return -ENOMEM; if ((vma->vm_flags & VM_ACCOUNT) && - security_vm_enough_memory(vma_pages(vma))) + security_vm_enough_memory_mm(mm, vma_pages(vma))) return -ENOMEM; vma_link(mm, vma, prev, rb_link, rb_parent); return 0; diff --git a/mm/nommu.c b/mm/nommu.c index 9eef6a398555..8ed0cb43118a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1270,7 +1270,7 @@ EXPORT_SYMBOL(get_unmapped_area); * Note this is a helper function intended to be used by LSMs which * wish to use this logic. */ -int __vm_enough_memory(long pages, int cap_sys_admin) +int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { unsigned long free, allowed; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3da85b81dabb..6427653023aa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ + enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */ zonelist_scan: /* @@ -1166,6 +1167,18 @@ zonelist_scan: z = zonelist->zones; do { + /* + * In NUMA, this could be a policy zonelist which contains + * zones that may not be allowed by the current gfp_mask. + * Check the zone is allowed by the current flags + */ + if (unlikely(alloc_should_filter_zonelist(zonelist))) { + if (highest_zoneidx == -1) + highest_zoneidx = gfp_zone(gfp_mask); + if (zone_idx(*z) > highest_zoneidx) + continue; + } + if (NUMA_BUILD && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; diff --git a/mm/slab.c b/mm/slab.c index a684778b2b41..6f6abef83a1a 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -883,6 +883,7 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, */ static int use_alien_caches __read_mostly = 1; +static int numa_platform __read_mostly = 1; static int __init noaliencache_setup(char *s) { use_alien_caches = 0; @@ -1399,8 +1400,10 @@ void __init kmem_cache_init(void) int order; int node; - if (num_possible_nodes() == 1) + if (num_possible_nodes() == 1) { use_alien_caches = 0; + numa_platform = 0; + } for (i = 0; i < NUM_INIT_LISTS; i++) { kmem_list3_init(&initkmem_list3[i]); @@ -3558,7 +3561,14 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) check_irq_off(); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); - if (cache_free_alien(cachep, objp)) + /* + * Skip calling cache_free_alien() when the platform is not numa. + * This will avoid cache misses that happen while accessing slabp (which + * is per page memory reference) to get nodeid. Instead use a global + * variable to skip the call, which is mostly likely to be present in + * the cache. + */ + if (numa_platform && cache_free_alien(cachep, objp)) return; if (likely(ac->avail < ac->limit)) { diff --git a/mm/slub.c b/mm/slub.c index 69d02e3e439e..04151da399c6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1877,9 +1877,16 @@ static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflag BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); - page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node); + page = new_slab(kmalloc_caches, gfpflags, node); BUG_ON(!page); + if (page_to_nid(page) != node) { + printk(KERN_ERR "SLUB: Unable to allocate memory from " + "node %d\n", node); + printk(KERN_ERR "SLUB: Allocating a useless per node structure " + "in order to be able to continue\n"); + } + n = page->freelist; BUG_ON(!n); page->freelist = get_freepointer(kmalloc_caches, n); @@ -3112,7 +3119,7 @@ static int list_locations(struct kmem_cache *s, char *buf, unsigned long flags; struct page *page; - if (!atomic_read(&n->nr_slabs)) + if (!atomic_long_read(&n->nr_slabs)) continue; spin_lock_irqsave(&n->list_lock, flags); @@ -3247,7 +3254,7 @@ static unsigned long slab_objects(struct kmem_cache *s, } if (flags & SO_FULL) { - int full_slabs = atomic_read(&n->nr_slabs) + int full_slabs = atomic_long_read(&n->nr_slabs) - per_cpu[node] - n->nr_partial; @@ -3283,7 +3290,7 @@ static int any_slab_objects(struct kmem_cache *s) for_each_node(node) { struct kmem_cache_node *n = get_node(s, node); - if (n->nr_partial || atomic_read(&n->nr_slabs)) + if (n->nr_partial || atomic_long_read(&n->nr_slabs)) return 1; } return 0; diff --git a/mm/sparse.c b/mm/sparse.c index 3047bf06c1f3..239f5a720d38 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -41,6 +41,15 @@ int page_to_nid(struct page *page) return section_to_node_table[page_to_section(page)]; } EXPORT_SYMBOL(page_to_nid); + +static void set_section_nid(unsigned long section_nr, int nid) +{ + section_to_node_table[section_nr] = nid; +} +#else /* !NODE_NOT_IN_PAGE_FLAGS */ +static inline void set_section_nid(unsigned long section_nr, int nid) +{ +} #endif #ifdef CONFIG_SPARSEMEM_EXTREME @@ -68,10 +77,6 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) struct mem_section *section; int ret = 0; -#ifdef NODE_NOT_IN_PAGE_FLAGS - section_to_node_table[section_nr] = nid; -#endif - if (mem_section[root]) return -EEXIST; @@ -148,6 +153,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) struct mem_section *ms; sparse_index_init(section, nid); + set_section_nid(section, nid); ms = __nr_to_section(section); if (!ms->section_mem_map) diff --git a/mm/vmscan.c b/mm/vmscan.c index d419e10e3daa..a6e65d024995 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -271,6 +271,12 @@ static void handle_write_error(struct address_space *mapping, unlock_page(page); } +/* Request for sync pageout. */ +enum pageout_io { + PAGEOUT_IO_ASYNC, + PAGEOUT_IO_SYNC, +}; + /* possible outcome of pageout() */ typedef enum { /* failed to write page out, page is locked */ @@ -287,7 +293,8 @@ typedef enum { * pageout is called by shrink_page_list() for each dirty page. * Calls ->writepage(). */ -static pageout_t pageout(struct page *page, struct address_space *mapping) +static pageout_t pageout(struct page *page, struct address_space *mapping, + enum pageout_io sync_writeback) { /* * If the page is dirty, only perform writeback if that write @@ -346,6 +353,15 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) ClearPageReclaim(page); return PAGE_ACTIVATE; } + + /* + * Wait on writeback if requested to. This happens when + * direct reclaiming a large contiguous area and the + * first attempt to free a range of pages fails. + */ + if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC) + wait_on_page_writeback(page); + if (!PageWriteback(page)) { /* synchronous write or broken a_ops? */ ClearPageReclaim(page); @@ -423,7 +439,8 @@ cannot_free: * shrink_page_list() returns the number of reclaimed pages */ static unsigned long shrink_page_list(struct list_head *page_list, - struct scan_control *sc) + struct scan_control *sc, + enum pageout_io sync_writeback) { LIST_HEAD(ret_pages); struct pagevec freed_pvec; @@ -458,8 +475,23 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (page_mapped(page) || PageSwapCache(page)) sc->nr_scanned++; - if (PageWriteback(page)) - goto keep_locked; + may_enter_fs = (sc->gfp_mask & __GFP_FS) || + (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); + + if (PageWriteback(page)) { + /* + * Synchronous reclaim is performed in two passes, + * first an asynchronous pass over the list to + * start parallel writeback, and a second synchronous + * pass to wait for the IO to complete. Wait here + * for any page for which writeback has already + * started. + */ + if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs) + wait_on_page_writeback(page); + else + goto keep_locked; + } referenced = page_referenced(page, 1); /* In active use or really unfreeable? Activate it. */ @@ -478,8 +510,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, #endif /* CONFIG_SWAP */ mapping = page_mapping(page); - may_enter_fs = (sc->gfp_mask & __GFP_FS) || - (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); /* * The page is mapped into the page tables of one or more @@ -505,7 +535,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep_locked; /* Page is dirty, try to write it out here */ - switch(pageout(page, mapping)) { + switch (pageout(page, mapping, sync_writeback)) { case PAGE_KEEP: goto keep_locked; case PAGE_ACTIVATE: @@ -777,6 +807,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, (sc->order > PAGE_ALLOC_COSTLY_ORDER)? ISOLATE_BOTH : ISOLATE_INACTIVE); nr_active = clear_active_flags(&page_list); + __count_vm_events(PGDEACTIVATE, nr_active); __mod_zone_page_state(zone, NR_ACTIVE, -nr_active); __mod_zone_page_state(zone, NR_INACTIVE, @@ -785,7 +816,29 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, spin_unlock_irq(&zone->lru_lock); nr_scanned += nr_scan; - nr_freed = shrink_page_list(&page_list, sc); + nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); + + /* + * If we are direct reclaiming for contiguous pages and we do + * not reclaim everything in the list, try again and wait + * for IO to complete. This will stall high-order allocations + * but that should be acceptable to the caller + */ + if (nr_freed < nr_taken && !current_is_kswapd() && + sc->order > PAGE_ALLOC_COSTLY_ORDER) { + congestion_wait(WRITE, HZ/10); + + /* + * The attempt at page out may have made some + * of the pages active, mark them inactive again. + */ + nr_active = clear_active_flags(&page_list); + count_vm_events(PGDEACTIVATE, nr_active); + + nr_freed += shrink_page_list(&page_list, sc, + PAGEOUT_IO_SYNC); + } + nr_reclaimed += nr_freed; local_irq_disable(); if (current_is_kswapd()) { diff --git a/security/commoncap.c b/security/commoncap.c index 338606eb7238..7520361663e8 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -315,13 +315,13 @@ int cap_syslog (int type) return 0; } -int cap_vm_enough_memory(long pages) +int cap_vm_enough_memory(struct mm_struct *mm, long pages) { int cap_sys_admin = 0; if (cap_capable(current, CAP_SYS_ADMIN) == 0) cap_sys_admin = 1; - return __vm_enough_memory(pages, cap_sys_admin); + return __vm_enough_memory(mm, pages, cap_sys_admin); } EXPORT_SYMBOL(cap_capable); diff --git a/security/dummy.c b/security/dummy.c index 19d813d5e083..853ec2292798 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -108,13 +108,13 @@ static int dummy_settime(struct timespec *ts, struct timezone *tz) return 0; } -static int dummy_vm_enough_memory(long pages) +static int dummy_vm_enough_memory(struct mm_struct *mm, long pages) { int cap_sys_admin = 0; if (dummy_capable(current, CAP_SYS_ADMIN) == 0) cap_sys_admin = 1; - return __vm_enough_memory(pages, cap_sys_admin); + return __vm_enough_memory(mm, pages, cap_sys_admin); } static int dummy_bprm_alloc_security (struct linux_binprm *bprm) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6237933f7d82..d8bc4172819c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1584,7 +1584,7 @@ static int selinux_syslog(int type) * Do not audit the selinux permission check, as this is applied to all * processes that allocate mappings. */ -static int selinux_vm_enough_memory(long pages) +static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) { int rc, cap_sys_admin = 0; struct task_security_struct *tsec = current->security; @@ -1600,7 +1600,7 @@ static int selinux_vm_enough_memory(long pages) if (rc == 0) cap_sys_admin = 1; - return __vm_enough_memory(pages, cap_sys_admin); + return __vm_enough_memory(mm, pages, cap_sys_admin); } /* binprm security operations */ |