summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/assoc_array.txt574
-rw-r--r--Documentation/devicetree/bindings/dma/atmel-dma.txt2
-rw-r--r--Documentation/devicetree/bindings/i2c/trivial-devices.txt3
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/dma.txt138
-rw-r--r--Documentation/dmatest.txt72
-rw-r--r--Documentation/filesystems/btrfs.txt34
-rw-r--r--Documentation/kernel-parameters.txt11
-rw-r--r--Documentation/power/runtime_pm.txt14
-rw-r--r--Documentation/security/00-INDEX2
-rw-r--r--Documentation/security/IMA-templates.txt87
-rw-r--r--Documentation/security/keys.txt20
-rw-r--r--Documentation/vm/split_page_table_lock6
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/alpha/Kconfig76
-rw-r--r--arch/alpha/include/asm/machvec.h22
-rw-r--r--arch/alpha/include/asm/pal.h71
-rw-r--r--arch/alpha/include/asm/rtc.h11
-rw-r--r--arch/alpha/include/asm/string.h24
-rw-r--r--arch/alpha/include/uapi/asm/pal.h1
-rw-r--r--arch/alpha/kernel/Makefile1
-rw-r--r--arch/alpha/kernel/alpha_ksyms.c1
-rw-r--r--arch/alpha/kernel/irq_alpha.c16
-rw-r--r--arch/alpha/kernel/machvec_impl.h5
-rw-r--r--arch/alpha/kernel/perf_event.c15
-rw-r--r--arch/alpha/kernel/process.c17
-rw-r--r--arch/alpha/kernel/proto.h6
-rw-r--r--arch/alpha/kernel/rtc.c323
-rw-r--r--arch/alpha/kernel/setup.c23
-rw-r--r--arch/alpha/kernel/smp.c33
-rw-r--r--arch/alpha/kernel/sys_jensen.c2
-rw-r--r--arch/alpha/kernel/sys_marvel.c55
-rw-r--r--arch/alpha/kernel/time.c405
-rw-r--r--arch/alpha/kernel/traps.c15
-rw-r--r--arch/alpha/lib/csum_partial_copy.c10
-rw-r--r--arch/alpha/lib/ev6-memset.S12
-rw-r--r--arch/alpha/lib/memset.S11
-rw-r--r--arch/arm/Kconfig11
-rw-r--r--arch/arm/common/edma.c4
-rw-r--r--arch/arm/include/asm/hardware/iop3xx-adma.h30
-rw-r--r--arch/arm/include/asm/hardware/iop_adma.h4
-rw-r--r--arch/arm/include/asm/memory.h9
-rw-r--r--arch/arm/kernel/head.S7
-rw-r--r--arch/arm/kernel/traps.c2
-rw-r--r--arch/arm/kvm/mmu.c34
-rw-r--r--arch/arm/lib/bitops.h2
-rw-r--r--arch/arm/mach-iop13xx/include/mach/adma.h26
-rw-r--r--arch/arm/mm/mmu.c4
-rw-r--r--arch/arm/mm/nommu.c1
-rw-r--r--arch/arm/mm/proc-v7.S17
-rw-r--r--arch/avr32/boot/u-boot/head.S35
-rw-r--r--arch/avr32/include/asm/kprobes.h14
-rw-r--r--arch/avr32/include/uapi/asm/Kbuild24
-rw-r--r--arch/avr32/include/uapi/asm/auxvec.h6
-rw-r--r--arch/avr32/include/uapi/asm/bitsperlong.h1
-rw-r--r--arch/avr32/include/uapi/asm/byteorder.h6
-rw-r--r--arch/avr32/include/uapi/asm/cachectl.h6
-rw-r--r--arch/avr32/include/uapi/asm/errno.h6
-rw-r--r--arch/avr32/include/uapi/asm/fcntl.h6
-rw-r--r--arch/avr32/include/uapi/asm/ioctl.h6
-rw-r--r--arch/avr32/include/uapi/asm/ioctls.h6
-rw-r--r--arch/avr32/include/uapi/asm/ipcbuf.h1
-rw-r--r--arch/avr32/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/avr32/include/uapi/asm/mman.h1
-rw-r--r--arch/avr32/include/uapi/asm/msgbuf.h6
-rw-r--r--arch/avr32/include/uapi/asm/poll.h1
-rw-r--r--arch/avr32/include/uapi/asm/posix_types.h6
-rw-r--r--arch/avr32/include/uapi/asm/resource.h6
-rw-r--r--arch/avr32/include/uapi/asm/sembuf.h6
-rw-r--r--arch/avr32/include/uapi/asm/setup.h1
-rw-r--r--arch/avr32/include/uapi/asm/shmbuf.h6
-rw-r--r--arch/avr32/include/uapi/asm/sigcontext.h6
-rw-r--r--arch/avr32/include/uapi/asm/siginfo.h6
-rw-r--r--arch/avr32/include/uapi/asm/signal.h1
-rw-r--r--arch/avr32/include/uapi/asm/socket.h6
-rw-r--r--arch/avr32/include/uapi/asm/sockios.h6
-rw-r--r--arch/avr32/include/uapi/asm/stat.h6
-rw-r--r--arch/avr32/include/uapi/asm/statfs.h6
-rw-r--r--arch/avr32/include/uapi/asm/swab.h6
-rw-r--r--arch/avr32/include/uapi/asm/termbits.h6
-rw-r--r--arch/avr32/include/uapi/asm/termios.h1
-rw-r--r--arch/avr32/include/uapi/asm/types.h5
-rw-r--r--arch/avr32/include/uapi/asm/unistd.h1
-rw-r--r--arch/avr32/kernel/entry-avr32b.S3
-rw-r--r--arch/avr32/kernel/head.S20
-rw-r--r--arch/ia64/hp/common/sba_iommu.c2
-rw-r--r--arch/ia64/include/asm/pci.h2
-rw-r--r--arch/ia64/kernel/perfmon.c8
-rw-r--r--arch/ia64/pci/pci.c6
-rw-r--r--arch/ia64/sn/kernel/io_acpi_init.c4
-rw-r--r--arch/parisc/include/asm/socket.h11
-rw-r--r--arch/parisc/include/asm/uaccess.h46
-rw-r--r--arch/parisc/include/uapi/asm/socket.h11
-rw-r--r--arch/parisc/lib/memcpy.c6
-rw-r--r--arch/parisc/mm/fault.c22
-rw-r--r--arch/powerpc/Makefile1
-rw-r--r--arch/powerpc/boot/dts/fsl/b4si-post.dtsi4
-rw-r--r--arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi82
-rw-r--r--arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi82
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240si-post.dtsi4
-rw-r--r--arch/powerpc/configs/pseries_le_defconfig352
-rw-r--r--arch/powerpc/include/asm/elf.h4
-rw-r--r--arch/powerpc/include/asm/hvcall.h2
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h26
-rw-r--r--arch/powerpc/include/asm/smp.h2
-rw-r--r--arch/powerpc/include/asm/thread_info.h9
-rw-r--r--arch/powerpc/kernel/eeh.c9
-rw-r--r--arch/powerpc/kernel/eeh_event.c9
-rw-r--r--arch/powerpc/kernel/process.c71
-rw-r--r--arch/powerpc/kernel/prom.c20
-rw-r--r--arch/powerpc/kernel/signal_32.c10
-rw-r--r--arch/powerpc/kernel/signal_64.c25
-rw-r--r--arch/powerpc/kernel/smp.c16
-rw-r--r--arch/powerpc/kernel/time.c4
-rw-r--r--arch/powerpc/kernel/vdso64/sigtramp.S16
-rw-r--r--arch/powerpc/kernel/vio.c2
-rw-r--r--arch/powerpc/mm/gup.c5
-rw-r--r--arch/powerpc/mm/slice.c2
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype11
-rw-r--r--arch/powerpc/platforms/powernv/rng.c1
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c21
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c17
-rw-r--r--arch/powerpc/platforms/pseries/rng.c1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c42
-rw-r--r--arch/powerpc/platforms/wsp/chroma.c1
-rw-r--r--arch/powerpc/platforms/wsp/h8.c1
-rw-r--r--arch/powerpc/platforms/wsp/ics.c2
-rw-r--r--arch/powerpc/platforms/wsp/opb_pic.c2
-rw-r--r--arch/powerpc/platforms/wsp/psr2.c1
-rw-r--r--arch/powerpc/platforms/wsp/scom_wsp.c1
-rw-r--r--arch/powerpc/platforms/wsp/wsp.c1
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h2
-rw-r--r--arch/x86/kvm/mmu_audit.c2
-rw-r--r--arch/x86/mm/pgtable.c6
-rw-r--r--arch/x86/pci/acpi.c4
-rw-r--r--block/blk-mq.c14
-rw-r--r--block/partitions/efi.c5
-rw-r--r--crypto/Kconfig3
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/asymmetric_keys/Kconfig4
-rw-r--r--crypto/asymmetric_keys/asymmetric_type.c1
-rw-r--r--crypto/asymmetric_keys/public_key.c66
-rw-r--r--crypto/asymmetric_keys/public_key.h6
-rw-r--r--crypto/asymmetric_keys/rsa.c14
-rw-r--r--crypto/asymmetric_keys/x509_cert_parser.c35
-rw-r--r--crypto/asymmetric_keys/x509_parser.h18
-rw-r--r--crypto/asymmetric_keys/x509_public_key.c232
-rw-r--r--crypto/async_tx/async_memcpy.c37
-rw-r--r--crypto/async_tx/async_pq.c174
-rw-r--r--crypto/async_tx/async_raid6_recov.c61
-rw-r--r--crypto/async_tx/async_tx.c4
-rw-r--r--crypto/async_tx/async_xor.c123
-rw-r--r--crypto/async_tx/raid6test.c10
-rw-r--r--crypto/hash_info.c56
-rw-r--r--drivers/acpi/Kconfig11
-rw-r--r--drivers/acpi/ac.c15
-rw-r--r--drivers/acpi/acpi_lpss.c9
-rw-r--r--drivers/acpi/acpi_platform.c2
-rw-r--r--drivers/acpi/blacklist.c35
-rw-r--r--drivers/acpi/device_pm.c14
-rw-r--r--drivers/acpi/ec.c3
-rw-r--r--drivers/acpi/glue.c53
-rw-r--r--drivers/acpi/pci_root.c1
-rw-r--r--drivers/acpi/scan.c14
-rw-r--r--drivers/acpi/video.c87
-rw-r--r--drivers/ata/libata-acpi.c4
-rw-r--r--drivers/ata/pata_arasan_cf.c3
-rw-r--r--drivers/base/platform.c4
-rw-r--r--drivers/base/power/main.c3
-rw-r--r--drivers/block/null_blk.c8
-rw-r--r--drivers/block/virtio_blk.c5
-rw-r--r--drivers/char/tpm/Kconfig37
-rw-r--r--drivers/char/tpm/Makefile11
-rw-r--r--drivers/char/tpm/tpm-interface.c (renamed from drivers/char/tpm/tpm.c)138
-rw-r--r--drivers/char/tpm/tpm.h3
-rw-r--r--drivers/char/tpm/tpm_atmel.c2
-rw-r--r--drivers/char/tpm/tpm_eventlog.c3
-rw-r--r--drivers/char/tpm/tpm_i2c_atmel.c284
-rw-r--r--drivers/char/tpm/tpm_i2c_infineon.c4
-rw-r--r--drivers/char/tpm/tpm_i2c_nuvoton.c710
-rw-r--r--drivers/char/tpm/tpm_i2c_stm_st33.c12
-rw-r--r--drivers/char/tpm/tpm_ibmvtpm.c6
-rw-r--r--drivers/char/tpm/tpm_ppi.c4
-rw-r--r--drivers/char/tpm/tpm_tis.c2
-rw-r--r--drivers/char/tpm/xen-tpmfront.c2
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c3
-rw-r--r--drivers/cpufreq/cpufreq_governor.c4
-rw-r--r--drivers/cpufreq/omap-cpufreq.c1
-rw-r--r--drivers/dma/Kconfig9
-rw-r--r--drivers/dma/amba-pl08x.c39
-rw-r--r--drivers/dma/at_hdmac.c28
-rw-r--r--drivers/dma/coh901318.c4
-rw-r--r--drivers/dma/cppi41.c178
-rw-r--r--drivers/dma/dma-jz4740.c2
-rw-r--r--drivers/dma/dmaengine.c264
-rw-r--r--drivers/dma/dmatest.c917
-rw-r--r--drivers/dma/dw/core.c29
-rw-r--r--drivers/dma/edma.c369
-rw-r--r--drivers/dma/ep93xx_dma.c30
-rw-r--r--drivers/dma/fsldma.c26
-rw-r--r--drivers/dma/fsldma.h2
-rw-r--r--drivers/dma/imx-dma.c42
-rw-r--r--drivers/dma/imx-sdma.c10
-rw-r--r--drivers/dma/intel_mid_dma.c4
-rw-r--r--drivers/dma/ioat/dma.c53
-rw-r--r--drivers/dma/ioat/dma.h14
-rw-r--r--drivers/dma/ioat/dma_v2.c2
-rw-r--r--drivers/dma/ioat/dma_v2.h1
-rw-r--r--drivers/dma/ioat/dma_v3.c323
-rw-r--r--drivers/dma/ioat/pci.c20
-rw-r--r--drivers/dma/iop-adma.c113
-rw-r--r--drivers/dma/ipu/ipu_idmac.c6
-rw-r--r--drivers/dma/k3dma.c4
-rw-r--r--drivers/dma/mmp_pdma.c7
-rw-r--r--drivers/dma/mmp_tdma.c40
-rw-r--r--drivers/dma/mv_xor.c58
-rw-r--r--drivers/dma/mv_xor.h25
-rw-r--r--drivers/dma/mxs-dma.c178
-rw-r--r--drivers/dma/omap-dma.c2
-rw-r--r--drivers/dma/pl330.c32
-rw-r--r--drivers/dma/ppc4xx/adma.c272
-rw-r--r--drivers/dma/sa11x0-dma.c2
-rw-r--r--drivers/dma/sh/shdma-base.c2
-rw-r--r--drivers/dma/sh/shdmac.c4
-rw-r--r--drivers/dma/ste_dma40.c7
-rw-r--r--drivers/dma/tegra20-apb-dma.c6
-rw-r--r--drivers/dma/timb_dma.c37
-rw-r--r--drivers/dma/txx9dmac.c29
-rw-r--r--drivers/gpio/gpiolib.c1
-rw-r--r--drivers/gpu/drm/i915/intel_acpi.c2
-rw-r--r--drivers/gpu/drm/i915/intel_opregion.c2
-rw-r--r--drivers/gpu/drm/nouveau/core/subdev/mxm/base.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_acpi.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_acpi.c8
-rw-r--r--drivers/gpu/drm/radeon/radeon_atpx_handler.c7
-rw-r--r--drivers/gpu/drm/radeon/radeon_bios.c2
-rw-r--r--drivers/hid/i2c-hid/i2c-hid.c2
-rw-r--r--drivers/i2c/i2c-core.c25
-rw-r--r--drivers/ide/ide-acpi.c5
-rw-r--r--drivers/idle/intel_idle.c24
-rw-r--r--drivers/md/md.c133
-rw-r--r--drivers/md/raid1.c162
-rw-r--r--drivers/md/raid1.h15
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--drivers/md/raid5.c420
-rw-r--r--drivers/md/raid5.h16
-rw-r--r--drivers/media/platform/m2m-deinterlace.c3
-rw-r--r--drivers/media/platform/timblogiw.c2
-rw-r--r--drivers/misc/carma/carma-fpga.c3
-rw-r--r--drivers/mmc/core/sdio_bus.c3
-rw-r--r--drivers/mtd/nand/atmel_nand.c3
-rw-r--r--drivers/mtd/nand/fsmc_nand.c2
-rw-r--r--drivers/net/ethernet/micrel/ks8842.c6
-rw-r--r--drivers/ntb/ntb_transport.c86
-rw-r--r--drivers/pci/hotplug/acpi_pcihp.c2
-rw-r--r--drivers/pci/hotplug/acpiphp.h1
-rw-r--r--drivers/pci/hotplug/pciehp_acpi.c4
-rw-r--r--drivers/pci/hotplug/sgi_hotplug.c8
-rw-r--r--drivers/pci/ioapic.c2
-rw-r--r--drivers/pci/pci-acpi.c6
-rw-r--r--drivers/pci/pci-label.c6
-rw-r--r--drivers/platform/x86/apple-gmux.c2
-rw-r--r--drivers/pnp/pnpacpi/core.c10
-rw-r--r--drivers/rtc/Kconfig10
-rw-r--r--drivers/rtc/rtc-at91rm9200.c9
-rw-r--r--drivers/spi/spi-dw-mid.c4
-rw-r--r--drivers/spi/spi.c19
-rw-r--r--drivers/tty/serial/sh-sci.c2
-rw-r--r--drivers/usb/core/hub.c2
-rw-r--r--drivers/usb/core/usb-acpi.c4
-rw-r--r--drivers/xen/pci.c6
-rw-r--r--fs/9p/vfs_dentry.c19
-rw-r--r--fs/aio.c134
-rw-r--r--fs/bio.c2
-rw-r--r--fs/btrfs/Kconfig15
-rw-r--r--fs/btrfs/async-thread.c1
-rw-r--r--fs/btrfs/check-integrity.c25
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/dev-replace.c2
-rw-r--r--fs/btrfs/disk-io.c21
-rw-r--r--fs/btrfs/extent_io.c11
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/btrfs/ordered-data.c3
-rw-r--r--fs/btrfs/scrub.c6
-rw-r--r--fs/btrfs/transaction.c4
-rw-r--r--fs/btrfs/tree-log.c5
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/configfs/dir.c28
-rw-r--r--fs/coredump.c6
-rw-r--r--fs/dcache.c84
-rw-r--r--fs/efivarfs/super.c11
-rw-r--r--fs/exec.c5
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/inode.c5
-rw-r--r--fs/gfs2/lock_dlm.c8
-rw-r--r--fs/gfs2/quota.c23
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/hostfs/hostfs_kern.c11
-rw-r--r--fs/libfs.c12
-rw-r--r--fs/namei.c1
-rw-r--r--fs/nfsd/nfs4xdr.c3
-rw-r--r--fs/nfsd/vfs.c173
-rw-r--r--fs/proc/base.c14
-rw-r--r--fs/proc/generic.c18
-rw-r--r--fs/proc/namespaces.c8
-rw-r--r--fs/squashfs/Kconfig72
-rw-r--r--fs/squashfs/Makefile5
-rw-r--r--fs/squashfs/block.c36
-rw-r--r--fs/squashfs/cache.c28
-rw-r--r--fs/squashfs/decompressor.c59
-rw-r--r--fs/squashfs/decompressor.h24
-rw-r--r--fs/squashfs/decompressor_multi.c198
-rw-r--r--fs/squashfs/decompressor_multi_percpu.c97
-rw-r--r--fs/squashfs/decompressor_single.c85
-rw-r--r--fs/squashfs/file.c142
-rw-r--r--fs/squashfs/file_cache.c38
-rw-r--r--fs/squashfs/file_direct.c173
-rw-r--r--fs/squashfs/lzo_wrapper.c47
-rw-r--r--fs/squashfs/page_actor.c100
-rw-r--r--fs/squashfs/page_actor.h81
-rw-r--r--fs/squashfs/squashfs.h20
-rw-r--r--fs/squashfs/squashfs_fs_sb.h4
-rw-r--r--fs/squashfs/super.c10
-rw-r--r--fs/squashfs/xz_wrapper.c105
-rw-r--r--fs/squashfs/zlib_wrapper.c64
-rw-r--r--fs/xfs/xfs_bmap.c38
-rw-r--r--fs/xfs/xfs_mount.c15
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_trans_inode.c8
-rw-r--r--fs/xfs/xfs_trans_resv.c3
-rw-r--r--include/acpi/acpi_bus.h2
-rw-r--r--include/crypto/hash_info.h40
-rw-r--r--include/crypto/public_key.h25
-rw-r--r--include/keys/big_key-type.h25
-rw-r--r--include/keys/keyring-type.h17
-rw-r--r--include/keys/system_keyring.h23
-rw-r--r--include/linux/acpi.h23
-rw-r--r--include/linux/assoc_array.h92
-rw-r--r--include/linux/assoc_array_priv.h182
-rw-r--r--include/linux/audit.h15
-rw-r--r--include/linux/blkdev.h3
-rw-r--r--include/linux/device.h12
-rw-r--r--include/linux/dmaengine.h76
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/hugetlb.h10
-rw-r--r--include/linux/key-type.h6
-rw-r--r--include/linux/key.h52
-rw-r--r--include/linux/mm.h9
-rw-r--r--include/linux/mm_types.h30
-rw-r--r--include/linux/pci-acpi.h4
-rw-r--r--include/linux/platform_data/edma.h8
-rw-r--r--include/linux/security.h26
-rw-r--r--include/linux/seqlock.h29
-rw-r--r--include/linux/slab.h9
-rw-r--r--include/linux/slab_def.h4
-rw-r--r--include/linux/slub_def.h2
-rw-r--r--include/linux/user_namespace.h6
-rw-r--r--include/linux/wait.h25
-rw-r--r--include/trace/events/btrfs.h4
-rw-r--r--include/uapi/linux/audit.h26
-rw-r--r--include/uapi/linux/hash_info.h37
-rw-r--r--include/uapi/linux/keyctl.h1
-rw-r--r--include/uapi/linux/raid/md_p.h1
-rw-r--r--init/Kconfig27
-rw-r--r--init/main.c2
-rw-r--r--ipc/shm.c37
-rw-r--r--kernel/Makefile50
-rw-r--r--kernel/audit.c153
-rw-r--r--kernel/audit.h3
-rw-r--r--kernel/auditfilter.c3
-rw-r--r--kernel/auditsc.c133
-rw-r--r--kernel/cgroup.c7
-rw-r--r--kernel/modsign_certificate.S12
-rw-r--r--kernel/modsign_pubkey.c104
-rw-r--r--kernel/module-internal.h2
-rw-r--r--kernel/module_signing.c11
-rw-r--r--kernel/power/snapshot.c3
-rw-r--r--kernel/power/user.c1
-rw-r--r--kernel/system_certificates.S10
-rw-r--r--kernel/system_keyring.c105
-rw-r--r--kernel/user.c4
-rw-r--r--kernel/user_namespace.c6
-rw-r--r--lib/Kconfig14
-rw-r--r--lib/Makefile1
-rw-r--r--lib/assoc_array.c1746
-rw-r--r--lib/mpi/mpiutil.c3
-rw-r--r--mm/hugetlb.c51
-rw-r--r--mm/memory.c7
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/migrate.c48
-rw-r--r--mm/slab.c571
-rw-r--r--mm/slub.c45
-rw-r--r--mm/swap.c143
-rw-r--r--net/Kconfig4
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/sunrpc/rpc_pipe.c11
-rw-r--r--scripts/asn1_compiler.c2
-rwxr-xr-xscripts/checkpatch.pl1
-rw-r--r--security/Makefile1
-rw-r--r--security/apparmor/audit.c14
-rw-r--r--security/apparmor/capability.c15
-rw-r--r--security/apparmor/domain.c16
-rw-r--r--security/apparmor/include/audit.h1
-rw-r--r--security/apparmor/include/capability.h5
-rw-r--r--security/apparmor/include/ipc.h4
-rw-r--r--security/apparmor/ipc.c9
-rw-r--r--security/apparmor/lsm.c2
-rw-r--r--security/capability.c15
-rw-r--r--security/integrity/digsig.c37
-rw-r--r--security/integrity/digsig_asymmetric.c11
-rw-r--r--security/integrity/evm/evm_main.c4
-rw-r--r--security/integrity/evm/evm_posix_acl.c3
-rw-r--r--security/integrity/iint.c2
-rw-r--r--security/integrity/ima/Kconfig72
-rw-r--r--security/integrity/ima/Makefile2
-rw-r--r--security/integrity/ima/ima.h101
-rw-r--r--security/integrity/ima/ima_api.c136
-rw-r--r--security/integrity/ima/ima_appraise.c117
-rw-r--r--security/integrity/ima/ima_crypto.c134
-rw-r--r--security/integrity/ima/ima_fs.c67
-rw-r--r--security/integrity/ima/ima_init.c37
-rw-r--r--security/integrity/ima/ima_main.c63
-rw-r--r--security/integrity/ima/ima_policy.c1
-rw-r--r--security/integrity/ima/ima_queue.c10
-rw-r--r--security/integrity/ima/ima_template.c178
-rw-r--r--security/integrity/ima/ima_template_lib.c347
-rw-r--r--security/integrity/ima/ima_template_lib.h49
-rw-r--r--security/integrity/integrity.h47
-rw-r--r--security/keys/Kconfig29
-rw-r--r--security/keys/Makefile2
-rw-r--r--security/keys/big_key.c207
-rw-r--r--security/keys/compat.c3
-rw-r--r--security/keys/gc.c47
-rw-r--r--security/keys/internal.h74
-rw-r--r--security/keys/key.c102
-rw-r--r--security/keys/keyctl.c3
-rw-r--r--security/keys/keyring.c1536
-rw-r--r--security/keys/persistent.c167
-rw-r--r--security/keys/proc.c17
-rw-r--r--security/keys/process_keys.c141
-rw-r--r--security/keys/request_key.c60
-rw-r--r--security/keys/request_key_auth.c31
-rw-r--r--security/keys/sysctl.c11
-rw-r--r--security/keys/user_defined.c18
-rw-r--r--security/lsm_audit.c3
-rw-r--r--security/security.c13
-rw-r--r--security/selinux/hooks.c146
-rw-r--r--security/selinux/include/objsec.h4
-rw-r--r--security/selinux/include/security.h13
-rw-r--r--security/selinux/include/xfrm.h45
-rw-r--r--security/selinux/netlabel.c6
-rw-r--r--security/selinux/netnode.c2
-rw-r--r--security/selinux/nlmsgtab.c2
-rw-r--r--security/selinux/selinuxfs.c4
-rw-r--r--security/selinux/ss/ebitmap.c20
-rw-r--r--security/selinux/ss/ebitmap.h10
-rw-r--r--security/selinux/ss/mls.c22
-rw-r--r--security/selinux/ss/mls_types.h2
-rw-r--r--security/selinux/ss/policydb.c3
-rw-r--r--security/selinux/ss/services.c66
-rw-r--r--security/selinux/xfrm.c453
-rw-r--r--security/smack/smack.h12
-rw-r--r--security/smack/smack_access.c10
-rw-r--r--security/smack/smack_lsm.c11
-rw-r--r--security/smack/smackfs.c10
-rw-r--r--sound/soc/davinci/davinci-pcm.c2
-rw-r--r--tools/power/x86/turbostat/turbostat.c197
-rw-r--r--virt/kvm/kvm_main.c5
468 files changed, 14555 insertions, 6832 deletions
diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
new file mode 100644
index 000000000000..f4faec0f66e4
--- /dev/null
+++ b/Documentation/assoc_array.txt
@@ -0,0 +1,574 @@
+ ========================================
+ GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION
+ ========================================
+
+Contents:
+
+ - Overview.
+
+ - The public API.
+ - Edit script.
+ - Operations table.
+ - Manipulation functions.
+ - Access functions.
+ - Index key form.
+
+ - Internal workings.
+ - Basic internal tree layout.
+ - Shortcuts.
+ - Splitting and collapsing nodes.
+ - Non-recursive iteration.
+ - Simultaneous alteration and iteration.
+
+
+========
+OVERVIEW
+========
+
+This associative array implementation is an object container with the following
+properties:
+
+ (1) Objects are opaque pointers. The implementation does not care where they
+ point (if anywhere) or what they point to (if anything).
+
+ [!] NOTE: Pointers to objects _must_ be zero in the least significant bit.
+
+ (2) Objects do not need to contain linkage blocks for use by the array. This
+ permits an object to be located in multiple arrays simultaneously.
+ Rather, the array is made up of metadata blocks that point to objects.
+
+ (3) Objects require index keys to locate them within the array.
+
+ (4) Index keys must be unique. Inserting an object with the same key as one
+ already in the array will replace the old object.
+
+ (5) Index keys can be of any length and can be of different lengths.
+
+ (6) Index keys should encode the length early on, before any variation due to
+ length is seen.
+
+ (7) Index keys can include a hash to scatter objects throughout the array.
+
+ (8) The array can iterated over. The objects will not necessarily come out in
+ key order.
+
+ (9) The array can be iterated over whilst it is being modified, provided the
+ RCU readlock is being held by the iterator. Note, however, under these
+ circumstances, some objects may be seen more than once. If this is a
+ problem, the iterator should lock against modification. Objects will not
+ be missed, however, unless deleted.
+
+(10) Objects in the array can be looked up by means of their index key.
+
+(11) Objects can be looked up whilst the array is being modified, provided the
+ RCU readlock is being held by the thread doing the look up.
+
+The implementation uses a tree of 16-pointer nodes internally that are indexed
+on each level by nibbles from the index key in the same manner as in a radix
+tree. To improve memory efficiency, shortcuts can be emplaced to skip over
+what would otherwise be a series of single-occupancy nodes. Further, nodes
+pack leaf object pointers into spare space in the node rather than making an
+extra branch until as such time an object needs to be added to a full node.
+
+
+==============
+THE PUBLIC API
+==============
+
+The public API can be found in <linux/assoc_array.h>. The associative array is
+rooted on the following structure:
+
+ struct assoc_array {
+ ...
+ };
+
+The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY.
+
+
+EDIT SCRIPT
+-----------
+
+The insertion and deletion functions produce an 'edit script' that can later be
+applied to effect the changes without risking ENOMEM. This retains the
+preallocated metadata blocks that will be installed in the internal tree and
+keeps track of the metadata blocks that will be removed from the tree when the
+script is applied.
+
+This is also used to keep track of dead blocks and dead objects after the
+script has been applied so that they can be freed later. The freeing is done
+after an RCU grace period has passed - thus allowing access functions to
+proceed under the RCU read lock.
+
+The script appears as outside of the API as a pointer of the type:
+
+ struct assoc_array_edit;
+
+There are two functions for dealing with the script:
+
+ (1) Apply an edit script.
+
+ void assoc_array_apply_edit(struct assoc_array_edit *edit);
+
+ This will perform the edit functions, interpolating various write barriers
+ to permit accesses under the RCU read lock to continue. The edit script
+ will then be passed to call_rcu() to free it and any dead stuff it points
+ to.
+
+ (2) Cancel an edit script.
+
+ void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+
+ This frees the edit script and all preallocated memory immediately. If
+ this was for insertion, the new object is _not_ released by this function,
+ but must rather be released by the caller.
+
+These functions are guaranteed not to fail.
+
+
+OPERATIONS TABLE
+----------------
+
+Various functions take a table of operations:
+
+ struct assoc_array_ops {
+ ...
+ };
+
+This points to a number of methods, all of which need to be provided:
+
+ (1) Get a chunk of index key from caller data:
+
+ unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+ This should return a chunk of caller-supplied index key starting at the
+ *bit* position given by the level argument. The level argument will be a
+ multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return
+ ASSOC_ARRAY_KEY_CHUNK_SIZE bits. No error is possible.
+
+
+ (2) Get a chunk of an object's index key.
+
+ unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+ As the previous function, but gets its data from an object in the array
+ rather than from a caller-supplied index key.
+
+
+ (3) See if this is the object we're looking for.
+
+ bool (*compare_object)(const void *object, const void *index_key);
+
+ Compare the object against an index key and return true if it matches and
+ false if it doesn't.
+
+
+ (4) Diff the index keys of two objects.
+
+ int (*diff_objects)(const void *a, const void *b);
+
+ Return the bit position at which the index keys of two objects differ or
+ -1 if they are the same.
+
+
+ (5) Free an object.
+
+ void (*free_object)(void *object);
+
+ Free the specified object. Note that this may be called an RCU grace
+ period after assoc_array_apply_edit() was called, so synchronize_rcu() may
+ be necessary on module unloading.
+
+
+MANIPULATION FUNCTIONS
+----------------------
+
+There are a number of functions for manipulating an associative array:
+
+ (1) Initialise an associative array.
+
+ void assoc_array_init(struct assoc_array *array);
+
+ This initialises the base structure for an associative array. It can't
+ fail.
+
+
+ (2) Insert/replace an object in an associative array.
+
+ struct assoc_array_edit *
+ assoc_array_insert(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key,
+ void *object);
+
+ This inserts the given object into the array. Note that the least
+ significant bit of the pointer must be zero as it's used to type-mark
+ pointers internally.
+
+ If an object already exists for that key then it will be replaced with the
+ new object and the old one will be freed automatically.
+
+ The index_key argument should hold index key information and is
+ passed to the methods in the ops table when they are called.
+
+ This function makes no alteration to the array itself, but rather returns
+ an edit script that must be applied. -ENOMEM is returned in the case of
+ an out-of-memory error.
+
+ The caller should lock exclusively against other modifiers of the array.
+
+
+ (3) Delete an object from an associative array.
+
+ struct assoc_array_edit *
+ assoc_array_delete(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key);
+
+ This deletes an object that matches the specified data from the array.
+
+ The index_key argument should hold index key information and is
+ passed to the methods in the ops table when they are called.
+
+ This function makes no alteration to the array itself, but rather returns
+ an edit script that must be applied. -ENOMEM is returned in the case of
+ an out-of-memory error. NULL will be returned if the specified object is
+ not found within the array.
+
+ The caller should lock exclusively against other modifiers of the array.
+
+
+ (4) Delete all objects from an associative array.
+
+ struct assoc_array_edit *
+ assoc_array_clear(struct assoc_array *array,
+ const struct assoc_array_ops *ops);
+
+ This deletes all the objects from an associative array and leaves it
+ completely empty.
+
+ This function makes no alteration to the array itself, but rather returns
+ an edit script that must be applied. -ENOMEM is returned in the case of
+ an out-of-memory error.
+
+ The caller should lock exclusively against other modifiers of the array.
+
+
+ (5) Destroy an associative array, deleting all objects.
+
+ void assoc_array_destroy(struct assoc_array *array,
+ const struct assoc_array_ops *ops);
+
+ This destroys the contents of the associative array and leaves it
+ completely empty. It is not permitted for another thread to be traversing
+ the array under the RCU read lock at the same time as this function is
+ destroying it as no RCU deferral is performed on memory release -
+ something that would require memory to be allocated.
+
+ The caller should lock exclusively against other modifiers and accessors
+ of the array.
+
+
+ (6) Garbage collect an associative array.
+
+ int assoc_array_gc(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ bool (*iterator)(void *object, void *iterator_data),
+ void *iterator_data);
+
+ This iterates over the objects in an associative array and passes each one
+ to iterator(). If iterator() returns true, the object is kept. If it
+ returns false, the object will be freed. If the iterator() function
+ returns true, it must perform any appropriate refcount incrementing on the
+ object before returning.
+
+ The internal tree will be packed down if possible as part of the iteration
+ to reduce the number of nodes in it.
+
+ The iterator_data is passed directly to iterator() and is otherwise
+ ignored by the function.
+
+ The function will return 0 if successful and -ENOMEM if there wasn't
+ enough memory.
+
+ It is possible for other threads to iterate over or search the array under
+ the RCU read lock whilst this function is in progress. The caller should
+ lock exclusively against other modifiers of the array.
+
+
+ACCESS FUNCTIONS
+----------------
+
+There are two functions for accessing an associative array:
+
+ (1) Iterate over all the objects in an associative array.
+
+ int assoc_array_iterate(const struct assoc_array *array,
+ int (*iterator)(const void *object,
+ void *iterator_data),
+ void *iterator_data);
+
+ This passes each object in the array to the iterator callback function.
+ iterator_data is private data for that function.
+
+ This may be used on an array at the same time as the array is being
+ modified, provided the RCU read lock is held. Under such circumstances,
+ it is possible for the iteration function to see some objects twice. If
+ this is a problem, then modification should be locked against. The
+ iteration algorithm should not, however, miss any objects.
+
+ The function will return 0 if no objects were in the array or else it will
+ return the result of the last iterator function called. Iteration stops
+ immediately if any call to the iteration function results in a non-zero
+ return.
+
+
+ (2) Find an object in an associative array.
+
+ void *assoc_array_find(const struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key);
+
+ This walks through the array's internal tree directly to the object
+ specified by the index key..
+
+ This may be used on an array at the same time as the array is being
+ modified, provided the RCU read lock is held.
+
+ The function will return the object if found (and set *_type to the object
+ type) or will return NULL if the object was not found.
+
+
+INDEX KEY FORM
+--------------
+
+The index key can be of any form, but since the algorithms aren't told how long
+the key is, it is strongly recommended that the index key includes its length
+very early on before any variation due to the length would have an effect on
+comparisons.
+
+This will cause leaves with different length keys to scatter away from each
+other - and those with the same length keys to cluster together.
+
+It is also recommended that the index key begin with a hash of the rest of the
+key to maximise scattering throughout keyspace.
+
+The better the scattering, the wider and lower the internal tree will be.
+
+Poor scattering isn't too much of a problem as there are shortcuts and nodes
+can contain mixtures of leaves and metadata pointers.
+
+The index key is read in chunks of machine word. Each chunk is subdivided into
+one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and
+on a 64-bit CPU, 16 levels. Unless the scattering is really poor, it is
+unlikely that more than one word of any particular index key will have to be
+used.
+
+
+=================
+INTERNAL WORKINGS
+=================
+
+The associative array data structure has an internal tree. This tree is
+constructed of two types of metadata blocks: nodes and shortcuts.
+
+A node is an array of slots. Each slot can contain one of four things:
+
+ (*) A NULL pointer, indicating that the slot is empty.
+
+ (*) A pointer to an object (a leaf).
+
+ (*) A pointer to a node at the next level.
+
+ (*) A pointer to a shortcut.
+
+
+BASIC INTERNAL TREE LAYOUT
+--------------------------
+
+Ignoring shortcuts for the moment, the nodes form a multilevel tree. The index
+key space is strictly subdivided by the nodes in the tree and nodes occur on
+fixed levels. For example:
+
+ Level: 0 1 2 3
+ =============== =============== =============== ===============
+ NODE D
+ NODE B NODE C +------>+---+
+ +------>+---+ +------>+---+ | | 0 |
+ NODE A | | 0 | | | 0 | | +---+
+ +---+ | +---+ | +---+ | : :
+ | 0 | | : : | : : | +---+
+ +---+ | +---+ | +---+ | | f |
+ | 1 |---+ | 3 |---+ | 7 |---+ +---+
+ +---+ +---+ +---+
+ : : : : | 8 |---+
+ +---+ +---+ +---+ | NODE E
+ | e |---+ | f | : : +------>+---+
+ +---+ | +---+ +---+ | 0 |
+ | f | | | f | +---+
+ +---+ | +---+ : :
+ | NODE F +---+
+ +------>+---+ | f |
+ | 0 | NODE G +---+
+ +---+ +------>+---+
+ : : | | 0 |
+ +---+ | +---+
+ | 6 |---+ : :
+ +---+ +---+
+ : : | f |
+ +---+ +---+
+ | f |
+ +---+
+
+In the above example, there are 7 nodes (A-G), each with 16 slots (0-f).
+Assuming no other meta data nodes in the tree, the key space is divided thusly:
+
+ KEY PREFIX NODE
+ ========== ====
+ 137* D
+ 138* E
+ 13[0-69-f]* C
+ 1[0-24-f]* B
+ e6* G
+ e[0-57-f]* F
+ [02-df]* A
+
+So, for instance, keys with the following example index keys will be found in
+the appropriate nodes:
+
+ INDEX KEY PREFIX NODE
+ =============== ======= ====
+ 13694892892489 13 C
+ 13795289025897 137 D
+ 13889dde88793 138 E
+ 138bbb89003093 138 E
+ 1394879524789 12 C
+ 1458952489 1 B
+ 9431809de993ba - A
+ b4542910809cd - A
+ e5284310def98 e F
+ e68428974237 e6 G
+ e7fffcbd443 e F
+ f3842239082 - A
+
+To save memory, if a node can hold all the leaves in its portion of keyspace,
+then the node will have all those leaves in it and will not have any metadata
+pointers - even if some of those leaves would like to be in the same slot.
+
+A node can contain a heterogeneous mix of leaves and metadata pointers.
+Metadata pointers must be in the slots that match their subdivisions of key
+space. The leaves can be in any slot not occupied by a metadata pointer. It
+is guaranteed that none of the leaves in a node will match a slot occupied by a
+metadata pointer. If the metadata pointer is there, any leaf whose key matches
+the metadata key prefix must be in the subtree that the metadata pointer points
+to.
+
+In the above example list of index keys, node A will contain:
+
+ SLOT CONTENT INDEX KEY (PREFIX)
+ ==== =============== ==================
+ 1 PTR TO NODE B 1*
+ any LEAF 9431809de993ba
+ any LEAF b4542910809cd
+ e PTR TO NODE F e*
+ any LEAF f3842239082
+
+and node B:
+
+ 3 PTR TO NODE C 13*
+ any LEAF 1458952489
+
+
+SHORTCUTS
+---------
+
+Shortcuts are metadata records that jump over a piece of keyspace. A shortcut
+is a replacement for a series of single-occupancy nodes ascending through the
+levels. Shortcuts exist to save memory and to speed up traversal.
+
+It is possible for the root of the tree to be a shortcut - say, for example,
+the tree contains at least 17 nodes all with key prefix '1111'. The insertion
+algorithm will insert a shortcut to skip over the '1111' keyspace in a single
+bound and get to the fourth level where these actually become different.
+
+
+SPLITTING AND COLLAPSING NODES
+------------------------------
+
+Each node has a maximum capacity of 16 leaves and metadata pointers. If the
+insertion algorithm finds that it is trying to insert a 17th object into a
+node, that node will be split such that at least two leaves that have a common
+key segment at that level end up in a separate node rooted on that slot for
+that common key segment.
+
+If the leaves in a full node and the leaf that is being inserted are
+sufficiently similar, then a shortcut will be inserted into the tree.
+
+When the number of objects in the subtree rooted at a node falls to 16 or
+fewer, then the subtree will be collapsed down to a single node - and this will
+ripple towards the root if possible.
+
+
+NON-RECURSIVE ITERATION
+-----------------------
+
+Each node and shortcut contains a back pointer to its parent and the number of
+slot in that parent that points to it. None-recursive iteration uses these to
+proceed rootwards through the tree, going to the parent node, slot N + 1 to
+make sure progress is made without the need for a stack.
+
+The backpointers, however, make simultaneous alteration and iteration tricky.
+
+
+SIMULTANEOUS ALTERATION AND ITERATION
+-------------------------------------
+
+There are a number of cases to consider:
+
+ (1) Simple insert/replace. This involves simply replacing a NULL or old
+ matching leaf pointer with the pointer to the new leaf after a barrier.
+ The metadata blocks don't change otherwise. An old leaf won't be freed
+ until after the RCU grace period.
+
+ (2) Simple delete. This involves just clearing an old matching leaf. The
+ metadata blocks don't change otherwise. The old leaf won't be freed until
+ after the RCU grace period.
+
+ (3) Insertion replacing part of a subtree that we haven't yet entered. This
+ may involve replacement of part of that subtree - but that won't affect
+ the iteration as we won't have reached the pointer to it yet and the
+ ancestry blocks are not replaced (the layout of those does not change).
+
+ (4) Insertion replacing nodes that we're actively processing. This isn't a
+ problem as we've passed the anchoring pointer and won't switch onto the
+ new layout until we follow the back pointers - at which point we've
+ already examined the leaves in the replaced node (we iterate over all the
+ leaves in a node before following any of its metadata pointers).
+
+ We might, however, re-see some leaves that have been split out into a new
+ branch that's in a slot further along than we were at.
+
+ (5) Insertion replacing nodes that we're processing a dependent branch of.
+ This won't affect us until we follow the back pointers. Similar to (4).
+
+ (6) Deletion collapsing a branch under us. This doesn't affect us because the
+ back pointers will get us back to the parent of the new node before we
+ could see the new node. The entire collapsed subtree is thrown away
+ unchanged - and will still be rooted on the same slot, so we shouldn't
+ process it a second time as we'll go back to slot + 1.
+
+Note:
+
+ (*) Under some circumstances, we need to simultaneously change the parent
+ pointer and the parent slot pointer on a node (say, for example, we
+ inserted another node before it and moved it up a level). We cannot do
+ this without locking against a read - so we have to replace that node too.
+
+ However, when we're changing a shortcut into a node this isn't a problem
+ as shortcuts only have one slot and so the parent slot number isn't used
+ when traversing backwards over one. This means that it's okay to change
+ the slot number first - provided suitable barriers are used to make sure
+ the parent slot number is read after the back pointer.
+
+Obsolete blocks and leaves are freed up after an RCU grace period has passed,
+so as long as anyone doing walking or iteration holds the RCU read lock, the
+old superstructure should not go away on them.
diff --git a/Documentation/devicetree/bindings/dma/atmel-dma.txt b/Documentation/devicetree/bindings/dma/atmel-dma.txt
index e1f343c7a34b..f69bcf5a6343 100644
--- a/Documentation/devicetree/bindings/dma/atmel-dma.txt
+++ b/Documentation/devicetree/bindings/dma/atmel-dma.txt
@@ -28,7 +28,7 @@ The three cells in order are:
dependent:
- bit 7-0: peripheral identifier for the hardware handshaking interface. The
identifier can be different for tx and rx.
- - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 1 for ASAP.
+ - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 2 for ASAP.
Example:
diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
index ad6a73852f08..f1fb26eed0e9 100644
--- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt
+++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
@@ -15,6 +15,7 @@ adi,adt7461 +/-1C TDM Extended Temp Range I.C
adt7461 +/-1C TDM Extended Temp Range I.C
at,24c08 i2c serial eeprom (24cxx)
atmel,24c02 i2c serial eeprom (24cxx)
+atmel,at97sc3204t i2c trusted platform module (TPM)
catalyst,24c32 i2c serial eeprom
dallas,ds1307 64 x 8, Serial, I2C Real-Time Clock
dallas,ds1338 I2C RTC with 56-Byte NV RAM
@@ -44,6 +45,7 @@ mc,rv3029c2 Real Time Clock Module with I2C-Bus
national,lm75 I2C TEMP SENSOR
national,lm80 Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor
national,lm92 ±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface
+nuvoton,npct501 i2c trusted platform module (TPM)
nxp,pca9556 Octal SMBus and I2C registered interface
nxp,pca9557 8-bit I2C-bus and SMBus I/O port with reset
nxp,pcf8563 Real-time clock/calendar
@@ -61,3 +63,4 @@ taos,tsl2550 Ambient Light Sensor with SMBUS/Two Wire Serial Interface
ti,tsc2003 I2C Touch-Screen Controller
ti,tmp102 Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface
ti,tmp275 Digital Temperature Sensor
+winbond,wpct301 i2c trusted platform module (TPM)
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
index 2a4b4bce6110..7fc1b010fa75 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
@@ -1,33 +1,30 @@
-* Freescale 83xx DMA Controller
+* Freescale DMA Controllers
-Freescale PowerPC 83xx have on chip general purpose DMA controllers.
+** Freescale Elo DMA Controller
+ This is a little-endian 4-channel DMA controller, used in Freescale mpc83xx
+ series chips such as mpc8315, mpc8349, mpc8379 etc.
Required properties:
-- compatible : compatible list, contains 2 entries, first is
- "fsl,CHIP-dma", where CHIP is the processor
- (mpc8349, mpc8360, etc.) and the second is
- "fsl,elo-dma"
-- reg : <registers mapping for DMA general status reg>
-- ranges : Should be defined as specified in 1) to describe the
- DMA controller channels.
+- compatible : must include "fsl,elo-dma"
+- reg : DMA General Status Register, i.e. DGSR which contains
+ status for all the 4 DMA channels
+- ranges : describes the mapping between the address space of the
+ DMA channels and the address space of the DMA controller
- cell-index : controller index. 0 for controller @ 0x8100
-- interrupts : <interrupt mapping for DMA IRQ>
+- interrupts : interrupt specifier for DMA IRQ
- interrupt-parent : optional, if needed for interrupt mapping
-
- DMA channel nodes:
- - compatible : compatible list, contains 2 entries, first is
- "fsl,CHIP-dma-channel", where CHIP is the processor
- (mpc8349, mpc8350, etc.) and the second is
- "fsl,elo-dma-channel". However, see note below.
- - reg : <registers mapping for channel>
- - cell-index : dma channel index starts at 0.
+ - compatible : must include "fsl,elo-dma-channel"
+ However, see note below.
+ - reg : DMA channel specific registers
+ - cell-index : DMA channel index starts at 0.
Optional properties:
- - interrupts : <interrupt mapping for DMA channel IRQ>
- (on 83xx this is expected to be identical to
- the interrupts property of the parent node)
+ - interrupts : interrupt specifier for DMA channel IRQ
+ (on 83xx this is expected to be identical to
+ the interrupts property of the parent node)
- interrupt-parent : optional, if needed for interrupt mapping
Example:
@@ -70,30 +67,27 @@ Example:
};
};
-* Freescale 85xx/86xx DMA Controller
-
-Freescale PowerPC 85xx/86xx have on chip general purpose DMA controllers.
+** Freescale EloPlus DMA Controller
+ This is a 4-channel DMA controller with extended addresses and chaining,
+ mainly used in Freescale mpc85xx/86xx, Pxxx and BSC series chips, such as
+ mpc8540, mpc8641 p4080, bsc9131 etc.
Required properties:
-- compatible : compatible list, contains 2 entries, first is
- "fsl,CHIP-dma", where CHIP is the processor
- (mpc8540, mpc8540, etc.) and the second is
- "fsl,eloplus-dma"
-- reg : <registers mapping for DMA general status reg>
+- compatible : must include "fsl,eloplus-dma"
+- reg : DMA General Status Register, i.e. DGSR which contains
+ status for all the 4 DMA channels
- cell-index : controller index. 0 for controller @ 0x21000,
1 for controller @ 0xc000
-- ranges : Should be defined as specified in 1) to describe the
- DMA controller channels.
+- ranges : describes the mapping between the address space of the
+ DMA channels and the address space of the DMA controller
- DMA channel nodes:
- - compatible : compatible list, contains 2 entries, first is
- "fsl,CHIP-dma-channel", where CHIP is the processor
- (mpc8540, mpc8560, etc.) and the second is
- "fsl,eloplus-dma-channel". However, see note below.
- - cell-index : dma channel index starts at 0.
- - reg : <registers mapping for channel>
- - interrupts : <interrupt mapping for DMA channel IRQ>
+ - compatible : must include "fsl,eloplus-dma-channel"
+ However, see note below.
+ - cell-index : DMA channel index starts at 0.
+ - reg : DMA channel specific registers
+ - interrupts : interrupt specifier for DMA channel IRQ
- interrupt-parent : optional, if needed for interrupt mapping
Example:
@@ -134,6 +128,76 @@ Example:
};
};
+** Freescale Elo3 DMA Controller
+ DMA controller which has same function as EloPlus except that Elo3 has 8
+ channels while EloPlus has only 4, it is used in Freescale Txxx and Bxxx
+ series chips, such as t1040, t4240, b4860.
+
+Required properties:
+
+- compatible : must include "fsl,elo3-dma"
+- reg : contains two entries for DMA General Status Registers,
+ i.e. DGSR0 which includes status for channel 1~4, and
+ DGSR1 for channel 5~8
+- ranges : describes the mapping between the address space of the
+ DMA channels and the address space of the DMA controller
+
+- DMA channel nodes:
+ - compatible : must include "fsl,eloplus-dma-channel"
+ - reg : DMA channel specific registers
+ - interrupts : interrupt specifier for DMA channel IRQ
+ - interrupt-parent : optional, if needed for interrupt mapping
+
+Example:
+dma@100300 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,elo3-dma";
+ reg = <0x100300 0x4>,
+ <0x100600 0x4>;
+ ranges = <0x0 0x100100 0x500>;
+ dma-channel@0 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x0 0x80>;
+ interrupts = <28 2 0 0>;
+ };
+ dma-channel@80 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x80 0x80>;
+ interrupts = <29 2 0 0>;
+ };
+ dma-channel@100 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x100 0x80>;
+ interrupts = <30 2 0 0>;
+ };
+ dma-channel@180 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x180 0x80>;
+ interrupts = <31 2 0 0>;
+ };
+ dma-channel@300 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x300 0x80>;
+ interrupts = <76 2 0 0>;
+ };
+ dma-channel@380 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x380 0x80>;
+ interrupts = <77 2 0 0>;
+ };
+ dma-channel@400 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x400 0x80>;
+ interrupts = <78 2 0 0>;
+ };
+ dma-channel@480 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x480 0x80>;
+ interrupts = <79 2 0 0>;
+ };
+};
+
Note on DMA channel compatible properties: The compatible property must say
"fsl,elo-dma-channel" or "fsl,eloplus-dma-channel" to be used by the Elo DMA
driver (fsldma). Any DMA channel used by fsldma cannot be used by another
diff --git a/Documentation/dmatest.txt b/Documentation/dmatest.txt
index a2b5663eae26..dd77a81bdb80 100644
--- a/Documentation/dmatest.txt
+++ b/Documentation/dmatest.txt
@@ -15,39 +15,48 @@ be built as module or inside kernel. Let's consider those cases.
Part 2 - When dmatest is built as a module...
-After mounting debugfs and loading the module, the /sys/kernel/debug/dmatest
-folder with nodes will be created. There are two important files located. First
-is the 'run' node that controls run and stop phases of the test, and the second
-one, 'results', is used to get the test case results.
-
-Note that in this case test will not run on load automatically.
-
Example of usage:
+ % modprobe dmatest channel=dma0chan0 timeout=2000 iterations=1 run=1
+
+...or:
+ % modprobe dmatest
% echo dma0chan0 > /sys/module/dmatest/parameters/channel
% echo 2000 > /sys/module/dmatest/parameters/timeout
% echo 1 > /sys/module/dmatest/parameters/iterations
- % echo 1 > /sys/kernel/debug/dmatest/run
+ % echo 1 > /sys/module/dmatest/parameters/run
+
+...or on the kernel command line:
+
+ dmatest.channel=dma0chan0 dmatest.timeout=2000 dmatest.iterations=1 dmatest.run=1
Hint: available channel list could be extracted by running the following
command:
% ls -1 /sys/class/dma/
-After a while you will start to get messages about current status or error like
-in the original code.
+Once started a message like "dmatest: Started 1 threads using dma0chan0" is
+emitted. After that only test failure messages are reported until the test
+stops.
Note that running a new test will not stop any in progress test.
-The following command should return actual state of the test.
- % cat /sys/kernel/debug/dmatest/run
-
-To wait for test done the user may perform a busy loop that checks the state.
-
- % while [ $(cat /sys/kernel/debug/dmatest/run) = "Y" ]
- > do
- > echo -n "."
- > sleep 1
- > done
- > echo
+The following command returns the state of the test.
+ % cat /sys/module/dmatest/parameters/run
+
+To wait for test completion userpace can poll 'run' until it is false, or use
+the wait parameter. Specifying 'wait=1' when loading the module causes module
+initialization to pause until a test run has completed, while reading
+/sys/module/dmatest/parameters/wait waits for any running test to complete
+before returning. For example, the following scripts wait for 42 tests
+to complete before exiting. Note that if 'iterations' is set to 'infinite' then
+waiting is disabled.
+
+Example:
+ % modprobe dmatest run=1 iterations=42 wait=1
+ % modprobe -r dmatest
+...or:
+ % modprobe dmatest run=1 iterations=42
+ % cat /sys/module/dmatest/parameters/wait
+ % modprobe -r dmatest
Part 3 - When built-in in the kernel...
@@ -62,21 +71,22 @@ case. You always could check them at run-time by running
Part 4 - Gathering the test results
-The module provides a storage for the test results in the memory. The gathered
-data could be used after test is done.
+Test results are printed to the kernel log buffer with the format:
-The special file 'results' in the debugfs represents gathered data of the in
-progress test. The messages collected are printed to the kernel log as well.
+"dmatest: result <channel>: <test id>: '<error msg>' with src_off=<val> dst_off=<val> len=<val> (<err code>)"
Example of output:
- % cat /sys/kernel/debug/dmatest/results
- dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
+ % dmesg | tail -n 1
+ dmatest: result dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
The message format is unified across the different types of errors. A number in
the parens represents additional information, e.g. error code, error counter,
-or status.
+or status. A test thread also emits a summary line at completion listing the
+number of tests executed, number that failed, and a result code.
-Comparison between buffers is stored to the dedicated structure.
+Example:
+ % dmesg | tail -n 1
+ dmatest: dma0chan0-copy0: summary 1 test, 0 failures 1000 iops 100000 KB/s (0)
-Note that the verify result is now accessible only via file 'results' in the
-debugfs.
+The details of a data miscompare error are also emitted, but do not follow the
+above format.
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt
index 9dae59407437..5dd282dda55c 100644
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.txt
@@ -70,6 +70,12 @@ Unless otherwise specified, all options default to off.
See comments at the top of fs/btrfs/check-integrity.c for more info.
+ commit=<seconds>
+ Set the interval of periodic commit, 30 seconds by default. Higher
+ values defer data being synced to permanent storage with obvious
+ consequences when the system crashes. The upper bound is not forced,
+ but a warning is printed if it's more than 300 seconds (5 minutes).
+
compress
compress=<type>
compress-force
@@ -154,7 +160,11 @@ Unless otherwise specified, all options default to off.
Currently this scans a list of several previous tree roots and tries to
use the first readable.
- skip_balance
+ rescan_uuid_tree
+ Force check and rebuild procedure of the UUID tree. This should not
+ normally be needed.
+
+ skip_balance
Skip automatic resume of interrupted balance operation after mount.
May be resumed with "btrfs balance resume."
@@ -234,24 +244,14 @@ available from the git repository at the following location:
These include the following tools:
-mkfs.btrfs: create a filesystem
-
-btrfsctl: control program to create snapshots and subvolumes:
+* mkfs.btrfs: create a filesystem
- mount /dev/sda2 /mnt
- btrfsctl -s new_subvol_name /mnt
- btrfsctl -s snapshot_of_default /mnt/default
- btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
- btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
- ls /mnt
- default snapshot_of_a_snapshot snapshot_of_new_subvol
- new_subvol_name snapshot_of_default
+* btrfs: a single tool to manage the filesystems, refer to the manpage for more details
- Snapshots and subvolumes cannot be deleted right now, but you can
- rm -rf all the files and directories inside them.
+* 'btrfsck' or 'btrfs check': do a consistency check of the filesystem
-btrfsck: do a limited check of the FS extent trees.
+Other tools for specific tasks:
-btrfs-debug-tree: print all of the FS metadata in text form. Example:
+* btrfs-convert: in-place conversion from ext2/3/4 filesystems
- btrfs-debug-tree /dev/sda2 >& big_output_file
+* btrfs-image: dump filesystem metadata for debugging
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9ca3e74a10e1..50680a59a2ff 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1190,15 +1190,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
owned by uid=0.
ima_hash= [IMA]
- Format: { "sha1" | "md5" }
+ Format: { md5 | sha1 | rmd160 | sha256 | sha384
+ | sha512 | ... }
default: "sha1"
+ The list of supported hash algorithms is defined
+ in crypto/hash_info.h.
+
ima_tcb [IMA]
Load a policy which meets the needs of the Trusted
Computing Base. This means IMA will measure all
programs exec'd, files mmap'd for exec, and all files
opened for read by uid=0.
+ ima_template= [IMA]
+ Select one of defined IMA measurements template formats.
+ Formats: { "ima" | "ima-ng" }
+ Default: "ima-ng"
+
init= [KNL]
Format: <full_path>
Run specified binary instead of /sbin/init as init
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 0f54333b0ff2..b6ce00b2be9a 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -547,13 +547,11 @@ helper functions described in Section 4. In that case, pm_runtime_resume()
should be used. Of course, for this purpose the device's runtime PM has to be
enabled earlier by calling pm_runtime_enable().
-If the device bus type's or driver's ->probe() callback runs
-pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts,
-they will fail returning -EAGAIN, because the device's usage counter is
-incremented by the driver core before executing ->probe(). Still, it may be
-desirable to suspend the device as soon as ->probe() has finished, so the driver
-core uses pm_runtime_put_sync() to invoke the subsystem-level idle callback for
-the device at that time.
+It may be desirable to suspend the device once ->probe() has finished.
+Therefore the driver core uses the asyncronous pm_request_idle() to submit a
+request to execute the subsystem-level idle callback for the device at that
+time. A driver that makes use of the runtime autosuspend feature, may want to
+update the last busy mark before returning from ->probe().
Moreover, the driver core prevents runtime PM callbacks from racing with the bus
notifier callback in __device_release_driver(), which is necessary, because the
@@ -656,7 +654,7 @@ out the following operations:
__pm_runtime_disable() with 'false' as the second argument for every device
right before executing the subsystem-level .suspend_late() callback for it.
- * During system resume it calls pm_runtime_enable() and pm_runtime_put_sync()
+ * During system resume it calls pm_runtime_enable() and pm_runtime_put()
for every device right after executing the subsystem-level .resume_early()
callback and right after executing the subsystem-level .resume() callback
for it, respectively.
diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX
index 414235c1fcfc..45c82fd3e9d3 100644
--- a/Documentation/security/00-INDEX
+++ b/Documentation/security/00-INDEX
@@ -22,3 +22,5 @@ keys.txt
- description of the kernel key retention service.
tomoyo.txt
- documentation on the TOMOYO Linux Security Module.
+IMA-templates.txt
+ - documentation on the template management mechanism for IMA.
diff --git a/Documentation/security/IMA-templates.txt b/Documentation/security/IMA-templates.txt
new file mode 100644
index 000000000000..a777e5f1df5b
--- /dev/null
+++ b/Documentation/security/IMA-templates.txt
@@ -0,0 +1,87 @@
+ IMA Template Management Mechanism
+
+
+==== INTRODUCTION ====
+
+The original 'ima' template is fixed length, containing the filedata hash
+and pathname. The filedata hash is limited to 20 bytes (md5/sha1).
+The pathname is a null terminated string, limited to 255 characters.
+To overcome these limitations and to add additional file metadata, it is
+necessary to extend the current version of IMA by defining additional
+templates. For example, information that could be possibly reported are
+the inode UID/GID or the LSM labels either of the inode and of the process
+that is accessing it.
+
+However, the main problem to introduce this feature is that, each time
+a new template is defined, the functions that generate and display
+the measurements list would include the code for handling a new format
+and, thus, would significantly grow over the time.
+
+The proposed solution solves this problem by separating the template
+management from the remaining IMA code. The core of this solution is the
+definition of two new data structures: a template descriptor, to determine
+which information should be included in the measurement list; a template
+field, to generate and display data of a given type.
+
+Managing templates with these structures is very simple. To support
+a new data type, developers define the field identifier and implement
+two functions, init() and show(), respectively to generate and display
+measurement entries. Defining a new template descriptor requires
+specifying the template format, a string of field identifiers separated
+by the '|' character. While in the current implementation it is possible
+to define new template descriptors only by adding their definition in the
+template specific code (ima_template.c), in a future version it will be
+possible to register a new template on a running kernel by supplying to IMA
+the desired format string. In this version, IMA initializes at boot time
+all defined template descriptors by translating the format into an array
+of template fields structures taken from the set of the supported ones.
+
+After the initialization step, IMA will call ima_alloc_init_template()
+(new function defined within the patches for the new template management
+mechanism) to generate a new measurement entry by using the template
+descriptor chosen through the kernel configuration or through the newly
+introduced 'ima_template=' kernel command line parameter. It is during this
+phase that the advantages of the new architecture are clearly shown:
+the latter function will not contain specific code to handle a given template
+but, instead, it simply calls the init() method of the template fields
+associated to the chosen template descriptor and store the result (pointer
+to allocated data and data length) in the measurement entry structure.
+
+The same mechanism is employed to display measurements entries.
+The functions ima[_ascii]_measurements_show() retrieve, for each entry,
+the template descriptor used to produce that entry and call the show()
+method for each item of the array of template fields structures.
+
+
+
+==== SUPPORTED TEMPLATE FIELDS AND DESCRIPTORS ====
+
+In the following, there is the list of supported template fields
+('<identifier>': description), that can be used to define new template
+descriptors by adding their identifier to the format string
+(support for more data types will be added later):
+
+ - 'd': the digest of the event (i.e. the digest of a measured file),
+ calculated with the SHA1 or MD5 hash algorithm;
+ - 'n': the name of the event (i.e. the file name), with size up to 255 bytes;
+ - 'd-ng': the digest of the event, calculated with an arbitrary hash
+ algorithm (field format: [<hash algo>:]digest, where the digest
+ prefix is shown only if the hash algorithm is not SHA1 or MD5);
+ - 'n-ng': the name of the event, without size limitations.
+
+
+Below, there is the list of defined template descriptors:
+ - "ima": its format is 'd|n';
+ - "ima-ng" (default): its format is 'd-ng|n-ng'.
+
+
+
+==== USE ====
+
+To specify the template descriptor to be used to generate measurement entries,
+currently the following methods are supported:
+
+ - select a template descriptor among those supported in the kernel
+ configuration ('ima-ng' is the default choice);
+ - specify a template descriptor name from the kernel command line through
+ the 'ima_template=' parameter.
diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index 7b4145d00452..a4c33f1a7c6d 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -865,15 +865,14 @@ encountered:
calling processes has a searchable link to the key from one of its
keyrings. There are three functions for dealing with these:
- key_ref_t make_key_ref(const struct key *key,
- unsigned long possession);
+ key_ref_t make_key_ref(const struct key *key, bool possession);
struct key *key_ref_to_ptr(const key_ref_t key_ref);
- unsigned long is_key_possessed(const key_ref_t key_ref);
+ bool is_key_possessed(const key_ref_t key_ref);
The first function constructs a key reference from a key pointer and
- possession information (which must be 0 or 1 and not any other value).
+ possession information (which must be true or false).
The second function retrieves the key pointer from a reference and the
third retrieves the possession flag.
@@ -961,14 +960,17 @@ payload contents" for more information.
the argument will not be parsed.
-(*) Extra references can be made to a key by calling the following function:
+(*) Extra references can be made to a key by calling one of the following
+ functions:
+ struct key *__key_get(struct key *key);
struct key *key_get(struct key *key);
- These need to be disposed of by calling key_put() when they've been
- finished with. The key pointer passed in will be returned. If the pointer
- is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and
- no increment will take place.
+ Keys so references will need to be disposed of by calling key_put() when
+ they've been finished with. The key pointer passed in will be returned.
+
+ In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set
+ then the key will not be dereferenced and no increment will take place.
(*) A key's serial number can be obtained by calling:
diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock
index 7521d367f21d..6dea4fd5c961 100644
--- a/Documentation/vm/split_page_table_lock
+++ b/Documentation/vm/split_page_table_lock
@@ -63,9 +63,9 @@ levels.
PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table
allocation and pgtable_pmd_page_dtor() on freeing.
-Allocation usually happens in pmd_alloc_one(), freeing in pmd_free(), but
-make sure you cover all PMD table allocation / freeing paths: i.e X86_PAE
-preallocate few PMDs on pgd_alloc().
+Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and
+pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing
+paths: i.e X86_PAE preallocate few PMDs on pgd_alloc().
With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK.
diff --git a/MAINTAINERS b/MAINTAINERS
index 63f30484932b..8285ed4676b6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4065,6 +4065,7 @@ F: arch/x86/include/uapi/asm/hyperv.h
F: arch/x86/kernel/cpu/mshyperv.c
F: drivers/hid/hid-hyperv.c
F: drivers/hv/
+F: drivers/input/serio/hyperv-keyboard.c
F: drivers/net/hyperv/
F: drivers/scsi/storvsc_drv.c
F: drivers/video/hyperv_fb.c
@@ -7515,9 +7516,10 @@ SELINUX SECURITY MODULE
M: Stephen Smalley <sds@tycho.nsa.gov>
M: James Morris <james.l.morris@oracle.com>
M: Eric Paris <eparis@parisplace.org>
+M: Paul Moore <paul@paul-moore.com>
L: selinux@tycho.nsa.gov (subscribers-only, general discussion)
W: http://selinuxproject.org
-T: git git://git.infradead.org/users/eparis/selinux.git
+T: git git://git.infradead.org/users/pcmoore/selinux
S: Supported
F: include/linux/selinux*
F: security/selinux/
@@ -8664,6 +8666,7 @@ F: drivers/media/usb/tm6000/
TPM DEVICE DRIVER
M: Leonidas Da Silva Barbosa <leosilva@linux.vnet.ibm.com>
M: Ashley Lai <ashley@ashleylai.com>
+M: Peter Huewe <peterhuewe@gmx.de>
M: Rajiv Andrade <mail@srajiv.net>
W: http://tpmdd.sourceforge.net
M: Marcel Selhorst <tpmdd@selhorst.net>
@@ -9522,8 +9525,8 @@ F: drivers/xen/*swiotlb*
XFS FILESYSTEM
P: Silicon Graphics Inc
+M: Dave Chinner <dchinner@fromorbit.com>
M: Ben Myers <bpm@sgi.com>
-M: Alex Elder <elder@kernel.org>
M: xfs@oss.sgi.com
L: xfs@oss.sgi.com
W: http://oss.sgi.com/projects/xfs
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 135c674eaf9e..d39dc9b95a2c 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -16,8 +16,8 @@ config ALPHA
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+ select GENERIC_CLOCKEVENTS
select GENERIC_SMP_IDLE_THREAD
- select GENERIC_CMOS_UPDATE
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select HAVE_MOD_ARCH_SPECIFIC
@@ -488,6 +488,20 @@ config VGA_HOSE
which always have multiple hoses, and whose consoles support it.
+config ALPHA_QEMU
+ bool "Run under QEMU emulation"
+ depends on !ALPHA_GENERIC
+ ---help---
+ Assume the presence of special features supported by QEMU PALcode
+ that reduce the overhead of system emulation.
+
+ Generic kernels will auto-detect QEMU. But when building a
+ system-specific kernel, the assumption is that we want to
+ elimiate as many runtime tests as possible.
+
+ If unsure, say N.
+
+
config ALPHA_SRM
bool "Use SRM as bootloader" if ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_NAUTILUS || ALPHA_NONAME
depends on TTY
@@ -572,6 +586,30 @@ config NUMA
Access). This option is for configuring high-end multiprocessor
server machines. If in doubt, say N.
+config ALPHA_WTINT
+ bool "Use WTINT" if ALPHA_SRM || ALPHA_GENERIC
+ default y if ALPHA_QEMU
+ default n if ALPHA_EV5 || ALPHA_EV56 || (ALPHA_EV4 && !ALPHA_LCA)
+ default n if !ALPHA_SRM && !ALPHA_GENERIC
+ default y if SMP
+ ---help---
+ The Wait for Interrupt (WTINT) PALcall attempts to place the CPU
+ to sleep until the next interrupt. This may reduce the power
+ consumed, and the heat produced by the computer. However, it has
+ the side effect of making the cycle counter unreliable as a timing
+ device across the sleep.
+
+ For emulation under QEMU, definitely say Y here, as we have other
+ mechanisms for measuring time than the cycle counter.
+
+ For EV4 (but not LCA), EV5 and EV56 systems, or for systems running
+ MILO, sleep mode is not supported so you might as well say N here.
+
+ For SMP systems we cannot use the cycle counter for timing anyway,
+ so you might as well say Y here.
+
+ If unsure, say N.
+
config NODES_SHIFT
int
default "7"
@@ -613,9 +651,41 @@ config VERBOSE_MCHECK_ON
Take the default (1) unless you want more control or more info.
+choice
+ prompt "Timer interrupt frequency (HZ)?"
+ default HZ_128 if ALPHA_QEMU
+ default HZ_1200 if ALPHA_RAWHIDE
+ default HZ_1024
+ ---help---
+ The frequency at which timer interrupts occur. A high frequency
+ minimizes latency, whereas a low frequency minimizes overhead of
+ process accounting. The later effect is especially significant
+ when being run under QEMU.
+
+ Note that some Alpha hardware cannot change the interrupt frequency
+ of the timer. If unsure, say 1024 (or 1200 for Rawhide).
+
+ config HZ_32
+ bool "32 Hz"
+ config HZ_64
+ bool "64 Hz"
+ config HZ_128
+ bool "128 Hz"
+ config HZ_256
+ bool "256 Hz"
+ config HZ_1024
+ bool "1024 Hz"
+ config HZ_1200
+ bool "1200 Hz"
+endchoice
+
config HZ
- int
- default 1200 if ALPHA_RAWHIDE
+ int
+ default 32 if HZ_32
+ default 64 if HZ_64
+ default 128 if HZ_128
+ default 256 if HZ_256
+ default 1200 if HZ_1200
default 1024
source "drivers/pci/Kconfig"
diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h
index 72dbf2359270..75cb3641ed2f 100644
--- a/arch/alpha/include/asm/machvec.h
+++ b/arch/alpha/include/asm/machvec.h
@@ -33,6 +33,7 @@ struct alpha_machine_vector
int nr_irqs;
int rtc_port;
+ int rtc_boot_cpu_only;
unsigned int max_asn;
unsigned long max_isa_dma_address;
unsigned long irq_probe_mask;
@@ -95,9 +96,6 @@ struct alpha_machine_vector
struct _alpha_agp_info *(*agp_info)(void);
- unsigned int (*rtc_get_time)(struct rtc_time *);
- int (*rtc_set_time)(struct rtc_time *);
-
const char *vector_name;
/* NUMA information */
@@ -126,13 +124,19 @@ extern struct alpha_machine_vector alpha_mv;
#ifdef CONFIG_ALPHA_GENERIC
extern int alpha_using_srm;
+extern int alpha_using_qemu;
#else
-#ifdef CONFIG_ALPHA_SRM
-#define alpha_using_srm 1
-#else
-#define alpha_using_srm 0
-#endif
+# ifdef CONFIG_ALPHA_SRM
+# define alpha_using_srm 1
+# else
+# define alpha_using_srm 0
+# endif
+# ifdef CONFIG_ALPHA_QEMU
+# define alpha_using_qemu 1
+# else
+# define alpha_using_qemu 0
+# endif
#endif /* GENERIC */
-#endif
+#endif /* __KERNEL__ */
#endif /* __ALPHA_MACHVEC_H */
diff --git a/arch/alpha/include/asm/pal.h b/arch/alpha/include/asm/pal.h
index 6fcd2b5b08f0..5422a47646fc 100644
--- a/arch/alpha/include/asm/pal.h
+++ b/arch/alpha/include/asm/pal.h
@@ -89,6 +89,7 @@ __CALL_PAL_W1(wrmces, unsigned long);
__CALL_PAL_RW2(wrperfmon, unsigned long, unsigned long, unsigned long);
__CALL_PAL_W1(wrusp, unsigned long);
__CALL_PAL_W1(wrvptptr, unsigned long);
+__CALL_PAL_RW1(wtint, unsigned long, unsigned long);
/*
* TB routines..
@@ -111,5 +112,75 @@ __CALL_PAL_W1(wrvptptr, unsigned long);
#define tbiap() __tbi(-1, /* no second argument */)
#define tbia() __tbi(-2, /* no second argument */)
+/*
+ * QEMU Cserv routines..
+ */
+
+static inline unsigned long
+qemu_get_walltime(void)
+{
+ register unsigned long v0 __asm__("$0");
+ register unsigned long a0 __asm__("$16") = 3;
+
+ asm("call_pal %2 # cserve get_time"
+ : "=r"(v0), "+r"(a0)
+ : "i"(PAL_cserve)
+ : "$17", "$18", "$19", "$20", "$21");
+
+ return v0;
+}
+
+static inline unsigned long
+qemu_get_alarm(void)
+{
+ register unsigned long v0 __asm__("$0");
+ register unsigned long a0 __asm__("$16") = 4;
+
+ asm("call_pal %2 # cserve get_alarm"
+ : "=r"(v0), "+r"(a0)
+ : "i"(PAL_cserve)
+ : "$17", "$18", "$19", "$20", "$21");
+
+ return v0;
+}
+
+static inline void
+qemu_set_alarm_rel(unsigned long expire)
+{
+ register unsigned long a0 __asm__("$16") = 5;
+ register unsigned long a1 __asm__("$17") = expire;
+
+ asm volatile("call_pal %2 # cserve set_alarm_rel"
+ : "+r"(a0), "+r"(a1)
+ : "i"(PAL_cserve)
+ : "$0", "$18", "$19", "$20", "$21");
+}
+
+static inline void
+qemu_set_alarm_abs(unsigned long expire)
+{
+ register unsigned long a0 __asm__("$16") = 6;
+ register unsigned long a1 __asm__("$17") = expire;
+
+ asm volatile("call_pal %2 # cserve set_alarm_abs"
+ : "+r"(a0), "+r"(a1)
+ : "i"(PAL_cserve)
+ : "$0", "$18", "$19", "$20", "$21");
+}
+
+static inline unsigned long
+qemu_get_vmtime(void)
+{
+ register unsigned long v0 __asm__("$0");
+ register unsigned long a0 __asm__("$16") = 7;
+
+ asm("call_pal %2 # cserve get_time"
+ : "=r"(v0), "+r"(a0)
+ : "i"(PAL_cserve)
+ : "$17", "$18", "$19", "$20", "$21");
+
+ return v0;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* __ALPHA_PAL_H */
diff --git a/arch/alpha/include/asm/rtc.h b/arch/alpha/include/asm/rtc.h
index d70408d36677..f71c3b0ed360 100644
--- a/arch/alpha/include/asm/rtc.h
+++ b/arch/alpha/include/asm/rtc.h
@@ -1,12 +1 @@
-#ifndef _ALPHA_RTC_H
-#define _ALPHA_RTC_H
-
-#if defined(CONFIG_ALPHA_MARVEL) && defined(CONFIG_SMP) \
- || defined(CONFIG_ALPHA_GENERIC)
-# define get_rtc_time alpha_mv.rtc_get_time
-# define set_rtc_time alpha_mv.rtc_set_time
-#endif
-
#include <asm-generic/rtc.h>
-
-#endif
diff --git a/arch/alpha/include/asm/string.h b/arch/alpha/include/asm/string.h
index b02b8a282940..c2911f591704 100644
--- a/arch/alpha/include/asm/string.h
+++ b/arch/alpha/include/asm/string.h
@@ -22,15 +22,27 @@ extern void * __memcpy(void *, const void *, size_t);
#define __HAVE_ARCH_MEMSET
extern void * __constant_c_memset(void *, unsigned long, size_t);
+extern void * ___memset(void *, int, size_t);
extern void * __memset(void *, int, size_t);
extern void * memset(void *, int, size_t);
-#define memset(s, c, n) \
-(__builtin_constant_p(c) \
- ? (__builtin_constant_p(n) && (c) == 0 \
- ? __builtin_memset((s),0,(n)) \
- : __constant_c_memset((s),0x0101010101010101UL*(unsigned char)(c),(n))) \
- : __memset((s),(c),(n)))
+/* For gcc 3.x, we cannot have the inline function named "memset" because
+ the __builtin_memset will attempt to resolve to the inline as well,
+ leading to a "sorry" about unimplemented recursive inlining. */
+extern inline void *__memset(void *s, int c, size_t n)
+{
+ if (__builtin_constant_p(c)) {
+ if (__builtin_constant_p(n)) {
+ return __builtin_memset(s, c, n);
+ } else {
+ unsigned long c8 = (c & 0xff) * 0x0101010101010101UL;
+ return __constant_c_memset(s, c8, n);
+ }
+ }
+ return ___memset(s, c, n);
+}
+
+#define memset __memset
#define __HAVE_ARCH_STRCPY
extern char * strcpy(char *,const char *);
diff --git a/arch/alpha/include/uapi/asm/pal.h b/arch/alpha/include/uapi/asm/pal.h
index 3c0ce08e5f59..dfc8140b9088 100644
--- a/arch/alpha/include/uapi/asm/pal.h
+++ b/arch/alpha/include/uapi/asm/pal.h
@@ -46,6 +46,7 @@
#define PAL_rdusp 58
#define PAL_whami 60
#define PAL_retsys 61
+#define PAL_wtint 62
#define PAL_rti 63
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index 84ec46b38f7d..0d54650e78fc 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_PCI) += pci.o pci_iommu.o pci-sysfs.o
obj-$(CONFIG_SRM_ENV) += srm_env.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_RTC_DRV_ALPHA) += rtc.o
ifdef CONFIG_ALPHA_GENERIC
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c
index 89566b346c0f..f4c7ab6f43b0 100644
--- a/arch/alpha/kernel/alpha_ksyms.c
+++ b/arch/alpha/kernel/alpha_ksyms.c
@@ -40,6 +40,7 @@ EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(__memset);
+EXPORT_SYMBOL(___memset);
EXPORT_SYMBOL(__memsetw);
EXPORT_SYMBOL(__constant_c_memset);
EXPORT_SYMBOL(copy_page);
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 28e4429596f3..1c8625cb0e25 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -66,21 +66,7 @@ do_entInt(unsigned long type, unsigned long vector,
break;
case 1:
old_regs = set_irq_regs(regs);
-#ifdef CONFIG_SMP
- {
- long cpu;
-
- smp_percpu_timer_interrupt(regs);
- cpu = smp_processor_id();
- if (cpu != boot_cpuid) {
- kstat_incr_irqs_this_cpu(RTC_IRQ, irq_to_desc(RTC_IRQ));
- } else {
- handle_irq(RTC_IRQ);
- }
- }
-#else
handle_irq(RTC_IRQ);
-#endif
set_irq_regs(old_regs);
return;
case 2:
@@ -228,7 +214,7 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr,
*/
struct irqaction timer_irqaction = {
- .handler = timer_interrupt,
+ .handler = rtc_timer_interrupt,
.name = "timer",
};
diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h
index 7fa62488bd16..f54bdf658cd0 100644
--- a/arch/alpha/kernel/machvec_impl.h
+++ b/arch/alpha/kernel/machvec_impl.h
@@ -43,10 +43,7 @@
#define CAT1(x,y) x##y
#define CAT(x,y) CAT1(x,y)
-#define DO_DEFAULT_RTC \
- .rtc_port = 0x70, \
- .rtc_get_time = common_get_rtc_time, \
- .rtc_set_time = common_set_rtc_time
+#define DO_DEFAULT_RTC .rtc_port = 0x70
#define DO_EV4_MMU \
.max_asn = EV4_MAX_ASN, \
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index d821b17047e0..c52e7f0ee5f6 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -83,6 +83,8 @@ struct alpha_pmu_t {
long pmc_left[3];
/* Subroutine for allocation of PMCs. Enforces constraints. */
int (*check_constraints)(struct perf_event **, unsigned long *, int);
+ /* Subroutine for checking validity of a raw event for this PMU. */
+ int (*raw_event_valid)(u64 config);
};
/*
@@ -203,6 +205,12 @@ success:
}
+static int ev67_raw_event_valid(u64 config)
+{
+ return config >= EV67_CYCLES && config < EV67_LAST_ET;
+};
+
+
static const struct alpha_pmu_t ev67_pmu = {
.event_map = ev67_perfmon_event_map,
.max_events = ARRAY_SIZE(ev67_perfmon_event_map),
@@ -211,7 +219,8 @@ static const struct alpha_pmu_t ev67_pmu = {
.pmc_count_mask = {EV67_PCTR_0_COUNT_MASK, EV67_PCTR_1_COUNT_MASK, 0},
.pmc_max_period = {(1UL<<20) - 1, (1UL<<20) - 1, 0},
.pmc_left = {16, 4, 0},
- .check_constraints = ev67_check_constraints
+ .check_constraints = ev67_check_constraints,
+ .raw_event_valid = ev67_raw_event_valid,
};
@@ -609,7 +618,9 @@ static int __hw_perf_event_init(struct perf_event *event)
} else if (attr->type == PERF_TYPE_HW_CACHE) {
return -EOPNOTSUPP;
} else if (attr->type == PERF_TYPE_RAW) {
- ev = attr->config & 0xff;
+ if (!alpha_pmu->raw_event_valid(attr->config))
+ return -EINVAL;
+ ev = attr->config;
} else {
return -EOPNOTSUPP;
}
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index f2360a74e5d5..1941a07b5811 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -46,6 +46,23 @@
void (*pm_power_off)(void) = machine_power_off;
EXPORT_SYMBOL(pm_power_off);
+#ifdef CONFIG_ALPHA_WTINT
+/*
+ * Sleep the CPU.
+ * EV6, LCA45 and QEMU know how to power down, skipping N timer interrupts.
+ */
+void arch_cpu_idle(void)
+{
+ wtint(0);
+ local_irq_enable();
+}
+
+void arch_cpu_idle_dead(void)
+{
+ wtint(INT_MAX);
+}
+#endif /* ALPHA_WTINT */
+
struct halt_info {
int mode;
char *restart_cmd;
diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h
index d3e52d3fd592..da2d6ec9c370 100644
--- a/arch/alpha/kernel/proto.h
+++ b/arch/alpha/kernel/proto.h
@@ -135,17 +135,15 @@ extern void unregister_srm_console(void);
/* smp.c */
extern void setup_smp(void);
extern void handle_ipi(struct pt_regs *);
-extern void smp_percpu_timer_interrupt(struct pt_regs *);
/* bios32.c */
/* extern void reset_for_srm(void); */
/* time.c */
-extern irqreturn_t timer_interrupt(int irq, void *dev);
+extern irqreturn_t rtc_timer_interrupt(int irq, void *dev);
+extern void init_clockevent(void);
extern void common_init_rtc(void);
extern unsigned long est_cycle_freq;
-extern unsigned int common_get_rtc_time(struct rtc_time *time);
-extern int common_set_rtc_time(struct rtc_time *time);
/* smc37c93x.c */
extern void SMC93x_Init(void);
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
new file mode 100644
index 000000000000..c8d284d8521f
--- /dev/null
+++ b/arch/alpha/kernel/rtc.c
@@ -0,0 +1,323 @@
+/*
+ * linux/arch/alpha/kernel/rtc.c
+ *
+ * Copyright (C) 1991, 1992, 1995, 1999, 2000 Linus Torvalds
+ *
+ * This file contains date handling.
+ */
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+
+#include <asm/rtc.h>
+
+#include "proto.h"
+
+
+/*
+ * Support for the RTC device.
+ *
+ * We don't want to use the rtc-cmos driver, because we don't want to support
+ * alarms, as that would be indistinguishable from timer interrupts.
+ *
+ * Further, generic code is really, really tied to a 1900 epoch. This is
+ * true in __get_rtc_time as well as the users of struct rtc_time e.g.
+ * rtc_tm_to_time. Thankfully all of the other epochs in use are later
+ * than 1900, and so it's easy to adjust.
+ */
+
+static unsigned long rtc_epoch;
+
+static int __init
+specifiy_epoch(char *str)
+{
+ unsigned long epoch = simple_strtoul(str, NULL, 0);
+ if (epoch < 1900)
+ printk("Ignoring invalid user specified epoch %lu\n", epoch);
+ else
+ rtc_epoch = epoch;
+ return 1;
+}
+__setup("epoch=", specifiy_epoch);
+
+static void __init
+init_rtc_epoch(void)
+{
+ int epoch, year, ctrl;
+
+ if (rtc_epoch != 0) {
+ /* The epoch was specified on the command-line. */
+ return;
+ }
+
+ /* Detect the epoch in use on this computer. */
+ ctrl = CMOS_READ(RTC_CONTROL);
+ year = CMOS_READ(RTC_YEAR);
+ if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ year = bcd2bin(year);
+
+ /* PC-like is standard; used for year >= 70 */
+ epoch = 1900;
+ if (year < 20) {
+ epoch = 2000;
+ } else if (year >= 20 && year < 48) {
+ /* NT epoch */
+ epoch = 1980;
+ } else if (year >= 48 && year < 70) {
+ /* Digital UNIX epoch */
+ epoch = 1952;
+ }
+ rtc_epoch = epoch;
+
+ printk(KERN_INFO "Using epoch %d for rtc year %d\n", epoch, year);
+}
+
+static int
+alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+ __get_rtc_time(tm);
+
+ /* Adjust for non-default epochs. It's easier to depend on the
+ generic __get_rtc_time and adjust the epoch here than create
+ a copy of __get_rtc_time with the edits we need. */
+ if (rtc_epoch != 1900) {
+ int year = tm->tm_year;
+ /* Undo the century adjustment made in __get_rtc_time. */
+ if (year >= 100)
+ year -= 100;
+ year += rtc_epoch - 1900;
+ /* Redo the century adjustment with the epoch in place. */
+ if (year <= 69)
+ year += 100;
+ tm->tm_year = year;
+ }
+
+ return rtc_valid_tm(tm);
+}
+
+static int
+alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+ struct rtc_time xtm;
+
+ if (rtc_epoch != 1900) {
+ xtm = *tm;
+ xtm.tm_year -= rtc_epoch - 1900;
+ tm = &xtm;
+ }
+
+ return __set_rtc_time(tm);
+}
+
+static int
+alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime)
+{
+ int retval = 0;
+ int real_seconds, real_minutes, cmos_minutes;
+ unsigned char save_control, save_freq_select;
+
+ /* Note: This code only updates minutes and seconds. Comments
+ indicate this was to avoid messing with unknown time zones,
+ and with the epoch nonsense described above. In order for
+ this to work, the existing clock cannot be off by more than
+ 15 minutes.
+
+ ??? This choice is may be out of date. The x86 port does
+ not have problems with timezones, and the epoch processing has
+ now been fixed in alpha_set_rtc_time.
+
+ In either case, one can always force a full rtc update with
+ the userland hwclock program, so surely 15 minute accuracy
+ is no real burden. */
+
+ /* In order to set the CMOS clock precisely, we have to be called
+ 500 ms after the second nowtime has started, because when
+ nowtime is written into the registers of the CMOS clock, it will
+ jump to the next second precisely 500 ms later. Check the Motorola
+ MC146818A or Dallas DS12887 data sheet for details. */
+
+ /* irq are locally disabled here */
+ spin_lock(&rtc_lock);
+ /* Tell the clock it's being set */
+ save_control = CMOS_READ(RTC_CONTROL);
+ CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+
+ /* Stop and reset prescaler */
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+ cmos_minutes = CMOS_READ(RTC_MINUTES);
+ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ cmos_minutes = bcd2bin(cmos_minutes);
+
+ real_seconds = nowtime % 60;
+ real_minutes = nowtime / 60;
+ if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1) {
+ /* correct for half hour time zone */
+ real_minutes += 30;
+ }
+ real_minutes %= 60;
+
+ if (abs(real_minutes - cmos_minutes) < 30) {
+ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ real_seconds = bin2bcd(real_seconds);
+ real_minutes = bin2bcd(real_minutes);
+ }
+ CMOS_WRITE(real_seconds,RTC_SECONDS);
+ CMOS_WRITE(real_minutes,RTC_MINUTES);
+ } else {
+ printk_once(KERN_NOTICE
+ "set_rtc_mmss: can't update from %d to %d\n",
+ cmos_minutes, real_minutes);
+ retval = -1;
+ }
+
+ /* The following flags have to be released exactly in this order,
+ * otherwise the DS12887 (popular MC146818A clone with integrated
+ * battery and quartz) will not reset the oscillator and will not
+ * update precisely 500 ms later. You won't find this mentioned in
+ * the Dallas Semiconductor data sheets, but who believes data
+ * sheets anyway ... -- Markus Kuhn
+ */
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ spin_unlock(&rtc_lock);
+
+ return retval;
+}
+
+static int
+alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case RTC_EPOCH_READ:
+ return put_user(rtc_epoch, (unsigned long __user *)arg);
+ case RTC_EPOCH_SET:
+ if (arg < 1900)
+ return -EINVAL;
+ rtc_epoch = arg;
+ return 0;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
+static const struct rtc_class_ops alpha_rtc_ops = {
+ .read_time = alpha_rtc_read_time,
+ .set_time = alpha_rtc_set_time,
+ .set_mmss = alpha_rtc_set_mmss,
+ .ioctl = alpha_rtc_ioctl,
+};
+
+/*
+ * Similarly, except do the actual CMOS access on the boot cpu only.
+ * This requires marshalling the data across an interprocessor call.
+ */
+
+#if defined(CONFIG_SMP) && \
+ (defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_MARVEL))
+# define HAVE_REMOTE_RTC 1
+
+union remote_data {
+ struct rtc_time *tm;
+ unsigned long now;
+ long retval;
+};
+
+static void
+do_remote_read(void *data)
+{
+ union remote_data *x = data;
+ x->retval = alpha_rtc_read_time(NULL, x->tm);
+}
+
+static int
+remote_read_time(struct device *dev, struct rtc_time *tm)
+{
+ union remote_data x;
+ if (smp_processor_id() != boot_cpuid) {
+ x.tm = tm;
+ smp_call_function_single(boot_cpuid, do_remote_read, &x, 1);
+ return x.retval;
+ }
+ return alpha_rtc_read_time(NULL, tm);
+}
+
+static void
+do_remote_set(void *data)
+{
+ union remote_data *x = data;
+ x->retval = alpha_rtc_set_time(NULL, x->tm);
+}
+
+static int
+remote_set_time(struct device *dev, struct rtc_time *tm)
+{
+ union remote_data x;
+ if (smp_processor_id() != boot_cpuid) {
+ x.tm = tm;
+ smp_call_function_single(boot_cpuid, do_remote_set, &x, 1);
+ return x.retval;
+ }
+ return alpha_rtc_set_time(NULL, tm);
+}
+
+static void
+do_remote_mmss(void *data)
+{
+ union remote_data *x = data;
+ x->retval = alpha_rtc_set_mmss(NULL, x->now);
+}
+
+static int
+remote_set_mmss(struct device *dev, unsigned long now)
+{
+ union remote_data x;
+ if (smp_processor_id() != boot_cpuid) {
+ x.now = now;
+ smp_call_function_single(boot_cpuid, do_remote_mmss, &x, 1);
+ return x.retval;
+ }
+ return alpha_rtc_set_mmss(NULL, now);
+}
+
+static const struct rtc_class_ops remote_rtc_ops = {
+ .read_time = remote_read_time,
+ .set_time = remote_set_time,
+ .set_mmss = remote_set_mmss,
+ .ioctl = alpha_rtc_ioctl,
+};
+#endif
+
+static int __init
+alpha_rtc_init(void)
+{
+ const struct rtc_class_ops *ops;
+ struct platform_device *pdev;
+ struct rtc_device *rtc;
+ const char *name;
+
+ init_rtc_epoch();
+ name = "rtc-alpha";
+ ops = &alpha_rtc_ops;
+
+#ifdef HAVE_REMOTE_RTC
+ if (alpha_mv.rtc_boot_cpu_only)
+ ops = &remote_rtc_ops;
+#endif
+
+ pdev = platform_device_register_simple(name, -1, NULL, 0);
+ rtc = devm_rtc_device_register(&pdev->dev, name, ops, THIS_MODULE);
+ if (IS_ERR(rtc))
+ return PTR_ERR(rtc);
+
+ platform_set_drvdata(pdev, rtc);
+ return 0;
+}
+device_initcall(alpha_rtc_init);
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 9e3107cc5ebb..b20af76f12c1 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -115,10 +115,17 @@ unsigned long alpha_agpgart_size = DEFAULT_AGP_APER_SIZE;
#ifdef CONFIG_ALPHA_GENERIC
struct alpha_machine_vector alpha_mv;
+#endif
+
+#ifndef alpha_using_srm
int alpha_using_srm;
EXPORT_SYMBOL(alpha_using_srm);
#endif
+#ifndef alpha_using_qemu
+int alpha_using_qemu;
+#endif
+
static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long,
unsigned long);
static struct alpha_machine_vector *get_sysvec_byname(const char *);
@@ -529,11 +536,15 @@ setup_arch(char **cmdline_p)
atomic_notifier_chain_register(&panic_notifier_list,
&alpha_panic_block);
-#ifdef CONFIG_ALPHA_GENERIC
+#ifndef alpha_using_srm
/* Assume that we've booted from SRM if we haven't booted from MILO.
Detect the later by looking for "MILO" in the system serial nr. */
alpha_using_srm = strncmp((const char *)hwrpb->ssn, "MILO", 4) != 0;
#endif
+#ifndef alpha_using_qemu
+ /* Similarly, look for QEMU. */
+ alpha_using_qemu = strstr((const char *)hwrpb->ssn, "QEMU") != 0;
+#endif
/* If we are using SRM, we want to allow callbacks
as early as possible, so do this NOW, and then
@@ -1207,6 +1218,7 @@ show_cpuinfo(struct seq_file *f, void *slot)
char *systype_name;
char *sysvariation_name;
int nr_processors;
+ unsigned long timer_freq;
cpu_index = (unsigned) (cpu->type - 1);
cpu_name = "Unknown";
@@ -1218,6 +1230,12 @@ show_cpuinfo(struct seq_file *f, void *slot)
nr_processors = get_nr_processors(cpu, hwrpb->nr_processors);
+#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200
+ timer_freq = (100UL * hwrpb->intr_freq) / 4096;
+#else
+ timer_freq = 100UL * CONFIG_HZ;
+#endif
+
seq_printf(f, "cpu\t\t\t: Alpha\n"
"cpu model\t\t: %s\n"
"cpu variation\t\t: %ld\n"
@@ -1243,8 +1261,7 @@ show_cpuinfo(struct seq_file *f, void *slot)
(char*)hwrpb->ssn,
est_cycle_freq ? : hwrpb->cycle_freq,
est_cycle_freq ? "est." : "",
- hwrpb->intr_freq / 4096,
- (100 * hwrpb->intr_freq / 4096) % 100,
+ timer_freq / 100, timer_freq % 100,
hwrpb->pagesize,
hwrpb->pa_bits,
hwrpb->max_asn,
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 9dbbcb3b9146..99ac36d5de4e 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -138,9 +138,11 @@ smp_callin(void)
/* Get our local ticker going. */
smp_setup_percpu_timer(cpuid);
+ init_clockevent();
/* Call platform-specific callin, if specified */
- if (alpha_mv.smp_callin) alpha_mv.smp_callin();
+ if (alpha_mv.smp_callin)
+ alpha_mv.smp_callin();
/* All kernel threads share the same mm context. */
atomic_inc(&init_mm.mm_count);
@@ -498,35 +500,6 @@ smp_cpus_done(unsigned int max_cpus)
((bogosum + 2500) / (5000/HZ)) % 100);
}
-
-void
-smp_percpu_timer_interrupt(struct pt_regs *regs)
-{
- struct pt_regs *old_regs;
- int cpu = smp_processor_id();
- unsigned long user = user_mode(regs);
- struct cpuinfo_alpha *data = &cpu_data[cpu];
-
- old_regs = set_irq_regs(regs);
-
- /* Record kernel PC. */
- profile_tick(CPU_PROFILING);
-
- if (!--data->prof_counter) {
- /* We need to make like a normal interrupt -- otherwise
- timer interrupts ignore the global interrupt lock,
- which would be a Bad Thing. */
- irq_enter();
-
- update_process_times(user);
-
- data->prof_counter = data->prof_multiplier;
-
- irq_exit();
- }
- set_irq_regs(old_regs);
-}
-
int
setup_profiling_timer(unsigned int multiplier)
{
diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c
index 5a0af11b3a61..608f2a7fa0a3 100644
--- a/arch/alpha/kernel/sys_jensen.c
+++ b/arch/alpha/kernel/sys_jensen.c
@@ -224,8 +224,6 @@ struct alpha_machine_vector jensen_mv __initmv = {
.machine_check = jensen_machine_check,
.max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS,
.rtc_port = 0x170,
- .rtc_get_time = common_get_rtc_time,
- .rtc_set_time = common_set_rtc_time,
.nr_irqs = 16,
.device_interrupt = jensen_device_interrupt,
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index c92e389ff219..f21d61fab678 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -22,7 +22,6 @@
#include <asm/hwrpb.h>
#include <asm/tlbflush.h>
#include <asm/vga.h>
-#include <asm/rtc.h>
#include "proto.h"
#include "err_impl.h"
@@ -400,57 +399,6 @@ marvel_init_rtc(void)
init_rtc_irq();
}
-struct marvel_rtc_time {
- struct rtc_time *time;
- int retval;
-};
-
-#ifdef CONFIG_SMP
-static void
-smp_get_rtc_time(void *data)
-{
- struct marvel_rtc_time *mrt = data;
- mrt->retval = __get_rtc_time(mrt->time);
-}
-
-static void
-smp_set_rtc_time(void *data)
-{
- struct marvel_rtc_time *mrt = data;
- mrt->retval = __set_rtc_time(mrt->time);
-}
-#endif
-
-static unsigned int
-marvel_get_rtc_time(struct rtc_time *time)
-{
-#ifdef CONFIG_SMP
- struct marvel_rtc_time mrt;
-
- if (smp_processor_id() != boot_cpuid) {
- mrt.time = time;
- smp_call_function_single(boot_cpuid, smp_get_rtc_time, &mrt, 1);
- return mrt.retval;
- }
-#endif
- return __get_rtc_time(time);
-}
-
-static int
-marvel_set_rtc_time(struct rtc_time *time)
-{
-#ifdef CONFIG_SMP
- struct marvel_rtc_time mrt;
-
- if (smp_processor_id() != boot_cpuid) {
- mrt.time = time;
- smp_call_function_single(boot_cpuid, smp_set_rtc_time, &mrt, 1);
- return mrt.retval;
- }
-#endif
- return __set_rtc_time(time);
-}
-
static void
marvel_smp_callin(void)
{
@@ -492,8 +440,7 @@ struct alpha_machine_vector marvel_ev7_mv __initmv = {
.vector_name = "MARVEL/EV7",
DO_EV7_MMU,
.rtc_port = 0x70,
- .rtc_get_time = marvel_get_rtc_time,
- .rtc_set_time = marvel_set_rtc_time,
+ .rtc_boot_cpu_only = 1,
DO_MARVEL_IO,
.machine_check = marvel_machine_check,
.max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS,
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index ea3395036556..ee39cee8064c 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -3,13 +3,7 @@
*
* Copyright (C) 1991, 1992, 1995, 1999, 2000 Linus Torvalds
*
- * This file contains the PC-specific time handling details:
- * reading the RTC at bootup, etc..
- * 1994-07-02 Alan Modra
- * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
- * 1995-03-26 Markus Kuhn
- * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
- * precision CMOS clock update
+ * This file contains the clocksource time handling.
* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
* "A Kernel Model for Precision Timekeeping" by Dave Mills
* 1997-01-09 Adrian Sun
@@ -21,9 +15,6 @@
* 1999-04-16 Thorsten Kranzkowski (dl8bcu@gmx.net)
* fixed algorithm in do_gettimeofday() for calculating the precise time
* from processor cycle counter (now taking lost_ticks into account)
- * 2000-08-13 Jan-Benedict Glaw <jbglaw@lug-owl.de>
- * Fixed time_init to be aware of epoches != 1900. This prevents
- * booting up in 2048 for me;) Code is stolen from rtc.c.
* 2003-06-03 R. Scott Bailey <scott.bailey@eds.com>
* Tighten sanity in time_init from 1% (10,000 PPM) to 250 PPM
*/
@@ -46,40 +37,19 @@
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/hwrpb.h>
-#include <asm/rtc.h>
#include <linux/mc146818rtc.h>
#include <linux/time.h>
#include <linux/timex.h>
#include <linux/clocksource.h>
+#include <linux/clockchips.h>
#include "proto.h"
#include "irq_impl.h"
-static int set_rtc_mmss(unsigned long);
-
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
-#define TICK_SIZE (tick_nsec / 1000)
-
-/*
- * Shift amount by which scaled_ticks_per_cycle is scaled. Shifting
- * by 48 gives us 16 bits for HZ while keeping the accuracy good even
- * for large CPU clock rates.
- */
-#define FIX_SHIFT 48
-
-/* lump static variables together for more efficient access: */
-static struct {
- /* cycle counter last time it got invoked */
- __u32 last_time;
- /* ticks/cycle * 2^48 */
- unsigned long scaled_ticks_per_cycle;
- /* partial unused tick */
- unsigned long partial_tick;
-} state;
-
unsigned long est_cycle_freq;
#ifdef CONFIG_IRQ_WORK
@@ -108,109 +78,156 @@ static inline __u32 rpcc(void)
return __builtin_alpha_rpcc();
}
-int update_persistent_clock(struct timespec now)
-{
- return set_rtc_mmss(now.tv_sec);
-}
-void read_persistent_clock(struct timespec *ts)
+
+/*
+ * The RTC as a clock_event_device primitive.
+ */
+
+static DEFINE_PER_CPU(struct clock_event_device, cpu_ce);
+
+irqreturn_t
+rtc_timer_interrupt(int irq, void *dev)
{
- unsigned int year, mon, day, hour, min, sec, epoch;
-
- sec = CMOS_READ(RTC_SECONDS);
- min = CMOS_READ(RTC_MINUTES);
- hour = CMOS_READ(RTC_HOURS);
- day = CMOS_READ(RTC_DAY_OF_MONTH);
- mon = CMOS_READ(RTC_MONTH);
- year = CMOS_READ(RTC_YEAR);
-
- if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- sec = bcd2bin(sec);
- min = bcd2bin(min);
- hour = bcd2bin(hour);
- day = bcd2bin(day);
- mon = bcd2bin(mon);
- year = bcd2bin(year);
- }
+ int cpu = smp_processor_id();
+ struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
- /* PC-like is standard; used for year >= 70 */
- epoch = 1900;
- if (year < 20)
- epoch = 2000;
- else if (year >= 20 && year < 48)
- /* NT epoch */
- epoch = 1980;
- else if (year >= 48 && year < 70)
- /* Digital UNIX epoch */
- epoch = 1952;
+ /* Don't run the hook for UNUSED or SHUTDOWN. */
+ if (likely(ce->mode == CLOCK_EVT_MODE_PERIODIC))
+ ce->event_handler(ce);
- printk(KERN_INFO "Using epoch = %d\n", epoch);
+ if (test_irq_work_pending()) {
+ clear_irq_work_pending();
+ irq_work_run();
+ }
- if ((year += epoch) < 1970)
- year += 100;
+ return IRQ_HANDLED;
+}
- ts->tv_sec = mktime(year, mon, day, hour, min, sec);
- ts->tv_nsec = 0;
+static void
+rtc_ce_set_mode(enum clock_event_mode mode, struct clock_event_device *ce)
+{
+ /* The mode member of CE is updated in generic code.
+ Since we only support periodic events, nothing to do. */
+}
+
+static int
+rtc_ce_set_next_event(unsigned long evt, struct clock_event_device *ce)
+{
+ /* This hook is for oneshot mode, which we don't support. */
+ return -EINVAL;
}
+static void __init
+init_rtc_clockevent(void)
+{
+ int cpu = smp_processor_id();
+ struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+ *ce = (struct clock_event_device){
+ .name = "rtc",
+ .features = CLOCK_EVT_FEAT_PERIODIC,
+ .rating = 100,
+ .cpumask = cpumask_of(cpu),
+ .set_mode = rtc_ce_set_mode,
+ .set_next_event = rtc_ce_set_next_event,
+ };
+ clockevents_config_and_register(ce, CONFIG_HZ, 0, 0);
+}
+
/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "xtime_update()" routine every clocktick
+ * The QEMU clock as a clocksource primitive.
*/
-irqreturn_t timer_interrupt(int irq, void *dev)
+
+static cycle_t
+qemu_cs_read(struct clocksource *cs)
{
- unsigned long delta;
- __u32 now;
- long nticks;
+ return qemu_get_vmtime();
+}
-#ifndef CONFIG_SMP
- /* Not SMP, do kernel PC profiling here. */
- profile_tick(CPU_PROFILING);
-#endif
+static struct clocksource qemu_cs = {
+ .name = "qemu",
+ .rating = 400,
+ .read = qemu_cs_read,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .max_idle_ns = LONG_MAX
+};
- /*
- * Calculate how many ticks have passed since the last update,
- * including any previous partial leftover. Save any resulting
- * fraction for the next pass.
- */
- now = rpcc();
- delta = now - state.last_time;
- state.last_time = now;
- delta = delta * state.scaled_ticks_per_cycle + state.partial_tick;
- state.partial_tick = delta & ((1UL << FIX_SHIFT) - 1);
- nticks = delta >> FIX_SHIFT;
- if (nticks)
- xtime_update(nticks);
+/*
+ * The QEMU alarm as a clock_event_device primitive.
+ */
- if (test_irq_work_pending()) {
- clear_irq_work_pending();
- irq_work_run();
- }
+static void
+qemu_ce_set_mode(enum clock_event_mode mode, struct clock_event_device *ce)
+{
+ /* The mode member of CE is updated for us in generic code.
+ Just make sure that the event is disabled. */
+ qemu_set_alarm_abs(0);
+}
-#ifndef CONFIG_SMP
- while (nticks--)
- update_process_times(user_mode(get_irq_regs()));
-#endif
+static int
+qemu_ce_set_next_event(unsigned long evt, struct clock_event_device *ce)
+{
+ qemu_set_alarm_rel(evt);
+ return 0;
+}
+static irqreturn_t
+qemu_timer_interrupt(int irq, void *dev)
+{
+ int cpu = smp_processor_id();
+ struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+ ce->event_handler(ce);
return IRQ_HANDLED;
}
+static void __init
+init_qemu_clockevent(void)
+{
+ int cpu = smp_processor_id();
+ struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+ *ce = (struct clock_event_device){
+ .name = "qemu",
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ .rating = 400,
+ .cpumask = cpumask_of(cpu),
+ .set_mode = qemu_ce_set_mode,
+ .set_next_event = qemu_ce_set_next_event,
+ };
+
+ clockevents_config_and_register(ce, NSEC_PER_SEC, 1000, LONG_MAX);
+}
+
+
void __init
common_init_rtc(void)
{
- unsigned char x;
+ unsigned char x, sel = 0;
/* Reset periodic interrupt frequency. */
- x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f;
- /* Test includes known working values on various platforms
- where 0x26 is wrong; we refuse to change those. */
- if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) {
- printk("Setting RTC_FREQ to 1024 Hz (%x)\n", x);
- CMOS_WRITE(0x26, RTC_FREQ_SELECT);
+#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200
+ x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f;
+ /* Test includes known working values on various platforms
+ where 0x26 is wrong; we refuse to change those. */
+ if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) {
+ sel = RTC_REF_CLCK_32KHZ + 6;
}
+#elif CONFIG_HZ == 256 || CONFIG_HZ == 128 || CONFIG_HZ == 64 || CONFIG_HZ == 32
+ sel = RTC_REF_CLCK_32KHZ + __builtin_ffs(32768 / CONFIG_HZ);
+#else
+# error "Unknown HZ from arch/alpha/Kconfig"
+#endif
+ if (sel) {
+ printk(KERN_INFO "Setting RTC_FREQ to %d Hz (%x)\n",
+ CONFIG_HZ, sel);
+ CMOS_WRITE(sel, RTC_FREQ_SELECT);
+ }
/* Turn on periodic interrupts. */
x = CMOS_READ(RTC_CONTROL);
@@ -233,16 +250,37 @@ common_init_rtc(void)
init_rtc_irq();
}
-unsigned int common_get_rtc_time(struct rtc_time *time)
-{
- return __get_rtc_time(time);
-}
+
+#ifndef CONFIG_ALPHA_WTINT
+/*
+ * The RPCC as a clocksource primitive.
+ *
+ * While we have free-running timecounters running on all CPUs, and we make
+ * a half-hearted attempt in init_rtc_rpcc_info to sync the timecounter
+ * with the wall clock, that initialization isn't kept up-to-date across
+ * different time counters in SMP mode. Therefore we can only use this
+ * method when there's only one CPU enabled.
+ *
+ * When using the WTINT PALcall, the RPCC may shift to a lower frequency,
+ * or stop altogether, while waiting for the interrupt. Therefore we cannot
+ * use this method when WTINT is in use.
+ */
-int common_set_rtc_time(struct rtc_time *time)
+static cycle_t read_rpcc(struct clocksource *cs)
{
- return __set_rtc_time(time);
+ return rpcc();
}
+static struct clocksource clocksource_rpcc = {
+ .name = "rpcc",
+ .rating = 300,
+ .read = read_rpcc,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS
+};
+#endif /* ALPHA_WTINT */
+
+
/* Validate a computed cycle counter result against the known bounds for
the given processor core. There's too much brokenness in the way of
timing hardware for any one method to work everywhere. :-(
@@ -353,33 +391,6 @@ rpcc_after_update_in_progress(void)
return rpcc();
}
-#ifndef CONFIG_SMP
-/* Until and unless we figure out how to get cpu cycle counters
- in sync and keep them there, we can't use the rpcc. */
-static cycle_t read_rpcc(struct clocksource *cs)
-{
- cycle_t ret = (cycle_t)rpcc();
- return ret;
-}
-
-static struct clocksource clocksource_rpcc = {
- .name = "rpcc",
- .rating = 300,
- .read = read_rpcc,
- .mask = CLOCKSOURCE_MASK(32),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS
-};
-
-static inline void register_rpcc_clocksource(long cycle_freq)
-{
- clocksource_register_hz(&clocksource_rpcc, cycle_freq);
-}
-#else /* !CONFIG_SMP */
-static inline void register_rpcc_clocksource(long cycle_freq)
-{
-}
-#endif /* !CONFIG_SMP */
-
void __init
time_init(void)
{
@@ -387,6 +398,15 @@ time_init(void)
unsigned long cycle_freq, tolerance;
long diff;
+ if (alpha_using_qemu) {
+ clocksource_register_hz(&qemu_cs, NSEC_PER_SEC);
+ init_qemu_clockevent();
+
+ timer_irqaction.handler = qemu_timer_interrupt;
+ init_rtc_irq();
+ return;
+ }
+
/* Calibrate CPU clock -- attempt #1. */
if (!est_cycle_freq)
est_cycle_freq = validate_cc_value(calibrate_cc_with_pit());
@@ -421,100 +441,25 @@ time_init(void)
"and unable to estimate a proper value!\n");
}
- /* From John Bowman <bowman@math.ualberta.ca>: allow the values
- to settle, as the Update-In-Progress bit going low isn't good
- enough on some hardware. 2ms is our guess; we haven't found
- bogomips yet, but this is close on a 500Mhz box. */
- __delay(1000000);
-
-
- if (HZ > (1<<16)) {
- extern void __you_loose (void);
- __you_loose();
- }
-
- register_rpcc_clocksource(cycle_freq);
-
- state.last_time = cc1;
- state.scaled_ticks_per_cycle
- = ((unsigned long) HZ << FIX_SHIFT) / cycle_freq;
- state.partial_tick = 0L;
+ /* See above for restrictions on using clocksource_rpcc. */
+#ifndef CONFIG_ALPHA_WTINT
+ if (hwrpb->nr_processors == 1)
+ clocksource_register_hz(&clocksource_rpcc, cycle_freq);
+#endif
/* Startup the timer source. */
alpha_mv.init_rtc();
+ init_rtc_clockevent();
}
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be
- * called 500 ms after the second nowtime has started, because when
- * nowtime is written into the registers of the CMOS clock, it will
- * jump to the next second precisely 500 ms later. Check the Motorola
- * MC146818A or Dallas DS12887 data sheet for details.
- *
- * BUG: This routine does not handle hour overflow properly; it just
- * sets the minutes. Usually you won't notice until after reboot!
- */
-
-
-static int
-set_rtc_mmss(unsigned long nowtime)
+/* Initialize the clock_event_device for secondary cpus. */
+#ifdef CONFIG_SMP
+void __init
+init_clockevent(void)
{
- int retval = 0;
- int real_seconds, real_minutes, cmos_minutes;
- unsigned char save_control, save_freq_select;
-
- /* irq are locally disabled here */
- spin_lock(&rtc_lock);
- /* Tell the clock it's being set */
- save_control = CMOS_READ(RTC_CONTROL);
- CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
-
- /* Stop and reset prescaler */
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
- cmos_minutes = CMOS_READ(RTC_MINUTES);
- if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
- cmos_minutes = bcd2bin(cmos_minutes);
-
- /*
- * since we're only adjusting minutes and seconds,
- * don't interfere with hour overflow. This avoids
- * messing with unknown time zones but requires your
- * RTC not to be off by more than 15 minutes
- */
- real_seconds = nowtime % 60;
- real_minutes = nowtime / 60;
- if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) {
- /* correct for half hour time zone */
- real_minutes += 30;
- }
- real_minutes %= 60;
-
- if (abs(real_minutes - cmos_minutes) < 30) {
- if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
- real_seconds = bin2bcd(real_seconds);
- real_minutes = bin2bcd(real_minutes);
- }
- CMOS_WRITE(real_seconds,RTC_SECONDS);
- CMOS_WRITE(real_minutes,RTC_MINUTES);
- } else {
- printk_once(KERN_NOTICE
- "set_rtc_mmss: can't update from %d to %d\n",
- cmos_minutes, real_minutes);
- retval = -1;
- }
-
- /* The following flags have to be released exactly in this order,
- * otherwise the DS12887 (popular MC146818A clone with integrated
- * battery and quartz) will not reset the oscillator and will not
- * update precisely 500 ms later. You won't find this mentioned in
- * the Dallas Semiconductor data sheets, but who believes data
- * sheets anyway ... -- Markus Kuhn
- */
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- spin_unlock(&rtc_lock);
-
- return retval;
+ if (alpha_using_qemu)
+ init_qemu_clockevent();
+ else
+ init_rtc_clockevent();
}
+#endif
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index bd0665cdc840..9c4c189eb22f 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -241,6 +241,21 @@ do_entIF(unsigned long type, struct pt_regs *regs)
(const char *)(data[1] | (long)data[2] << 32),
data[0]);
}
+#ifdef CONFIG_ALPHA_WTINT
+ if (type == 4) {
+ /* If CALL_PAL WTINT is totally unsupported by the
+ PALcode, e.g. MILO, "emulate" it by overwriting
+ the insn. */
+ unsigned int *pinsn
+ = (unsigned int *) regs->pc - 1;
+ if (*pinsn == PAL_wtint) {
+ *pinsn = 0x47e01400; /* mov 0,$0 */
+ imb();
+ regs->r0 = 0;
+ return;
+ }
+ }
+#endif /* ALPHA_WTINT */
die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"),
regs, type, NULL);
}
diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c
index ffb19b7da999..ff3c10721caf 100644
--- a/arch/alpha/lib/csum_partial_copy.c
+++ b/arch/alpha/lib/csum_partial_copy.c
@@ -130,7 +130,7 @@ csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst,
*dst = word | tmp;
checksum += carry;
}
- if (err) *errp = err;
+ if (err && errp) *errp = err;
return checksum;
}
@@ -185,7 +185,7 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src,
*dst = word | tmp;
checksum += carry;
}
- if (err) *errp = err;
+ if (err && errp) *errp = err;
return checksum;
}
@@ -242,7 +242,7 @@ csum_partial_cfu_src_aligned(const unsigned long __user *src,
stq_u(partial_dest | second_dest, dst);
out:
checksum += carry;
- if (err) *errp = err;
+ if (err && errp) *errp = err;
return checksum;
}
@@ -325,7 +325,7 @@ csum_partial_cfu_unaligned(const unsigned long __user * src,
stq_u(partial_dest | word | second_dest, dst);
checksum += carry;
}
- if (err) *errp = err;
+ if (err && errp) *errp = err;
return checksum;
}
@@ -339,7 +339,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len,
if (len) {
if (!access_ok(VERIFY_READ, src, len)) {
- *errp = -EFAULT;
+ if (errp) *errp = -EFAULT;
memset(dst, 0, len);
return sum;
}
diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S
index d8b94e1c7fca..356bb2fdd705 100644
--- a/arch/alpha/lib/ev6-memset.S
+++ b/arch/alpha/lib/ev6-memset.S
@@ -30,14 +30,15 @@
.set noat
.set noreorder
.text
+ .globl memset
.globl __memset
+ .globl ___memset
.globl __memsetw
.globl __constant_c_memset
- .globl memset
- .ent __memset
+ .ent ___memset
.align 5
-__memset:
+___memset:
.frame $30,0,$26,0
.prologue 0
@@ -227,7 +228,7 @@ end_b:
nop
nop
ret $31,($26),1 # L0 :
- .end __memset
+ .end ___memset
/*
* This is the original body of code, prior to replication and
@@ -594,4 +595,5 @@ end_w:
.end __memsetw
-memset = __memset
+memset = ___memset
+__memset = ___memset
diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S
index 311b8cfc6914..76ccc6d1f364 100644
--- a/arch/alpha/lib/memset.S
+++ b/arch/alpha/lib/memset.S
@@ -19,11 +19,13 @@
.text
.globl memset
.globl __memset
+ .globl ___memset
.globl __memsetw
.globl __constant_c_memset
- .ent __memset
+
+ .ent ___memset
.align 5
-__memset:
+___memset:
.frame $30,0,$26,0
.prologue 0
@@ -103,7 +105,7 @@ within_one_quad:
end:
ret $31,($26),1 /* E1 */
- .end __memset
+ .end ___memset
.align 5
.ent __memsetw
@@ -121,4 +123,5 @@ __memsetw:
.end __memsetw
-memset = __memset
+memset = ___memset
+__memset = ___memset
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 214b698cefea..c1f1a7eee953 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -25,7 +25,7 @@ config ARM
select HARDIRQS_SW_RESEND
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
select HAVE_ARCH_KGDB
- select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
select HAVE_ARCH_TRACEHOOK
select HAVE_BPF_JIT
select HAVE_CONTEXT_TRACKING
@@ -1496,6 +1496,7 @@ config HAVE_ARM_ARCH_TIMER
bool "Architected timer support"
depends on CPU_V7
select ARM_ARCH_TIMER
+ select GENERIC_CLOCKEVENTS
help
This option enables support for the ARM architected timer
@@ -1719,7 +1720,6 @@ config AEABI
config OABI_COMPAT
bool "Allow old ABI binaries to run with this kernel (EXPERIMENTAL)"
depends on AEABI && !THUMB2_KERNEL
- default y
help
This option preserves the old syscall interface along with the
new (ARM EABI) one. It also provides a compatibility layer to
@@ -1727,11 +1727,16 @@ config OABI_COMPAT
in memory differs between the legacy ABI and the new ARM EABI
(only for non "thumb" binaries). This option adds a tiny
overhead to all syscalls and produces a slightly larger kernel.
+
+ The seccomp filter system will not be available when this is
+ selected, since there is no way yet to sensibly distinguish
+ between calling conventions during filtering.
+
If you know you'll be using only pure EABI user space then you
can say N here. If this option is not selected and you attempt
to execute a legacy ABI binary then the result will be
UNPREDICTABLE (in fact it can be predicted that it won't work
- at all). If in doubt say Y.
+ at all). If in doubt say N.
config ARCH_HAS_HOLES_MEMORYMODEL
bool
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 8e1a0245907f..41bca32409fc 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -404,7 +404,7 @@ static irqreturn_t dma_irq_handler(int irq, void *data)
BIT(slot));
if (edma_cc[ctlr]->intr_data[channel].callback)
edma_cc[ctlr]->intr_data[channel].callback(
- channel, DMA_COMPLETE,
+ channel, EDMA_DMA_COMPLETE,
edma_cc[ctlr]->intr_data[channel].data);
}
} while (sh_ipr);
@@ -459,7 +459,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
callback) {
edma_cc[ctlr]->intr_data[k].
callback(k,
- DMA_CC_ERROR,
+ EDMA_DMA_CC_ERROR,
edma_cc[ctlr]->intr_data
[k].data);
}
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 9b28f1243bdc..240b29ef17db 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -393,36 +393,6 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
return slot_cnt;
}
-static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
-{
- return 0;
-}
-
-static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
- struct iop_adma_chan *chan)
-{
- union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
- switch (chan->device->id) {
- case DMA0_ID:
- case DMA1_ID:
- return hw_desc.dma->dest_addr;
- case AAU_ID:
- return hw_desc.aau->dest_addr;
- default:
- BUG();
- }
- return 0;
-}
-
-
-static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
- struct iop_adma_chan *chan)
-{
- BUG();
- return 0;
-}
-
static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h
index 122f86d8c991..250760e08103 100644
--- a/arch/arm/include/asm/hardware/iop_adma.h
+++ b/arch/arm/include/asm/hardware/iop_adma.h
@@ -82,8 +82,6 @@ struct iop_adma_chan {
* @slot_cnt: total slots used in an transaction (group of operations)
* @slots_per_op: number of slots per operation
* @idx: pool index
- * @unmap_src_cnt: number of xor sources
- * @unmap_len: transaction bytecount
* @tx_list: list of descriptors that are associated with one operation
* @async_tx: support for the async_tx api
* @group_list: list of slots that make up a multi-descriptor transaction
@@ -99,8 +97,6 @@ struct iop_adma_desc_slot {
u16 slot_cnt;
u16 slots_per_op;
u16 idx;
- u16 unmap_src_cnt;
- size_t unmap_len;
struct list_head tx_list;
struct dma_async_tx_descriptor async_tx;
union {
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 4dd21457ef9d..9ecccc865046 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -226,7 +226,14 @@ static inline phys_addr_t __virt_to_phys(unsigned long x)
static inline unsigned long __phys_to_virt(phys_addr_t x)
{
unsigned long t;
- __pv_stub(x, t, "sub", __PV_BITS_31_24);
+
+ /*
+ * 'unsigned long' cast discard upper word when
+ * phys_addr_t is 64 bit, and makes sure that inline
+ * assembler expression receives 32 bit argument
+ * in place where 'r' 32 bit operand is expected.
+ */
+ __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24);
return t;
}
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 7801866e626a..11d59b32fb8d 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -508,6 +508,7 @@ __fixup_smp:
teq r0, #0x0 @ '0' on actual UP A9 hardware
beq __fixup_smp_on_up @ So its an A9 UP
ldr r0, [r0, #4] @ read SCU Config
+ARM_BE8(rev r0, r0) @ byteswap if big endian
and r0, r0, #0x3 @ number of CPUs
teq r0, #0x0 @ is 1?
movne pc, lr
@@ -644,7 +645,11 @@ ARM_BE8(rev16 ip, ip)
bcc 1b
bx lr
#else
+#ifdef CONFIG_CPU_ENDIAN_BE8
+ moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction
+#else
moveq r0, #0x400000 @ set bit 22, mov to mvn instruction
+#endif
b 2f
1: ldr ip, [r7, r3]
#ifdef CONFIG_CPU_ENDIAN_BE8
@@ -653,7 +658,7 @@ ARM_BE8(rev16 ip, ip)
tst ip, #0x000f0000 @ check the rotation field
orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24
biceq ip, ip, #0x00004000 @ clear bit 22
- orreq ip, ip, r0, lsl #24 @ mask in offset bits 7-0
+ orreq ip, ip, r0 @ mask in offset bits 7-0
#else
bic ip, ip, #0x000000ff
tst ip, #0xf00 @ check the rotation field
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 6125f259b7b5..dbf0923e8d76 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -856,7 +856,7 @@ static void __init kuser_init(void *vectors)
memcpy(vectors + 0xfe0, vectors + 0xfe8, 4);
}
#else
-static void __init kuser_init(void *vectors)
+static inline void __init kuser_init(void *vectors)
{
}
#endif
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 371958370de4..580906989db1 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -334,6 +334,17 @@ out:
return err;
}
+static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
+{
+ if (!is_vmalloc_addr(kaddr)) {
+ BUG_ON(!virt_addr_valid(kaddr));
+ return __pa(kaddr);
+ } else {
+ return page_to_phys(vmalloc_to_page(kaddr)) +
+ offset_in_page(kaddr);
+ }
+}
+
/**
* create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
* @from: The virtual kernel start address of the range
@@ -345,16 +356,27 @@ out:
*/
int create_hyp_mappings(void *from, void *to)
{
- unsigned long phys_addr = virt_to_phys(from);
+ phys_addr_t phys_addr;
+ unsigned long virt_addr;
unsigned long start = KERN_TO_HYP((unsigned long)from);
unsigned long end = KERN_TO_HYP((unsigned long)to);
- /* Check for a valid kernel memory mapping */
- if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
- return -EINVAL;
+ start = start & PAGE_MASK;
+ end = PAGE_ALIGN(end);
- return __create_hyp_mappings(hyp_pgd, start, end,
- __phys_to_pfn(phys_addr), PAGE_HYP);
+ for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
+ int err;
+
+ phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
+ err = __create_hyp_mappings(hyp_pgd, virt_addr,
+ virt_addr + PAGE_SIZE,
+ __phys_to_pfn(phys_addr),
+ PAGE_HYP);
+ if (err)
+ return err;
+ }
+
+ return 0;
}
/**
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index e0c68d5bb7dc..52886b89706c 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -10,7 +10,7 @@ UNWIND( .fnstart )
and r3, r0, #31 @ Get bit offset
mov r0, r0, lsr #5
add r1, r1, r0, lsl #2 @ Get word offset
-#if __LINUX_ARM_ARCH__ >= 7
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
.arch_extension mp
ALT_SMP(W(pldw) [r1])
ALT_UP(W(nop))
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h
index 6d3782d85a9f..a86fd0ed7757 100644
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -218,20 +218,6 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
#define iop_chan_pq_slot_count iop_chan_xor_slot_count
#define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
-static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
- struct iop_adma_chan *chan)
-{
- struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
- return hw_desc->dest_addr;
-}
-
-static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
- struct iop_adma_chan *chan)
-{
- struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
- return hw_desc->q_dest_addr;
-}
-
static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *chan)
{
@@ -350,18 +336,6 @@ iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
hw_desc->desc_ctrl = u_desc_ctrl.value;
}
-static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
-{
- struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
- union {
- u32 value;
- struct iop13xx_adma_desc_ctrl field;
- } u_desc_ctrl;
-
- u_desc_ctrl.value = hw_desc->desc_ctrl;
- return u_desc_ctrl.field.pq_xfer_en;
-}
-
static inline void
iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
unsigned long flags)
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 78eeeca78f5a..580ef2de82d7 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -558,8 +558,8 @@ static void __init build_mem_type_table(void)
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
break;
}
- printk("Memory policy: ECC %sabled, Data cache %s\n",
- ecc_mask ? "en" : "dis", cp->policy);
+ pr_info("Memory policy: %sData cache %s\n",
+ ecc_mask ? "ECC enabled, " : "", cp->policy);
for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
struct mem_type *t = &mem_types[i];
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 5c668b7a31f9..55764a7ef1f0 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -18,6 +18,7 @@
#include <asm/mach/arch.h>
#include <asm/cputype.h>
#include <asm/mpu.h>
+#include <asm/procinfo.h>
#include "mm.h"
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 60920f62fdf5..bd1781979a39 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -92,7 +92,7 @@ ENDPROC(cpu_v7_dcache_clean_area)
/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
.globl cpu_v7_suspend_size
-.equ cpu_v7_suspend_size, 4 * 8
+.equ cpu_v7_suspend_size, 4 * 9
#ifdef CONFIG_ARM_CPU_SUSPEND
ENTRY(cpu_v7_do_suspend)
stmfd sp!, {r4 - r10, lr}
@@ -101,13 +101,17 @@ ENTRY(cpu_v7_do_suspend)
stmia r0!, {r4 - r5}
#ifdef CONFIG_MMU
mrc p15, 0, r6, c3, c0, 0 @ Domain ID
+#ifdef CONFIG_ARM_LPAE
+ mrrc p15, 1, r5, r7, c2 @ TTB 1
+#else
mrc p15, 0, r7, c2, c0, 1 @ TTB 1
+#endif
mrc p15, 0, r11, c2, c0, 2 @ TTB control register
#endif
mrc p15, 0, r8, c1, c0, 0 @ Control register
mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register
mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control
- stmia r0, {r6 - r11}
+ stmia r0, {r5 - r11}
ldmfd sp!, {r4 - r10, pc}
ENDPROC(cpu_v7_do_suspend)
@@ -118,16 +122,19 @@ ENTRY(cpu_v7_do_resume)
ldmia r0!, {r4 - r5}
mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID
mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID
- ldmia r0, {r6 - r11}
+ ldmia r0, {r5 - r11}
#ifdef CONFIG_MMU
mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs
mcr p15, 0, r6, c3, c0, 0 @ Domain ID
-#ifndef CONFIG_ARM_LPAE
+#ifdef CONFIG_ARM_LPAE
+ mcrr p15, 0, r1, ip, c2 @ TTB 0
+ mcrr p15, 1, r5, r7, c2 @ TTB 1
+#else
ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP)
ALT_UP(orr r1, r1, #TTB_FLAGS_UP)
-#endif
mcr p15, 0, r1, c2, c0, 0 @ TTB 0
mcr p15, 0, r7, c2, c0, 1 @ TTB 1
+#endif
mcr p15, 0, r11, c2, c0, 2 @ TTB control register
ldr r4, =PRRR @ PRRR
ldr r5, =NMRR @ NMRR
diff --git a/arch/avr32/boot/u-boot/head.S b/arch/avr32/boot/u-boot/head.S
index 4488fa27fe94..2ffc298f061b 100644
--- a/arch/avr32/boot/u-boot/head.S
+++ b/arch/avr32/boot/u-boot/head.S
@@ -8,6 +8,8 @@
* published by the Free Software Foundation.
*/
#include <asm/setup.h>
+#include <asm/thread_info.h>
+#include <asm/sysreg.h>
/*
* The kernel is loaded where we want it to be and all caches
@@ -20,11 +22,6 @@
.section .init.text,"ax"
.global _start
_start:
- /* Check if the boot loader actually provided a tag table */
- lddpc r0, magic_number
- cp.w r12, r0
- brne no_tag_table
-
/* Initialize .bss */
lddpc r2, bss_start_addr
lddpc r3, end_addr
@@ -34,6 +31,25 @@ _start:
cp r2, r3
brlo 1b
+ /* Initialize status register */
+ lddpc r0, init_sr
+ mtsr SYSREG_SR, r0
+
+ /* Set initial stack pointer */
+ lddpc sp, stack_addr
+ sub sp, -THREAD_SIZE
+
+#ifdef CONFIG_FRAME_POINTER
+ /* Mark last stack frame */
+ mov lr, 0
+ mov r7, 0
+#endif
+
+ /* Check if the boot loader actually provided a tag table */
+ lddpc r0, magic_number
+ cp.w r12, r0
+ brne no_tag_table
+
/*
* Save the tag table address for later use. This must be done
* _after_ .bss has been initialized...
@@ -53,8 +69,15 @@ bss_start_addr:
.long __bss_start
end_addr:
.long _end
+init_sr:
+ .long 0x007f0000 /* Supervisor mode, everything masked */
+stack_addr:
+ .long init_thread_union
+panic_addr:
+ .long panic
no_tag_table:
sub r12, pc, (. - 2f)
- bral panic
+ /* branch to panic() which can be far away with that construct */
+ lddpc pc, panic_addr
2: .asciz "Boot loader didn't provide correct magic number\n"
diff --git a/arch/avr32/include/asm/kprobes.h b/arch/avr32/include/asm/kprobes.h
index 996cb656474e..45f563ed73fd 100644
--- a/arch/avr32/include/asm/kprobes.h
+++ b/arch/avr32/include/asm/kprobes.h
@@ -16,6 +16,7 @@
typedef u16 kprobe_opcode_t;
#define BREAKPOINT_INSTRUCTION 0xd673 /* breakpoint */
#define MAX_INSN_SIZE 2
+#define MAX_STACK_SIZE 64 /* 32 would probably be OK */
#define kretprobe_blacklist_size 0
@@ -26,6 +27,19 @@ struct arch_specific_insn {
kprobe_opcode_t insn[MAX_INSN_SIZE];
};
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned int status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+ unsigned int kprobe_status;
+ struct prev_kprobe prev_kprobe;
+ struct pt_regs jprobe_saved_regs;
+ char jprobes_stack[MAX_STACK_SIZE];
+};
+
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
diff --git a/arch/avr32/include/uapi/asm/Kbuild b/arch/avr32/include/uapi/asm/Kbuild
index 3b85eaddf525..08d8a3d76ea8 100644
--- a/arch/avr32/include/uapi/asm/Kbuild
+++ b/arch/avr32/include/uapi/asm/Kbuild
@@ -2,35 +2,35 @@
include include/uapi/asm-generic/Kbuild.asm
header-y += auxvec.h
-header-y += bitsperlong.h
header-y += byteorder.h
header-y += cachectl.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
header-y += msgbuf.h
header-y += param.h
-header-y += poll.h
header-y += posix_types.h
header-y += ptrace.h
-header-y += resource.h
header-y += sembuf.h
header-y += setup.h
header-y += shmbuf.h
header-y += sigcontext.h
-header-y += siginfo.h
header-y += signal.h
header-y += socket.h
header-y += sockios.h
header-y += stat.h
-header-y += statfs.h
header-y += swab.h
header-y += termbits.h
header-y += termios.h
header-y += types.h
header-y += unistd.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += siginfo.h
+generic-y += statfs.h
diff --git a/arch/avr32/include/uapi/asm/auxvec.h b/arch/avr32/include/uapi/asm/auxvec.h
index d5dd435bf8f4..4f02da3ffefa 100644
--- a/arch/avr32/include/uapi/asm/auxvec.h
+++ b/arch/avr32/include/uapi/asm/auxvec.h
@@ -1,4 +1,4 @@
-#ifndef __ASM_AVR32_AUXVEC_H
-#define __ASM_AVR32_AUXVEC_H
+#ifndef _UAPI__ASM_AVR32_AUXVEC_H
+#define _UAPI__ASM_AVR32_AUXVEC_H
-#endif /* __ASM_AVR32_AUXVEC_H */
+#endif /* _UAPI__ASM_AVR32_AUXVEC_H */
diff --git a/arch/avr32/include/uapi/asm/bitsperlong.h b/arch/avr32/include/uapi/asm/bitsperlong.h
deleted file mode 100644
index 6dc0bb0c13b2..000000000000
--- a/arch/avr32/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bitsperlong.h>
diff --git a/arch/avr32/include/uapi/asm/byteorder.h b/arch/avr32/include/uapi/asm/byteorder.h
index 50abc21619a8..71242f0d39c6 100644
--- a/arch/avr32/include/uapi/asm/byteorder.h
+++ b/arch/avr32/include/uapi/asm/byteorder.h
@@ -1,9 +1,9 @@
/*
* AVR32 endian-conversion functions.
*/
-#ifndef __ASM_AVR32_BYTEORDER_H
-#define __ASM_AVR32_BYTEORDER_H
+#ifndef _UAPI__ASM_AVR32_BYTEORDER_H
+#define _UAPI__ASM_AVR32_BYTEORDER_H
#include <linux/byteorder/big_endian.h>
-#endif /* __ASM_AVR32_BYTEORDER_H */
+#endif /* _UAPI__ASM_AVR32_BYTEORDER_H */
diff --git a/arch/avr32/include/uapi/asm/cachectl.h b/arch/avr32/include/uapi/asm/cachectl.h
index 4faf1ce60061..573a9584dd57 100644
--- a/arch/avr32/include/uapi/asm/cachectl.h
+++ b/arch/avr32/include/uapi/asm/cachectl.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_CACHECTL_H
-#define __ASM_AVR32_CACHECTL_H
+#ifndef _UAPI__ASM_AVR32_CACHECTL_H
+#define _UAPI__ASM_AVR32_CACHECTL_H
/*
* Operations that can be performed through the cacheflush system call
@@ -8,4 +8,4 @@
/* Clean the data cache, then invalidate the icache */
#define CACHE_IFLUSH 0
-#endif /* __ASM_AVR32_CACHECTL_H */
+#endif /* _UAPI__ASM_AVR32_CACHECTL_H */
diff --git a/arch/avr32/include/uapi/asm/errno.h b/arch/avr32/include/uapi/asm/errno.h
deleted file mode 100644
index 558a7249f06d..000000000000
--- a/arch/avr32/include/uapi/asm/errno.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_ERRNO_H
-#define __ASM_AVR32_ERRNO_H
-
-#include <asm-generic/errno.h>
-
-#endif /* __ASM_AVR32_ERRNO_H */
diff --git a/arch/avr32/include/uapi/asm/fcntl.h b/arch/avr32/include/uapi/asm/fcntl.h
deleted file mode 100644
index 14c0c4402b11..000000000000
--- a/arch/avr32/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_FCNTL_H
-#define __ASM_AVR32_FCNTL_H
-
-#include <asm-generic/fcntl.h>
-
-#endif /* __ASM_AVR32_FCNTL_H */
diff --git a/arch/avr32/include/uapi/asm/ioctl.h b/arch/avr32/include/uapi/asm/ioctl.h
deleted file mode 100644
index c8472c1398ef..000000000000
--- a/arch/avr32/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_IOCTL_H
-#define __ASM_AVR32_IOCTL_H
-
-#include <asm-generic/ioctl.h>
-
-#endif /* __ASM_AVR32_IOCTL_H */
diff --git a/arch/avr32/include/uapi/asm/ioctls.h b/arch/avr32/include/uapi/asm/ioctls.h
deleted file mode 100644
index 909cf66feaf5..000000000000
--- a/arch/avr32/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_IOCTLS_H
-#define __ASM_AVR32_IOCTLS_H
-
-#include <asm-generic/ioctls.h>
-
-#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/arch/avr32/include/uapi/asm/ipcbuf.h b/arch/avr32/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/avr32/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/avr32/include/uapi/asm/kvm_para.h b/arch/avr32/include/uapi/asm/kvm_para.h
deleted file mode 100644
index 14fab8f0b957..000000000000
--- a/arch/avr32/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kvm_para.h>
diff --git a/arch/avr32/include/uapi/asm/mman.h b/arch/avr32/include/uapi/asm/mman.h
deleted file mode 100644
index 8eebf89f5ab1..000000000000
--- a/arch/avr32/include/uapi/asm/mman.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mman.h>
diff --git a/arch/avr32/include/uapi/asm/msgbuf.h b/arch/avr32/include/uapi/asm/msgbuf.h
index ac18bc4da7f7..9eae6effad14 100644
--- a/arch/avr32/include/uapi/asm/msgbuf.h
+++ b/arch/avr32/include/uapi/asm/msgbuf.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_MSGBUF_H
-#define __ASM_AVR32_MSGBUF_H
+#ifndef _UAPI__ASM_AVR32_MSGBUF_H
+#define _UAPI__ASM_AVR32_MSGBUF_H
/*
* The msqid64_ds structure for i386 architecture.
@@ -28,4 +28,4 @@ struct msqid64_ds {
unsigned long __unused5;
};
-#endif /* __ASM_AVR32_MSGBUF_H */
+#endif /* _UAPI__ASM_AVR32_MSGBUF_H */
diff --git a/arch/avr32/include/uapi/asm/poll.h b/arch/avr32/include/uapi/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/avr32/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/avr32/include/uapi/asm/posix_types.h b/arch/avr32/include/uapi/asm/posix_types.h
index 9ba9e749b3f3..5b813a8abf09 100644
--- a/arch/avr32/include/uapi/asm/posix_types.h
+++ b/arch/avr32/include/uapi/asm/posix_types.h
@@ -5,8 +5,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-#ifndef __ASM_AVR32_POSIX_TYPES_H
-#define __ASM_AVR32_POSIX_TYPES_H
+#ifndef _UAPI__ASM_AVR32_POSIX_TYPES_H
+#define _UAPI__ASM_AVR32_POSIX_TYPES_H
/*
* This file is generally used by user-level software, so you need to
@@ -34,4 +34,4 @@ typedef unsigned short __kernel_old_dev_t;
#include <asm-generic/posix_types.h>
-#endif /* __ASM_AVR32_POSIX_TYPES_H */
+#endif /* _UAPI__ASM_AVR32_POSIX_TYPES_H */
diff --git a/arch/avr32/include/uapi/asm/resource.h b/arch/avr32/include/uapi/asm/resource.h
deleted file mode 100644
index c6dd101472b1..000000000000
--- a/arch/avr32/include/uapi/asm/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_RESOURCE_H
-#define __ASM_AVR32_RESOURCE_H
-
-#include <asm-generic/resource.h>
-
-#endif /* __ASM_AVR32_RESOURCE_H */
diff --git a/arch/avr32/include/uapi/asm/sembuf.h b/arch/avr32/include/uapi/asm/sembuf.h
index e472216e0c97..6c6f7cf1e75a 100644
--- a/arch/avr32/include/uapi/asm/sembuf.h
+++ b/arch/avr32/include/uapi/asm/sembuf.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SEMBUF_H
-#define __ASM_AVR32_SEMBUF_H
+#ifndef _UAPI__ASM_AVR32_SEMBUF_H
+#define _UAPI__ASM_AVR32_SEMBUF_H
/*
* The semid64_ds structure for AVR32 architecture.
@@ -22,4 +22,4 @@ struct semid64_ds {
unsigned long __unused4;
};
-#endif /* __ASM_AVR32_SEMBUF_H */
+#endif /* _UAPI__ASM_AVR32_SEMBUF_H */
diff --git a/arch/avr32/include/uapi/asm/setup.h b/arch/avr32/include/uapi/asm/setup.h
index e58aa9356faf..a654df7dba46 100644
--- a/arch/avr32/include/uapi/asm/setup.h
+++ b/arch/avr32/include/uapi/asm/setup.h
@@ -13,5 +13,4 @@
#define COMMAND_LINE_SIZE 256
-
#endif /* _UAPI__ASM_AVR32_SETUP_H__ */
diff --git a/arch/avr32/include/uapi/asm/shmbuf.h b/arch/avr32/include/uapi/asm/shmbuf.h
index c62fba41739a..b94cf8b60b73 100644
--- a/arch/avr32/include/uapi/asm/shmbuf.h
+++ b/arch/avr32/include/uapi/asm/shmbuf.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SHMBUF_H
-#define __ASM_AVR32_SHMBUF_H
+#ifndef _UAPI__ASM_AVR32_SHMBUF_H
+#define _UAPI__ASM_AVR32_SHMBUF_H
/*
* The shmid64_ds structure for i386 architecture.
@@ -39,4 +39,4 @@ struct shminfo64 {
unsigned long __unused4;
};
-#endif /* __ASM_AVR32_SHMBUF_H */
+#endif /* _UAPI__ASM_AVR32_SHMBUF_H */
diff --git a/arch/avr32/include/uapi/asm/sigcontext.h b/arch/avr32/include/uapi/asm/sigcontext.h
index e04062b5f39f..27e56bf6377f 100644
--- a/arch/avr32/include/uapi/asm/sigcontext.h
+++ b/arch/avr32/include/uapi/asm/sigcontext.h
@@ -5,8 +5,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-#ifndef __ASM_AVR32_SIGCONTEXT_H
-#define __ASM_AVR32_SIGCONTEXT_H
+#ifndef _UAPI__ASM_AVR32_SIGCONTEXT_H
+#define _UAPI__ASM_AVR32_SIGCONTEXT_H
struct sigcontext {
unsigned long oldmask;
@@ -31,4 +31,4 @@ struct sigcontext {
unsigned long r0;
};
-#endif /* __ASM_AVR32_SIGCONTEXT_H */
+#endif /* _UAPI__ASM_AVR32_SIGCONTEXT_H */
diff --git a/arch/avr32/include/uapi/asm/siginfo.h b/arch/avr32/include/uapi/asm/siginfo.h
deleted file mode 100644
index 5ee93f40a8a8..000000000000
--- a/arch/avr32/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _AVR32_SIGINFO_H
-#define _AVR32_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-#endif
diff --git a/arch/avr32/include/uapi/asm/signal.h b/arch/avr32/include/uapi/asm/signal.h
index 1b77a93eff50..ffe8c770cafd 100644
--- a/arch/avr32/include/uapi/asm/signal.h
+++ b/arch/avr32/include/uapi/asm/signal.h
@@ -118,5 +118,4 @@ typedef struct sigaltstack {
size_t ss_size;
} stack_t;
-
#endif /* _UAPI__ASM_AVR32_SIGNAL_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 439936421434..cbf902e4cd9e 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SOCKET_H
-#define __ASM_AVR32_SOCKET_H
+#ifndef _UAPI__ASM_AVR32_SOCKET_H
+#define _UAPI__ASM_AVR32_SOCKET_H
#include <asm/sockios.h>
@@ -78,4 +78,4 @@
#define SO_MAX_PACING_RATE 47
-#endif /* __ASM_AVR32_SOCKET_H */
+#endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/sockios.h b/arch/avr32/include/uapi/asm/sockios.h
index 0802d742f97d..d04785453532 100644
--- a/arch/avr32/include/uapi/asm/sockios.h
+++ b/arch/avr32/include/uapi/asm/sockios.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SOCKIOS_H
-#define __ASM_AVR32_SOCKIOS_H
+#ifndef _UAPI__ASM_AVR32_SOCKIOS_H
+#define _UAPI__ASM_AVR32_SOCKIOS_H
/* Socket-level I/O control calls. */
#define FIOSETOWN 0x8901
@@ -10,4 +10,4 @@
#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-#endif /* __ASM_AVR32_SOCKIOS_H */
+#endif /* _UAPI__ASM_AVR32_SOCKIOS_H */
diff --git a/arch/avr32/include/uapi/asm/stat.h b/arch/avr32/include/uapi/asm/stat.h
index e72881e10230..c06acef7fce7 100644
--- a/arch/avr32/include/uapi/asm/stat.h
+++ b/arch/avr32/include/uapi/asm/stat.h
@@ -5,8 +5,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-#ifndef __ASM_AVR32_STAT_H
-#define __ASM_AVR32_STAT_H
+#ifndef _UAPI__ASM_AVR32_STAT_H
+#define _UAPI__ASM_AVR32_STAT_H
struct __old_kernel_stat {
unsigned short st_dev;
@@ -76,4 +76,4 @@ struct stat64 {
unsigned long __unused2;
};
-#endif /* __ASM_AVR32_STAT_H */
+#endif /* _UAPI__ASM_AVR32_STAT_H */
diff --git a/arch/avr32/include/uapi/asm/statfs.h b/arch/avr32/include/uapi/asm/statfs.h
deleted file mode 100644
index 2961bd18c50e..000000000000
--- a/arch/avr32/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_STATFS_H
-#define __ASM_AVR32_STATFS_H
-
-#include <asm-generic/statfs.h>
-
-#endif /* __ASM_AVR32_STATFS_H */
diff --git a/arch/avr32/include/uapi/asm/swab.h b/arch/avr32/include/uapi/asm/swab.h
index 14cc737bbca6..1a03549e7dc5 100644
--- a/arch/avr32/include/uapi/asm/swab.h
+++ b/arch/avr32/include/uapi/asm/swab.h
@@ -1,8 +1,8 @@
/*
* AVR32 byteswapping functions.
*/
-#ifndef __ASM_AVR32_SWAB_H
-#define __ASM_AVR32_SWAB_H
+#ifndef _UAPI__ASM_AVR32_SWAB_H
+#define _UAPI__ASM_AVR32_SWAB_H
#include <linux/types.h>
#include <linux/compiler.h>
@@ -32,4 +32,4 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
#define __arch_swab32 __arch_swab32
#endif
-#endif /* __ASM_AVR32_SWAB_H */
+#endif /* _UAPI__ASM_AVR32_SWAB_H */
diff --git a/arch/avr32/include/uapi/asm/termbits.h b/arch/avr32/include/uapi/asm/termbits.h
index 366adc5ebb10..32789ccb38f8 100644
--- a/arch/avr32/include/uapi/asm/termbits.h
+++ b/arch/avr32/include/uapi/asm/termbits.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_TERMBITS_H
-#define __ASM_AVR32_TERMBITS_H
+#ifndef _UAPI__ASM_AVR32_TERMBITS_H
+#define _UAPI__ASM_AVR32_TERMBITS_H
#include <linux/posix_types.h>
@@ -193,4 +193,4 @@ struct ktermios {
#define TCSADRAIN 1
#define TCSAFLUSH 2
-#endif /* __ASM_AVR32_TERMBITS_H */
+#endif /* _UAPI__ASM_AVR32_TERMBITS_H */
diff --git a/arch/avr32/include/uapi/asm/termios.h b/arch/avr32/include/uapi/asm/termios.h
index b8ef8ea63352..c8a0081556c4 100644
--- a/arch/avr32/include/uapi/asm/termios.h
+++ b/arch/avr32/include/uapi/asm/termios.h
@@ -46,5 +46,4 @@ struct termio {
/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
#endif /* _UAPI__ASM_AVR32_TERMIOS_H */
diff --git a/arch/avr32/include/uapi/asm/types.h b/arch/avr32/include/uapi/asm/types.h
index bb34ad349dfc..7c986c4e99b5 100644
--- a/arch/avr32/include/uapi/asm/types.h
+++ b/arch/avr32/include/uapi/asm/types.h
@@ -5,4 +5,9 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#ifndef _UAPI__ASM_AVR32_TYPES_H
+#define _UAPI__ASM_AVR32_TYPES_H
+
#include <asm-generic/int-ll64.h>
+
+#endif /* _UAPI__ASM_AVR32_TYPES_H */
diff --git a/arch/avr32/include/uapi/asm/unistd.h b/arch/avr32/include/uapi/asm/unistd.h
index 3eaa68753adb..8822bf46ddc6 100644
--- a/arch/avr32/include/uapi/asm/unistd.h
+++ b/arch/avr32/include/uapi/asm/unistd.h
@@ -301,5 +301,4 @@
#define __NR_eventfd 281
#define __NR_setns 283
-
#endif /* _UAPI__ASM_AVR32_UNISTD_H */
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
index 9899d3cc6f03..7301f4806bbe 100644
--- a/arch/avr32/kernel/entry-avr32b.S
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -401,9 +401,10 @@ handle_critical:
/* We should never get here... */
bad_return:
sub r12, pc, (. - 1f)
- bral panic
+ lddpc pc, 2f
.align 2
1: .asciz "Return from critical exception!"
+2: .long panic
.align 1
do_bus_error_write:
diff --git a/arch/avr32/kernel/head.S b/arch/avr32/kernel/head.S
index 6163bd0acb95..59eae6dfbed2 100644
--- a/arch/avr32/kernel/head.S
+++ b/arch/avr32/kernel/head.S
@@ -10,33 +10,13 @@
#include <linux/linkage.h>
#include <asm/page.h>
-#include <asm/thread_info.h>
-#include <asm/sysreg.h>
.section .init.text,"ax"
.global kernel_entry
kernel_entry:
- /* Initialize status register */
- lddpc r0, init_sr
- mtsr SYSREG_SR, r0
-
- /* Set initial stack pointer */
- lddpc sp, stack_addr
- sub sp, -THREAD_SIZE
-
-#ifdef CONFIG_FRAME_POINTER
- /* Mark last stack frame */
- mov lr, 0
- mov r7, 0
-#endif
-
/* Start the show */
lddpc pc, kernel_start_addr
.align 2
-init_sr:
- .long 0x007f0000 /* Supervisor mode, everything masked */
-stack_addr:
- .long init_thread_union
kernel_start_addr:
.long start_kernel
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index d43daf192b21..4c530a82fc46 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1992,7 +1992,7 @@ sba_connect_bus(struct pci_bus *bus)
if (PCI_CONTROLLER(bus)->iommu)
return;
- handle = PCI_CONTROLLER(bus)->acpi_handle;
+ handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
if (!handle)
return;
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 80775f55f03f..71fbaaa495cc 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -95,7 +95,7 @@ struct iospace_resource {
};
struct pci_controller {
- void *acpi_handle;
+ struct acpi_device *companion;
void *iommu;
int segment;
int node; /* nearest node with memory or -1 for global allocation */
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5a9ff1c3c3e9..cb592773c78b 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2166,12 +2166,6 @@ static const struct file_operations pfm_file_ops = {
.flush = pfm_flush
};
-static int
-pfmfs_delete_dentry(const struct dentry *dentry)
-{
- return 1;
-}
-
static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
{
return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
@@ -2179,7 +2173,7 @@ static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
}
static const struct dentry_operations pfmfs_dentry_operations = {
- .d_delete = pfmfs_delete_dentry,
+ .d_delete = always_delete_dentry,
.d_dname = pfmfs_dname,
};
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 2326790b7d8b..9e4938d8ca4d 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -436,9 +436,9 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
if (!controller)
return NULL;
- controller->acpi_handle = device->handle;
+ controller->companion = device;
- pxm = acpi_get_pxm(controller->acpi_handle);
+ pxm = acpi_get_pxm(device->handle);
#ifdef CONFIG_NUMA
if (pxm >= 0)
controller->node = pxm_to_node(pxm);
@@ -489,7 +489,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
struct pci_controller *controller = bridge->bus->sysdata;
- ACPI_HANDLE_SET(&bridge->dev, controller->acpi_handle);
+ ACPI_COMPANION_SET(&bridge->dev, controller->companion);
return 0;
}
diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c
index b1725398b5af..0640739cc20c 100644
--- a/arch/ia64/sn/kernel/io_acpi_init.c
+++ b/arch/ia64/sn/kernel/io_acpi_init.c
@@ -132,7 +132,7 @@ sn_get_bussoft_ptr(struct pci_bus *bus)
struct acpi_resource_vendor_typed *vendor;
- handle = PCI_CONTROLLER(bus)->acpi_handle;
+ handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
&sn_uuid, &buffer);
if (ACPI_FAILURE(status)) {
@@ -360,7 +360,7 @@ sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info,
acpi_status status;
struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
- rootbus_handle = PCI_CONTROLLER(dev)->acpi_handle;
+ rootbus_handle = acpi_device_handle(PCI_CONTROLLER(dev)->companion);
status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL,
&segment);
if (ACPI_SUCCESS(status)) {
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
new file mode 100644
index 000000000000..748016cb122d
--- /dev/null
+++ b/arch/parisc/include/asm/socket.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <uapi/asm/socket.h>
+
+/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK 0x40000000
+
+#endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 63f4dd0b49c2..4006964d8e12 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -4,14 +4,11 @@
/*
* User space memory access functions
*/
-#include <asm/processor.h>
#include <asm/page.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm-generic/uaccess-unaligned.h>
-#include <linux/sched.h>
-
#define VERIFY_READ 0
#define VERIFY_WRITE 1
@@ -36,43 +33,12 @@ extern int __get_user_bad(void);
extern int __put_kernel_bad(void);
extern int __put_user_bad(void);
-
-/*
- * Test whether a block of memory is a valid user space address.
- * Returns 0 if the range is valid, nonzero otherwise.
- */
-static inline int __range_not_ok(unsigned long addr, unsigned long size,
- unsigned long limit)
+static inline long access_ok(int type, const void __user * addr,
+ unsigned long size)
{
- unsigned long __newaddr = addr + size;
- return (__newaddr < addr || __newaddr > limit || size > limit);
+ return 1;
}
-/**
- * access_ok: - Checks if a user space pointer is valid
- * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
- * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
- * to write to a block, it is always safe to read from it.
- * @addr: User space pointer to start of block to check
- * @size: Size of block to check
- *
- * Context: User context only. This function may sleep.
- *
- * Checks if a pointer to a block of memory in user space is valid.
- *
- * Returns true (nonzero) if the memory block may be valid, false (zero)
- * if it is definitely invalid.
- *
- * Note that, depending on architecture, this function probably just
- * checks that the pointer is in the user space range - after calling
- * this function, memory access functions may still return -EFAULT.
- */
-#define access_ok(type, addr, size) \
-( __chk_user_ptr(addr), \
- !__range_not_ok((unsigned long) (__force void *) (addr), \
- size, user_addr_max()) \
-)
-
#define put_user __put_user
#define get_user __get_user
@@ -253,11 +219,7 @@ extern long lstrnlen_user(const char __user *,long);
/*
* Complex access routines -- macros
*/
-#ifdef CONFIG_COMPAT
-#define user_addr_max() (TASK_SIZE)
-#else
-#define user_addr_max() (DEFAULT_TASK_SIZE)
-#endif
+#define user_addr_max() (~0UL)
#define strnlen_user lstrnlen_user
#define strlen_user(str) lstrnlen_user(str, 0x7fffffffL)
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 7c614d01f1fa..f33113a6141e 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_SOCKET_H
-#define _ASM_SOCKET_H
+#ifndef _UAPI_ASM_SOCKET_H
+#define _UAPI_ASM_SOCKET_H
#include <asm/sockios.h>
@@ -77,9 +77,4 @@
#define SO_MAX_PACING_RATE 0x4048
-/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
- * have to define SOCK_NONBLOCK to a different value here.
- */
-#define SOCK_NONBLOCK 0x40000000
-
-#endif /* _ASM_SOCKET_H */
+#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index b5507ec06b84..413dc1769299 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -161,7 +161,7 @@ static inline void prefetch_dst(const void *addr)
/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
* per loop. This code is derived from glibc.
*/
-static inline unsigned long copy_dstaligned(unsigned long dst,
+static noinline unsigned long copy_dstaligned(unsigned long dst,
unsigned long src, unsigned long len)
{
/* gcc complains that a2 and a3 may be uninitialized, but actually
@@ -276,7 +276,7 @@ handle_store_error:
/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
* In case of an access fault the faulty address can be read from the per_cpu
* exception data struct. */
-static unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
+static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
unsigned long len)
{
register unsigned long src, dst, t1, t2, t3;
@@ -529,7 +529,7 @@ long probe_kernel_read(void *dst, const void *src, size_t size)
{
unsigned long addr = (unsigned long)src;
- if (size < 0 || addr < PAGE_SIZE)
+ if (addr < PAGE_SIZE)
return -EFAULT;
/* check for I/O space F_EXTEND(0xfff00000) access as well? */
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 7584a5df0fa4..9d08c71a967e 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -282,16 +282,34 @@ bad_area:
#endif
switch (code) {
case 15: /* Data TLB miss fault/Data page fault */
+ /* send SIGSEGV when outside of vma */
+ if (!vma ||
+ address < vma->vm_start || address > vma->vm_end) {
+ si.si_signo = SIGSEGV;
+ si.si_code = SEGV_MAPERR;
+ break;
+ }
+
+ /* send SIGSEGV for wrong permissions */
+ if ((vma->vm_flags & acc_type) != acc_type) {
+ si.si_signo = SIGSEGV;
+ si.si_code = SEGV_ACCERR;
+ break;
+ }
+
+ /* probably address is outside of mapped file */
+ /* fall through */
case 17: /* NA data TLB miss / page fault */
case 18: /* Unaligned access - PCXS only */
si.si_signo = SIGBUS;
- si.si_code = BUS_ADRERR;
+ si.si_code = (code == 18) ? BUS_ADRALN : BUS_ADRERR;
break;
case 16: /* Non-access instruction TLB miss fault */
case 26: /* PCXL: Data memory access rights trap */
default:
si.si_signo = SIGSEGV;
- si.si_code = SEGV_MAPERR;
+ si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR;
+ break;
}
si.si_errno = 0;
si.si_addr = (void __user *) address;
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 607acf54a425..8a2463670a5b 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -111,6 +111,7 @@ endif
endif
CFLAGS-$(CONFIG_PPC64) := -mtraceback=no -mcall-aixdesc
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,-mminimal-toc)
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD)
diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
index 4c617bf8cdb2..4f6e48277c46 100644
--- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
@@ -223,13 +223,13 @@
reg = <0xe2000 0x1000>;
};
-/include/ "qoriq-dma-0.dtsi"
+/include/ "elo3-dma-0.dtsi"
dma@100300 {
fsl,iommu-parent = <&pamu0>;
fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
};
-/include/ "qoriq-dma-1.dtsi"
+/include/ "elo3-dma-1.dtsi"
dma@101300 {
fsl,iommu-parent = <&pamu0>;
fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
new file mode 100644
index 000000000000..3c210e0d5201
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x100000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma0: dma@100300 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,elo3-dma";
+ reg = <0x100300 0x4>,
+ <0x100600 0x4>;
+ ranges = <0x0 0x100100 0x500>;
+ dma-channel@0 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x0 0x80>;
+ interrupts = <28 2 0 0>;
+ };
+ dma-channel@80 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x80 0x80>;
+ interrupts = <29 2 0 0>;
+ };
+ dma-channel@100 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x100 0x80>;
+ interrupts = <30 2 0 0>;
+ };
+ dma-channel@180 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x180 0x80>;
+ interrupts = <31 2 0 0>;
+ };
+ dma-channel@300 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x300 0x80>;
+ interrupts = <76 2 0 0>;
+ };
+ dma-channel@380 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x380 0x80>;
+ interrupts = <77 2 0 0>;
+ };
+ dma-channel@400 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x400 0x80>;
+ interrupts = <78 2 0 0>;
+ };
+ dma-channel@480 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x480 0x80>;
+ interrupts = <79 2 0 0>;
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
new file mode 100644
index 000000000000..cccf3bb38224
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x101000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma1: dma@101300 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,elo3-dma";
+ reg = <0x101300 0x4>,
+ <0x101600 0x4>;
+ ranges = <0x0 0x101100 0x500>;
+ dma-channel@0 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x0 0x80>;
+ interrupts = <32 2 0 0>;
+ };
+ dma-channel@80 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x80 0x80>;
+ interrupts = <33 2 0 0>;
+ };
+ dma-channel@100 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x100 0x80>;
+ interrupts = <34 2 0 0>;
+ };
+ dma-channel@180 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x180 0x80>;
+ interrupts = <35 2 0 0>;
+ };
+ dma-channel@300 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x300 0x80>;
+ interrupts = <80 2 0 0>;
+ };
+ dma-channel@380 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x380 0x80>;
+ interrupts = <81 2 0 0>;
+ };
+ dma-channel@400 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x400 0x80>;
+ interrupts = <82 2 0 0>;
+ };
+ dma-channel@480 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x480 0x80>;
+ interrupts = <83 2 0 0>;
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
index 510afa362de1..4143a9733cd0 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
@@ -387,8 +387,8 @@
reg = <0xea000 0x4000>;
};
-/include/ "qoriq-dma-0.dtsi"
-/include/ "qoriq-dma-1.dtsi"
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
/include/ "qoriq-espi-0.dtsi"
spi@110000 {
diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig
new file mode 100644
index 000000000000..62771e0adb7c
--- /dev/null
+++ b/arch/powerpc/configs/pseries_le_defconfig
@@ -0,0 +1,352 @@
+CONFIG_PPC64=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_AUDITSYSCALL=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PPC_SPLPAR=y
+CONFIG_SCANLOG=m
+CONFIG_PPC_SMLPAR=y
+CONFIG_DTL=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_RTAS_FLASH=m
+CONFIG_IBMEBUS=y
+CONFIG_HZ_100=y
+CONFIG_BINFMT_MISC=m
+CONFIG_PPC_TRANSACTIONAL_MEM=y
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_CMA=y
+CONFIG_PPC_64K_PAGES=y
+CONFIG_PPC_SUBPAGE_PROT=y
+CONFIG_SCHED_SMT=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_RPA=m
+CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_BLK_DEV_FD=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_IBMVSCSI=y
+CONFIG_SCSI_IBMVFC=m
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_IPR=y
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_TUN=m
+CONFIG_VIRTIO_NET=m
+CONFIG_VORTEX=y
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_PCNET32=y
+CONFIG_TIGON3=y
+CONFIG_CHELSIO_T1=m
+CONFIG_BE2NET=m
+CONFIG_S2IO=m
+CONFIG_IBMVETH=y
+CONFIG_EHEA=y
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_IXGB=m
+CONFIG_IXGBE=m
+CONFIG_MLX4_EN=m
+CONFIG_MYRI10GE=m
+CONFIG_QLGE=m
+CONFIG_NETXEN_NIC=m
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_EVDEV=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_PCSPKR=m
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_ICOM=m
+CONFIG_SERIAL_JSM=m
+CONFIG_HVC_CONSOLE=y
+CONFIG_HVC_RTAS=y
+CONFIG_HVCS=m
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_IBM_BSR=m
+CONFIG_GEN_RTC=y
+CONFIG_RAW_DRIVER=y
+CONFIG_MAX_RAW_DEVS=1024
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_RADEON=y
+CONFIG_FB_IBM_GXT4500=y
+CONFIG_LCD_PLATFORM=m
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_EHCA=m
+CONFIG_INFINIBAND_CXGB3=m
+CONFIG_INFINIBAND_CXGB4=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_CM=y
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_ISER=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_PSTORE=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_CODE_PATCHING_SELFTEST=y
+CONFIG_FTR_FIXUP_SELFTEST=y
+CONFIG_MSI_BITMAP_SELFTEST=y
+CONFIG_XMON=y
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_LZO=m
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_NX=y
+CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index cc0655a702a7..935b5e7a1436 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -31,6 +31,8 @@
extern unsigned long randomize_et_dyn(unsigned long base);
#define ELF_ET_DYN_BASE (randomize_et_dyn(0x20000000))
+#define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0)
+
/*
* Our registers are always unsigned longs, whether we're a 32 bit
* process or 64 bit, on either a 64 bit or 32 bit kernel.
@@ -86,6 +88,8 @@ typedef elf_vrregset_t elf_fpxregset_t;
#ifdef __powerpc64__
# define SET_PERSONALITY(ex) \
do { \
+ if (((ex).e_flags & 0x3) == 2) \
+ set_thread_flag(TIF_ELF2ABI); \
if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \
set_thread_flag(TIF_32BIT); \
else \
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 0c7f2bfcf134..d8b600b3f058 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -403,6 +403,8 @@ static inline unsigned long cmo_get_page_size(void)
extern long pSeries_enable_reloc_on_exc(void);
extern long pSeries_disable_reloc_on_exc(void);
+extern long pseries_big_endian_exceptions(void);
+
#else
#define pSeries_enable_reloc_on_exc() do {} while (0)
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index a63b045e707c..12c32c5f533d 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -287,6 +287,32 @@ static inline long disable_reloc_on_exceptions(void) {
return plpar_set_mode(0, 3, 0, 0);
}
+/*
+ * Take exceptions in big endian mode on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_big_endian_exceptions(void)
+{
+ /* mflags = 0: big endian exceptions */
+ return plpar_set_mode(0, 4, 0, 0);
+}
+
+/*
+ * Take exceptions in little endian mode on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_little_endian_exceptions(void)
+{
+ /* mflags = 1: little endian exceptions */
+ return plpar_set_mode(1, 4, 0, 0);
+}
+
static inline long plapr_set_ciabr(unsigned long ciabr)
{
return plpar_set_mode(0, 1, ciabr, 0);
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 98da78e0c2c0..084e0807db98 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -33,6 +33,7 @@ extern int boot_cpuid;
extern int spinning_secondaries;
extern void cpu_die(void);
+extern int cpu_to_chip_id(int cpu);
#ifdef CONFIG_SMP
@@ -112,7 +113,6 @@ static inline struct cpumask *cpu_core_mask(int cpu)
}
extern int cpu_to_core_id(int cpu);
-extern int cpu_to_chip_id(int cpu);
/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
*
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 8fd6cf6dcee8..9854c564ac52 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -105,6 +105,9 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation
for stack store? */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
+#if defined(CONFIG_PPC64)
+#define TIF_ELF2ABI 18 /* function descriptors must die! */
+#endif
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -183,6 +186,12 @@ static inline bool test_thread_local_flags(unsigned int flags)
#define is_32bit_task() (1)
#endif
+#if defined(CONFIG_PPC64)
+#define is_elf2_task() (test_thread_flag(TIF_ELF2ABI))
+#else
+#define is_elf2_task() (0)
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 671302065347..4bd687d5e7aa 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -686,6 +686,15 @@ void eeh_save_bars(struct eeh_dev *edev)
for (i = 0; i < 16; i++)
eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
+
+ /*
+ * For PCI bridges including root port, we need enable bus
+ * master explicitly. Otherwise, it can't fetch IODA table
+ * entries correctly. So we cache the bit in advance so that
+ * we can restore it after reset, either PHB range or PE range.
+ */
+ if (edev->mode & EEH_DEV_BRIDGE)
+ edev->config_space[1] |= PCI_COMMAND_MASTER;
}
/**
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index d27c5afc90ae..72d748b56c86 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -74,8 +74,13 @@ static int eeh_event_handler(void * dummy)
pe = event->pe;
if (pe) {
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
- pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
- pe->phb->global_number, pe->addr);
+ if (pe->type & EEH_PE_PHB)
+ pr_info("EEH: Detected error on PHB#%d\n",
+ pe->phb->global_number);
+ else
+ pr_info("EEH: Detected PCI bus error on "
+ "PHB#%d-PE#%x\n",
+ pe->phb->global_number, pe->addr);
eeh_handle_event(pe);
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
} else {
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 75c2d1009985..3386d8ab7eb0 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -858,17 +858,21 @@ void show_regs(struct pt_regs * regs)
printk("MSR: "REG" ", regs->msr);
printbits(regs->msr, msr_bits);
printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
-#ifdef CONFIG_PPC64
- printk("SOFTE: %ld\n", regs->softe);
-#endif
trap = TRAP(regs);
if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
- printk("CFAR: "REG"\n", regs->orig_gpr3);
- if (trap == 0x300 || trap == 0x600)
+ printk("CFAR: "REG" ", regs->orig_gpr3);
+ if (trap == 0x200 || trap == 0x300 || trap == 0x600)
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr);
+ printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
#else
- printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr);
+ printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+#endif
+#ifdef CONFIG_PPC64
+ printk("SOFTE: %ld ", regs->softe);
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (MSR_TM_ACTIVE(regs->msr))
+ printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
#endif
for (i = 0; i < 32; i++) {
@@ -887,9 +891,6 @@ void show_regs(struct pt_regs * regs)
printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- printk("PACATMSCRATCH [%llx]\n", get_paca()->tm_scratch);
-#endif
show_stack(current, (unsigned long *) regs->gpr[1]);
if (!user_mode(regs))
show_instructions(regs);
@@ -1086,25 +1087,45 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
regs->msr = MSR_USER;
#else
if (!is_32bit_task()) {
- unsigned long entry, toc;
+ unsigned long entry;
- /* start is a relocated pointer to the function descriptor for
- * the elf _start routine. The first entry in the function
- * descriptor is the entry address of _start and the second
- * entry is the TOC value we need to use.
- */
- __get_user(entry, (unsigned long __user *)start);
- __get_user(toc, (unsigned long __user *)start+1);
+ if (is_elf2_task()) {
+ /* Look ma, no function descriptors! */
+ entry = start;
- /* Check whether the e_entry function descriptor entries
- * need to be relocated before we can use them.
- */
- if (load_addr != 0) {
- entry += load_addr;
- toc += load_addr;
+ /*
+ * Ulrich says:
+ * The latest iteration of the ABI requires that when
+ * calling a function (at its global entry point),
+ * the caller must ensure r12 holds the entry point
+ * address (so that the function can quickly
+ * establish addressability).
+ */
+ regs->gpr[12] = start;
+ /* Make sure that's restored on entry to userspace. */
+ set_thread_flag(TIF_RESTOREALL);
+ } else {
+ unsigned long toc;
+
+ /* start is a relocated pointer to the function
+ * descriptor for the elf _start routine. The first
+ * entry in the function descriptor is the entry
+ * address of _start and the second entry is the TOC
+ * value we need to use.
+ */
+ __get_user(entry, (unsigned long __user *)start);
+ __get_user(toc, (unsigned long __user *)start+1);
+
+ /* Check whether the e_entry function descriptor entries
+ * need to be relocated before we can use them.
+ */
+ if (load_addr != 0) {
+ entry += load_addr;
+ toc += load_addr;
+ }
+ regs->gpr[2] = toc;
}
regs->nip = entry;
- regs->gpr[2] = toc;
regs->msr = MSR_USER64;
} else {
regs->nip = start;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f3a47098fb8e..fa0ad8aafbcc 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -777,6 +777,26 @@ int of_get_ibm_chip_id(struct device_node *np)
return -1;
}
+/**
+ * cpu_to_chip_id - Return the cpus chip-id
+ * @cpu: The logical cpu number.
+ *
+ * Return the value of the ibm,chip-id property corresponding to the given
+ * logical cpu number. If the chip-id can not be found, returns -1.
+ */
+int cpu_to_chip_id(int cpu)
+{
+ struct device_node *np;
+
+ np = of_get_cpu_node(cpu, NULL);
+ if (!np)
+ return -1;
+
+ of_node_put(np);
+ return of_get_ibm_chip_id(np);
+}
+EXPORT_SYMBOL(cpu_to_chip_id);
+
#ifdef CONFIG_PPC_PSERIES
/*
* Fix up the uninitialized fields in a new device node:
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 749778e0a69d..1844298f5ea4 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -457,7 +457,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
if (copy_vsx_to_user(&frame->mc_vsregs, current))
return 1;
msr |= MSR_VSX;
- }
+ } else if (!ctx_has_vsx_region)
+ /*
+ * With a small context structure we can't hold the VSX
+ * registers, hence clear the MSR value to indicate the state
+ * was not saved.
+ */
+ msr &= ~MSR_VSX;
+
+
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/* save spe registers */
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index b3c615764c9b..e66f67b8b9e6 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -701,12 +701,6 @@ badframe:
int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
- /* Handler is *really* a pointer to the function descriptor for
- * the signal routine. The first entry in the function
- * descriptor is the entry address of signal and the second
- * entry is the TOC value we need to use.
- */
- func_descr_t __user *funct_desc_ptr;
struct rt_sigframe __user *frame;
unsigned long newsp = 0;
long err = 0;
@@ -766,19 +760,32 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
goto badframe;
regs->link = (unsigned long) &frame->tramp[0];
}
- funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler;
/* Allocate a dummy caller frame for the signal handler. */
newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
/* Set up "regs" so we "return" to the signal handler. */
- err |= get_user(regs->nip, &funct_desc_ptr->entry);
+ if (is_elf2_task()) {
+ regs->nip = (unsigned long) ka->sa.sa_handler;
+ regs->gpr[12] = regs->nip;
+ } else {
+ /* Handler is *really* a pointer to the function descriptor for
+ * the signal routine. The first entry in the function
+ * descriptor is the entry address of signal and the second
+ * entry is the TOC value we need to use.
+ */
+ func_descr_t __user *funct_desc_ptr =
+ (func_descr_t __user *) ka->sa.sa_handler;
+
+ err |= get_user(regs->nip, &funct_desc_ptr->entry);
+ err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
+ }
+
/* enter the signal handler in native-endian mode */
regs->msr &= ~MSR_LE;
regs->msr |= (MSR_KERNEL & MSR_LE);
regs->gpr[1] = newsp;
- err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
regs->gpr[3] = signr;
regs->result = 0;
if (ka->sa.sa_flags & SA_SIGINFO) {
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 930cd8af3503..a3b64f3bf9a2 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -597,22 +597,6 @@ out:
return id;
}
-/* Return the value of the chip-id property corresponding
- * to the given logical cpu.
- */
-int cpu_to_chip_id(int cpu)
-{
- struct device_node *np;
-
- np = of_get_cpu_node(cpu, NULL);
- if (!np)
- return -1;
-
- of_node_put(np);
- return of_get_ibm_chip_id(np);
-}
-EXPORT_SYMBOL(cpu_to_chip_id);
-
/* Helper routines for cpu to core mapping */
int cpu_core_index_of_thread(int cpu)
{
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 192b051df97e..b3b144121cc9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -213,8 +213,6 @@ static u64 scan_dispatch_log(u64 stop_tb)
if (i == be64_to_cpu(vpa->dtl_idx))
return 0;
while (i < be64_to_cpu(vpa->dtl_idx)) {
- if (dtl_consumer)
- dtl_consumer(dtl, i);
dtb = be64_to_cpu(dtl->timebase);
tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
be32_to_cpu(dtl->ready_to_enqueue_time);
@@ -227,6 +225,8 @@ static u64 scan_dispatch_log(u64 stop_tb)
}
if (dtb > stop_tb)
break;
+ if (dtl_consumer)
+ dtl_consumer(dtl, i);
stolen += tb_delta;
++i;
++dtl;
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S
index 45ea281e9a21..542c6f422e4d 100644
--- a/arch/powerpc/kernel/vdso64/sigtramp.S
+++ b/arch/powerpc/kernel/vdso64/sigtramp.S
@@ -142,6 +142,13 @@ V_FUNCTION_END(__kernel_sigtramp_rt64)
/* Size of CR reg in DWARF unwind info. */
#define CRSIZE 4
+/* Offset of CR reg within a full word. */
+#ifdef __LITTLE_ENDIAN__
+#define CROFF 0
+#else
+#define CROFF (RSIZE - CRSIZE)
+#endif
+
/* This is the offset of the VMX reg pointer. */
#define VREGS 48*RSIZE+33*8
@@ -181,7 +188,14 @@ V_FUNCTION_END(__kernel_sigtramp_rt64)
rsave (31, 31*RSIZE); \
rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
rsave (65, 36*RSIZE); /* lr */ \
- rsave (70, 38*RSIZE + (RSIZE - CRSIZE)) /* cr */
+ rsave (68, 38*RSIZE + CROFF); /* cr fields */ \
+ rsave (69, 38*RSIZE + CROFF); \
+ rsave (70, 38*RSIZE + CROFF); \
+ rsave (71, 38*RSIZE + CROFF); \
+ rsave (72, 38*RSIZE + CROFF); \
+ rsave (73, 38*RSIZE + CROFF); \
+ rsave (74, 38*RSIZE + CROFF); \
+ rsave (75, 38*RSIZE + CROFF)
/* Describe where the FP regs are saved. */
#define EH_FRAME_FP \
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index e7d0c88f621a..76a64821f4a2 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1419,7 +1419,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
/* needed to ensure proper operation of coherent allocations
* later, in case driver doesn't set it explicitly */
- dma_set_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64));
+ dma_coerce_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64));
}
/* register with generic device framework */
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index 6936547018b8..c5f734e20b0f 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -123,6 +123,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next;
+ unsigned long flags;
pgd_t *pgdp;
int nr = 0;
@@ -156,7 +157,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
* So long as we atomically load page table pointers versus teardown,
* we can follow the address down to the the page and take a ref on it.
*/
- local_irq_disable();
+ local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
@@ -179,7 +180,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
break;
} while (pgdp++, addr = next, addr != end);
- local_irq_enable();
+ local_irq_restore(flags);
return nr;
}
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 3e99c149271a..7ce9cf3b6988 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -258,7 +258,7 @@ static bool slice_scan_available(unsigned long addr,
slice = GET_HIGH_SLICE_INDEX(addr);
*boundary_addr = (slice + end) ?
((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
- return !!(available.high_slices & (1u << slice));
+ return !!(available.high_slices & (1ul << slice));
}
}
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index c2a566fb8bb8..132f8726a257 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -403,3 +403,14 @@ config PPC_DOORBELL
default n
endmenu
+
+config CPU_LITTLE_ENDIAN
+ bool "Build little endian kernel"
+ default n
+ help
+ This option selects whether a big endian or little endian kernel will
+ be built.
+
+ Note that if cross compiling a little endian kernel,
+ CROSS_COMPILE must point to a toolchain capable of targeting
+ little endian powerpc.
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 8844628915dc..1cb160dc1609 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -19,6 +19,7 @@
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/machdep.h>
+#include <asm/smp.h>
struct powernv_rng {
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 7fbc25b1813f..ccb633e077b1 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -189,8 +189,9 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
struct eeh_dev *edev;
struct eeh_pe pe;
struct pci_dn *pdn = PCI_DN(dn);
- const u32 *class_code, *vendor_id, *device_id;
- const u32 *regs;
+ const __be32 *classp, *vendorp, *devicep;
+ u32 class_code;
+ const __be32 *regs;
u32 pcie_flags;
int enable = 0;
int ret;
@@ -201,22 +202,24 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
return NULL;
/* Retrieve class/vendor/device IDs */
- class_code = of_get_property(dn, "class-code", NULL);
- vendor_id = of_get_property(dn, "vendor-id", NULL);
- device_id = of_get_property(dn, "device-id", NULL);
+ classp = of_get_property(dn, "class-code", NULL);
+ vendorp = of_get_property(dn, "vendor-id", NULL);
+ devicep = of_get_property(dn, "device-id", NULL);
/* Skip for bad OF node or PCI-ISA bridge */
- if (!class_code || !vendor_id || !device_id)
+ if (!classp || !vendorp || !devicep)
return NULL;
if (dn->type && !strcmp(dn->type, "isa"))
return NULL;
+ class_code = of_read_number(classp, 1);
+
/*
* Update class code and mode of eeh device. We need
* correctly reflects that current device is root port
* or PCIe switch downstream port.
*/
- edev->class_code = *class_code;
+ edev->class_code = class_code;
edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
edev->mode &= 0xFFFFFF00;
if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
@@ -243,12 +246,12 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
/* Initialize the fake PE */
memset(&pe, 0, sizeof(struct eeh_pe));
pe.phb = edev->phb;
- pe.config_addr = regs[0];
+ pe.config_addr = of_read_number(regs, 1);
/* Enable EEH on the device */
ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
if (!ret) {
- edev->config_addr = regs[0];
+ edev->config_addr = of_read_number(regs, 1);
/* Retrieve PE address */
edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
pe.addr = edev->pe_config_addr;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 356bc75ca74f..4fca3def9db9 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -245,6 +245,23 @@ static void pSeries_lpar_hptab_clear(void)
&(ptes[j].pteh), &(ptes[j].ptel));
}
}
+
+#ifdef __LITTLE_ENDIAN__
+ /* Reset exceptions to big endian */
+ if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+ long rc;
+
+ rc = pseries_big_endian_exceptions();
+ /*
+ * At this point it is unlikely panic() will get anything
+ * out to the user, but at least this will stop us from
+ * continuing on further and creating an even more
+ * difficult to debug situation.
+ */
+ if (rc)
+ panic("Could not enable big endian exceptions");
+ }
+#endif
}
/*
diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
index a702f1c08242..72a102758d4e 100644
--- a/arch/powerpc/platforms/pseries/rng.c
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -13,6 +13,7 @@
#include <linux/of.h>
#include <asm/archrandom.h>
#include <asm/machdep.h>
+#include <asm/plpar_wrappers.h>
static int pseries_get_random_long(unsigned long *v)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 1f97e2b87a62..c1f190858701 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -442,6 +442,32 @@ static void pSeries_machine_kexec(struct kimage *image)
}
#endif
+#ifdef __LITTLE_ENDIAN__
+long pseries_big_endian_exceptions(void)
+{
+ long rc;
+
+ while (1) {
+ rc = enable_big_endian_exceptions();
+ if (!H_IS_LONG_BUSY(rc))
+ return rc;
+ mdelay(get_longbusy_msecs(rc));
+ }
+}
+
+static long pseries_little_endian_exceptions(void)
+{
+ long rc;
+
+ while (1) {
+ rc = enable_little_endian_exceptions();
+ if (!H_IS_LONG_BUSY(rc))
+ return rc;
+ mdelay(get_longbusy_msecs(rc));
+ }
+}
+#endif
+
static void __init pSeries_setup_arch(void)
{
panic_timeout = 10;
@@ -698,6 +724,22 @@ static int __init pSeries_probe(void)
/* Now try to figure out if we are running on LPAR */
of_scan_flat_dt(pseries_probe_fw_features, NULL);
+#ifdef __LITTLE_ENDIAN__
+ if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+ long rc;
+ /*
+ * Tell the hypervisor that we want our exceptions to
+ * be taken in little endian mode. If this fails we don't
+ * want to use BUG() because it will trigger an exception.
+ */
+ rc = pseries_little_endian_exceptions();
+ if (rc) {
+ ppc_md.progress("H_SET_MODE LE exception fail", 0);
+ panic("Could not enable little endian exceptions");
+ }
+ }
+#endif
+
if (firmware_has_feature(FW_FEATURE_LPAR))
hpte_init_lpar();
else
diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c
index 8ef53bc2e70e..aaa46b353715 100644
--- a/arch/powerpc/platforms/wsp/chroma.c
+++ b/arch/powerpc/platforms/wsp/chroma.c
@@ -15,6 +15,7 @@
#include <linux/of.h>
#include <linux/smp.h>
#include <linux/time.h>
+#include <linux/of_fdt.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c
index d18e6cc19df3..a3c87f395750 100644
--- a/arch/powerpc/platforms/wsp/h8.c
+++ b/arch/powerpc/platforms/wsp/h8.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/of.h>
#include <linux/io.h>
+#include <linux/of_address.h>
#include "wsp.h"
diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c
index 2d3b1dd9571d..9cd92e645028 100644
--- a/arch/powerpc/platforms/wsp/ics.c
+++ b/arch/powerpc/platforms/wsp/ics.c
@@ -18,6 +18,8 @@
#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/types.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#include <asm/io.h>
#include <asm/irq.h>
diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c
index cb565bf93650..3f6729807938 100644
--- a/arch/powerpc/platforms/wsp/opb_pic.c
+++ b/arch/powerpc/platforms/wsp/opb_pic.c
@@ -15,6 +15,8 @@
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/time.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#include <asm/reg_a2.h>
#include <asm/irq.h>
diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c
index 508ec8282b96..a87b414c766a 100644
--- a/arch/powerpc/platforms/wsp/psr2.c
+++ b/arch/powerpc/platforms/wsp/psr2.c
@@ -15,6 +15,7 @@
#include <linux/of.h>
#include <linux/smp.h>
#include <linux/time.h>
+#include <linux/of_fdt.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c
index 8928507affea..6538b4de34fc 100644
--- a/arch/powerpc/platforms/wsp/scom_wsp.c
+++ b/arch/powerpc/platforms/wsp/scom_wsp.c
@@ -14,6 +14,7 @@
#include <linux/of.h>
#include <linux/spinlock.h>
#include <linux/types.h>
+#include <linux/of_address.h>
#include <asm/cputhreads.h>
#include <asm/reg_a2.h>
diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c
index ddb6efe88914..58cd1f00e1ef 100644
--- a/arch/powerpc/platforms/wsp/wsp.c
+++ b/arch/powerpc/platforms/wsp/wsp.c
@@ -13,6 +13,7 @@
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/time.h>
+#include <linux/of_address.h>
#include <asm/scom.h>
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 7d7443283a9d..947b5c417e83 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -15,7 +15,7 @@ struct pci_sysdata {
int domain; /* PCI domain */
int node; /* NUMA node */
#ifdef CONFIG_ACPI
- void *acpi; /* ACPI-specific data */
+ struct acpi_device *companion; /* ACPI companion device */
#endif
#ifdef CONFIG_X86_64
void *iommu; /* IOMMU private data */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index b93e09a0fa21..37813b5ddc37 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -147,6 +147,8 @@
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
+#define MSR_CORE_C1_RES 0x00000660
+
#define MSR_AMD64_MC0_MASK 0xc0010044
#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index daff69e21150..1185fe7a7f47 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -296,4 +296,4 @@ static struct kernel_param_ops audit_param_ops = {
.get = param_get_bool,
};
-module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);
+arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index a7cccb6d7fec..c96314abd144 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -61,6 +61,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
#if PAGETABLE_LEVELS > 2
void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
+ struct page *page = virt_to_page(pmd);
paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
/*
* NOTE! For PAE, any changes to the top page-directory-pointer-table
@@ -69,7 +70,8 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
#ifdef CONFIG_X86_PAE
tlb->need_flush_all = 1;
#endif
- tlb_remove_page(tlb, virt_to_page(pmd));
+ pgtable_pmd_page_dtor(page);
+ tlb_remove_page(tlb, page);
}
#if PAGETABLE_LEVELS > 3
@@ -209,7 +211,7 @@ static int preallocate_pmds(pmd_t *pmds[])
if (!pmd)
failed = true;
if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
- free_page((unsigned long)pmds[i]);
+ free_page((unsigned long)pmd);
pmd = NULL;
failed = true;
}
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 7fb24e53d4c8..4f25ec077552 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -518,7 +518,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
sd = &info->sd;
sd->domain = domain;
sd->node = node;
- sd->acpi = device->handle;
+ sd->companion = device;
/*
* Maybe the desired pci bus has been already scanned. In such case
* it is unnecessary to scan the pci bus with the given domain,busnum.
@@ -589,7 +589,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
struct pci_sysdata *sd = bridge->bus->sysdata;
- ACPI_HANDLE_SET(&bridge->dev, sd->acpi);
+ ACPI_COMPANION_SET(&bridge->dev, sd->companion);
return 0;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 862f458d4760..cdc629cf075b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -171,9 +171,12 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
}
EXPORT_SYMBOL(blk_mq_can_queue);
-static void blk_mq_rq_ctx_init(struct blk_mq_ctx *ctx, struct request *rq,
- unsigned int rw_flags)
+static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
+ struct request *rq, unsigned int rw_flags)
{
+ if (blk_queue_io_stat(q))
+ rw_flags |= REQ_IO_STAT;
+
rq->mq_ctx = ctx;
rq->cmd_flags = rw_flags;
ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
@@ -197,7 +200,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved);
if (rq) {
- blk_mq_rq_ctx_init(ctx, rq, rw);
+ blk_mq_rq_ctx_init(q, ctx, rq, rw);
break;
} else if (!(gfp & __GFP_WAIT))
break;
@@ -718,6 +721,8 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
+ trace_block_rq_insert(hctx->queue, rq);
+
list_add_tail(&rq->queuelist, &ctx->rq_list);
blk_mq_hctx_mark_pending(hctx, ctx);
@@ -921,7 +926,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
trace_block_getrq(q, bio, rw);
rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
if (likely(rq))
- blk_mq_rq_ctx_init(ctx, rq, rw);
+ blk_mq_rq_ctx_init(q, ctx, rq, rw);
else {
blk_mq_put_ctx(ctx);
trace_block_sleeprq(q, bio, rw);
@@ -1377,6 +1382,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
q->queue_hw_ctx = hctxs;
q->mq_ops = reg->ops;
+ q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
blk_queue_make_request(q, blk_mq_make_request);
blk_queue_rq_timed_out(q, reg->ops->timeout);
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index a8287b49d062..dc51f467a560 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -96,6 +96,7 @@
* - Code works, detects all the partitions.
*
************************************************************/
+#include <linux/kernel.h>
#include <linux/crc32.h>
#include <linux/ctype.h>
#include <linux/math64.h>
@@ -715,8 +716,8 @@ int efi_partition(struct parsed_partitions *state)
efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid);
/* Naively convert UTF16-LE to 7 bits. */
- label_max = min(sizeof(info->volname) - 1,
- sizeof(ptes[i].partition_name));
+ label_max = min(ARRAY_SIZE(info->volname) - 1,
+ ARRAY_SIZE(ptes[i].partition_name));
info->volname[label_max] = 0;
while (label_count < label_max) {
u8 c = ptes[i].partition_name[label_count] & 0xff;
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 71f337aefa39..4ae5734fb473 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1402,6 +1402,9 @@ config CRYPTO_USER_API_SKCIPHER
This option enables the user-spaces interface for symmetric
key cipher algorithms.
+config CRYPTO_HASH_INFO
+ bool
+
source "drivers/crypto/Kconfig"
source crypto/asymmetric_keys/Kconfig
diff --git a/crypto/Makefile b/crypto/Makefile
index 80019ba8da3a..b3a7e807e08b 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -104,3 +104,4 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
obj-$(CONFIG_XOR_BLOCKS) += xor.o
obj-$(CONFIG_ASYNC_CORE) += async_tx/
obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
+obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 6d2c2ea12559..03a6eb95ab50 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -12,6 +12,8 @@ if ASYMMETRIC_KEY_TYPE
config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
tristate "Asymmetric public-key crypto algorithm subtype"
select MPILIB
+ select PUBLIC_KEY_ALGO_RSA
+ select CRYPTO_HASH_INFO
help
This option provides support for asymmetric public key type handling.
If signature generation and/or verification are to be used,
@@ -20,8 +22,8 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
config PUBLIC_KEY_ALGO_RSA
tristate "RSA public-key algorithm"
- depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
select MPILIB_EXTRA
+ select MPILIB
help
This option enables support for the RSA algorithm (PKCS#1, RFC3447).
diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index cf807654d221..b77eb5304788 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -209,6 +209,7 @@ struct key_type key_type_asymmetric = {
.match = asymmetric_key_match,
.destroy = asymmetric_key_destroy,
.describe = asymmetric_key_describe,
+ .def_lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE,
};
EXPORT_SYMBOL_GPL(key_type_asymmetric);
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index cb2e29180a87..97eb001960b9 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -22,29 +22,25 @@
MODULE_LICENSE("GPL");
-const char *const pkey_algo[PKEY_ALGO__LAST] = {
+const char *const pkey_algo_name[PKEY_ALGO__LAST] = {
[PKEY_ALGO_DSA] = "DSA",
[PKEY_ALGO_RSA] = "RSA",
};
-EXPORT_SYMBOL_GPL(pkey_algo);
+EXPORT_SYMBOL_GPL(pkey_algo_name);
-const char *const pkey_hash_algo[PKEY_HASH__LAST] = {
- [PKEY_HASH_MD4] = "md4",
- [PKEY_HASH_MD5] = "md5",
- [PKEY_HASH_SHA1] = "sha1",
- [PKEY_HASH_RIPE_MD_160] = "rmd160",
- [PKEY_HASH_SHA256] = "sha256",
- [PKEY_HASH_SHA384] = "sha384",
- [PKEY_HASH_SHA512] = "sha512",
- [PKEY_HASH_SHA224] = "sha224",
+const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST] = {
+#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \
+ defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE)
+ [PKEY_ALGO_RSA] = &RSA_public_key_algorithm,
+#endif
};
-EXPORT_SYMBOL_GPL(pkey_hash_algo);
+EXPORT_SYMBOL_GPL(pkey_algo);
-const char *const pkey_id_type[PKEY_ID_TYPE__LAST] = {
+const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST] = {
[PKEY_ID_PGP] = "PGP",
[PKEY_ID_X509] = "X509",
};
-EXPORT_SYMBOL_GPL(pkey_id_type);
+EXPORT_SYMBOL_GPL(pkey_id_type_name);
/*
* Provide a part of a description of the key for /proc/keys.
@@ -56,7 +52,7 @@ static void public_key_describe(const struct key *asymmetric_key,
if (key)
seq_printf(m, "%s.%s",
- pkey_id_type[key->id_type], key->algo->name);
+ pkey_id_type_name[key->id_type], key->algo->name);
}
/*
@@ -78,21 +74,45 @@ EXPORT_SYMBOL_GPL(public_key_destroy);
/*
* Verify a signature using a public key.
*/
-static int public_key_verify_signature(const struct key *key,
- const struct public_key_signature *sig)
+int public_key_verify_signature(const struct public_key *pk,
+ const struct public_key_signature *sig)
{
- const struct public_key *pk = key->payload.data;
+ const struct public_key_algorithm *algo;
+
+ BUG_ON(!pk);
+ BUG_ON(!pk->mpi[0]);
+ BUG_ON(!pk->mpi[1]);
+ BUG_ON(!sig);
+ BUG_ON(!sig->digest);
+ BUG_ON(!sig->mpi[0]);
+
+ algo = pk->algo;
+ if (!algo) {
+ if (pk->pkey_algo >= PKEY_ALGO__LAST)
+ return -ENOPKG;
+ algo = pkey_algo[pk->pkey_algo];
+ if (!algo)
+ return -ENOPKG;
+ }
- if (!pk->algo->verify_signature)
+ if (!algo->verify_signature)
return -ENOTSUPP;
- if (sig->nr_mpi != pk->algo->n_sig_mpi) {
+ if (sig->nr_mpi != algo->n_sig_mpi) {
pr_debug("Signature has %u MPI not %u\n",
- sig->nr_mpi, pk->algo->n_sig_mpi);
+ sig->nr_mpi, algo->n_sig_mpi);
return -EINVAL;
}
- return pk->algo->verify_signature(pk, sig);
+ return algo->verify_signature(pk, sig);
+}
+EXPORT_SYMBOL_GPL(public_key_verify_signature);
+
+static int public_key_verify_signature_2(const struct key *key,
+ const struct public_key_signature *sig)
+{
+ const struct public_key *pk = key->payload.data;
+ return public_key_verify_signature(pk, sig);
}
/*
@@ -103,6 +123,6 @@ struct asymmetric_key_subtype public_key_subtype = {
.name = "public_key",
.describe = public_key_describe,
.destroy = public_key_destroy,
- .verify_signature = public_key_verify_signature,
+ .verify_signature = public_key_verify_signature_2,
};
EXPORT_SYMBOL_GPL(public_key_subtype);
diff --git a/crypto/asymmetric_keys/public_key.h b/crypto/asymmetric_keys/public_key.h
index 5e5e35626899..5c37a22a0637 100644
--- a/crypto/asymmetric_keys/public_key.h
+++ b/crypto/asymmetric_keys/public_key.h
@@ -28,3 +28,9 @@ struct public_key_algorithm {
};
extern const struct public_key_algorithm RSA_public_key_algorithm;
+
+/*
+ * public_key.c
+ */
+extern int public_key_verify_signature(const struct public_key *pk,
+ const struct public_key_signature *sig);
diff --git a/crypto/asymmetric_keys/rsa.c b/crypto/asymmetric_keys/rsa.c
index 4a6a0696f8a3..90a17f59ba28 100644
--- a/crypto/asymmetric_keys/rsa.c
+++ b/crypto/asymmetric_keys/rsa.c
@@ -73,13 +73,13 @@ static const struct {
size_t size;
} RSA_ASN1_templates[PKEY_HASH__LAST] = {
#define _(X) { RSA_digest_info_##X, sizeof(RSA_digest_info_##X) }
- [PKEY_HASH_MD5] = _(MD5),
- [PKEY_HASH_SHA1] = _(SHA1),
- [PKEY_HASH_RIPE_MD_160] = _(RIPE_MD_160),
- [PKEY_HASH_SHA256] = _(SHA256),
- [PKEY_HASH_SHA384] = _(SHA384),
- [PKEY_HASH_SHA512] = _(SHA512),
- [PKEY_HASH_SHA224] = _(SHA224),
+ [HASH_ALGO_MD5] = _(MD5),
+ [HASH_ALGO_SHA1] = _(SHA1),
+ [HASH_ALGO_RIPE_MD_160] = _(RIPE_MD_160),
+ [HASH_ALGO_SHA256] = _(SHA256),
+ [HASH_ALGO_SHA384] = _(SHA384),
+ [HASH_ALGO_SHA512] = _(SHA512),
+ [HASH_ALGO_SHA224] = _(SHA224),
#undef _
};
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index facbf26bc6bb..29893162497c 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -47,6 +47,8 @@ void x509_free_certificate(struct x509_certificate *cert)
kfree(cert->subject);
kfree(cert->fingerprint);
kfree(cert->authority);
+ kfree(cert->sig.digest);
+ mpi_free(cert->sig.rsa.s);
kfree(cert);
}
}
@@ -152,33 +154,33 @@ int x509_note_pkey_algo(void *context, size_t hdrlen,
return -ENOPKG; /* Unsupported combination */
case OID_md4WithRSAEncryption:
- ctx->cert->sig_hash_algo = PKEY_HASH_MD5;
- ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+ ctx->cert->sig.pkey_hash_algo = HASH_ALGO_MD5;
+ ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
break;
case OID_sha1WithRSAEncryption:
- ctx->cert->sig_hash_algo = PKEY_HASH_SHA1;
- ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+ ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA1;
+ ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
break;
case OID_sha256WithRSAEncryption:
- ctx->cert->sig_hash_algo = PKEY_HASH_SHA256;
- ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+ ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA256;
+ ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
break;
case OID_sha384WithRSAEncryption:
- ctx->cert->sig_hash_algo = PKEY_HASH_SHA384;
- ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+ ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA384;
+ ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
break;
case OID_sha512WithRSAEncryption:
- ctx->cert->sig_hash_algo = PKEY_HASH_SHA512;
- ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+ ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA512;
+ ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
break;
case OID_sha224WithRSAEncryption:
- ctx->cert->sig_hash_algo = PKEY_HASH_SHA224;
- ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+ ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA224;
+ ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
break;
}
@@ -203,8 +205,8 @@ int x509_note_signature(void *context, size_t hdrlen,
return -EINVAL;
}
- ctx->cert->sig = value;
- ctx->cert->sig_size = vlen;
+ ctx->cert->raw_sig = value;
+ ctx->cert->raw_sig_size = vlen;
return 0;
}
@@ -343,8 +345,9 @@ int x509_extract_key_data(void *context, size_t hdrlen,
if (ctx->last_oid != OID_rsaEncryption)
return -ENOPKG;
- /* There seems to be an extraneous 0 byte on the front of the data */
- ctx->cert->pkey_algo = PKEY_ALGO_RSA;
+ ctx->cert->pub->pkey_algo = PKEY_ALGO_RSA;
+
+ /* Discard the BIT STRING metadata */
ctx->key = value + 1;
ctx->key_size = vlen - 1;
return 0;
diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h
index f86dc5fcc4ad..87d9cc26f630 100644
--- a/crypto/asymmetric_keys/x509_parser.h
+++ b/crypto/asymmetric_keys/x509_parser.h
@@ -9,6 +9,7 @@
* 2 of the Licence, or (at your option) any later version.
*/
+#include <linux/time.h>
#include <crypto/public_key.h>
struct x509_certificate {
@@ -20,13 +21,11 @@ struct x509_certificate {
char *authority; /* Authority key fingerprint as hex */
struct tm valid_from;
struct tm valid_to;
- enum pkey_algo pkey_algo : 8; /* Public key algorithm */
- enum pkey_algo sig_pkey_algo : 8; /* Signature public key algorithm */
- enum pkey_hash_algo sig_hash_algo : 8; /* Signature hash algorithm */
const void *tbs; /* Signed data */
- size_t tbs_size; /* Size of signed data */
- const void *sig; /* Signature data */
- size_t sig_size; /* Size of sigature */
+ unsigned tbs_size; /* Size of signed data */
+ unsigned raw_sig_size; /* Size of sigature */
+ const void *raw_sig; /* Signature data */
+ struct public_key_signature sig; /* Signature parameters */
};
/*
@@ -34,3 +33,10 @@ struct x509_certificate {
*/
extern void x509_free_certificate(struct x509_certificate *cert);
extern struct x509_certificate *x509_cert_parse(const void *data, size_t datalen);
+
+/*
+ * x509_public_key.c
+ */
+extern int x509_get_sig_params(struct x509_certificate *cert);
+extern int x509_check_signature(const struct public_key *pub,
+ struct x509_certificate *cert);
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 06007f0e880c..f83300b6e8c1 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -18,85 +18,162 @@
#include <linux/asn1_decoder.h>
#include <keys/asymmetric-subtype.h>
#include <keys/asymmetric-parser.h>
+#include <keys/system_keyring.h>
#include <crypto/hash.h>
#include "asymmetric_keys.h"
#include "public_key.h"
#include "x509_parser.h"
-static const
-struct public_key_algorithm *x509_public_key_algorithms[PKEY_ALGO__LAST] = {
- [PKEY_ALGO_DSA] = NULL,
-#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \
- defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE)
- [PKEY_ALGO_RSA] = &RSA_public_key_algorithm,
-#endif
-};
+/*
+ * Find a key in the given keyring by issuer and authority.
+ */
+static struct key *x509_request_asymmetric_key(
+ struct key *keyring,
+ const char *signer, size_t signer_len,
+ const char *authority, size_t auth_len)
+{
+ key_ref_t key;
+ char *id;
+
+ /* Construct an identifier. */
+ id = kmalloc(signer_len + 2 + auth_len + 1, GFP_KERNEL);
+ if (!id)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(id, signer, signer_len);
+ id[signer_len + 0] = ':';
+ id[signer_len + 1] = ' ';
+ memcpy(id + signer_len + 2, authority, auth_len);
+ id[signer_len + 2 + auth_len] = 0;
+
+ pr_debug("Look up: \"%s\"\n", id);
+
+ key = keyring_search(make_key_ref(keyring, 1),
+ &key_type_asymmetric, id);
+ if (IS_ERR(key))
+ pr_debug("Request for module key '%s' err %ld\n",
+ id, PTR_ERR(key));
+ kfree(id);
+
+ if (IS_ERR(key)) {
+ switch (PTR_ERR(key)) {
+ /* Hide some search errors */
+ case -EACCES:
+ case -ENOTDIR:
+ case -EAGAIN:
+ return ERR_PTR(-ENOKEY);
+ default:
+ return ERR_CAST(key);
+ }
+ }
+
+ pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key)));
+ return key_ref_to_ptr(key);
+}
/*
- * Check the signature on a certificate using the provided public key
+ * Set up the signature parameters in an X.509 certificate. This involves
+ * digesting the signed data and extracting the signature.
*/
-static int x509_check_signature(const struct public_key *pub,
- const struct x509_certificate *cert)
+int x509_get_sig_params(struct x509_certificate *cert)
{
- struct public_key_signature *sig;
struct crypto_shash *tfm;
struct shash_desc *desc;
size_t digest_size, desc_size;
+ void *digest;
int ret;
pr_devel("==>%s()\n", __func__);
-
+
+ if (cert->sig.rsa.s)
+ return 0;
+
+ cert->sig.rsa.s = mpi_read_raw_data(cert->raw_sig, cert->raw_sig_size);
+ if (!cert->sig.rsa.s)
+ return -ENOMEM;
+ cert->sig.nr_mpi = 1;
+
/* Allocate the hashing algorithm we're going to need and find out how
* big the hash operational data will be.
*/
- tfm = crypto_alloc_shash(pkey_hash_algo[cert->sig_hash_algo], 0, 0);
+ tfm = crypto_alloc_shash(hash_algo_name[cert->sig.pkey_hash_algo], 0, 0);
if (IS_ERR(tfm))
return (PTR_ERR(tfm) == -ENOENT) ? -ENOPKG : PTR_ERR(tfm);
desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
digest_size = crypto_shash_digestsize(tfm);
- /* We allocate the hash operational data storage on the end of our
- * context data.
+ /* We allocate the hash operational data storage on the end of the
+ * digest storage space.
*/
ret = -ENOMEM;
- sig = kzalloc(sizeof(*sig) + desc_size + digest_size, GFP_KERNEL);
- if (!sig)
- goto error_no_sig;
+ digest = kzalloc(digest_size + desc_size, GFP_KERNEL);
+ if (!digest)
+ goto error;
- sig->pkey_hash_algo = cert->sig_hash_algo;
- sig->digest = (u8 *)sig + sizeof(*sig) + desc_size;
- sig->digest_size = digest_size;
+ cert->sig.digest = digest;
+ cert->sig.digest_size = digest_size;
- desc = (void *)sig + sizeof(*sig);
- desc->tfm = tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ desc = digest + digest_size;
+ desc->tfm = tfm;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
ret = crypto_shash_init(desc);
if (ret < 0)
goto error;
+ might_sleep();
+ ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, digest);
+error:
+ crypto_free_shash(tfm);
+ pr_devel("<==%s() = %d\n", __func__, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(x509_get_sig_params);
- ret = -ENOMEM;
- sig->rsa.s = mpi_read_raw_data(cert->sig, cert->sig_size);
- if (!sig->rsa.s)
- goto error;
+/*
+ * Check the signature on a certificate using the provided public key
+ */
+int x509_check_signature(const struct public_key *pub,
+ struct x509_certificate *cert)
+{
+ int ret;
- ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, sig->digest);
- if (ret < 0)
- goto error_mpi;
+ pr_devel("==>%s()\n", __func__);
- ret = pub->algo->verify_signature(pub, sig);
+ ret = x509_get_sig_params(cert);
+ if (ret < 0)
+ return ret;
+ ret = public_key_verify_signature(pub, &cert->sig);
pr_debug("Cert Verification: %d\n", ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(x509_check_signature);
-error_mpi:
- mpi_free(sig->rsa.s);
-error:
- kfree(sig);
-error_no_sig:
- crypto_free_shash(tfm);
+/*
+ * Check the new certificate against the ones in the trust keyring. If one of
+ * those is the signing key and validates the new certificate, then mark the
+ * new certificate as being trusted.
+ *
+ * Return 0 if the new certificate was successfully validated, 1 if we couldn't
+ * find a matching parent certificate in the trusted list and an error if there
+ * is a matching certificate but the signature check fails.
+ */
+static int x509_validate_trust(struct x509_certificate *cert,
+ struct key *trust_keyring)
+{
+ const struct public_key *pk;
+ struct key *key;
+ int ret = 1;
- pr_devel("<==%s() = %d\n", __func__, ret);
+ key = x509_request_asymmetric_key(trust_keyring,
+ cert->issuer, strlen(cert->issuer),
+ cert->authority,
+ strlen(cert->authority));
+ if (!IS_ERR(key)) {
+ pk = key->payload.data;
+ ret = x509_check_signature(pk, cert);
+ }
return ret;
}
@@ -106,7 +183,6 @@ error_no_sig:
static int x509_key_preparse(struct key_preparsed_payload *prep)
{
struct x509_certificate *cert;
- struct tm now;
size_t srlen, sulen;
char *desc = NULL;
int ret;
@@ -117,7 +193,18 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
pr_devel("Cert Issuer: %s\n", cert->issuer);
pr_devel("Cert Subject: %s\n", cert->subject);
- pr_devel("Cert Key Algo: %s\n", pkey_algo[cert->pkey_algo]);
+
+ if (cert->pub->pkey_algo >= PKEY_ALGO__LAST ||
+ cert->sig.pkey_algo >= PKEY_ALGO__LAST ||
+ cert->sig.pkey_hash_algo >= PKEY_HASH__LAST ||
+ !pkey_algo[cert->pub->pkey_algo] ||
+ !pkey_algo[cert->sig.pkey_algo] ||
+ !hash_algo_name[cert->sig.pkey_hash_algo]) {
+ ret = -ENOPKG;
+ goto error_free_cert;
+ }
+
+ pr_devel("Cert Key Algo: %s\n", pkey_algo_name[cert->pub->pkey_algo]);
pr_devel("Cert Valid From: %04ld-%02d-%02d %02d:%02d:%02d\n",
cert->valid_from.tm_year + 1900, cert->valid_from.tm_mon + 1,
cert->valid_from.tm_mday, cert->valid_from.tm_hour,
@@ -127,61 +214,29 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
cert->valid_to.tm_mday, cert->valid_to.tm_hour,
cert->valid_to.tm_min, cert->valid_to.tm_sec);
pr_devel("Cert Signature: %s + %s\n",
- pkey_algo[cert->sig_pkey_algo],
- pkey_hash_algo[cert->sig_hash_algo]);
+ pkey_algo_name[cert->sig.pkey_algo],
+ hash_algo_name[cert->sig.pkey_hash_algo]);
- if (!cert->fingerprint || !cert->authority) {
- pr_warn("Cert for '%s' must have SubjKeyId and AuthKeyId extensions\n",
+ if (!cert->fingerprint) {
+ pr_warn("Cert for '%s' must have a SubjKeyId extension\n",
cert->subject);
ret = -EKEYREJECTED;
goto error_free_cert;
}
- time_to_tm(CURRENT_TIME.tv_sec, 0, &now);
- pr_devel("Now: %04ld-%02d-%02d %02d:%02d:%02d\n",
- now.tm_year + 1900, now.tm_mon + 1, now.tm_mday,
- now.tm_hour, now.tm_min, now.tm_sec);
- if (now.tm_year < cert->valid_from.tm_year ||
- (now.tm_year == cert->valid_from.tm_year &&
- (now.tm_mon < cert->valid_from.tm_mon ||
- (now.tm_mon == cert->valid_from.tm_mon &&
- (now.tm_mday < cert->valid_from.tm_mday ||
- (now.tm_mday == cert->valid_from.tm_mday &&
- (now.tm_hour < cert->valid_from.tm_hour ||
- (now.tm_hour == cert->valid_from.tm_hour &&
- (now.tm_min < cert->valid_from.tm_min ||
- (now.tm_min == cert->valid_from.tm_min &&
- (now.tm_sec < cert->valid_from.tm_sec
- ))))))))))) {
- pr_warn("Cert %s is not yet valid\n", cert->fingerprint);
- ret = -EKEYREJECTED;
- goto error_free_cert;
- }
- if (now.tm_year > cert->valid_to.tm_year ||
- (now.tm_year == cert->valid_to.tm_year &&
- (now.tm_mon > cert->valid_to.tm_mon ||
- (now.tm_mon == cert->valid_to.tm_mon &&
- (now.tm_mday > cert->valid_to.tm_mday ||
- (now.tm_mday == cert->valid_to.tm_mday &&
- (now.tm_hour > cert->valid_to.tm_hour ||
- (now.tm_hour == cert->valid_to.tm_hour &&
- (now.tm_min > cert->valid_to.tm_min ||
- (now.tm_min == cert->valid_to.tm_min &&
- (now.tm_sec > cert->valid_to.tm_sec
- ))))))))))) {
- pr_warn("Cert %s has expired\n", cert->fingerprint);
- ret = -EKEYEXPIRED;
- goto error_free_cert;
- }
-
- cert->pub->algo = x509_public_key_algorithms[cert->pkey_algo];
+ cert->pub->algo = pkey_algo[cert->pub->pkey_algo];
cert->pub->id_type = PKEY_ID_X509;
- /* Check the signature on the key */
- if (strcmp(cert->fingerprint, cert->authority) == 0) {
- ret = x509_check_signature(cert->pub, cert);
+ /* Check the signature on the key if it appears to be self-signed */
+ if (!cert->authority ||
+ strcmp(cert->fingerprint, cert->authority) == 0) {
+ ret = x509_check_signature(cert->pub, cert); /* self-signed */
if (ret < 0)
goto error_free_cert;
+ } else {
+ ret = x509_validate_trust(cert, system_trusted_keyring);
+ if (!ret)
+ prep->trusted = 1;
}
/* Propose a description */
@@ -237,3 +292,6 @@ static void __exit x509_key_exit(void)
module_init(x509_key_init);
module_exit(x509_key_exit);
+
+MODULE_DESCRIPTION("X.509 certificate parser");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 9e62feffb374..f8c0b8dbeb75 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -50,33 +50,36 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
&dest, 1, &src, 1, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
+ struct dmaengine_unmap_data *unmap = NULL;
- if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
- dma_addr_t dma_dest, dma_src;
+ if (device)
+ unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
+
+ if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
unsigned long dma_prep_flags = 0;
if (submit->cb_fn)
dma_prep_flags |= DMA_PREP_INTERRUPT;
if (submit->flags & ASYNC_TX_FENCE)
dma_prep_flags |= DMA_PREP_FENCE;
- dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
- DMA_FROM_DEVICE);
-
- dma_src = dma_map_page(device->dev, src, src_offset, len,
- DMA_TO_DEVICE);
-
- tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
- len, dma_prep_flags);
- if (!tx) {
- dma_unmap_page(device->dev, dma_dest, len,
- DMA_FROM_DEVICE);
- dma_unmap_page(device->dev, dma_src, len,
- DMA_TO_DEVICE);
- }
+
+ unmap->to_cnt = 1;
+ unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len,
+ DMA_TO_DEVICE);
+ unmap->from_cnt = 1;
+ unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len,
+ DMA_FROM_DEVICE);
+ unmap->len = len;
+
+ tx = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+ unmap->addr[0], len,
+ dma_prep_flags);
}
if (tx) {
pr_debug("%s: (async) len: %zu\n", __func__, len);
+
+ dma_set_unmap(tx, unmap);
async_tx_submit(chan, tx, submit);
} else {
void *dest_buf, *src_buf;
@@ -96,6 +99,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
async_tx_sync_epilog(submit);
}
+ dmaengine_unmap_put(unmap);
+
return tx;
}
EXPORT_SYMBOL_GPL(async_memcpy);
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 91d5d385899e..d05327caf69d 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -46,49 +46,24 @@ static struct page *pq_scribble_page;
* do_async_gen_syndrome - asynchronously calculate P and/or Q
*/
static __async_inline struct dma_async_tx_descriptor *
-do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
- const unsigned char *scfs, unsigned int offset, int disks,
- size_t len, dma_addr_t *dma_src,
+do_async_gen_syndrome(struct dma_chan *chan,
+ const unsigned char *scfs, int disks,
+ struct dmaengine_unmap_data *unmap,
+ enum dma_ctrl_flags dma_flags,
struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct dma_device *dma = chan->device;
- enum dma_ctrl_flags dma_flags = 0;
enum async_tx_flags flags_orig = submit->flags;
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
dma_async_tx_callback cb_param_orig = submit->cb_param;
int src_cnt = disks - 2;
- unsigned char coefs[src_cnt];
unsigned short pq_src_cnt;
dma_addr_t dma_dest[2];
int src_off = 0;
- int idx;
- int i;
- /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
- if (P(blocks, disks))
- dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
- len, DMA_BIDIRECTIONAL);
- else
- dma_flags |= DMA_PREP_PQ_DISABLE_P;
- if (Q(blocks, disks))
- dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
- len, DMA_BIDIRECTIONAL);
- else
- dma_flags |= DMA_PREP_PQ_DISABLE_Q;
-
- /* convert source addresses being careful to collapse 'empty'
- * sources and update the coefficients accordingly
- */
- for (i = 0, idx = 0; i < src_cnt; i++) {
- if (blocks[i] == NULL)
- continue;
- dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
- DMA_TO_DEVICE);
- coefs[idx] = scfs[i];
- idx++;
- }
- src_cnt = idx;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
while (src_cnt > 0) {
submit->flags = flags_orig;
@@ -100,28 +75,25 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
if (src_cnt > pq_src_cnt) {
submit->flags &= ~ASYNC_TX_ACK;
submit->flags |= ASYNC_TX_FENCE;
- dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = NULL;
submit->cb_param = NULL;
} else {
- dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = cb_fn_orig;
submit->cb_param = cb_param_orig;
if (cb_fn_orig)
dma_flags |= DMA_PREP_INTERRUPT;
}
- if (submit->flags & ASYNC_TX_FENCE)
- dma_flags |= DMA_PREP_FENCE;
- /* Since we have clobbered the src_list we are committed
- * to doing this asynchronously. Drivers force forward
- * progress in case they can not provide a descriptor
+ /* Drivers force forward progress in case they can not provide
+ * a descriptor
*/
for (;;) {
+ dma_dest[0] = unmap->addr[disks - 2];
+ dma_dest[1] = unmap->addr[disks - 1];
tx = dma->device_prep_dma_pq(chan, dma_dest,
- &dma_src[src_off],
+ &unmap->addr[src_off],
pq_src_cnt,
- &coefs[src_off], len,
+ &scfs[src_off], unmap->len,
dma_flags);
if (likely(tx))
break;
@@ -129,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
dma_async_issue_pending(chan);
}
+ dma_set_unmap(tx, unmap);
async_tx_submit(chan, tx, submit);
submit->depend_tx = tx;
@@ -188,10 +161,6 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
* set to NULL those buffers will be replaced with the raid6_zero_page
* in the synchronous path and omitted in the hardware-asynchronous
* path.
- *
- * 'blocks' note: if submit->scribble is NULL then the contents of
- * 'blocks' may be overwritten to perform address conversions
- * (dma_map_page() or page_address()).
*/
struct dma_async_tx_descriptor *
async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
@@ -202,26 +171,69 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
&P(blocks, disks), 2,
blocks, src_cnt, len);
struct dma_device *device = chan ? chan->device : NULL;
- dma_addr_t *dma_src = NULL;
+ struct dmaengine_unmap_data *unmap = NULL;
BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
- if (submit->scribble)
- dma_src = submit->scribble;
- else if (sizeof(dma_addr_t) <= sizeof(struct page *))
- dma_src = (dma_addr_t *) blocks;
+ if (device)
+ unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
- if (dma_src && device &&
+ if (unmap &&
(src_cnt <= dma_maxpq(device, 0) ||
dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
is_dma_pq_aligned(device, offset, 0, len)) {
+ struct dma_async_tx_descriptor *tx;
+ enum dma_ctrl_flags dma_flags = 0;
+ unsigned char coefs[src_cnt];
+ int i, j;
+
/* run the p+q asynchronously */
pr_debug("%s: (async) disks: %d len: %zu\n",
__func__, disks, len);
- return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
- disks, len, dma_src, submit);
+
+ /* convert source addresses being careful to collapse 'empty'
+ * sources and update the coefficients accordingly
+ */
+ unmap->len = len;
+ for (i = 0, j = 0; i < src_cnt; i++) {
+ if (blocks[i] == NULL)
+ continue;
+ unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset,
+ len, DMA_TO_DEVICE);
+ coefs[j] = raid6_gfexp[i];
+ unmap->to_cnt++;
+ j++;
+ }
+
+ /*
+ * DMAs use destinations as sources,
+ * so use BIDIRECTIONAL mapping
+ */
+ unmap->bidi_cnt++;
+ if (P(blocks, disks))
+ unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks),
+ offset, len, DMA_BIDIRECTIONAL);
+ else {
+ unmap->addr[j++] = 0;
+ dma_flags |= DMA_PREP_PQ_DISABLE_P;
+ }
+
+ unmap->bidi_cnt++;
+ if (Q(blocks, disks))
+ unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks),
+ offset, len, DMA_BIDIRECTIONAL);
+ else {
+ unmap->addr[j++] = 0;
+ dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+ }
+
+ tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit);
+ dmaengine_unmap_put(unmap);
+ return tx;
}
+ dmaengine_unmap_put(unmap);
+
/* run the pq synchronously */
pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
@@ -277,50 +289,60 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
struct dma_async_tx_descriptor *tx;
unsigned char coefs[disks-2];
enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
- dma_addr_t *dma_src = NULL;
- int src_cnt = 0;
+ struct dmaengine_unmap_data *unmap = NULL;
BUG_ON(disks < 4);
- if (submit->scribble)
- dma_src = submit->scribble;
- else if (sizeof(dma_addr_t) <= sizeof(struct page *))
- dma_src = (dma_addr_t *) blocks;
+ if (device)
+ unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
- if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+ if (unmap && disks <= dma_maxpq(device, 0) &&
is_dma_pq_aligned(device, offset, 0, len)) {
struct device *dev = device->dev;
- dma_addr_t *pq = &dma_src[disks-2];
- int i;
+ dma_addr_t pq[2];
+ int i, j = 0, src_cnt = 0;
pr_debug("%s: (async) disks: %d len: %zu\n",
__func__, disks, len);
- if (!P(blocks, disks))
+
+ unmap->len = len;
+ for (i = 0; i < disks-2; i++)
+ if (likely(blocks[i])) {
+ unmap->addr[j] = dma_map_page(dev, blocks[i],
+ offset, len,
+ DMA_TO_DEVICE);
+ coefs[j] = raid6_gfexp[i];
+ unmap->to_cnt++;
+ src_cnt++;
+ j++;
+ }
+
+ if (!P(blocks, disks)) {
+ pq[0] = 0;
dma_flags |= DMA_PREP_PQ_DISABLE_P;
- else
+ } else {
pq[0] = dma_map_page(dev, P(blocks, disks),
offset, len,
DMA_TO_DEVICE);
- if (!Q(blocks, disks))
+ unmap->addr[j++] = pq[0];
+ unmap->to_cnt++;
+ }
+ if (!Q(blocks, disks)) {
+ pq[1] = 0;
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
- else
+ } else {
pq[1] = dma_map_page(dev, Q(blocks, disks),
offset, len,
DMA_TO_DEVICE);
+ unmap->addr[j++] = pq[1];
+ unmap->to_cnt++;
+ }
if (submit->flags & ASYNC_TX_FENCE)
dma_flags |= DMA_PREP_FENCE;
- for (i = 0; i < disks-2; i++)
- if (likely(blocks[i])) {
- dma_src[src_cnt] = dma_map_page(dev, blocks[i],
- offset, len,
- DMA_TO_DEVICE);
- coefs[src_cnt] = raid6_gfexp[i];
- src_cnt++;
- }
-
for (;;) {
- tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+ tx = device->device_prep_dma_pq_val(chan, pq,
+ unmap->addr,
src_cnt,
coefs,
len, pqres,
@@ -330,6 +352,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
async_tx_quiesce(&submit->depend_tx);
dma_async_issue_pending(chan);
}
+
+ dma_set_unmap(tx, unmap);
async_tx_submit(chan, tx, submit);
return tx;
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index a9f08a6a582e..934a84981495 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -26,6 +26,7 @@
#include <linux/dma-mapping.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
+#include <linux/dmaengine.h>
static struct dma_async_tx_descriptor *
async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
@@ -34,35 +35,45 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
&dest, 1, srcs, 2, len);
struct dma_device *dma = chan ? chan->device : NULL;
+ struct dmaengine_unmap_data *unmap = NULL;
const u8 *amul, *bmul;
u8 ax, bx;
u8 *a, *b, *c;
- if (dma) {
- dma_addr_t dma_dest[2];
- dma_addr_t dma_src[2];
+ if (dma)
+ unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+
+ if (unmap) {
struct device *dev = dma->dev;
+ dma_addr_t pq[2];
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
if (submit->flags & ASYNC_TX_FENCE)
dma_flags |= DMA_PREP_FENCE;
- dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
- dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
- dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
- tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+ unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+ unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+ unmap->to_cnt = 2;
+
+ unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+ unmap->bidi_cnt = 1;
+ /* engine only looks at Q, but expects it to follow P */
+ pq[1] = unmap->addr[2];
+
+ unmap->len = len;
+ tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef,
len, dma_flags);
if (tx) {
+ dma_set_unmap(tx, unmap);
async_tx_submit(chan, tx, submit);
+ dmaengine_unmap_put(unmap);
return tx;
}
/* could not get a descriptor, unmap and fall through to
* the synchronous path
*/
- dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
- dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
- dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
+ dmaengine_unmap_put(unmap);
}
/* run the operation synchronously */
@@ -89,23 +100,38 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
&dest, 1, &src, 1, len);
struct dma_device *dma = chan ? chan->device : NULL;
+ struct dmaengine_unmap_data *unmap = NULL;
const u8 *qmul; /* Q multiplier table */
u8 *d, *s;
- if (dma) {
+ if (dma)
+ unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+
+ if (unmap) {
dma_addr_t dma_dest[2];
- dma_addr_t dma_src[1];
struct device *dev = dma->dev;
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
if (submit->flags & ASYNC_TX_FENCE)
dma_flags |= DMA_PREP_FENCE;
- dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
- dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
- tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
- len, dma_flags);
+ unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+ unmap->to_cnt++;
+ unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+ dma_dest[1] = unmap->addr[1];
+ unmap->bidi_cnt++;
+ unmap->len = len;
+
+ /* this looks funny, but the engine looks for Q at
+ * dma_dest[1] and ignores dma_dest[0] as a dest
+ * due to DMA_PREP_PQ_DISABLE_P
+ */
+ tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr,
+ 1, &coef, len, dma_flags);
+
if (tx) {
+ dma_set_unmap(tx, unmap);
+ dmaengine_unmap_put(unmap);
async_tx_submit(chan, tx, submit);
return tx;
}
@@ -113,8 +139,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
/* could not get a descriptor, unmap and fall through to
* the synchronous path
*/
- dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
- dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+ dmaengine_unmap_put(unmap);
}
/* no channel available, or failed to allocate a descriptor, so
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 7be34248b450..39ea4791a3c9 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -128,7 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
}
device->device_issue_pending(chan);
} else {
- if (dma_wait_for_async_tx(depend_tx) != DMA_SUCCESS)
+ if (dma_wait_for_async_tx(depend_tx) != DMA_COMPLETE)
panic("%s: DMA error waiting for depend_tx\n",
__func__);
tx->tx_submit(tx);
@@ -280,7 +280,7 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
* we are referring to the correct operation
*/
BUG_ON(async_tx_test_ack(*tx));
- if (dma_wait_for_async_tx(*tx) != DMA_SUCCESS)
+ if (dma_wait_for_async_tx(*tx) != DMA_COMPLETE)
panic("%s: DMA error waiting for transaction\n",
__func__);
async_tx_ack(*tx);
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 8ade0a0481c6..3c562f5a60bb 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -33,48 +33,31 @@
/* do_async_xor - dma map the pages and perform the xor with an engine */
static __async_inline struct dma_async_tx_descriptor *
-do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
- unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
+do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
struct async_submit_ctl *submit)
{
struct dma_device *dma = chan->device;
struct dma_async_tx_descriptor *tx = NULL;
- int src_off = 0;
- int i;
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
void *cb_param_orig = submit->cb_param;
enum async_tx_flags flags_orig = submit->flags;
- enum dma_ctrl_flags dma_flags;
- int xor_src_cnt = 0;
- dma_addr_t dma_dest;
-
- /* map the dest bidrectional in case it is re-used as a source */
- dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL);
- for (i = 0; i < src_cnt; i++) {
- /* only map the dest once */
- if (!src_list[i])
- continue;
- if (unlikely(src_list[i] == dest)) {
- dma_src[xor_src_cnt++] = dma_dest;
- continue;
- }
- dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset,
- len, DMA_TO_DEVICE);
- }
- src_cnt = xor_src_cnt;
+ enum dma_ctrl_flags dma_flags = 0;
+ int src_cnt = unmap->to_cnt;
+ int xor_src_cnt;
+ dma_addr_t dma_dest = unmap->addr[unmap->to_cnt];
+ dma_addr_t *src_list = unmap->addr;
while (src_cnt) {
+ dma_addr_t tmp;
+
submit->flags = flags_orig;
- dma_flags = 0;
xor_src_cnt = min(src_cnt, (int)dma->max_xor);
- /* if we are submitting additional xors, leave the chain open,
- * clear the callback parameters, and leave the destination
- * buffer mapped
+ /* if we are submitting additional xors, leave the chain open
+ * and clear the callback parameters
*/
if (src_cnt > xor_src_cnt) {
submit->flags &= ~ASYNC_TX_ACK;
submit->flags |= ASYNC_TX_FENCE;
- dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = NULL;
submit->cb_param = NULL;
} else {
@@ -85,12 +68,18 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
dma_flags |= DMA_PREP_INTERRUPT;
if (submit->flags & ASYNC_TX_FENCE)
dma_flags |= DMA_PREP_FENCE;
- /* Since we have clobbered the src_list we are committed
- * to doing this asynchronously. Drivers force forward progress
- * in case they can not provide a descriptor
+
+ /* Drivers force forward progress in case they can not provide a
+ * descriptor
*/
- tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off],
- xor_src_cnt, len, dma_flags);
+ tmp = src_list[0];
+ if (src_list > unmap->addr)
+ src_list[0] = dma_dest;
+ tx = dma->device_prep_dma_xor(chan, dma_dest, src_list,
+ xor_src_cnt, unmap->len,
+ dma_flags);
+ src_list[0] = tmp;
+
if (unlikely(!tx))
async_tx_quiesce(&submit->depend_tx);
@@ -99,22 +88,21 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
while (unlikely(!tx)) {
dma_async_issue_pending(chan);
tx = dma->device_prep_dma_xor(chan, dma_dest,
- &dma_src[src_off],
- xor_src_cnt, len,
+ src_list,
+ xor_src_cnt, unmap->len,
dma_flags);
}
+ dma_set_unmap(tx, unmap);
async_tx_submit(chan, tx, submit);
submit->depend_tx = tx;
if (src_cnt > xor_src_cnt) {
/* drop completed sources */
src_cnt -= xor_src_cnt;
- src_off += xor_src_cnt;
-
/* use the intermediate result a source */
- dma_src[--src_off] = dma_dest;
src_cnt++;
+ src_list += xor_src_cnt - 1;
} else
break;
}
@@ -189,22 +177,40 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
&dest, 1, src_list,
src_cnt, len);
- dma_addr_t *dma_src = NULL;
+ struct dma_device *device = chan ? chan->device : NULL;
+ struct dmaengine_unmap_data *unmap = NULL;
BUG_ON(src_cnt <= 1);
- if (submit->scribble)
- dma_src = submit->scribble;
- else if (sizeof(dma_addr_t) <= sizeof(struct page *))
- dma_src = (dma_addr_t *) src_list;
+ if (device)
+ unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO);
+
+ if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
+ struct dma_async_tx_descriptor *tx;
+ int i, j;
- if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
/* run the xor asynchronously */
pr_debug("%s (async): len: %zu\n", __func__, len);
- return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
- dma_src, submit);
+ unmap->len = len;
+ for (i = 0, j = 0; i < src_cnt; i++) {
+ if (!src_list[i])
+ continue;
+ unmap->to_cnt++;
+ unmap->addr[j++] = dma_map_page(device->dev, src_list[i],
+ offset, len, DMA_TO_DEVICE);
+ }
+
+ /* map it bidirectional as it may be re-used as a source */
+ unmap->addr[j] = dma_map_page(device->dev, dest, offset, len,
+ DMA_BIDIRECTIONAL);
+ unmap->bidi_cnt = 1;
+
+ tx = do_async_xor(chan, unmap, submit);
+ dmaengine_unmap_put(unmap);
+ return tx;
} else {
+ dmaengine_unmap_put(unmap);
/* run the xor synchronously */
pr_debug("%s (sync): len: %zu\n", __func__, len);
WARN_ONCE(chan, "%s: no space for dma address conversion\n",
@@ -268,16 +274,14 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
- dma_addr_t *dma_src = NULL;
+ struct dmaengine_unmap_data *unmap = NULL;
BUG_ON(src_cnt <= 1);
- if (submit->scribble)
- dma_src = submit->scribble;
- else if (sizeof(dma_addr_t) <= sizeof(struct page *))
- dma_src = (dma_addr_t *) src_list;
+ if (device)
+ unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO);
- if (dma_src && device && src_cnt <= device->max_xor &&
+ if (unmap && src_cnt <= device->max_xor &&
is_dma_xor_aligned(device, offset, 0, len)) {
unsigned long dma_prep_flags = 0;
int i;
@@ -288,11 +292,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
dma_prep_flags |= DMA_PREP_INTERRUPT;
if (submit->flags & ASYNC_TX_FENCE)
dma_prep_flags |= DMA_PREP_FENCE;
- for (i = 0; i < src_cnt; i++)
- dma_src[i] = dma_map_page(device->dev, src_list[i],
- offset, len, DMA_TO_DEVICE);
- tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
+ for (i = 0; i < src_cnt; i++) {
+ unmap->addr[i] = dma_map_page(device->dev, src_list[i],
+ offset, len, DMA_TO_DEVICE);
+ unmap->to_cnt++;
+ }
+ unmap->len = len;
+
+ tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt,
len, result,
dma_prep_flags);
if (unlikely(!tx)) {
@@ -301,11 +309,11 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
while (!tx) {
dma_async_issue_pending(chan);
tx = device->device_prep_dma_xor_val(chan,
- dma_src, src_cnt, len, result,
+ unmap->addr, src_cnt, len, result,
dma_prep_flags);
}
}
-
+ dma_set_unmap(tx, unmap);
async_tx_submit(chan, tx, submit);
} else {
enum async_tx_flags flags_orig = submit->flags;
@@ -327,6 +335,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
async_tx_sync_epilog(submit);
submit->flags = flags_orig;
}
+ dmaengine_unmap_put(unmap);
return tx;
}
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index 4a92bac744dc..dad95f45b88f 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -28,7 +28,7 @@
#undef pr
#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
-#define NDISKS 16 /* Including P and Q */
+#define NDISKS 64 /* Including P and Q */
static struct page *dataptrs[NDISKS];
static addr_conv_t addr_conv[NDISKS];
@@ -219,6 +219,14 @@ static int raid6_test(void)
err += test(11, &tests);
err += test(12, &tests);
}
+
+ /* the 24 disk case is special for ioatdma as it is the boudary point
+ * at which it needs to switch from 8-source ops to 16-source
+ * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set)
+ */
+ if (NDISKS > 24)
+ err += test(24, &tests);
+
err += test(NDISKS, &tests);
pr("\n");
diff --git a/crypto/hash_info.c b/crypto/hash_info.c
new file mode 100644
index 000000000000..3e7ff46f26e8
--- /dev/null
+++ b/crypto/hash_info.c
@@ -0,0 +1,56 @@
+/*
+ * Hash Info: Hash algorithms information
+ *
+ * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/export.h>
+#include <crypto/hash_info.h>
+
+const char *const hash_algo_name[HASH_ALGO__LAST] = {
+ [HASH_ALGO_MD4] = "md4",
+ [HASH_ALGO_MD5] = "md5",
+ [HASH_ALGO_SHA1] = "sha1",
+ [HASH_ALGO_RIPE_MD_160] = "rmd160",
+ [HASH_ALGO_SHA256] = "sha256",
+ [HASH_ALGO_SHA384] = "sha384",
+ [HASH_ALGO_SHA512] = "sha512",
+ [HASH_ALGO_SHA224] = "sha224",
+ [HASH_ALGO_RIPE_MD_128] = "rmd128",
+ [HASH_ALGO_RIPE_MD_256] = "rmd256",
+ [HASH_ALGO_RIPE_MD_320] = "rmd320",
+ [HASH_ALGO_WP_256] = "wp256",
+ [HASH_ALGO_WP_384] = "wp384",
+ [HASH_ALGO_WP_512] = "wp512",
+ [HASH_ALGO_TGR_128] = "tgr128",
+ [HASH_ALGO_TGR_160] = "tgr160",
+ [HASH_ALGO_TGR_192] = "tgr192",
+};
+EXPORT_SYMBOL_GPL(hash_algo_name);
+
+const int hash_digest_size[HASH_ALGO__LAST] = {
+ [HASH_ALGO_MD4] = MD5_DIGEST_SIZE,
+ [HASH_ALGO_MD5] = MD5_DIGEST_SIZE,
+ [HASH_ALGO_SHA1] = SHA1_DIGEST_SIZE,
+ [HASH_ALGO_RIPE_MD_160] = RMD160_DIGEST_SIZE,
+ [HASH_ALGO_SHA256] = SHA256_DIGEST_SIZE,
+ [HASH_ALGO_SHA384] = SHA384_DIGEST_SIZE,
+ [HASH_ALGO_SHA512] = SHA512_DIGEST_SIZE,
+ [HASH_ALGO_SHA224] = SHA224_DIGEST_SIZE,
+ [HASH_ALGO_RIPE_MD_128] = RMD128_DIGEST_SIZE,
+ [HASH_ALGO_RIPE_MD_256] = RMD256_DIGEST_SIZE,
+ [HASH_ALGO_RIPE_MD_320] = RMD320_DIGEST_SIZE,
+ [HASH_ALGO_WP_256] = WP256_DIGEST_SIZE,
+ [HASH_ALGO_WP_384] = WP384_DIGEST_SIZE,
+ [HASH_ALGO_WP_512] = WP512_DIGEST_SIZE,
+ [HASH_ALGO_TGR_128] = TGR128_DIGEST_SIZE,
+ [HASH_ALGO_TGR_160] = TGR160_DIGEST_SIZE,
+ [HASH_ALGO_TGR_192] = TGR192_DIGEST_SIZE,
+};
+EXPORT_SYMBOL_GPL(hash_digest_size);
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index c95df0b8c880..5d9248526d78 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -235,17 +235,6 @@ config ACPI_INITRD_TABLE_OVERRIDE
initrd, therefore it's safe to say Y.
See Documentation/acpi/initrd_table_override.txt for details
-config ACPI_BLACKLIST_YEAR
- int "Disable ACPI for systems before Jan 1st this year" if X86_32
- default 0
- help
- Enter a 4-digit year, e.g., 2001, to disable ACPI by default
- on platforms with DMI BIOS date before January 1st that year.
- "acpi=force" can be used to override this mechanism.
-
- Enter 0 to disable this mechanism and allow ACPI to
- run by default no matter what the year. (default)
-
config ACPI_DEBUG
bool "Debug Statements"
default n
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index b9f0d5f4bba5..8711e3797165 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -56,7 +56,6 @@ static int ac_sleep_before_get_state_ms;
struct acpi_ac {
struct power_supply charger;
- struct acpi_device *adev;
struct platform_device *pdev;
unsigned long long state;
};
@@ -70,8 +69,9 @@ struct acpi_ac {
static int acpi_ac_get_state(struct acpi_ac *ac)
{
acpi_status status;
+ acpi_handle handle = ACPI_HANDLE(&ac->pdev->dev);
- status = acpi_evaluate_integer(ac->adev->handle, "_PSR", NULL,
+ status = acpi_evaluate_integer(handle, "_PSR", NULL,
&ac->state);
if (ACPI_FAILURE(status)) {
ACPI_EXCEPTION((AE_INFO, status,
@@ -119,6 +119,7 @@ static enum power_supply_property ac_props[] = {
static void acpi_ac_notify_handler(acpi_handle handle, u32 event, void *data)
{
struct acpi_ac *ac = data;
+ struct acpi_device *adev;
if (!ac)
return;
@@ -141,10 +142,11 @@ static void acpi_ac_notify_handler(acpi_handle handle, u32 event, void *data)
msleep(ac_sleep_before_get_state_ms);
acpi_ac_get_state(ac);
- acpi_bus_generate_netlink_event(ac->adev->pnp.device_class,
+ adev = ACPI_COMPANION(&ac->pdev->dev);
+ acpi_bus_generate_netlink_event(adev->pnp.device_class,
dev_name(&ac->pdev->dev),
event, (u32) ac->state);
- acpi_notifier_call_chain(ac->adev, event, (u32) ac->state);
+ acpi_notifier_call_chain(adev, event, (u32) ac->state);
kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE);
}
@@ -178,8 +180,8 @@ static int acpi_ac_probe(struct platform_device *pdev)
if (!pdev)
return -EINVAL;
- result = acpi_bus_get_device(ACPI_HANDLE(&pdev->dev), &adev);
- if (result)
+ adev = ACPI_COMPANION(&pdev->dev);
+ if (!adev)
return -ENODEV;
ac = kzalloc(sizeof(struct acpi_ac), GFP_KERNEL);
@@ -188,7 +190,6 @@ static int acpi_ac_probe(struct platform_device *pdev)
strcpy(acpi_device_name(adev), ACPI_AC_DEVICE_NAME);
strcpy(acpi_device_class(adev), ACPI_AC_CLASS);
- ac->adev = adev;
ac->pdev = pdev;
platform_set_drvdata(pdev, ac);
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index d3961014aad7..6745fe137b9e 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -163,6 +163,15 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
{ "80860F41", (unsigned long)&byt_i2c_dev_desc },
{ "INT33B2", },
+ { "INT3430", (unsigned long)&lpt_dev_desc },
+ { "INT3431", (unsigned long)&lpt_dev_desc },
+ { "INT3432", (unsigned long)&lpt_dev_desc },
+ { "INT3433", (unsigned long)&lpt_dev_desc },
+ { "INT3434", (unsigned long)&lpt_uart_dev_desc },
+ { "INT3435", (unsigned long)&lpt_uart_dev_desc },
+ { "INT3436", (unsigned long)&lpt_sdio_dev_desc },
+ { "INT3437", },
+
{ }
};
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 8a4cfc7e71f0..dbfe49e5fd63 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -111,7 +111,7 @@ int acpi_create_platform_device(struct acpi_device *adev,
pdevinfo.id = -1;
pdevinfo.res = resources;
pdevinfo.num_res = count;
- pdevinfo.acpi_node.handle = adev->handle;
+ pdevinfo.acpi_node.companion = adev;
pdev = platform_device_register_full(&pdevinfo);
if (IS_ERR(pdev)) {
dev_err(&adev->dev, "platform device creation failed: %ld\n",
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index fb848378d582..078c4f7fe2dd 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -75,39 +75,6 @@ static struct acpi_blacklist_item acpi_blacklist[] __initdata = {
{""}
};
-#if CONFIG_ACPI_BLACKLIST_YEAR
-
-static int __init blacklist_by_year(void)
-{
- int year;
-
- /* Doesn't exist? Likely an old system */
- if (!dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL)) {
- printk(KERN_ERR PREFIX "no DMI BIOS year, "
- "acpi=force is required to enable ACPI\n" );
- return 1;
- }
- /* 0? Likely a buggy new BIOS */
- if (year == 0) {
- printk(KERN_ERR PREFIX "DMI BIOS year==0, "
- "assuming ACPI-capable machine\n" );
- return 0;
- }
- if (year < CONFIG_ACPI_BLACKLIST_YEAR) {
- printk(KERN_ERR PREFIX "BIOS age (%d) fails cutoff (%d), "
- "acpi=force is required to enable ACPI\n",
- year, CONFIG_ACPI_BLACKLIST_YEAR);
- return 1;
- }
- return 0;
-}
-#else
-static inline int blacklist_by_year(void)
-{
- return 0;
-}
-#endif
-
int __init acpi_blacklisted(void)
{
int i = 0;
@@ -166,8 +133,6 @@ int __init acpi_blacklisted(void)
}
}
- blacklisted += blacklist_by_year();
-
dmi_check_system(acpi_osi_dmi_table);
return blacklisted;
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index d42b2fb5a7e9..b3480cf7db1a 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -22,16 +22,12 @@
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-#include <linux/device.h>
+#include <linux/acpi.h>
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/pm_qos.h>
#include <linux/pm_runtime.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
-
#include "internal.h"
#define _COMPONENT ACPI_POWER_COMPONENT
@@ -548,7 +544,7 @@ static int acpi_dev_pm_get_state(struct device *dev, struct acpi_device *adev,
*/
int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p, int d_max_in)
{
- acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+ acpi_handle handle = ACPI_HANDLE(dev);
struct acpi_device *adev;
int ret, d_min, d_max;
@@ -656,7 +652,7 @@ int acpi_pm_device_run_wake(struct device *phys_dev, bool enable)
if (!device_run_wake(phys_dev))
return -EINVAL;
- handle = DEVICE_ACPI_HANDLE(phys_dev);
+ handle = ACPI_HANDLE(phys_dev);
if (!handle || acpi_bus_get_device(handle, &adev)) {
dev_dbg(phys_dev, "ACPI handle without context in %s!\n",
__func__);
@@ -700,7 +696,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
if (!device_can_wakeup(dev))
return -EINVAL;
- handle = DEVICE_ACPI_HANDLE(dev);
+ handle = ACPI_HANDLE(dev);
if (!handle || acpi_bus_get_device(handle, &adev)) {
dev_dbg(dev, "ACPI handle without context in %s!\n", __func__);
return -ENODEV;
@@ -722,7 +718,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
*/
struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
{
- acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+ acpi_handle handle = ACPI_HANDLE(dev);
struct acpi_device *adev;
return handle && !acpi_bus_get_device(handle, &adev) ? adev : NULL;
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index d5309fd49458..ba5b56db9d27 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -173,9 +173,10 @@ static void start_transaction(struct acpi_ec *ec)
static void advance_transaction(struct acpi_ec *ec, u8 status)
{
unsigned long flags;
- struct transaction *t = ec->curr;
+ struct transaction *t;
spin_lock_irqsave(&ec->lock, flags);
+ t = ec->curr;
if (!t)
goto unlock;
if (t->wlen > t->wi) {
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 10f0f40587bb..a22a295edb69 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -197,30 +197,28 @@ static void acpi_physnode_link_name(char *buf, unsigned int node_id)
int acpi_bind_one(struct device *dev, acpi_handle handle)
{
- struct acpi_device *acpi_dev;
- acpi_status status;
+ struct acpi_device *acpi_dev = NULL;
struct acpi_device_physical_node *physical_node, *pn;
char physical_node_name[PHYSICAL_NODE_NAME_SIZE];
struct list_head *physnode_list;
unsigned int node_id;
int retval = -EINVAL;
- if (ACPI_HANDLE(dev)) {
+ if (ACPI_COMPANION(dev)) {
if (handle) {
- dev_warn(dev, "ACPI handle is already set\n");
+ dev_warn(dev, "ACPI companion already set\n");
return -EINVAL;
} else {
- handle = ACPI_HANDLE(dev);
+ acpi_dev = ACPI_COMPANION(dev);
}
+ } else {
+ acpi_bus_get_device(handle, &acpi_dev);
}
- if (!handle)
+ if (!acpi_dev)
return -EINVAL;
+ get_device(&acpi_dev->dev);
get_device(dev);
- status = acpi_bus_get_device(handle, &acpi_dev);
- if (ACPI_FAILURE(status))
- goto err;
-
physical_node = kzalloc(sizeof(*physical_node), GFP_KERNEL);
if (!physical_node) {
retval = -ENOMEM;
@@ -242,10 +240,11 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
dev_warn(dev, "Already associated with ACPI node\n");
kfree(physical_node);
- if (ACPI_HANDLE(dev) != handle)
+ if (ACPI_COMPANION(dev) != acpi_dev)
goto err;
put_device(dev);
+ put_device(&acpi_dev->dev);
return 0;
}
if (pn->node_id == node_id) {
@@ -259,8 +258,8 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
list_add(&physical_node->node, physnode_list);
acpi_dev->physical_node_count++;
- if (!ACPI_HANDLE(dev))
- ACPI_HANDLE_SET(dev, acpi_dev->handle);
+ if (!ACPI_COMPANION(dev))
+ ACPI_COMPANION_SET(dev, acpi_dev);
acpi_physnode_link_name(physical_node_name, node_id);
retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
@@ -283,27 +282,21 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
return 0;
err:
- ACPI_HANDLE_SET(dev, NULL);
+ ACPI_COMPANION_SET(dev, NULL);
put_device(dev);
+ put_device(&acpi_dev->dev);
return retval;
}
EXPORT_SYMBOL_GPL(acpi_bind_one);
int acpi_unbind_one(struct device *dev)
{
+ struct acpi_device *acpi_dev = ACPI_COMPANION(dev);
struct acpi_device_physical_node *entry;
- struct acpi_device *acpi_dev;
- acpi_status status;
- if (!ACPI_HANDLE(dev))
+ if (!acpi_dev)
return 0;
- status = acpi_bus_get_device(ACPI_HANDLE(dev), &acpi_dev);
- if (ACPI_FAILURE(status)) {
- dev_err(dev, "Oops, ACPI handle corrupt in %s()\n", __func__);
- return -EINVAL;
- }
-
mutex_lock(&acpi_dev->physical_node_lock);
list_for_each_entry(entry, &acpi_dev->physical_node_list, node)
@@ -316,9 +309,10 @@ int acpi_unbind_one(struct device *dev)
acpi_physnode_link_name(physnode_name, entry->node_id);
sysfs_remove_link(&acpi_dev->dev.kobj, physnode_name);
sysfs_remove_link(&dev->kobj, "firmware_node");
- ACPI_HANDLE_SET(dev, NULL);
- /* acpi_bind_one() increase refcnt by one. */
+ ACPI_COMPANION_SET(dev, NULL);
+ /* Drop references taken by acpi_bind_one(). */
put_device(dev);
+ put_device(&acpi_dev->dev);
kfree(entry);
break;
}
@@ -328,6 +322,15 @@ int acpi_unbind_one(struct device *dev)
}
EXPORT_SYMBOL_GPL(acpi_unbind_one);
+void acpi_preset_companion(struct device *dev, acpi_handle parent, u64 addr)
+{
+ struct acpi_device *adev;
+
+ if (!acpi_bus_get_device(acpi_get_child(parent, addr), &adev))
+ ACPI_COMPANION_SET(dev, adev);
+}
+EXPORT_SYMBOL_GPL(acpi_preset_companion);
+
static int acpi_platform_notify(struct device *dev)
{
struct acpi_bus_type *type = acpi_get_bus_type(dev);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 56f05869b08d..0703bff5e60e 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -575,6 +575,7 @@ static int acpi_pci_root_add(struct acpi_device *device,
dev_err(&device->dev,
"Bus %04x:%02x not present in PCI namespace\n",
root->segment, (unsigned int)root->secondary.start);
+ device->driver_data = NULL;
result = -ENODEV;
goto end;
}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 55f9dedbbf9f..15daa21fcd05 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -289,24 +289,17 @@ void acpi_bus_device_eject(void *data, u32 ost_src)
{
struct acpi_device *device = data;
acpi_handle handle = device->handle;
- struct acpi_scan_handler *handler;
u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
int error;
lock_device_hotplug();
mutex_lock(&acpi_scan_lock);
- handler = device->handler;
- if (!handler || !handler->hotplug.enabled) {
- put_device(&device->dev);
- goto err_support;
- }
-
if (ost_src == ACPI_NOTIFY_EJECT_REQUEST)
acpi_evaluate_hotplug_ost(handle, ACPI_NOTIFY_EJECT_REQUEST,
ACPI_OST_SC_EJECT_IN_PROGRESS, NULL);
- if (handler->hotplug.mode == AHM_CONTAINER)
+ if (device->handler && device->handler->hotplug.mode == AHM_CONTAINER)
kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
error = acpi_scan_hot_remove(device);
@@ -411,8 +404,7 @@ static void acpi_hotplug_notify_cb(acpi_handle handle, u32 type, void *data)
break;
case ACPI_NOTIFY_EJECT_REQUEST:
acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
- status = acpi_bus_get_device(handle, &adev);
- if (ACPI_FAILURE(status))
+ if (acpi_bus_get_device(handle, &adev))
goto err_out;
get_device(&adev->dev);
@@ -1997,6 +1989,7 @@ static int acpi_bus_scan_fixed(void)
if (result)
return result;
+ device->flags.match_driver = true;
result = device_attach(&device->dev);
if (result < 0)
return result;
@@ -2013,6 +2006,7 @@ static int acpi_bus_scan_fixed(void)
if (result)
return result;
+ device->flags.match_driver = true;
result = device_attach(&device->dev);
}
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 18dbdff4656e..995e91bcb97b 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -82,13 +82,6 @@ static bool allow_duplicates;
module_param(allow_duplicates, bool, 0644);
/*
- * Some BIOSes claim they use minimum backlight at boot,
- * and this may bring dimming screen after boot
- */
-static bool use_bios_initial_backlight = 1;
-module_param(use_bios_initial_backlight, bool, 0644);
-
-/*
* For Windows 8 systems: if set ture and the GPU driver has
* registered a backlight interface, skip registering ACPI video's.
*/
@@ -406,12 +399,6 @@ static int __init video_set_bqc_offset(const struct dmi_system_id *d)
return 0;
}
-static int video_ignore_initial_backlight(const struct dmi_system_id *d)
-{
- use_bios_initial_backlight = 0;
- return 0;
-}
-
static struct dmi_system_id video_dmi_table[] __initdata = {
/*
* Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121
@@ -456,54 +443,6 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7720"),
},
},
- {
- .callback = video_ignore_initial_backlight,
- .ident = "HP Folio 13-2000",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13 - 2000 Notebook PC"),
- },
- },
- {
- .callback = video_ignore_initial_backlight,
- .ident = "Fujitsu E753",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "FUJITSU"),
- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E753"),
- },
- },
- {
- .callback = video_ignore_initial_backlight,
- .ident = "HP Pavilion dm4",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dm4 Notebook PC"),
- },
- },
- {
- .callback = video_ignore_initial_backlight,
- .ident = "HP Pavilion g6 Notebook PC",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion g6 Notebook PC"),
- },
- },
- {
- .callback = video_ignore_initial_backlight,
- .ident = "HP 1000 Notebook PC",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP 1000 Notebook PC"),
- },
- },
- {
- .callback = video_ignore_initial_backlight,
- .ident = "HP Pavilion m4",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion m4 Notebook PC"),
- },
- },
{}
};
@@ -839,20 +778,18 @@ acpi_video_init_brightness(struct acpi_video_device *device)
if (!device->cap._BQC)
goto set_level;
- if (use_bios_initial_backlight) {
- level = acpi_video_bqc_value_to_level(device, level_old);
- /*
- * On some buggy laptops, _BQC returns an uninitialized
- * value when invoked for the first time, i.e.
- * level_old is invalid (no matter whether it's a level
- * or an index). Set the backlight to max_level in this case.
- */
- for (i = 2; i < br->count; i++)
- if (level == br->levels[i])
- break;
- if (i == br->count || !level)
- level = max_level;
- }
+ level = acpi_video_bqc_value_to_level(device, level_old);
+ /*
+ * On some buggy laptops, _BQC returns an uninitialized
+ * value when invoked for the first time, i.e.
+ * level_old is invalid (no matter whether it's a level
+ * or an index). Set the backlight to max_level in this case.
+ */
+ for (i = 2; i < br->count; i++)
+ if (level == br->levels[i])
+ break;
+ if (i == br->count || !level)
+ level = max_level;
set_level:
result = acpi_video_device_lcd_set_level(device, level);
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index ab714d2ad978..4372cfa883c9 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -185,7 +185,7 @@ void ata_acpi_bind_port(struct ata_port *ap)
if (libata_noacpi || ap->flags & ATA_FLAG_ACPI_SATA || !host_handle)
return;
- ACPI_HANDLE_SET(&ap->tdev, acpi_get_child(host_handle, ap->port_no));
+ acpi_preset_companion(&ap->tdev, host_handle, ap->port_no);
if (ata_acpi_gtm(ap, &ap->__acpi_init_gtm) == 0)
ap->pflags |= ATA_PFLAG_INIT_GTM_VALID;
@@ -222,7 +222,7 @@ void ata_acpi_bind_dev(struct ata_device *dev)
parent_handle = port_handle;
}
- ACPI_HANDLE_SET(&dev->tdev, acpi_get_child(parent_handle, adr));
+ acpi_preset_companion(&dev->tdev, parent_handle, adr);
register_hotplug_dock_device(ata_dev_acpi_handle(dev),
&ata_acpi_dev_dock_ops, dev, NULL, NULL);
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
index 853f610af28f..e88690ebfd82 100644
--- a/drivers/ata/pata_arasan_cf.c
+++ b/drivers/ata/pata_arasan_cf.c
@@ -396,8 +396,7 @@ dma_xfer(struct arasan_cf_dev *acdev, dma_addr_t src, dma_addr_t dest, u32 len)
struct dma_async_tx_descriptor *tx;
struct dma_chan *chan = acdev->dma_chan;
dma_cookie_t cookie;
- unsigned long flags = DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
- DMA_COMPL_SKIP_DEST_UNMAP;
+ unsigned long flags = DMA_PREP_INTERRUPT;
int ret = 0;
tx = chan->device->device_prep_dma_memcpy(chan, dest, src, len, flags);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 47051cd25113..3a94b799f166 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -432,7 +432,7 @@ struct platform_device *platform_device_register_full(
goto err_alloc;
pdev->dev.parent = pdevinfo->parent;
- ACPI_HANDLE_SET(&pdev->dev, pdevinfo->acpi_node.handle);
+ ACPI_COMPANION_SET(&pdev->dev, pdevinfo->acpi_node.companion);
if (pdevinfo->dma_mask) {
/*
@@ -463,7 +463,7 @@ struct platform_device *platform_device_register_full(
ret = platform_device_add(pdev);
if (ret) {
err:
- ACPI_HANDLE_SET(&pdev->dev, NULL);
+ ACPI_COMPANION_SET(&pdev->dev, NULL);
kfree(pdev->dev.dma_mask);
err_alloc:
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index c12e9b9556be..1b41fca3d65a 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1350,6 +1350,9 @@ static int device_prepare(struct device *dev, pm_message_t state)
device_unlock(dev);
+ if (error)
+ pm_runtime_put(dev);
+
return error;
}
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index b5d842370cc9..ea192ec029c4 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -223,7 +223,7 @@ static void null_softirq_done_fn(struct request *rq)
blk_end_request_all(rq, 0);
}
-#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#ifdef CONFIG_SMP
static void null_ipi_cmd_end_io(void *data)
{
@@ -260,7 +260,7 @@ static void null_cmd_end_ipi(struct nullb_cmd *cmd)
put_cpu();
}
-#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
+#endif /* CONFIG_SMP */
static inline void null_handle_cmd(struct nullb_cmd *cmd)
{
@@ -270,7 +270,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
end_cmd(cmd);
break;
case NULL_IRQ_SOFTIRQ:
-#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#ifdef CONFIG_SMP
null_cmd_end_ipi(cmd);
#else
end_cmd(cmd);
@@ -571,7 +571,7 @@ static int __init null_init(void)
{
unsigned int i;
-#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#if !defined(CONFIG_SMP)
if (irqmode == NULL_IRQ_SOFTIRQ) {
pr_warn("null_blk: softirq completions not available.\n");
pr_warn("null_blk: using direct completions.\n");
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 588479d58f52..6a680d4de7f1 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -199,15 +199,16 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
spin_lock_irqsave(&vblk->vq_lock, flags);
if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
+ virtqueue_kick(vblk->vq);
spin_unlock_irqrestore(&vblk->vq_lock, flags);
blk_mq_stop_hw_queue(hctx);
- virtqueue_kick(vblk->vq);
return BLK_MQ_RQ_QUEUE_BUSY;
}
- spin_unlock_irqrestore(&vblk->vq_lock, flags);
if (last)
virtqueue_kick(vblk->vq);
+
+ spin_unlock_irqrestore(&vblk->vq_lock, flags);
return BLK_MQ_RQ_QUEUE_OK;
}
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 94c0c74434ea..1a65838888cd 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -33,6 +33,15 @@ config TCG_TIS
from within Linux. To compile this driver as a module, choose
M here; the module will be called tpm_tis.
+config TCG_TIS_I2C_ATMEL
+ tristate "TPM Interface Specification 1.2 Interface (I2C - Atmel)"
+ depends on I2C
+ ---help---
+ If you have an Atmel I2C TPM security chip say Yes and it will be
+ accessible from within Linux.
+ To compile this driver as a module, choose M here; the module will
+ be called tpm_tis_i2c_atmel.
+
config TCG_TIS_I2C_INFINEON
tristate "TPM Interface Specification 1.2 Interface (I2C - Infineon)"
depends on I2C
@@ -42,7 +51,17 @@ config TCG_TIS_I2C_INFINEON
Specification 0.20 say Yes and it will be accessible from within
Linux.
To compile this driver as a module, choose M here; the module
- will be called tpm_tis_i2c_infineon.
+ will be called tpm_i2c_infineon.
+
+config TCG_TIS_I2C_NUVOTON
+ tristate "TPM Interface Specification 1.2 Interface (I2C - Nuvoton)"
+ depends on I2C
+ ---help---
+ If you have a TPM security chip with an I2C interface from
+ Nuvoton Technology Corp. say Yes and it will be accessible
+ from within Linux.
+ To compile this driver as a module, choose M here; the module
+ will be called tpm_i2c_nuvoton.
config TCG_NSC
tristate "National Semiconductor TPM Interface"
@@ -82,14 +101,14 @@ config TCG_IBMVTPM
as a module, choose M here; the module will be called tpm_ibmvtpm.
config TCG_ST33_I2C
- tristate "STMicroelectronics ST33 I2C TPM"
- depends on I2C
- depends on GPIOLIB
- ---help---
- If you have a TPM security chip from STMicroelectronics working with
- an I2C bus say Yes and it will be accessible from within Linux.
- To compile this driver as a module, choose M here; the module will be
- called tpm_stm_st33_i2c.
+ tristate "STMicroelectronics ST33 I2C TPM"
+ depends on I2C
+ depends on GPIOLIB
+ ---help---
+ If you have a TPM security chip from STMicroelectronics working with
+ an I2C bus say Yes and it will be accessible from within Linux.
+ To compile this driver as a module, choose M here; the module will be
+ called tpm_stm_st33_i2c.
config TCG_XEN
tristate "XEN TPM Interface"
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index eb41ff97d0ad..b80a4000daee 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -2,17 +2,20 @@
# Makefile for the kernel tpm device drivers.
#
obj-$(CONFIG_TCG_TPM) += tpm.o
+tpm-y := tpm-interface.o
+tpm-$(CONFIG_ACPI) += tpm_ppi.o
+
ifdef CONFIG_ACPI
- obj-$(CONFIG_TCG_TPM) += tpm_bios.o
- tpm_bios-objs += tpm_eventlog.o tpm_acpi.o tpm_ppi.o
+ tpm-y += tpm_eventlog.o tpm_acpi.o
else
ifdef CONFIG_TCG_IBMVTPM
- obj-$(CONFIG_TCG_TPM) += tpm_bios.o
- tpm_bios-objs += tpm_eventlog.o tpm_of.o
+ tpm-y += tpm_eventlog.o tpm_of.o
endif
endif
obj-$(CONFIG_TCG_TIS) += tpm_tis.o
+obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o
obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o
+obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o
obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm-interface.c
index e3c974a6c522..6ae41d337630 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -10,13 +10,13 @@
* Maintained by: <tpmdd-devel@lists.sourceforge.net>
*
* Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org
+ * Specifications at www.trustedcomputinggroup.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, version 2 of the
* License.
- *
+ *
* Note, the TPM chip is not interrupt driven (only polling)
* and can have very long timeouts (minutes!). Hence the unusual
* calls to msleep.
@@ -371,13 +371,14 @@ static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
return -ENODATA;
if (count > bufsiz) {
dev_err(chip->dev,
- "invalid count value %x %zx \n", count, bufsiz);
+ "invalid count value %x %zx\n", count, bufsiz);
return -E2BIG;
}
mutex_lock(&chip->tpm_mutex);
- if ((rc = chip->vendor.send(chip, (u8 *) buf, count)) < 0) {
+ rc = chip->vendor.send(chip, (u8 *) buf, count);
+ if (rc < 0) {
dev_err(chip->dev,
"tpm_transmit: tpm_send: error %zd\n", rc);
goto out;
@@ -444,7 +445,7 @@ static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd,
{
int err;
- len = tpm_transmit(chip,(u8 *) cmd, len);
+ len = tpm_transmit(chip, (u8 *) cmd, len);
if (len < 0)
return len;
else if (len < TPM_HEADER_SIZE)
@@ -658,7 +659,7 @@ static int tpm_continue_selftest(struct tpm_chip *chip)
return rc;
}
-ssize_t tpm_show_enabled(struct device * dev, struct device_attribute * attr,
+ssize_t tpm_show_enabled(struct device *dev, struct device_attribute *attr,
char *buf)
{
cap_t cap;
@@ -674,7 +675,7 @@ ssize_t tpm_show_enabled(struct device * dev, struct device_attribute * attr,
}
EXPORT_SYMBOL_GPL(tpm_show_enabled);
-ssize_t tpm_show_active(struct device * dev, struct device_attribute * attr,
+ssize_t tpm_show_active(struct device *dev, struct device_attribute *attr,
char *buf)
{
cap_t cap;
@@ -690,7 +691,7 @@ ssize_t tpm_show_active(struct device * dev, struct device_attribute * attr,
}
EXPORT_SYMBOL_GPL(tpm_show_active);
-ssize_t tpm_show_owned(struct device * dev, struct device_attribute * attr,
+ssize_t tpm_show_owned(struct device *dev, struct device_attribute *attr,
char *buf)
{
cap_t cap;
@@ -706,8 +707,8 @@ ssize_t tpm_show_owned(struct device * dev, struct device_attribute * attr,
}
EXPORT_SYMBOL_GPL(tpm_show_owned);
-ssize_t tpm_show_temp_deactivated(struct device * dev,
- struct device_attribute * attr, char *buf)
+ssize_t tpm_show_temp_deactivated(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
cap_t cap;
ssize_t rc;
@@ -769,10 +770,10 @@ static int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
/**
* tpm_pcr_read - read a pcr value
- * @chip_num: tpm idx # or ANY
+ * @chip_num: tpm idx # or ANY
* @pcr_idx: pcr idx to retrieve
- * @res_buf: TPM_PCR value
- * size of res_buf is 20 bytes (or NULL if you don't care)
+ * @res_buf: TPM_PCR value
+ * size of res_buf is 20 bytes (or NULL if you don't care)
*
* The TPM driver should be built-in, but for whatever reason it
* isn't, protect against the chip disappearing, by incrementing
@@ -794,9 +795,9 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read);
/**
* tpm_pcr_extend - extend pcr value with hash
- * @chip_num: tpm idx # or AN&
+ * @chip_num: tpm idx # or AN&
* @pcr_idx: pcr idx to extend
- * @hash: hash value used to extend pcr value
+ * @hash: hash value used to extend pcr value
*
* The TPM driver should be built-in, but for whatever reason it
* isn't, protect against the chip disappearing, by incrementing
@@ -847,8 +848,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
unsigned long duration;
struct tpm_cmd_t cmd;
- duration = tpm_calc_ordinal_duration(chip,
- TPM_ORD_CONTINUE_SELFTEST);
+ duration = tpm_calc_ordinal_duration(chip, TPM_ORD_CONTINUE_SELFTEST);
loops = jiffies_to_msecs(duration) / delay_msec;
@@ -965,12 +965,12 @@ ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr,
if (err)
goto out;
- /*
+ /*
ignore header 10 bytes
algorithm 32 bits (1 == RSA )
encscheme 16 bits
sigscheme 16 bits
- parameters (RSA 12->bytes: keybit, #primes, expbit)
+ parameters (RSA 12->bytes: keybit, #primes, expbit)
keylenbytes 32 bits
256 byte modulus
ignore checksum 20 bytes
@@ -1020,43 +1020,33 @@ ssize_t tpm_show_caps(struct device *dev, struct device_attribute *attr,
str += sprintf(str, "Manufacturer: 0x%x\n",
be32_to_cpu(cap.manufacturer_id));
- rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap,
- "attempting to determine the 1.1 version");
- if (rc)
- return 0;
- str += sprintf(str,
- "TCG version: %d.%d\nFirmware version: %d.%d\n",
- cap.tpm_version.Major, cap.tpm_version.Minor,
- cap.tpm_version.revMajor, cap.tpm_version.revMinor);
- return str - buf;
-}
-EXPORT_SYMBOL_GPL(tpm_show_caps);
-
-ssize_t tpm_show_caps_1_2(struct device * dev,
- struct device_attribute * attr, char *buf)
-{
- cap_t cap;
- ssize_t rc;
- char *str = buf;
-
- rc = tpm_getcap(dev, TPM_CAP_PROP_MANUFACTURER, &cap,
- "attempting to determine the manufacturer");
- if (rc)
- return 0;
- str += sprintf(str, "Manufacturer: 0x%x\n",
- be32_to_cpu(cap.manufacturer_id));
+ /* Try to get a TPM version 1.2 TPM_CAP_VERSION_INFO */
rc = tpm_getcap(dev, CAP_VERSION_1_2, &cap,
"attempting to determine the 1.2 version");
- if (rc)
- return 0;
- str += sprintf(str,
- "TCG version: %d.%d\nFirmware version: %d.%d\n",
- cap.tpm_version_1_2.Major, cap.tpm_version_1_2.Minor,
- cap.tpm_version_1_2.revMajor,
- cap.tpm_version_1_2.revMinor);
+ if (!rc) {
+ str += sprintf(str,
+ "TCG version: %d.%d\nFirmware version: %d.%d\n",
+ cap.tpm_version_1_2.Major,
+ cap.tpm_version_1_2.Minor,
+ cap.tpm_version_1_2.revMajor,
+ cap.tpm_version_1_2.revMinor);
+ } else {
+ /* Otherwise just use TPM_STRUCT_VER */
+ rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap,
+ "attempting to determine the 1.1 version");
+ if (rc)
+ return 0;
+ str += sprintf(str,
+ "TCG version: %d.%d\nFirmware version: %d.%d\n",
+ cap.tpm_version.Major,
+ cap.tpm_version.Minor,
+ cap.tpm_version.revMajor,
+ cap.tpm_version.revMinor);
+ }
+
return str - buf;
}
-EXPORT_SYMBOL_GPL(tpm_show_caps_1_2);
+EXPORT_SYMBOL_GPL(tpm_show_caps);
ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -1102,8 +1092,8 @@ ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
}
EXPORT_SYMBOL_GPL(tpm_store_cancel);
-static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask, bool check_cancel,
- bool *canceled)
+static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask,
+ bool check_cancel, bool *canceled)
{
u8 status = chip->vendor.status(chip);
@@ -1170,38 +1160,25 @@ EXPORT_SYMBOL_GPL(wait_for_tpm_stat);
*/
int tpm_open(struct inode *inode, struct file *file)
{
- int minor = iminor(inode);
- struct tpm_chip *chip = NULL, *pos;
-
- rcu_read_lock();
- list_for_each_entry_rcu(pos, &tpm_chip_list, list) {
- if (pos->vendor.miscdev.minor == minor) {
- chip = pos;
- get_device(chip->dev);
- break;
- }
- }
- rcu_read_unlock();
-
- if (!chip)
- return -ENODEV;
+ struct miscdevice *misc = file->private_data;
+ struct tpm_chip *chip = container_of(misc, struct tpm_chip,
+ vendor.miscdev);
if (test_and_set_bit(0, &chip->is_open)) {
dev_dbg(chip->dev, "Another process owns this TPM\n");
- put_device(chip->dev);
return -EBUSY;
}
chip->data_buffer = kzalloc(TPM_BUFSIZE, GFP_KERNEL);
if (chip->data_buffer == NULL) {
clear_bit(0, &chip->is_open);
- put_device(chip->dev);
return -ENOMEM;
}
atomic_set(&chip->data_pending, 0);
file->private_data = chip;
+ get_device(chip->dev);
return 0;
}
EXPORT_SYMBOL_GPL(tpm_open);
@@ -1463,7 +1440,6 @@ void tpm_dev_vendor_release(struct tpm_chip *chip)
chip->vendor.release(chip->dev);
clear_bit(chip->dev_num, dev_mask);
- kfree(chip->vendor.miscdev.name);
}
EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
@@ -1487,7 +1463,7 @@ void tpm_dev_release(struct device *dev)
EXPORT_SYMBOL_GPL(tpm_dev_release);
/*
- * Called from tpm_<specific>.c probe function only for devices
+ * Called from tpm_<specific>.c probe function only for devices
* the driver has determined it should claim. Prior to calling
* this function the specific probe function has called pci_enable_device
* upon errant exit from this function specific probe function should call
@@ -1496,17 +1472,13 @@ EXPORT_SYMBOL_GPL(tpm_dev_release);
struct tpm_chip *tpm_register_hardware(struct device *dev,
const struct tpm_vendor_specific *entry)
{
-#define DEVNAME_SIZE 7
-
- char *devname;
struct tpm_chip *chip;
/* Driver specific per-device data */
chip = kzalloc(sizeof(*chip), GFP_KERNEL);
- devname = kmalloc(DEVNAME_SIZE, GFP_KERNEL);
- if (chip == NULL || devname == NULL)
- goto out_free;
+ if (chip == NULL)
+ return NULL;
mutex_init(&chip->buffer_mutex);
mutex_init(&chip->tpm_mutex);
@@ -1531,8 +1503,9 @@ struct tpm_chip *tpm_register_hardware(struct device *dev,
set_bit(chip->dev_num, dev_mask);
- scnprintf(devname, DEVNAME_SIZE, "%s%d", "tpm", chip->dev_num);
- chip->vendor.miscdev.name = devname;
+ scnprintf(chip->devname, sizeof(chip->devname), "%s%d", "tpm",
+ chip->dev_num);
+ chip->vendor.miscdev.name = chip->devname;
chip->vendor.miscdev.parent = dev;
chip->dev = get_device(dev);
@@ -1558,7 +1531,7 @@ struct tpm_chip *tpm_register_hardware(struct device *dev,
goto put_device;
}
- chip->bios_dir = tpm_bios_log_setup(devname);
+ chip->bios_dir = tpm_bios_log_setup(chip->devname);
/* Make chip available */
spin_lock(&driver_lock);
@@ -1571,7 +1544,6 @@ put_device:
put_device(chip->dev);
out_free:
kfree(chip);
- kfree(devname);
return NULL;
}
EXPORT_SYMBOL_GPL(tpm_register_hardware);
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index a7bfc176ed43..f32847872193 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -59,8 +59,6 @@ extern ssize_t tpm_show_pcrs(struct device *, struct device_attribute *attr,
char *);
extern ssize_t tpm_show_caps(struct device *, struct device_attribute *attr,
char *);
-extern ssize_t tpm_show_caps_1_2(struct device *, struct device_attribute *attr,
- char *);
extern ssize_t tpm_store_cancel(struct device *, struct device_attribute *attr,
const char *, size_t);
extern ssize_t tpm_show_enabled(struct device *, struct device_attribute *attr,
@@ -122,6 +120,7 @@ struct tpm_chip {
struct device *dev; /* Device stuff */
int dev_num; /* /dev/tpm# */
+ char devname[7];
unsigned long is_open; /* only one allowed */
int time_expired;
diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c
index 99d6820c611d..c9a528d25d22 100644
--- a/drivers/char/tpm/tpm_atmel.c
+++ b/drivers/char/tpm/tpm_atmel.c
@@ -202,7 +202,7 @@ static int __init init_atmel(void)
have_region =
(atmel_request_region
- (tpm_atmel.base, region_size, "tpm_atmel0") == NULL) ? 0 : 1;
+ (base, region_size, "tpm_atmel0") == NULL) ? 0 : 1;
pdev = platform_device_register_simple("tpm_atmel", -1, NULL, 0);
if (IS_ERR(pdev)) {
diff --git a/drivers/char/tpm/tpm_eventlog.c b/drivers/char/tpm/tpm_eventlog.c
index 84ddc557b8f8..59f7cb28260b 100644
--- a/drivers/char/tpm/tpm_eventlog.c
+++ b/drivers/char/tpm/tpm_eventlog.c
@@ -406,7 +406,6 @@ out_tpm:
out:
return NULL;
}
-EXPORT_SYMBOL_GPL(tpm_bios_log_setup);
void tpm_bios_log_teardown(struct dentry **lst)
{
@@ -415,5 +414,3 @@ void tpm_bios_log_teardown(struct dentry **lst)
for (i = 0; i < 3; i++)
securityfs_remove(lst[i]);
}
-EXPORT_SYMBOL_GPL(tpm_bios_log_teardown);
-MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c
new file mode 100644
index 000000000000..c3cd7fe481a1
--- /dev/null
+++ b/drivers/char/tpm/tpm_i2c_atmel.c
@@ -0,0 +1,284 @@
+/*
+ * ATMEL I2C TPM AT97SC3204T
+ *
+ * Copyright (C) 2012 V Lab Technologies
+ * Teddy Reed <teddy@prosauce.org>
+ * Copyright (C) 2013, Obsidian Research Corp.
+ * Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+ * Device driver for ATMEL I2C TPMs.
+ *
+ * Teddy Reed determined the basic I2C command flow, unlike other I2C TPM
+ * devices the raw TCG formatted TPM command data is written via I2C and then
+ * raw TCG formatted TPM command data is returned via I2C.
+ *
+ * TGC status/locality/etc functions seen in the LPC implementation do not
+ * seem to be present.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/>.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include "tpm.h"
+
+#define I2C_DRIVER_NAME "tpm_i2c_atmel"
+
+#define TPM_I2C_SHORT_TIMEOUT 750 /* ms */
+#define TPM_I2C_LONG_TIMEOUT 2000 /* 2 sec */
+
+#define ATMEL_STS_OK 1
+
+struct priv_data {
+ size_t len;
+ /* This is the amount we read on the first try. 25 was chosen to fit a
+ * fair number of read responses in the buffer so a 2nd retry can be
+ * avoided in small message cases. */
+ u8 buffer[sizeof(struct tpm_output_header) + 25];
+};
+
+static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+ struct priv_data *priv = chip->vendor.priv;
+ struct i2c_client *client = to_i2c_client(chip->dev);
+ s32 status;
+
+ priv->len = 0;
+
+ if (len <= 2)
+ return -EIO;
+
+ status = i2c_master_send(client, buf, len);
+
+ dev_dbg(chip->dev,
+ "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__,
+ (int)min_t(size_t, 64, len), buf, len, status);
+ return status;
+}
+
+static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+ struct priv_data *priv = chip->vendor.priv;
+ struct i2c_client *client = to_i2c_client(chip->dev);
+ struct tpm_output_header *hdr =
+ (struct tpm_output_header *)priv->buffer;
+ u32 expected_len;
+ int rc;
+
+ if (priv->len == 0)
+ return -EIO;
+
+ /* Get the message size from the message header, if we didn't get the
+ * whole message in read_status then we need to re-read the
+ * message. */
+ expected_len = be32_to_cpu(hdr->length);
+ if (expected_len > count)
+ return -ENOMEM;
+
+ if (priv->len >= expected_len) {
+ dev_dbg(chip->dev,
+ "%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__,
+ (int)min_t(size_t, 64, expected_len), buf, count,
+ expected_len);
+ memcpy(buf, priv->buffer, expected_len);
+ return expected_len;
+ }
+
+ rc = i2c_master_recv(client, buf, expected_len);
+ dev_dbg(chip->dev,
+ "%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__,
+ (int)min_t(size_t, 64, expected_len), buf, count,
+ expected_len);
+ return rc;
+}
+
+static void i2c_atmel_cancel(struct tpm_chip *chip)
+{
+ dev_err(chip->dev, "TPM operation cancellation was requested, but is not supported");
+}
+
+static u8 i2c_atmel_read_status(struct tpm_chip *chip)
+{
+ struct priv_data *priv = chip->vendor.priv;
+ struct i2c_client *client = to_i2c_client(chip->dev);
+ int rc;
+
+ /* The TPM fails the I2C read until it is ready, so we do the entire
+ * transfer here and buffer it locally. This way the common code can
+ * properly handle the timeouts. */
+ priv->len = 0;
+ memset(priv->buffer, 0, sizeof(priv->buffer));
+
+
+ /* Once the TPM has completed the command the command remains readable
+ * until another command is issued. */
+ rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer));
+ dev_dbg(chip->dev,
+ "%s: sts=%d", __func__, rc);
+ if (rc <= 0)
+ return 0;
+
+ priv->len = rc;
+
+ return ATMEL_STS_OK;
+}
+
+static const struct file_operations i2c_atmel_ops = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .open = tpm_open,
+ .read = tpm_read,
+ .write = tpm_write,
+ .release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
+
+static struct attribute *i2c_atmel_attrs[] = {
+ &dev_attr_pubek.attr,
+ &dev_attr_pcrs.attr,
+ &dev_attr_enabled.attr,
+ &dev_attr_active.attr,
+ &dev_attr_owned.attr,
+ &dev_attr_temp_deactivated.attr,
+ &dev_attr_caps.attr,
+ &dev_attr_cancel.attr,
+ &dev_attr_durations.attr,
+ &dev_attr_timeouts.attr,
+ NULL,
+};
+
+static struct attribute_group i2c_atmel_attr_grp = {
+ .attrs = i2c_atmel_attrs
+};
+
+static bool i2c_atmel_req_canceled(struct tpm_chip *chip, u8 status)
+{
+ return 0;
+}
+
+static const struct tpm_vendor_specific i2c_atmel = {
+ .status = i2c_atmel_read_status,
+ .recv = i2c_atmel_recv,
+ .send = i2c_atmel_send,
+ .cancel = i2c_atmel_cancel,
+ .req_complete_mask = ATMEL_STS_OK,
+ .req_complete_val = ATMEL_STS_OK,
+ .req_canceled = i2c_atmel_req_canceled,
+ .attr_group = &i2c_atmel_attr_grp,
+ .miscdev.fops = &i2c_atmel_ops,
+};
+
+static int i2c_atmel_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ int rc;
+ struct tpm_chip *chip;
+ struct device *dev = &client->dev;
+
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+ return -ENODEV;
+
+ chip = tpm_register_hardware(dev, &i2c_atmel);
+ if (!chip) {
+ dev_err(dev, "%s() error in tpm_register_hardware\n", __func__);
+ return -ENODEV;
+ }
+
+ chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data),
+ GFP_KERNEL);
+
+ /* Default timeouts */
+ chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+ chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT);
+ chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+ chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+ chip->vendor.irq = 0;
+
+ /* There is no known way to probe for this device, and all version
+ * information seems to be read via TPM commands. Thus we rely on the
+ * TPM startup process in the common code to detect the device. */
+ if (tpm_get_timeouts(chip)) {
+ rc = -ENODEV;
+ goto out_err;
+ }
+
+ if (tpm_do_selftest(chip)) {
+ rc = -ENODEV;
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ tpm_dev_vendor_release(chip);
+ tpm_remove_hardware(chip->dev);
+ return rc;
+}
+
+static int i2c_atmel_remove(struct i2c_client *client)
+{
+ struct device *dev = &(client->dev);
+ struct tpm_chip *chip = dev_get_drvdata(dev);
+
+ if (chip)
+ tpm_dev_vendor_release(chip);
+ tpm_remove_hardware(dev);
+ kfree(chip);
+ return 0;
+}
+
+static const struct i2c_device_id i2c_atmel_id[] = {
+ {I2C_DRIVER_NAME, 0},
+ {}
+};
+MODULE_DEVICE_TABLE(i2c, i2c_atmel_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_atmel_of_match[] = {
+ {.compatible = "atmel,at97sc3204t"},
+ {},
+};
+MODULE_DEVICE_TABLE(of, i2c_atmel_of_match);
+#endif
+
+static SIMPLE_DEV_PM_OPS(i2c_atmel_pm_ops, tpm_pm_suspend, tpm_pm_resume);
+
+static struct i2c_driver i2c_atmel_driver = {
+ .id_table = i2c_atmel_id,
+ .probe = i2c_atmel_probe,
+ .remove = i2c_atmel_remove,
+ .driver = {
+ .name = I2C_DRIVER_NAME,
+ .owner = THIS_MODULE,
+ .pm = &i2c_atmel_pm_ops,
+ .of_match_table = of_match_ptr(i2c_atmel_of_match),
+ },
+};
+
+module_i2c_driver(i2c_atmel_driver);
+
+MODULE_AUTHOR("Jason Gunthorpe <jgunthorpe@obsidianresearch.com>");
+MODULE_DESCRIPTION("Atmel TPM I2C Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c
index b8735de8ce95..fefd2aa5c81e 100644
--- a/drivers/char/tpm/tpm_i2c_infineon.c
+++ b/drivers/char/tpm/tpm_i2c_infineon.c
@@ -581,7 +581,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
@@ -685,7 +685,6 @@ out_vendor:
chip->dev->release = NULL;
chip->release = NULL;
tpm_dev.client = NULL;
- dev_set_drvdata(chip->dev, chip);
out_err:
return rc;
}
@@ -766,7 +765,6 @@ static int tpm_tis_i2c_remove(struct i2c_client *client)
chip->dev->release = NULL;
chip->release = NULL;
tpm_dev.client = NULL;
- dev_set_drvdata(chip->dev, chip);
return 0;
}
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c
new file mode 100644
index 000000000000..6276fea01ff0
--- /dev/null
+++ b/drivers/char/tpm/tpm_i2c_nuvoton.c
@@ -0,0 +1,710 @@
+/******************************************************************************
+ * Nuvoton TPM I2C Device Driver Interface for WPCT301/NPCT501,
+ * based on the TCG TPM Interface Spec version 1.2.
+ * Specifications at www.trustedcomputinggroup.org
+ *
+ * Copyright (C) 2011, Nuvoton Technology Corporation.
+ * Dan Morav <dan.morav@nuvoton.com>
+ * Copyright (C) 2013, Obsidian Research Corp.
+ * Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/>.
+ *
+ * Nuvoton contact information: APC.Support@nuvoton.com
+ *****************************************************************************/
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/i2c.h>
+#include "tpm.h"
+
+/* I2C interface offsets */
+#define TPM_STS 0x00
+#define TPM_BURST_COUNT 0x01
+#define TPM_DATA_FIFO_W 0x20
+#define TPM_DATA_FIFO_R 0x40
+#define TPM_VID_DID_RID 0x60
+/* TPM command header size */
+#define TPM_HEADER_SIZE 10
+#define TPM_RETRY 5
+/*
+ * I2C bus device maximum buffer size w/o counting I2C address or command
+ * i.e. max size required for I2C write is 34 = addr, command, 32 bytes data
+ */
+#define TPM_I2C_MAX_BUF_SIZE 32
+#define TPM_I2C_RETRY_COUNT 32
+#define TPM_I2C_BUS_DELAY 1 /* msec */
+#define TPM_I2C_RETRY_DELAY_SHORT 2 /* msec */
+#define TPM_I2C_RETRY_DELAY_LONG 10 /* msec */
+
+#define I2C_DRIVER_NAME "tpm_i2c_nuvoton"
+
+struct priv_data {
+ unsigned int intrs;
+};
+
+static s32 i2c_nuvoton_read_buf(struct i2c_client *client, u8 offset, u8 size,
+ u8 *data)
+{
+ s32 status;
+
+ status = i2c_smbus_read_i2c_block_data(client, offset, size, data);
+ dev_dbg(&client->dev,
+ "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__,
+ offset, size, (int)size, data, status);
+ return status;
+}
+
+static s32 i2c_nuvoton_write_buf(struct i2c_client *client, u8 offset, u8 size,
+ u8 *data)
+{
+ s32 status;
+
+ status = i2c_smbus_write_i2c_block_data(client, offset, size, data);
+ dev_dbg(&client->dev,
+ "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__,
+ offset, size, (int)size, data, status);
+ return status;
+}
+
+#define TPM_STS_VALID 0x80
+#define TPM_STS_COMMAND_READY 0x40
+#define TPM_STS_GO 0x20
+#define TPM_STS_DATA_AVAIL 0x10
+#define TPM_STS_EXPECT 0x08
+#define TPM_STS_RESPONSE_RETRY 0x02
+#define TPM_STS_ERR_VAL 0x07 /* bit2...bit0 reads always 0 */
+
+#define TPM_I2C_SHORT_TIMEOUT 750 /* ms */
+#define TPM_I2C_LONG_TIMEOUT 2000 /* 2 sec */
+
+/* read TPM_STS register */
+static u8 i2c_nuvoton_read_status(struct tpm_chip *chip)
+{
+ struct i2c_client *client = to_i2c_client(chip->dev);
+ s32 status;
+ u8 data;
+
+ status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data);
+ if (status <= 0) {
+ dev_err(chip->dev, "%s() error return %d\n", __func__,
+ status);
+ data = TPM_STS_ERR_VAL;
+ }
+
+ return data;
+}
+
+/* write byte to TPM_STS register */
+static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data)
+{
+ s32 status;
+ int i;
+
+ /* this causes the current command to be aborted */
+ for (i = 0, status = -1; i < TPM_I2C_RETRY_COUNT && status < 0; i++) {
+ status = i2c_nuvoton_write_buf(client, TPM_STS, 1, &data);
+ msleep(TPM_I2C_BUS_DELAY);
+ }
+ return status;
+}
+
+/* write commandReady to TPM_STS register */
+static void i2c_nuvoton_ready(struct tpm_chip *chip)
+{
+ struct i2c_client *client = to_i2c_client(chip->dev);
+ s32 status;
+
+ /* this causes the current command to be aborted */
+ status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY);
+ if (status < 0)
+ dev_err(chip->dev,
+ "%s() fail to write TPM_STS.commandReady\n", __func__);
+}
+
+/* read burstCount field from TPM_STS register
+ * return -1 on fail to read */
+static int i2c_nuvoton_get_burstcount(struct i2c_client *client,
+ struct tpm_chip *chip)
+{
+ unsigned long stop = jiffies + chip->vendor.timeout_d;
+ s32 status;
+ int burst_count = -1;
+ u8 data;
+
+ /* wait for burstcount to be non-zero */
+ do {
+ /* in I2C burstCount is 1 byte */
+ status = i2c_nuvoton_read_buf(client, TPM_BURST_COUNT, 1,
+ &data);
+ if (status > 0 && data > 0) {
+ burst_count = min_t(u8, TPM_I2C_MAX_BUF_SIZE, data);
+ break;
+ }
+ msleep(TPM_I2C_BUS_DELAY);
+ } while (time_before(jiffies, stop));
+
+ return burst_count;
+}
+
+/*
+ * WPCT301/NPCT501 SINT# supports only dataAvail
+ * any call to this function which is not waiting for dataAvail will
+ * set queue to NULL to avoid waiting for interrupt
+ */
+static bool i2c_nuvoton_check_status(struct tpm_chip *chip, u8 mask, u8 value)
+{
+ u8 status = i2c_nuvoton_read_status(chip);
+ return (status != TPM_STS_ERR_VAL) && ((status & mask) == value);
+}
+
+static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value,
+ u32 timeout, wait_queue_head_t *queue)
+{
+ if (chip->vendor.irq && queue) {
+ s32 rc;
+ DEFINE_WAIT(wait);
+ struct priv_data *priv = chip->vendor.priv;
+ unsigned int cur_intrs = priv->intrs;
+
+ enable_irq(chip->vendor.irq);
+ rc = wait_event_interruptible_timeout(*queue,
+ cur_intrs != priv->intrs,
+ timeout);
+ if (rc > 0)
+ return 0;
+ /* At this point we know that the SINT pin is asserted, so we
+ * do not need to do i2c_nuvoton_check_status */
+ } else {
+ unsigned long ten_msec, stop;
+ bool status_valid;
+
+ /* check current status */
+ status_valid = i2c_nuvoton_check_status(chip, mask, value);
+ if (status_valid)
+ return 0;
+
+ /* use polling to wait for the event */
+ ten_msec = jiffies + msecs_to_jiffies(TPM_I2C_RETRY_DELAY_LONG);
+ stop = jiffies + timeout;
+ do {
+ if (time_before(jiffies, ten_msec))
+ msleep(TPM_I2C_RETRY_DELAY_SHORT);
+ else
+ msleep(TPM_I2C_RETRY_DELAY_LONG);
+ status_valid = i2c_nuvoton_check_status(chip, mask,
+ value);
+ if (status_valid)
+ return 0;
+ } while (time_before(jiffies, stop));
+ }
+ dev_err(chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask,
+ value);
+ return -ETIMEDOUT;
+}
+
+/* wait for dataAvail field to be set in the TPM_STS register */
+static int i2c_nuvoton_wait_for_data_avail(struct tpm_chip *chip, u32 timeout,
+ wait_queue_head_t *queue)
+{
+ return i2c_nuvoton_wait_for_stat(chip,
+ TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+ TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+ timeout, queue);
+}
+
+/* Read @count bytes into @buf from TPM_RD_FIFO register */
+static int i2c_nuvoton_recv_data(struct i2c_client *client,
+ struct tpm_chip *chip, u8 *buf, size_t count)
+{
+ s32 rc;
+ int burst_count, bytes2read, size = 0;
+
+ while (size < count &&
+ i2c_nuvoton_wait_for_data_avail(chip,
+ chip->vendor.timeout_c,
+ &chip->vendor.read_queue) == 0) {
+ burst_count = i2c_nuvoton_get_burstcount(client, chip);
+ if (burst_count < 0) {
+ dev_err(chip->dev,
+ "%s() fail to read burstCount=%d\n", __func__,
+ burst_count);
+ return -EIO;
+ }
+ bytes2read = min_t(size_t, burst_count, count - size);
+ rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R,
+ bytes2read, &buf[size]);
+ if (rc < 0) {
+ dev_err(chip->dev,
+ "%s() fail on i2c_nuvoton_read_buf()=%d\n",
+ __func__, rc);
+ return -EIO;
+ }
+ dev_dbg(chip->dev, "%s(%d):", __func__, bytes2read);
+ size += bytes2read;
+ }
+
+ return size;
+}
+
+/* Read TPM command results */
+static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+ struct device *dev = chip->dev;
+ struct i2c_client *client = to_i2c_client(dev);
+ s32 rc;
+ int expected, status, burst_count, retries, size = 0;
+
+ if (count < TPM_HEADER_SIZE) {
+ i2c_nuvoton_ready(chip); /* return to idle */
+ dev_err(dev, "%s() count < header size\n", __func__);
+ return -EIO;
+ }
+ for (retries = 0; retries < TPM_RETRY; retries++) {
+ if (retries > 0) {
+ /* if this is not the first trial, set responseRetry */
+ i2c_nuvoton_write_status(client,
+ TPM_STS_RESPONSE_RETRY);
+ }
+ /*
+ * read first available (> 10 bytes), including:
+ * tag, paramsize, and result
+ */
+ status = i2c_nuvoton_wait_for_data_avail(
+ chip, chip->vendor.timeout_c, &chip->vendor.read_queue);
+ if (status != 0) {
+ dev_err(dev, "%s() timeout on dataAvail\n", __func__);
+ size = -ETIMEDOUT;
+ continue;
+ }
+ burst_count = i2c_nuvoton_get_burstcount(client, chip);
+ if (burst_count < 0) {
+ dev_err(dev, "%s() fail to get burstCount\n", __func__);
+ size = -EIO;
+ continue;
+ }
+ size = i2c_nuvoton_recv_data(client, chip, buf,
+ burst_count);
+ if (size < TPM_HEADER_SIZE) {
+ dev_err(dev, "%s() fail to read header\n", __func__);
+ size = -EIO;
+ continue;
+ }
+ /*
+ * convert number of expected bytes field from big endian 32 bit
+ * to machine native
+ */
+ expected = be32_to_cpu(*(__be32 *) (buf + 2));
+ if (expected > count) {
+ dev_err(dev, "%s() expected > count\n", __func__);
+ size = -EIO;
+ continue;
+ }
+ rc = i2c_nuvoton_recv_data(client, chip, &buf[size],
+ expected - size);
+ size += rc;
+ if (rc < 0 || size < expected) {
+ dev_err(dev, "%s() fail to read remainder of result\n",
+ __func__);
+ size = -EIO;
+ continue;
+ }
+ if (i2c_nuvoton_wait_for_stat(
+ chip, TPM_STS_VALID | TPM_STS_DATA_AVAIL,
+ TPM_STS_VALID, chip->vendor.timeout_c,
+ NULL)) {
+ dev_err(dev, "%s() error left over data\n", __func__);
+ size = -ETIMEDOUT;
+ continue;
+ }
+ break;
+ }
+ i2c_nuvoton_ready(chip);
+ dev_dbg(chip->dev, "%s() -> %d\n", __func__, size);
+ return size;
+}
+
+/*
+ * Send TPM command.
+ *
+ * If interrupts are used (signaled by an irq set in the vendor structure)
+ * tpm.c can skip polling for the data to be available as the interrupt is
+ * waited for here
+ */
+static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+ struct device *dev = chip->dev;
+ struct i2c_client *client = to_i2c_client(dev);
+ u32 ordinal;
+ size_t count = 0;
+ int burst_count, bytes2write, retries, rc = -EIO;
+
+ for (retries = 0; retries < TPM_RETRY; retries++) {
+ i2c_nuvoton_ready(chip);
+ if (i2c_nuvoton_wait_for_stat(chip, TPM_STS_COMMAND_READY,
+ TPM_STS_COMMAND_READY,
+ chip->vendor.timeout_b, NULL)) {
+ dev_err(dev, "%s() timeout on commandReady\n",
+ __func__);
+ rc = -EIO;
+ continue;
+ }
+ rc = 0;
+ while (count < len - 1) {
+ burst_count = i2c_nuvoton_get_burstcount(client,
+ chip);
+ if (burst_count < 0) {
+ dev_err(dev, "%s() fail get burstCount\n",
+ __func__);
+ rc = -EIO;
+ break;
+ }
+ bytes2write = min_t(size_t, burst_count,
+ len - 1 - count);
+ rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W,
+ bytes2write, &buf[count]);
+ if (rc < 0) {
+ dev_err(dev, "%s() fail i2cWriteBuf\n",
+ __func__);
+ break;
+ }
+ dev_dbg(dev, "%s(%d):", __func__, bytes2write);
+ count += bytes2write;
+ rc = i2c_nuvoton_wait_for_stat(chip,
+ TPM_STS_VALID |
+ TPM_STS_EXPECT,
+ TPM_STS_VALID |
+ TPM_STS_EXPECT,
+ chip->vendor.timeout_c,
+ NULL);
+ if (rc < 0) {
+ dev_err(dev, "%s() timeout on Expect\n",
+ __func__);
+ rc = -ETIMEDOUT;
+ break;
+ }
+ }
+ if (rc < 0)
+ continue;
+
+ /* write last byte */
+ rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W, 1,
+ &buf[count]);
+ if (rc < 0) {
+ dev_err(dev, "%s() fail to write last byte\n",
+ __func__);
+ rc = -EIO;
+ continue;
+ }
+ dev_dbg(dev, "%s(last): %02x", __func__, buf[count]);
+ rc = i2c_nuvoton_wait_for_stat(chip,
+ TPM_STS_VALID | TPM_STS_EXPECT,
+ TPM_STS_VALID,
+ chip->vendor.timeout_c, NULL);
+ if (rc) {
+ dev_err(dev, "%s() timeout on Expect to clear\n",
+ __func__);
+ rc = -ETIMEDOUT;
+ continue;
+ }
+ break;
+ }
+ if (rc < 0) {
+ /* retries == TPM_RETRY */
+ i2c_nuvoton_ready(chip);
+ return rc;
+ }
+ /* execute the TPM command */
+ rc = i2c_nuvoton_write_status(client, TPM_STS_GO);
+ if (rc < 0) {
+ dev_err(dev, "%s() fail to write Go\n", __func__);
+ i2c_nuvoton_ready(chip);
+ return rc;
+ }
+ ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
+ rc = i2c_nuvoton_wait_for_data_avail(chip,
+ tpm_calc_ordinal_duration(chip,
+ ordinal),
+ &chip->vendor.read_queue);
+ if (rc) {
+ dev_err(dev, "%s() timeout command duration\n", __func__);
+ i2c_nuvoton_ready(chip);
+ return rc;
+ }
+
+ dev_dbg(dev, "%s() -> %zd\n", __func__, len);
+ return len;
+}
+
+static bool i2c_nuvoton_req_canceled(struct tpm_chip *chip, u8 status)
+{
+ return (status == TPM_STS_COMMAND_READY);
+}
+
+static const struct file_operations i2c_nuvoton_ops = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .open = tpm_open,
+ .read = tpm_read,
+ .write = tpm_write,
+ .release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
+
+static struct attribute *i2c_nuvoton_attrs[] = {
+ &dev_attr_pubek.attr,
+ &dev_attr_pcrs.attr,
+ &dev_attr_enabled.attr,
+ &dev_attr_active.attr,
+ &dev_attr_owned.attr,
+ &dev_attr_temp_deactivated.attr,
+ &dev_attr_caps.attr,
+ &dev_attr_cancel.attr,
+ &dev_attr_durations.attr,
+ &dev_attr_timeouts.attr,
+ NULL,
+};
+
+static struct attribute_group i2c_nuvoton_attr_grp = {
+ .attrs = i2c_nuvoton_attrs
+};
+
+static const struct tpm_vendor_specific tpm_i2c = {
+ .status = i2c_nuvoton_read_status,
+ .recv = i2c_nuvoton_recv,
+ .send = i2c_nuvoton_send,
+ .cancel = i2c_nuvoton_ready,
+ .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+ .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+ .req_canceled = i2c_nuvoton_req_canceled,
+ .attr_group = &i2c_nuvoton_attr_grp,
+ .miscdev.fops = &i2c_nuvoton_ops,
+};
+
+/* The only purpose for the handler is to signal to any waiting threads that
+ * the interrupt is currently being asserted. The driver does not do any
+ * processing triggered by interrupts, and the chip provides no way to mask at
+ * the source (plus that would be slow over I2C). Run the IRQ as a one-shot,
+ * this means it cannot be shared. */
+static irqreturn_t i2c_nuvoton_int_handler(int dummy, void *dev_id)
+{
+ struct tpm_chip *chip = dev_id;
+ struct priv_data *priv = chip->vendor.priv;
+
+ priv->intrs++;
+ wake_up(&chip->vendor.read_queue);
+ disable_irq_nosync(chip->vendor.irq);
+ return IRQ_HANDLED;
+}
+
+static int get_vid(struct i2c_client *client, u32 *res)
+{
+ static const u8 vid_did_rid_value[] = { 0x50, 0x10, 0xfe };
+ u32 temp;
+ s32 rc;
+
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+ return -ENODEV;
+ rc = i2c_nuvoton_read_buf(client, TPM_VID_DID_RID, 4, (u8 *)&temp);
+ if (rc < 0)
+ return rc;
+
+ /* check WPCT301 values - ignore RID */
+ if (memcmp(&temp, vid_did_rid_value, sizeof(vid_did_rid_value))) {
+ /*
+ * f/w rev 2.81 has an issue where the VID_DID_RID is not
+ * reporting the right value. so give it another chance at
+ * offset 0x20 (FIFO_W).
+ */
+ rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_W, 4,
+ (u8 *) (&temp));
+ if (rc < 0)
+ return rc;
+
+ /* check WPCT301 values - ignore RID */
+ if (memcmp(&temp, vid_did_rid_value,
+ sizeof(vid_did_rid_value)))
+ return -ENODEV;
+ }
+
+ *res = temp;
+ return 0;
+}
+
+static int i2c_nuvoton_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ int rc;
+ struct tpm_chip *chip;
+ struct device *dev = &client->dev;
+ u32 vid = 0;
+
+ rc = get_vid(client, &vid);
+ if (rc)
+ return rc;
+
+ dev_info(dev, "VID: %04X DID: %02X RID: %02X\n", (u16) vid,
+ (u8) (vid >> 16), (u8) (vid >> 24));
+
+ chip = tpm_register_hardware(dev, &tpm_i2c);
+ if (!chip) {
+ dev_err(dev, "%s() error in tpm_register_hardware\n", __func__);
+ return -ENODEV;
+ }
+
+ chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data),
+ GFP_KERNEL);
+ init_waitqueue_head(&chip->vendor.read_queue);
+ init_waitqueue_head(&chip->vendor.int_queue);
+
+ /* Default timeouts */
+ chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+ chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT);
+ chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+ chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+
+ /*
+ * I2C intfcaps (interrupt capabilitieis) in the chip are hard coded to:
+ * TPM_INTF_INT_LEVEL_LOW | TPM_INTF_DATA_AVAIL_INT
+ * The IRQ should be set in the i2c_board_info (which is done
+ * automatically in of_i2c_register_devices, for device tree users */
+ chip->vendor.irq = client->irq;
+
+ if (chip->vendor.irq) {
+ dev_dbg(dev, "%s() chip-vendor.irq\n", __func__);
+ rc = devm_request_irq(dev, chip->vendor.irq,
+ i2c_nuvoton_int_handler,
+ IRQF_TRIGGER_LOW,
+ chip->vendor.miscdev.name,
+ chip);
+ if (rc) {
+ dev_err(dev, "%s() Unable to request irq: %d for use\n",
+ __func__, chip->vendor.irq);
+ chip->vendor.irq = 0;
+ } else {
+ /* Clear any pending interrupt */
+ i2c_nuvoton_ready(chip);
+ /* - wait for TPM_STS==0xA0 (stsValid, commandReady) */
+ rc = i2c_nuvoton_wait_for_stat(chip,
+ TPM_STS_COMMAND_READY,
+ TPM_STS_COMMAND_READY,
+ chip->vendor.timeout_b,
+ NULL);
+ if (rc == 0) {
+ /*
+ * TIS is in ready state
+ * write dummy byte to enter reception state
+ * TPM_DATA_FIFO_W <- rc (0)
+ */
+ rc = i2c_nuvoton_write_buf(client,
+ TPM_DATA_FIFO_W,
+ 1, (u8 *) (&rc));
+ if (rc < 0)
+ goto out_err;
+ /* TPM_STS <- 0x40 (commandReady) */
+ i2c_nuvoton_ready(chip);
+ } else {
+ /*
+ * timeout_b reached - command was
+ * aborted. TIS should now be in idle state -
+ * only TPM_STS_VALID should be set
+ */
+ if (i2c_nuvoton_read_status(chip) !=
+ TPM_STS_VALID) {
+ rc = -EIO;
+ goto out_err;
+ }
+ }
+ }
+ }
+
+ if (tpm_get_timeouts(chip)) {
+ rc = -ENODEV;
+ goto out_err;
+ }
+
+ if (tpm_do_selftest(chip)) {
+ rc = -ENODEV;
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ tpm_dev_vendor_release(chip);
+ tpm_remove_hardware(chip->dev);
+ return rc;
+}
+
+static int i2c_nuvoton_remove(struct i2c_client *client)
+{
+ struct device *dev = &(client->dev);
+ struct tpm_chip *chip = dev_get_drvdata(dev);
+
+ if (chip)
+ tpm_dev_vendor_release(chip);
+ tpm_remove_hardware(dev);
+ kfree(chip);
+ return 0;
+}
+
+
+static const struct i2c_device_id i2c_nuvoton_id[] = {
+ {I2C_DRIVER_NAME, 0},
+ {}
+};
+MODULE_DEVICE_TABLE(i2c, i2c_nuvoton_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_nuvoton_of_match[] = {
+ {.compatible = "nuvoton,npct501"},
+ {.compatible = "winbond,wpct301"},
+ {},
+};
+MODULE_DEVICE_TABLE(of, i2c_nuvoton_of_match);
+#endif
+
+static SIMPLE_DEV_PM_OPS(i2c_nuvoton_pm_ops, tpm_pm_suspend, tpm_pm_resume);
+
+static struct i2c_driver i2c_nuvoton_driver = {
+ .id_table = i2c_nuvoton_id,
+ .probe = i2c_nuvoton_probe,
+ .remove = i2c_nuvoton_remove,
+ .driver = {
+ .name = I2C_DRIVER_NAME,
+ .owner = THIS_MODULE,
+ .pm = &i2c_nuvoton_pm_ops,
+ .of_match_table = of_match_ptr(i2c_nuvoton_of_match),
+ },
+};
+
+module_i2c_driver(i2c_nuvoton_driver);
+
+MODULE_AUTHOR("Dan Morav (dan.morav@nuvoton.com)");
+MODULE_DESCRIPTION("Nuvoton TPM I2C Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c
index 5bb8e2ddd3b3..a0d6ceb5d005 100644
--- a/drivers/char/tpm/tpm_i2c_stm_st33.c
+++ b/drivers/char/tpm/tpm_i2c_stm_st33.c
@@ -584,7 +584,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
static struct attribute *stm_tpm_attrs[] = {
@@ -746,8 +746,6 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
tpm_get_timeouts(chip);
- i2c_set_clientdata(client, chip);
-
dev_info(chip->dev, "TPM I2C Initialized\n");
return 0;
_irq_set:
@@ -807,24 +805,18 @@ static int tpm_st33_i2c_remove(struct i2c_client *client)
#ifdef CONFIG_PM_SLEEP
/*
* tpm_st33_i2c_pm_suspend suspend the TPM device
- * Added: Work around when suspend and no tpm application is running, suspend
- * may fail because chip->data_buffer is not set (only set in tpm_open in Linux
- * TPM core)
* @param: client, the i2c_client drescription (TPM I2C description).
* @param: mesg, the power management message.
* @return: 0 in case of success.
*/
static int tpm_st33_i2c_pm_suspend(struct device *dev)
{
- struct tpm_chip *chip = dev_get_drvdata(dev);
struct st33zp24_platform_data *pin_infos = dev->platform_data;
int ret = 0;
if (power_mgt) {
gpio_set_value(pin_infos->io_lpcpd, 0);
} else {
- if (chip->data_buffer == NULL)
- chip->data_buffer = pin_infos->tpm_i2c_buffer[0];
ret = tpm_pm_suspend(dev);
}
return ret;
@@ -849,8 +841,6 @@ static int tpm_st33_i2c_pm_resume(struct device *dev)
TPM_STS_VALID) == TPM_STS_VALID,
chip->vendor.timeout_b);
} else {
- if (chip->data_buffer == NULL)
- chip->data_buffer = pin_infos->tpm_i2c_buffer[0];
ret = tpm_pm_resume(dev);
if (!ret)
tpm_do_selftest(chip);
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 56b07c35a13e..2783a42aa732 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -98,7 +98,7 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
if (count < len) {
dev_err(ibmvtpm->dev,
- "Invalid size in recv: count=%ld, crq_size=%d\n",
+ "Invalid size in recv: count=%zd, crq_size=%d\n",
count, len);
return -EIO;
}
@@ -136,7 +136,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
if (count > ibmvtpm->rtce_size) {
dev_err(ibmvtpm->dev,
- "Invalid size in send: count=%ld, rtce_size=%d\n",
+ "Invalid size in send: count=%zd, rtce_size=%d\n",
count, ibmvtpm->rtce_size);
return -EIO;
}
@@ -419,7 +419,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c
index 2168d15bc728..8e562dc65601 100644
--- a/drivers/char/tpm/tpm_ppi.c
+++ b/drivers/char/tpm/tpm_ppi.c
@@ -452,12 +452,8 @@ int tpm_add_ppi(struct kobject *parent)
{
return sysfs_create_group(parent, &ppi_attr_grp);
}
-EXPORT_SYMBOL_GPL(tpm_add_ppi);
void tpm_remove_ppi(struct kobject *parent)
{
sysfs_remove_group(parent, &ppi_attr_grp);
}
-EXPORT_SYMBOL_GPL(tpm_remove_ppi);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 5796d0157ce0..1b74459c0723 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -448,7 +448,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 94c280d36e8b..c8ff4df81779 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -351,8 +351,6 @@ static int tpmfront_probe(struct xenbus_device *dev,
tpm_get_timeouts(priv->chip);
- dev_set_drvdata(&dev->dev, priv->chip);
-
return rv;
}
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 218460fcd2e4..25a70d06c5bf 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -68,6 +68,9 @@ static void cs_check_cpu(int cpu, unsigned int load)
dbs_info->requested_freq += get_freq_target(cs_tuners, policy);
+ if (dbs_info->requested_freq > policy->max)
+ dbs_info->requested_freq = policy->max;
+
__cpufreq_driver_target(policy, dbs_info->requested_freq,
CPUFREQ_RELATION_H);
return;
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 0806c31e5764..e6be63561fa6 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -328,10 +328,6 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
dbs_data->cdata->gov_dbs_timer);
}
- /*
- * conservative does not implement micro like ondemand
- * governor, thus we are bound to jiffes/HZ
- */
if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
cs_dbs_info->down_skip = 0;
cs_dbs_info->enable = 1;
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index be6d14307aa8..a0acd0bfba40 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -53,6 +53,7 @@ static unsigned int omap_getspeed(unsigned int cpu)
static int omap_target(struct cpufreq_policy *policy, unsigned int index)
{
+ int r, ret;
struct dev_pm_opp *opp;
unsigned long freq, volt = 0, volt_old = 0, tol = 0;
unsigned int old_freq, new_freq;
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index dd2874ec1927..446687cc2334 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -89,14 +89,15 @@ config AT_HDMAC
Support the Atmel AHB DMA controller.
config FSL_DMA
- tristate "Freescale Elo and Elo Plus DMA support"
+ tristate "Freescale Elo series DMA support"
depends on FSL_SOC
select DMA_ENGINE
select ASYNC_TX_ENABLE_CHANNEL_SWITCH
---help---
- Enable support for the Freescale Elo and Elo Plus DMA controllers.
- The Elo is the DMA controller on some 82xx and 83xx parts, and the
- Elo Plus is the DMA controller on 85xx and 86xx parts.
+ Enable support for the Freescale Elo series DMA controllers.
+ The Elo is the DMA controller on some mpc82xx and mpc83xx parts, the
+ EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on
+ some Txxx and Bxxx parts.
config MPC512X_DMA
tristate "Freescale MPC512x built-in DMA engine support"
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index e51a9832ef0d..16a2aa28f856 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1164,42 +1164,12 @@ static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
kfree(txd);
}
-static void pl08x_unmap_buffers(struct pl08x_txd *txd)
-{
- struct device *dev = txd->vd.tx.chan->device->dev;
- struct pl08x_sg *dsg;
-
- if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- if (txd->vd.tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
- list_for_each_entry(dsg, &txd->dsg_list, node)
- dma_unmap_single(dev, dsg->src_addr, dsg->len,
- DMA_TO_DEVICE);
- else {
- list_for_each_entry(dsg, &txd->dsg_list, node)
- dma_unmap_page(dev, dsg->src_addr, dsg->len,
- DMA_TO_DEVICE);
- }
- }
- if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (txd->vd.tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
- list_for_each_entry(dsg, &txd->dsg_list, node)
- dma_unmap_single(dev, dsg->dst_addr, dsg->len,
- DMA_FROM_DEVICE);
- else
- list_for_each_entry(dsg, &txd->dsg_list, node)
- dma_unmap_page(dev, dsg->dst_addr, dsg->len,
- DMA_FROM_DEVICE);
- }
-}
-
static void pl08x_desc_free(struct virt_dma_desc *vd)
{
struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
struct pl08x_dma_chan *plchan = to_pl08x_chan(vd->tx.chan);
- if (!plchan->slave)
- pl08x_unmap_buffers(txd);
-
+ dma_descriptor_unmap(txd);
if (!txd->done)
pl08x_release_mux(plchan);
@@ -1252,7 +1222,7 @@ static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
size_t bytes = 0;
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
/*
@@ -1267,7 +1237,7 @@ static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
spin_lock_irqsave(&plchan->vc.lock, flags);
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret != DMA_SUCCESS) {
+ if (ret != DMA_COMPLETE) {
vd = vchan_find_desc(&plchan->vc, cookie);
if (vd) {
/* On the issued list, so hasn't been processed yet */
@@ -2138,8 +2108,7 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
- ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED,
- DRIVER_NAME, pl08x);
+ ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x);
if (ret) {
dev_err(&adev->dev, "%s failed to request interrupt %d\n",
__func__, adev->irq[0]);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index c787f38a186a..e2c04dc81e2a 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -344,31 +344,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
/* move myself to free_list */
list_move(&desc->desc_node, &atchan->free_list);
- /* unmap dma addresses (not on slave channels) */
- if (!atchan->chan_common.private) {
- struct device *parent = chan2parent(&atchan->chan_common);
- if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
- dma_unmap_single(parent,
- desc->lli.daddr,
- desc->len, DMA_FROM_DEVICE);
- else
- dma_unmap_page(parent,
- desc->lli.daddr,
- desc->len, DMA_FROM_DEVICE);
- }
- if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
- dma_unmap_single(parent,
- desc->lli.saddr,
- desc->len, DMA_TO_DEVICE);
- else
- dma_unmap_page(parent,
- desc->lli.saddr,
- desc->len, DMA_TO_DEVICE);
- }
- }
-
+ dma_descriptor_unmap(txd);
/* for cyclic transfers,
* no need to replay callback function while stopping */
if (!atc_chan_is_cyclic(atchan)) {
@@ -1102,7 +1078,7 @@ atc_tx_status(struct dma_chan *chan,
int bytes = 0;
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
/*
* There's no point calculating the residue if there's
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index 31011d2a26fc..3c6716e0b78e 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -2369,7 +2369,7 @@ coh901318_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
enum dma_status ret;
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
dma_set_residue(txstate, coh901318_get_bytes_left(chan));
@@ -2694,7 +2694,7 @@ static int __init coh901318_probe(struct platform_device *pdev)
if (irq < 0)
return irq;
- err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, IRQF_DISABLED,
+ err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, 0,
"coh901318", base);
if (err)
return err;
diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 7c82b92f9b16..c29dacff66fa 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -141,6 +141,9 @@ struct cppi41_dd {
const struct chan_queues *queues_rx;
const struct chan_queues *queues_tx;
struct chan_queues td_queue;
+
+ /* context for suspend/resume */
+ unsigned int dma_tdfdq;
};
#define FIST_COMPLETION_QUEUE 93
@@ -263,6 +266,15 @@ static u32 pd_trans_len(u32 val)
return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1);
}
+static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
+{
+ u32 desc;
+
+ desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
+ desc &= ~0x1f;
+ return desc;
+}
+
static irqreturn_t cppi41_irq(int irq, void *data)
{
struct cppi41_dd *cdd = data;
@@ -300,8 +312,7 @@ static irqreturn_t cppi41_irq(int irq, void *data)
q_num = __fls(val);
val &= ~(1 << q_num);
q_num += 32 * i;
- desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(q_num));
- desc &= ~0x1f;
+ desc = cppi41_pop_desc(cdd, q_num);
c = desc_to_chan(cdd, desc);
if (WARN_ON(!c)) {
pr_err("%s() q %d desc %08x\n", __func__,
@@ -353,7 +364,7 @@ static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan,
/* lock */
ret = dma_cookie_status(chan, cookie, txstate);
- if (txstate && ret == DMA_SUCCESS)
+ if (txstate && ret == DMA_COMPLETE)
txstate->residue = c->residue;
/* unlock */
@@ -517,15 +528,6 @@ static void cppi41_compute_td_desc(struct cppi41_desc *d)
d->pd0 = DESC_TYPE_TEARD << DESC_TYPE;
}
-static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
-{
- u32 desc;
-
- desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
- desc &= ~0x1f;
- return desc;
-}
-
static int cppi41_tear_down_chan(struct cppi41_channel *c)
{
struct cppi41_dd *cdd = c->cdd;
@@ -561,36 +563,26 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
c->td_retry = 100;
}
- if (!c->td_seen) {
- unsigned td_comp_queue;
+ if (!c->td_seen || !c->td_desc_seen) {
- if (c->is_tx)
- td_comp_queue = cdd->td_queue.complete;
- else
- td_comp_queue = c->q_comp_num;
+ desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete);
+ if (!desc_phys)
+ desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
- desc_phys = cppi41_pop_desc(cdd, td_comp_queue);
- if (desc_phys) {
- __iormb();
+ if (desc_phys == c->desc_phys) {
+ c->td_desc_seen = 1;
+
+ } else if (desc_phys == td_desc_phys) {
+ u32 pd0;
- if (desc_phys == td_desc_phys) {
- u32 pd0;
- pd0 = td->pd0;
- WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
- WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
- WARN_ON((pd0 & 0x1f) != c->port_num);
- } else {
- WARN_ON_ONCE(1);
- }
- c->td_seen = 1;
- }
- }
- if (!c->td_desc_seen) {
- desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
- if (desc_phys) {
__iormb();
- WARN_ON(c->desc_phys != desc_phys);
- c->td_desc_seen = 1;
+ pd0 = td->pd0;
+ WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
+ WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
+ WARN_ON((pd0 & 0x1f) != c->port_num);
+ c->td_seen = 1;
+ } else if (desc_phys) {
+ WARN_ON_ONCE(1);
}
}
c->td_retry--;
@@ -609,7 +601,7 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
WARN_ON(!c->td_retry);
if (!c->td_desc_seen) {
- desc_phys = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num));
+ desc_phys = cppi41_pop_desc(cdd, c->q_num);
WARN_ON(!desc_phys);
}
@@ -674,14 +666,14 @@ static void cleanup_chans(struct cppi41_dd *cdd)
}
}
-static int cppi41_add_chans(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
{
struct cppi41_channel *cchan;
int i;
int ret;
u32 n_chans;
- ret = of_property_read_u32(pdev->dev.of_node, "#dma-channels",
+ ret = of_property_read_u32(dev->of_node, "#dma-channels",
&n_chans);
if (ret)
return ret;
@@ -719,7 +711,7 @@ err:
return -ENOMEM;
}
-static void purge_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
+static void purge_descs(struct device *dev, struct cppi41_dd *cdd)
{
unsigned int mem_decs;
int i;
@@ -731,7 +723,7 @@ static void purge_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i));
cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i));
- dma_free_coherent(&pdev->dev, mem_decs, cdd->cd,
+ dma_free_coherent(dev, mem_decs, cdd->cd,
cdd->descs_phys);
}
}
@@ -741,19 +733,19 @@ static void disable_sched(struct cppi41_dd *cdd)
cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
}
-static void deinit_cpii41(struct platform_device *pdev, struct cppi41_dd *cdd)
+static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd)
{
disable_sched(cdd);
- purge_descs(pdev, cdd);
+ purge_descs(dev, cdd);
cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
- dma_free_coherent(&pdev->dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
+ dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
cdd->scratch_phys);
}
-static int init_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int init_descs(struct device *dev, struct cppi41_dd *cdd)
{
unsigned int desc_size;
unsigned int mem_decs;
@@ -777,7 +769,7 @@ static int init_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
reg |= ilog2(ALLOC_DECS_NUM) - 5;
BUILD_BUG_ON(DESCS_AREAS != 1);
- cdd->cd = dma_alloc_coherent(&pdev->dev, mem_decs,
+ cdd->cd = dma_alloc_coherent(dev, mem_decs,
&cdd->descs_phys, GFP_KERNEL);
if (!cdd->cd)
return -ENOMEM;
@@ -813,12 +805,12 @@ static void init_sched(struct cppi41_dd *cdd)
cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL);
}
-static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
{
int ret;
BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1));
- cdd->qmgr_scratch = dma_alloc_coherent(&pdev->dev, QMGR_SCRATCH_SIZE,
+ cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE,
&cdd->scratch_phys, GFP_KERNEL);
if (!cdd->qmgr_scratch)
return -ENOMEM;
@@ -827,7 +819,7 @@ static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd)
cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
- ret = init_descs(pdev, cdd);
+ ret = init_descs(dev, cdd);
if (ret)
goto err_td;
@@ -835,7 +827,7 @@ static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd)
init_sched(cdd);
return 0;
err_td:
- deinit_cpii41(pdev, cdd);
+ deinit_cppi41(dev, cdd);
return ret;
}
@@ -914,11 +906,11 @@ static const struct of_device_id cppi41_dma_ids[] = {
};
MODULE_DEVICE_TABLE(of, cppi41_dma_ids);
-static const struct cppi_glue_infos *get_glue_info(struct platform_device *pdev)
+static const struct cppi_glue_infos *get_glue_info(struct device *dev)
{
const struct of_device_id *of_id;
- of_id = of_match_node(cppi41_dma_ids, pdev->dev.of_node);
+ of_id = of_match_node(cppi41_dma_ids, dev->of_node);
if (!of_id)
return NULL;
return of_id->data;
@@ -927,11 +919,12 @@ static const struct cppi_glue_infos *get_glue_info(struct platform_device *pdev)
static int cppi41_dma_probe(struct platform_device *pdev)
{
struct cppi41_dd *cdd;
+ struct device *dev = &pdev->dev;
const struct cppi_glue_infos *glue_info;
int irq;
int ret;
- glue_info = get_glue_info(pdev);
+ glue_info = get_glue_info(dev);
if (!glue_info)
return -EINVAL;
@@ -946,14 +939,14 @@ static int cppi41_dma_probe(struct platform_device *pdev)
cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
cdd->ddev.device_control = cppi41_dma_control;
- cdd->ddev.dev = &pdev->dev;
+ cdd->ddev.dev = dev;
INIT_LIST_HEAD(&cdd->ddev.channels);
cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
- cdd->usbss_mem = of_iomap(pdev->dev.of_node, 0);
- cdd->ctrl_mem = of_iomap(pdev->dev.of_node, 1);
- cdd->sched_mem = of_iomap(pdev->dev.of_node, 2);
- cdd->qmgr_mem = of_iomap(pdev->dev.of_node, 3);
+ cdd->usbss_mem = of_iomap(dev->of_node, 0);
+ cdd->ctrl_mem = of_iomap(dev->of_node, 1);
+ cdd->sched_mem = of_iomap(dev->of_node, 2);
+ cdd->qmgr_mem = of_iomap(dev->of_node, 3);
if (!cdd->usbss_mem || !cdd->ctrl_mem || !cdd->sched_mem ||
!cdd->qmgr_mem) {
@@ -961,31 +954,31 @@ static int cppi41_dma_probe(struct platform_device *pdev)
goto err_remap;
}
- pm_runtime_enable(&pdev->dev);
- ret = pm_runtime_get_sync(&pdev->dev);
- if (ret)
+ pm_runtime_enable(dev);
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0)
goto err_get_sync;
cdd->queues_rx = glue_info->queues_rx;
cdd->queues_tx = glue_info->queues_tx;
cdd->td_queue = glue_info->td_queue;
- ret = init_cppi41(pdev, cdd);
+ ret = init_cppi41(dev, cdd);
if (ret)
goto err_init_cppi;
- ret = cppi41_add_chans(pdev, cdd);
+ ret = cppi41_add_chans(dev, cdd);
if (ret)
goto err_chans;
- irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+ irq = irq_of_parse_and_map(dev->of_node, 0);
if (!irq)
goto err_irq;
cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER);
ret = request_irq(irq, glue_info->isr, IRQF_SHARED,
- dev_name(&pdev->dev), cdd);
+ dev_name(dev), cdd);
if (ret)
goto err_irq;
cdd->irq = irq;
@@ -994,7 +987,7 @@ static int cppi41_dma_probe(struct platform_device *pdev)
if (ret)
goto err_dma_reg;
- ret = of_dma_controller_register(pdev->dev.of_node,
+ ret = of_dma_controller_register(dev->of_node,
cppi41_dma_xlate, &cpp41_dma_info);
if (ret)
goto err_of;
@@ -1009,11 +1002,11 @@ err_irq:
cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
cleanup_chans(cdd);
err_chans:
- deinit_cpii41(pdev, cdd);
+ deinit_cppi41(dev, cdd);
err_init_cppi:
- pm_runtime_put(&pdev->dev);
+ pm_runtime_put(dev);
err_get_sync:
- pm_runtime_disable(&pdev->dev);
+ pm_runtime_disable(dev);
iounmap(cdd->usbss_mem);
iounmap(cdd->ctrl_mem);
iounmap(cdd->sched_mem);
@@ -1033,7 +1026,7 @@ static int cppi41_dma_remove(struct platform_device *pdev)
cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
free_irq(cdd->irq, cdd);
cleanup_chans(cdd);
- deinit_cpii41(pdev, cdd);
+ deinit_cppi41(&pdev->dev, cdd);
iounmap(cdd->usbss_mem);
iounmap(cdd->ctrl_mem);
iounmap(cdd->sched_mem);
@@ -1044,12 +1037,53 @@ static int cppi41_dma_remove(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_PM_SLEEP
+static int cppi41_suspend(struct device *dev)
+{
+ struct cppi41_dd *cdd = dev_get_drvdata(dev);
+
+ cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ);
+ cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
+ disable_sched(cdd);
+
+ return 0;
+}
+
+static int cppi41_resume(struct device *dev)
+{
+ struct cppi41_dd *cdd = dev_get_drvdata(dev);
+ struct cppi41_channel *c;
+ int i;
+
+ for (i = 0; i < DESCS_AREAS; i++)
+ cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
+
+ list_for_each_entry(c, &cdd->ddev.channels, chan.device_node)
+ if (!c->is_tx)
+ cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);
+
+ init_sched(cdd);
+
+ cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ);
+ cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+ cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
+ cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
+
+ cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER);
+
+ return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(cppi41_pm_ops, cppi41_suspend, cppi41_resume);
+
static struct platform_driver cpp41_dma_driver = {
.probe = cppi41_dma_probe,
.remove = cppi41_dma_remove,
.driver = {
.name = "cppi41-dma-engine",
.owner = THIS_MODULE,
+ .pm = &cppi41_pm_ops,
.of_match_table = of_match_ptr(cppi41_dma_ids),
},
};
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
index b0c0c8268d42..94c380f07538 100644
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -491,7 +491,7 @@ static enum dma_status jz4740_dma_tx_status(struct dma_chan *c,
unsigned long flags;
status = dma_cookie_status(c, cookie, state);
- if (status == DMA_SUCCESS || !state)
+ if (status == DMA_COMPLETE || !state)
return status;
spin_lock_irqsave(&chan->vchan.lock, flags);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 9162ac80c18f..ea806bdc12ef 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -65,6 +65,7 @@
#include <linux/acpi.h>
#include <linux/acpi_dma.h>
#include <linux/of_dma.h>
+#include <linux/mempool.h>
static DEFINE_MUTEX(dma_list_mutex);
static DEFINE_IDR(dma_idr);
@@ -901,98 +902,132 @@ void dma_async_device_unregister(struct dma_device *device)
}
EXPORT_SYMBOL(dma_async_device_unregister);
-/**
- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
- * @chan: DMA channel to offload copy to
- * @dest: destination address (virtual)
- * @src: source address (virtual)
- * @len: length
- *
- * Both @dest and @src must be mappable to a bus address according to the
- * DMA mapping API rules for streaming mappings.
- * Both @dest and @src must stay memory resident (kernel memory or locked
- * user space pages).
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
- void *src, size_t len)
-{
- struct dma_device *dev = chan->device;
- struct dma_async_tx_descriptor *tx;
- dma_addr_t dma_dest, dma_src;
- dma_cookie_t cookie;
- unsigned long flags;
+struct dmaengine_unmap_pool {
+ struct kmem_cache *cache;
+ const char *name;
+ mempool_t *pool;
+ size_t size;
+};
- dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
- dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
- flags = DMA_CTRL_ACK |
- DMA_COMPL_SRC_UNMAP_SINGLE |
- DMA_COMPL_DEST_UNMAP_SINGLE;
- tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+#define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) }
+static struct dmaengine_unmap_pool unmap_pool[] = {
+ __UNMAP_POOL(2),
+ #if IS_ENABLED(CONFIG_ASYNC_TX_DMA)
+ __UNMAP_POOL(16),
+ __UNMAP_POOL(128),
+ __UNMAP_POOL(256),
+ #endif
+};
- if (!tx) {
- dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
- dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
- return -ENOMEM;
+static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
+{
+ int order = get_count_order(nr);
+
+ switch (order) {
+ case 0 ... 1:
+ return &unmap_pool[0];
+ case 2 ... 4:
+ return &unmap_pool[1];
+ case 5 ... 7:
+ return &unmap_pool[2];
+ case 8:
+ return &unmap_pool[3];
+ default:
+ BUG();
+ return NULL;
}
+}
- tx->callback = NULL;
- cookie = tx->tx_submit(tx);
+static void dmaengine_unmap(struct kref *kref)
+{
+ struct dmaengine_unmap_data *unmap = container_of(kref, typeof(*unmap), kref);
+ struct device *dev = unmap->dev;
+ int cnt, i;
+
+ cnt = unmap->to_cnt;
+ for (i = 0; i < cnt; i++)
+ dma_unmap_page(dev, unmap->addr[i], unmap->len,
+ DMA_TO_DEVICE);
+ cnt += unmap->from_cnt;
+ for (; i < cnt; i++)
+ dma_unmap_page(dev, unmap->addr[i], unmap->len,
+ DMA_FROM_DEVICE);
+ cnt += unmap->bidi_cnt;
+ for (; i < cnt; i++) {
+ if (unmap->addr[i] == 0)
+ continue;
+ dma_unmap_page(dev, unmap->addr[i], unmap->len,
+ DMA_BIDIRECTIONAL);
+ }
+ mempool_free(unmap, __get_unmap_pool(cnt)->pool);
+}
- preempt_disable();
- __this_cpu_add(chan->local->bytes_transferred, len);
- __this_cpu_inc(chan->local->memcpy_count);
- preempt_enable();
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+ if (unmap)
+ kref_put(&unmap->kref, dmaengine_unmap);
+}
+EXPORT_SYMBOL_GPL(dmaengine_unmap_put);
- return cookie;
+static void dmaengine_destroy_unmap_pool(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+ struct dmaengine_unmap_pool *p = &unmap_pool[i];
+
+ if (p->pool)
+ mempool_destroy(p->pool);
+ p->pool = NULL;
+ if (p->cache)
+ kmem_cache_destroy(p->cache);
+ p->cache = NULL;
+ }
}
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
-/**
- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
- * @chan: DMA channel to offload copy to
- * @page: destination page
- * @offset: offset in page to copy to
- * @kdata: source address (virtual)
- * @len: length
- *
- * Both @page/@offset and @kdata must be mappable to a bus address according
- * to the DMA mapping API rules for streaming mappings.
- * Both @page/@offset and @kdata must stay memory resident (kernel memory or
- * locked user space pages)
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
- unsigned int offset, void *kdata, size_t len)
+static int __init dmaengine_init_unmap_pool(void)
{
- struct dma_device *dev = chan->device;
- struct dma_async_tx_descriptor *tx;
- dma_addr_t dma_dest, dma_src;
- dma_cookie_t cookie;
- unsigned long flags;
+ int i;
- dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
- dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
- flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE;
- tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+ for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+ struct dmaengine_unmap_pool *p = &unmap_pool[i];
+ size_t size;
- if (!tx) {
- dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
- dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
- return -ENOMEM;
+ size = sizeof(struct dmaengine_unmap_data) +
+ sizeof(dma_addr_t) * p->size;
+
+ p->cache = kmem_cache_create(p->name, size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!p->cache)
+ break;
+ p->pool = mempool_create_slab_pool(1, p->cache);
+ if (!p->pool)
+ break;
}
- tx->callback = NULL;
- cookie = tx->tx_submit(tx);
+ if (i == ARRAY_SIZE(unmap_pool))
+ return 0;
- preempt_disable();
- __this_cpu_add(chan->local->bytes_transferred, len);
- __this_cpu_inc(chan->local->memcpy_count);
- preempt_enable();
+ dmaengine_destroy_unmap_pool();
+ return -ENOMEM;
+}
- return cookie;
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+ struct dmaengine_unmap_data *unmap;
+
+ unmap = mempool_alloc(__get_unmap_pool(nr)->pool, flags);
+ if (!unmap)
+ return NULL;
+
+ memset(unmap, 0, sizeof(*unmap));
+ kref_init(&unmap->kref);
+ unmap->dev = dev;
+
+ return unmap;
}
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+EXPORT_SYMBOL(dmaengine_get_unmap_data);
/**
* dma_async_memcpy_pg_to_pg - offloaded copy from page to page
@@ -1015,24 +1050,33 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
{
struct dma_device *dev = chan->device;
struct dma_async_tx_descriptor *tx;
- dma_addr_t dma_dest, dma_src;
+ struct dmaengine_unmap_data *unmap;
dma_cookie_t cookie;
unsigned long flags;
- dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
- dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
- DMA_FROM_DEVICE);
+ unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOIO);
+ if (!unmap)
+ return -ENOMEM;
+
+ unmap->to_cnt = 1;
+ unmap->from_cnt = 1;
+ unmap->addr[0] = dma_map_page(dev->dev, src_pg, src_off, len,
+ DMA_TO_DEVICE);
+ unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len,
+ DMA_FROM_DEVICE);
+ unmap->len = len;
flags = DMA_CTRL_ACK;
- tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+ tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0],
+ len, flags);
if (!tx) {
- dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
- dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+ dmaengine_unmap_put(unmap);
return -ENOMEM;
}
- tx->callback = NULL;
+ dma_set_unmap(tx, unmap);
cookie = tx->tx_submit(tx);
+ dmaengine_unmap_put(unmap);
preempt_disable();
__this_cpu_add(chan->local->bytes_transferred, len);
@@ -1043,6 +1087,52 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
}
EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
+/**
+ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+ * @chan: DMA channel to offload copy to
+ * @dest: destination address (virtual)
+ * @src: source address (virtual)
+ * @len: length
+ *
+ * Both @dest and @src must be mappable to a bus address according to the
+ * DMA mapping API rules for streaming mappings.
+ * Both @dest and @src must stay memory resident (kernel memory or locked
+ * user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
+ void *src, size_t len)
+{
+ return dma_async_memcpy_pg_to_pg(chan, virt_to_page(dest),
+ (unsigned long) dest & ~PAGE_MASK,
+ virt_to_page(src),
+ (unsigned long) src & ~PAGE_MASK, len);
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
+
+/**
+ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+ * @chan: DMA channel to offload copy to
+ * @page: destination page
+ * @offset: offset in page to copy to
+ * @kdata: source address (virtual)
+ * @len: length
+ *
+ * Both @page/@offset and @kdata must be mappable to a bus address according
+ * to the DMA mapping API rules for streaming mappings.
+ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+ * locked user space pages)
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
+ unsigned int offset, void *kdata, size_t len)
+{
+ return dma_async_memcpy_pg_to_pg(chan, page, offset,
+ virt_to_page(kdata),
+ (unsigned long) kdata & ~PAGE_MASK, len);
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+
void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
struct dma_chan *chan)
{
@@ -1062,7 +1152,7 @@ dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
if (!tx)
- return DMA_SUCCESS;
+ return DMA_COMPLETE;
while (tx->cookie == -EBUSY) {
if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
@@ -1116,6 +1206,10 @@ EXPORT_SYMBOL_GPL(dma_run_dependencies);
static int __init dma_bus_init(void)
{
+ int err = dmaengine_init_unmap_pool();
+
+ if (err)
+ return err;
return class_register(&dma_devclass);
}
arch_initcall(dma_bus_init);
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 92f796cdc6ab..20f9a3aaf926 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -8,6 +8,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
@@ -19,10 +21,6 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/wait.h>
-#include <linux/ctype.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-#include <linux/seq_file.h>
static unsigned int test_buf_size = 16384;
module_param(test_buf_size, uint, S_IRUGO | S_IWUSR);
@@ -68,92 +66,13 @@ module_param(timeout, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
"Pass -1 for infinite timeout");
-/* Maximum amount of mismatched bytes in buffer to print */
-#define MAX_ERROR_COUNT 32
-
-/*
- * Initialization patterns. All bytes in the source buffer has bit 7
- * set, all bytes in the destination buffer has bit 7 cleared.
- *
- * Bit 6 is set for all bytes which are to be copied by the DMA
- * engine. Bit 5 is set for all bytes which are to be overwritten by
- * the DMA engine.
- *
- * The remaining bits are the inverse of a counter which increments by
- * one for each byte address.
- */
-#define PATTERN_SRC 0x80
-#define PATTERN_DST 0x00
-#define PATTERN_COPY 0x40
-#define PATTERN_OVERWRITE 0x20
-#define PATTERN_COUNT_MASK 0x1f
-
-enum dmatest_error_type {
- DMATEST_ET_OK,
- DMATEST_ET_MAP_SRC,
- DMATEST_ET_MAP_DST,
- DMATEST_ET_PREP,
- DMATEST_ET_SUBMIT,
- DMATEST_ET_TIMEOUT,
- DMATEST_ET_DMA_ERROR,
- DMATEST_ET_DMA_IN_PROGRESS,
- DMATEST_ET_VERIFY,
- DMATEST_ET_VERIFY_BUF,
-};
-
-struct dmatest_verify_buffer {
- unsigned int index;
- u8 expected;
- u8 actual;
-};
-
-struct dmatest_verify_result {
- unsigned int error_count;
- struct dmatest_verify_buffer data[MAX_ERROR_COUNT];
- u8 pattern;
- bool is_srcbuf;
-};
-
-struct dmatest_thread_result {
- struct list_head node;
- unsigned int n;
- unsigned int src_off;
- unsigned int dst_off;
- unsigned int len;
- enum dmatest_error_type type;
- union {
- unsigned long data;
- dma_cookie_t cookie;
- enum dma_status status;
- int error;
- struct dmatest_verify_result *vr;
- };
-};
-
-struct dmatest_result {
- struct list_head node;
- char *name;
- struct list_head results;
-};
-
-struct dmatest_info;
-
-struct dmatest_thread {
- struct list_head node;
- struct dmatest_info *info;
- struct task_struct *task;
- struct dma_chan *chan;
- u8 **srcs;
- u8 **dsts;
- enum dma_transaction_type type;
- bool done;
-};
+static bool noverify;
+module_param(noverify, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(noverify, "Disable random data setup and verification");
-struct dmatest_chan {
- struct list_head node;
- struct dma_chan *chan;
- struct list_head threads;
-};
+static bool verbose;
+module_param(verbose, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(verbose, "Enable \"success\" result messages (default: off)");
/**
* struct dmatest_params - test parameters.
@@ -177,6 +96,7 @@ struct dmatest_params {
unsigned int xor_sources;
unsigned int pq_sources;
int timeout;
+ bool noverify;
};
/**
@@ -184,7 +104,7 @@ struct dmatest_params {
* @params: test parameters
* @lock: access protection to the fields of this structure
*/
-struct dmatest_info {
+static struct dmatest_info {
/* Test parameters */
struct dmatest_params params;
@@ -192,16 +112,95 @@ struct dmatest_info {
struct list_head channels;
unsigned int nr_channels;
struct mutex lock;
+ bool did_init;
+} test_info = {
+ .channels = LIST_HEAD_INIT(test_info.channels),
+ .lock = __MUTEX_INITIALIZER(test_info.lock),
+};
+
+static int dmatest_run_set(const char *val, const struct kernel_param *kp);
+static int dmatest_run_get(char *val, const struct kernel_param *kp);
+static struct kernel_param_ops run_ops = {
+ .set = dmatest_run_set,
+ .get = dmatest_run_get,
+};
+static bool dmatest_run;
+module_param_cb(run, &run_ops, &dmatest_run, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(run, "Run the test (default: false)");
+
+/* Maximum amount of mismatched bytes in buffer to print */
+#define MAX_ERROR_COUNT 32
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC 0x80
+#define PATTERN_DST 0x00
+#define PATTERN_COPY 0x40
+#define PATTERN_OVERWRITE 0x20
+#define PATTERN_COUNT_MASK 0x1f
- /* debugfs related stuff */
- struct dentry *root;
+struct dmatest_thread {
+ struct list_head node;
+ struct dmatest_info *info;
+ struct task_struct *task;
+ struct dma_chan *chan;
+ u8 **srcs;
+ u8 **dsts;
+ enum dma_transaction_type type;
+ bool done;
+};
- /* Test results */
- struct list_head results;
- struct mutex results_lock;
+struct dmatest_chan {
+ struct list_head node;
+ struct dma_chan *chan;
+ struct list_head threads;
};
-static struct dmatest_info test_info;
+static DECLARE_WAIT_QUEUE_HEAD(thread_wait);
+static bool wait;
+
+static bool is_threaded_test_run(struct dmatest_info *info)
+{
+ struct dmatest_chan *dtc;
+
+ list_for_each_entry(dtc, &info->channels, node) {
+ struct dmatest_thread *thread;
+
+ list_for_each_entry(thread, &dtc->threads, node) {
+ if (!thread->done)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int dmatest_wait_get(char *val, const struct kernel_param *kp)
+{
+ struct dmatest_info *info = &test_info;
+ struct dmatest_params *params = &info->params;
+
+ if (params->iterations)
+ wait_event(thread_wait, !is_threaded_test_run(info));
+ wait = true;
+ return param_get_bool(val, kp);
+}
+
+static struct kernel_param_ops wait_ops = {
+ .get = dmatest_wait_get,
+ .set = param_set_bool,
+};
+module_param_cb(wait, &wait_ops, &wait, S_IRUGO);
+MODULE_PARM_DESC(wait, "Wait for tests to complete (default: false)");
static bool dmatest_match_channel(struct dmatest_params *params,
struct dma_chan *chan)
@@ -223,7 +222,7 @@ static unsigned long dmatest_random(void)
{
unsigned long buf;
- get_random_bytes(&buf, sizeof(buf));
+ prandom_bytes(&buf, sizeof(buf));
return buf;
}
@@ -262,9 +261,31 @@ static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len,
}
}
-static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
- unsigned int start, unsigned int end, unsigned int counter,
- u8 pattern, bool is_srcbuf)
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+ unsigned int counter, bool is_srcbuf)
+{
+ u8 diff = actual ^ pattern;
+ u8 expected = pattern | (~counter & PATTERN_COUNT_MASK);
+ const char *thread_name = current->comm;
+
+ if (is_srcbuf)
+ pr_warn("%s: srcbuf[0x%x] overwritten! Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else if ((pattern & PATTERN_COPY)
+ && (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+ pr_warn("%s: dstbuf[0x%x] not copied! Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else if (diff & PATTERN_SRC)
+ pr_warn("%s: dstbuf[0x%x] was copied! Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else
+ pr_warn("%s: dstbuf[0x%x] mismatch! Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
+ unsigned int end, unsigned int counter, u8 pattern,
+ bool is_srcbuf)
{
unsigned int i;
unsigned int error_count = 0;
@@ -272,7 +293,6 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
u8 expected;
u8 *buf;
unsigned int counter_orig = counter;
- struct dmatest_verify_buffer *vb;
for (; (buf = *bufs); bufs++) {
counter = counter_orig;
@@ -280,12 +300,9 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
actual = buf[i];
expected = pattern | (~counter & PATTERN_COUNT_MASK);
if (actual != expected) {
- if (error_count < MAX_ERROR_COUNT && vr) {
- vb = &vr->data[error_count];
- vb->index = i;
- vb->expected = expected;
- vb->actual = actual;
- }
+ if (error_count < MAX_ERROR_COUNT)
+ dmatest_mismatch(actual, pattern, i,
+ counter, is_srcbuf);
error_count++;
}
counter++;
@@ -293,7 +310,7 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
}
if (error_count > MAX_ERROR_COUNT)
- pr_warning("%s: %u errors suppressed\n",
+ pr_warn("%s: %u errors suppressed\n",
current->comm, error_count - MAX_ERROR_COUNT);
return error_count;
@@ -313,20 +330,6 @@ static void dmatest_callback(void *arg)
wake_up_all(done->wait);
}
-static inline void unmap_src(struct device *dev, dma_addr_t *addr, size_t len,
- unsigned int count)
-{
- while (count--)
- dma_unmap_single(dev, addr[count], len, DMA_TO_DEVICE);
-}
-
-static inline void unmap_dst(struct device *dev, dma_addr_t *addr, size_t len,
- unsigned int count)
-{
- while (count--)
- dma_unmap_single(dev, addr[count], len, DMA_BIDIRECTIONAL);
-}
-
static unsigned int min_odd(unsigned int x, unsigned int y)
{
unsigned int val = min(x, y);
@@ -334,172 +337,49 @@ static unsigned int min_odd(unsigned int x, unsigned int y)
return val % 2 ? val : val - 1;
}
-static char *verify_result_get_one(struct dmatest_verify_result *vr,
- unsigned int i)
+static void result(const char *err, unsigned int n, unsigned int src_off,
+ unsigned int dst_off, unsigned int len, unsigned long data)
{
- struct dmatest_verify_buffer *vb = &vr->data[i];
- u8 diff = vb->actual ^ vr->pattern;
- static char buf[512];
- char *msg;
-
- if (vr->is_srcbuf)
- msg = "srcbuf overwritten!";
- else if ((vr->pattern & PATTERN_COPY)
- && (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
- msg = "dstbuf not copied!";
- else if (diff & PATTERN_SRC)
- msg = "dstbuf was copied!";
- else
- msg = "dstbuf mismatch!";
-
- snprintf(buf, sizeof(buf) - 1, "%s [0x%x] Expected %02x, got %02x", msg,
- vb->index, vb->expected, vb->actual);
-
- return buf;
+ pr_info("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)",
+ current->comm, n, err, src_off, dst_off, len, data);
}
-static char *thread_result_get(const char *name,
- struct dmatest_thread_result *tr)
+static void dbg_result(const char *err, unsigned int n, unsigned int src_off,
+ unsigned int dst_off, unsigned int len,
+ unsigned long data)
{
- static const char * const messages[] = {
- [DMATEST_ET_OK] = "No errors",
- [DMATEST_ET_MAP_SRC] = "src mapping error",
- [DMATEST_ET_MAP_DST] = "dst mapping error",
- [DMATEST_ET_PREP] = "prep error",
- [DMATEST_ET_SUBMIT] = "submit error",
- [DMATEST_ET_TIMEOUT] = "test timed out",
- [DMATEST_ET_DMA_ERROR] =
- "got completion callback (DMA_ERROR)",
- [DMATEST_ET_DMA_IN_PROGRESS] =
- "got completion callback (DMA_IN_PROGRESS)",
- [DMATEST_ET_VERIFY] = "errors",
- [DMATEST_ET_VERIFY_BUF] = "verify errors",
- };
- static char buf[512];
-
- snprintf(buf, sizeof(buf) - 1,
- "%s: #%u: %s with src_off=0x%x ""dst_off=0x%x len=0x%x (%lu)",
- name, tr->n, messages[tr->type], tr->src_off, tr->dst_off,
- tr->len, tr->data);
-
- return buf;
+ pr_debug("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)",
+ current->comm, n, err, src_off, dst_off, len, data);
}
-static int thread_result_add(struct dmatest_info *info,
- struct dmatest_result *r, enum dmatest_error_type type,
- unsigned int n, unsigned int src_off, unsigned int dst_off,
- unsigned int len, unsigned long data)
-{
- struct dmatest_thread_result *tr;
-
- tr = kzalloc(sizeof(*tr), GFP_KERNEL);
- if (!tr)
- return -ENOMEM;
-
- tr->type = type;
- tr->n = n;
- tr->src_off = src_off;
- tr->dst_off = dst_off;
- tr->len = len;
- tr->data = data;
+#define verbose_result(err, n, src_off, dst_off, len, data) ({ \
+ if (verbose) \
+ result(err, n, src_off, dst_off, len, data); \
+ else \
+ dbg_result(err, n, src_off, dst_off, len, data); \
+})
- mutex_lock(&info->results_lock);
- list_add_tail(&tr->node, &r->results);
- mutex_unlock(&info->results_lock);
-
- if (tr->type == DMATEST_ET_OK)
- pr_debug("%s\n", thread_result_get(r->name, tr));
- else
- pr_warn("%s\n", thread_result_get(r->name, tr));
-
- return 0;
-}
-
-static unsigned int verify_result_add(struct dmatest_info *info,
- struct dmatest_result *r, unsigned int n,
- unsigned int src_off, unsigned int dst_off, unsigned int len,
- u8 **bufs, int whence, unsigned int counter, u8 pattern,
- bool is_srcbuf)
+static unsigned long long dmatest_persec(s64 runtime, unsigned int val)
{
- struct dmatest_verify_result *vr;
- unsigned int error_count;
- unsigned int buf_off = is_srcbuf ? src_off : dst_off;
- unsigned int start, end;
-
- if (whence < 0) {
- start = 0;
- end = buf_off;
- } else if (whence > 0) {
- start = buf_off + len;
- end = info->params.buf_size;
- } else {
- start = buf_off;
- end = buf_off + len;
- }
+ unsigned long long per_sec = 1000000;
- vr = kmalloc(sizeof(*vr), GFP_KERNEL);
- if (!vr) {
- pr_warn("dmatest: No memory to store verify result\n");
- return dmatest_verify(NULL, bufs, start, end, counter, pattern,
- is_srcbuf);
- }
-
- vr->pattern = pattern;
- vr->is_srcbuf = is_srcbuf;
-
- error_count = dmatest_verify(vr, bufs, start, end, counter, pattern,
- is_srcbuf);
- if (error_count) {
- vr->error_count = error_count;
- thread_result_add(info, r, DMATEST_ET_VERIFY_BUF, n, src_off,
- dst_off, len, (unsigned long)vr);
- return error_count;
- }
-
- kfree(vr);
- return 0;
-}
-
-static void result_free(struct dmatest_info *info, const char *name)
-{
- struct dmatest_result *r, *_r;
-
- mutex_lock(&info->results_lock);
- list_for_each_entry_safe(r, _r, &info->results, node) {
- struct dmatest_thread_result *tr, *_tr;
-
- if (name && strcmp(r->name, name))
- continue;
-
- list_for_each_entry_safe(tr, _tr, &r->results, node) {
- if (tr->type == DMATEST_ET_VERIFY_BUF)
- kfree(tr->vr);
- list_del(&tr->node);
- kfree(tr);
- }
+ if (runtime <= 0)
+ return 0;
- kfree(r->name);
- list_del(&r->node);
- kfree(r);
+ /* drop precision until runtime is 32-bits */
+ while (runtime > UINT_MAX) {
+ runtime >>= 1;
+ per_sec <<= 1;
}
- mutex_unlock(&info->results_lock);
+ per_sec *= val;
+ do_div(per_sec, runtime);
+ return per_sec;
}
-static struct dmatest_result *result_init(struct dmatest_info *info,
- const char *name)
+static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
{
- struct dmatest_result *r;
-
- r = kzalloc(sizeof(*r), GFP_KERNEL);
- if (r) {
- r->name = kstrdup(name, GFP_KERNEL);
- INIT_LIST_HEAD(&r->results);
- mutex_lock(&info->results_lock);
- list_add_tail(&r->node, &info->results);
- mutex_unlock(&info->results_lock);
- }
- return r;
+ return dmatest_persec(runtime, len >> 10);
}
/*
@@ -525,7 +405,6 @@ static int dmatest_func(void *data)
struct dmatest_params *params;
struct dma_chan *chan;
struct dma_device *dev;
- const char *thread_name;
unsigned int src_off, dst_off, len;
unsigned int error_count;
unsigned int failed_tests = 0;
@@ -538,9 +417,10 @@ static int dmatest_func(void *data)
int src_cnt;
int dst_cnt;
int i;
- struct dmatest_result *result;
+ ktime_t ktime;
+ s64 runtime = 0;
+ unsigned long long total_len = 0;
- thread_name = current->comm;
set_freezable();
ret = -ENOMEM;
@@ -570,10 +450,6 @@ static int dmatest_func(void *data)
} else
goto err_thread_type;
- result = result_init(info, thread_name);
- if (!result)
- goto err_srcs;
-
thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL);
if (!thread->srcs)
goto err_srcs;
@@ -597,17 +473,17 @@ static int dmatest_func(void *data)
set_user_nice(current, 10);
/*
- * src buffers are freed by the DMAEngine code with dma_unmap_single()
- * dst buffers are freed by ourselves below
+ * src and dst buffers are freed by ourselves below
*/
- flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT
- | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE;
+ flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
+ ktime = ktime_get();
while (!kthread_should_stop()
&& !(params->iterations && total_tests >= params->iterations)) {
struct dma_async_tx_descriptor *tx = NULL;
- dma_addr_t dma_srcs[src_cnt];
- dma_addr_t dma_dsts[dst_cnt];
+ struct dmaengine_unmap_data *um;
+ dma_addr_t srcs[src_cnt];
+ dma_addr_t *dsts;
u8 align = 0;
total_tests++;
@@ -626,81 +502,103 @@ static int dmatest_func(void *data)
break;
}
- len = dmatest_random() % params->buf_size + 1;
+ if (params->noverify) {
+ len = params->buf_size;
+ src_off = 0;
+ dst_off = 0;
+ } else {
+ len = dmatest_random() % params->buf_size + 1;
+ len = (len >> align) << align;
+ if (!len)
+ len = 1 << align;
+ src_off = dmatest_random() % (params->buf_size - len + 1);
+ dst_off = dmatest_random() % (params->buf_size - len + 1);
+
+ src_off = (src_off >> align) << align;
+ dst_off = (dst_off >> align) << align;
+
+ dmatest_init_srcs(thread->srcs, src_off, len,
+ params->buf_size);
+ dmatest_init_dsts(thread->dsts, dst_off, len,
+ params->buf_size);
+ }
+
len = (len >> align) << align;
if (!len)
len = 1 << align;
- src_off = dmatest_random() % (params->buf_size - len + 1);
- dst_off = dmatest_random() % (params->buf_size - len + 1);
+ total_len += len;
- src_off = (src_off >> align) << align;
- dst_off = (dst_off >> align) << align;
-
- dmatest_init_srcs(thread->srcs, src_off, len, params->buf_size);
- dmatest_init_dsts(thread->dsts, dst_off, len, params->buf_size);
+ um = dmaengine_get_unmap_data(dev->dev, src_cnt+dst_cnt,
+ GFP_KERNEL);
+ if (!um) {
+ failed_tests++;
+ result("unmap data NULL", total_tests,
+ src_off, dst_off, len, ret);
+ continue;
+ }
+ um->len = params->buf_size;
for (i = 0; i < src_cnt; i++) {
- u8 *buf = thread->srcs[i] + src_off;
-
- dma_srcs[i] = dma_map_single(dev->dev, buf, len,
- DMA_TO_DEVICE);
- ret = dma_mapping_error(dev->dev, dma_srcs[i]);
+ unsigned long buf = (unsigned long) thread->srcs[i];
+ struct page *pg = virt_to_page(buf);
+ unsigned pg_off = buf & ~PAGE_MASK;
+
+ um->addr[i] = dma_map_page(dev->dev, pg, pg_off,
+ um->len, DMA_TO_DEVICE);
+ srcs[i] = um->addr[i] + src_off;
+ ret = dma_mapping_error(dev->dev, um->addr[i]);
if (ret) {
- unmap_src(dev->dev, dma_srcs, len, i);
- thread_result_add(info, result,
- DMATEST_ET_MAP_SRC,
- total_tests, src_off, dst_off,
- len, ret);
+ dmaengine_unmap_put(um);
+ result("src mapping error", total_tests,
+ src_off, dst_off, len, ret);
failed_tests++;
continue;
}
+ um->to_cnt++;
}
/* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
+ dsts = &um->addr[src_cnt];
for (i = 0; i < dst_cnt; i++) {
- dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i],
- params->buf_size,
- DMA_BIDIRECTIONAL);
- ret = dma_mapping_error(dev->dev, dma_dsts[i]);
+ unsigned long buf = (unsigned long) thread->dsts[i];
+ struct page *pg = virt_to_page(buf);
+ unsigned pg_off = buf & ~PAGE_MASK;
+
+ dsts[i] = dma_map_page(dev->dev, pg, pg_off, um->len,
+ DMA_BIDIRECTIONAL);
+ ret = dma_mapping_error(dev->dev, dsts[i]);
if (ret) {
- unmap_src(dev->dev, dma_srcs, len, src_cnt);
- unmap_dst(dev->dev, dma_dsts, params->buf_size,
- i);
- thread_result_add(info, result,
- DMATEST_ET_MAP_DST,
- total_tests, src_off, dst_off,
- len, ret);
+ dmaengine_unmap_put(um);
+ result("dst mapping error", total_tests,
+ src_off, dst_off, len, ret);
failed_tests++;
continue;
}
+ um->bidi_cnt++;
}
if (thread->type == DMA_MEMCPY)
tx = dev->device_prep_dma_memcpy(chan,
- dma_dsts[0] + dst_off,
- dma_srcs[0], len,
- flags);
+ dsts[0] + dst_off,
+ srcs[0], len, flags);
else if (thread->type == DMA_XOR)
tx = dev->device_prep_dma_xor(chan,
- dma_dsts[0] + dst_off,
- dma_srcs, src_cnt,
+ dsts[0] + dst_off,
+ srcs, src_cnt,
len, flags);
else if (thread->type == DMA_PQ) {
dma_addr_t dma_pq[dst_cnt];
for (i = 0; i < dst_cnt; i++)
- dma_pq[i] = dma_dsts[i] + dst_off;
- tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+ dma_pq[i] = dsts[i] + dst_off;
+ tx = dev->device_prep_dma_pq(chan, dma_pq, srcs,
src_cnt, pq_coefs,
len, flags);
}
if (!tx) {
- unmap_src(dev->dev, dma_srcs, len, src_cnt);
- unmap_dst(dev->dev, dma_dsts, params->buf_size,
- dst_cnt);
- thread_result_add(info, result, DMATEST_ET_PREP,
- total_tests, src_off, dst_off,
- len, 0);
+ dmaengine_unmap_put(um);
+ result("prep error", total_tests, src_off,
+ dst_off, len, ret);
msleep(100);
failed_tests++;
continue;
@@ -712,9 +610,9 @@ static int dmatest_func(void *data)
cookie = tx->tx_submit(tx);
if (dma_submit_error(cookie)) {
- thread_result_add(info, result, DMATEST_ET_SUBMIT,
- total_tests, src_off, dst_off,
- len, cookie);
+ dmaengine_unmap_put(um);
+ result("submit error", total_tests, src_off,
+ dst_off, len, ret);
msleep(100);
failed_tests++;
continue;
@@ -735,59 +633,59 @@ static int dmatest_func(void *data)
* free it this time?" dancing. For now, just
* leave it dangling.
*/
- thread_result_add(info, result, DMATEST_ET_TIMEOUT,
- total_tests, src_off, dst_off,
- len, 0);
+ dmaengine_unmap_put(um);
+ result("test timed out", total_tests, src_off, dst_off,
+ len, 0);
failed_tests++;
continue;
- } else if (status != DMA_SUCCESS) {
- enum dmatest_error_type type = (status == DMA_ERROR) ?
- DMATEST_ET_DMA_ERROR : DMATEST_ET_DMA_IN_PROGRESS;
- thread_result_add(info, result, type,
- total_tests, src_off, dst_off,
- len, status);
+ } else if (status != DMA_COMPLETE) {
+ dmaengine_unmap_put(um);
+ result(status == DMA_ERROR ?
+ "completion error status" :
+ "completion busy status", total_tests, src_off,
+ dst_off, len, ret);
failed_tests++;
continue;
}
- /* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */
- unmap_dst(dev->dev, dma_dsts, params->buf_size, dst_cnt);
+ dmaengine_unmap_put(um);
- error_count = 0;
+ if (params->noverify) {
+ verbose_result("test passed", total_tests, src_off,
+ dst_off, len, 0);
+ continue;
+ }
- pr_debug("%s: verifying source buffer...\n", thread_name);
- error_count += verify_result_add(info, result, total_tests,
- src_off, dst_off, len, thread->srcs, -1,
+ pr_debug("%s: verifying source buffer...\n", current->comm);
+ error_count = dmatest_verify(thread->srcs, 0, src_off,
0, PATTERN_SRC, true);
- error_count += verify_result_add(info, result, total_tests,
- src_off, dst_off, len, thread->srcs, 0,
- src_off, PATTERN_SRC | PATTERN_COPY, true);
- error_count += verify_result_add(info, result, total_tests,
- src_off, dst_off, len, thread->srcs, 1,
- src_off + len, PATTERN_SRC, true);
-
- pr_debug("%s: verifying dest buffer...\n", thread_name);
- error_count += verify_result_add(info, result, total_tests,
- src_off, dst_off, len, thread->dsts, -1,
+ error_count += dmatest_verify(thread->srcs, src_off,
+ src_off + len, src_off,
+ PATTERN_SRC | PATTERN_COPY, true);
+ error_count += dmatest_verify(thread->srcs, src_off + len,
+ params->buf_size, src_off + len,
+ PATTERN_SRC, true);
+
+ pr_debug("%s: verifying dest buffer...\n", current->comm);
+ error_count += dmatest_verify(thread->dsts, 0, dst_off,
0, PATTERN_DST, false);
- error_count += verify_result_add(info, result, total_tests,
- src_off, dst_off, len, thread->dsts, 0,
- src_off, PATTERN_SRC | PATTERN_COPY, false);
- error_count += verify_result_add(info, result, total_tests,
- src_off, dst_off, len, thread->dsts, 1,
- dst_off + len, PATTERN_DST, false);
+ error_count += dmatest_verify(thread->dsts, dst_off,
+ dst_off + len, src_off,
+ PATTERN_SRC | PATTERN_COPY, false);
+ error_count += dmatest_verify(thread->dsts, dst_off + len,
+ params->buf_size, dst_off + len,
+ PATTERN_DST, false);
if (error_count) {
- thread_result_add(info, result, DMATEST_ET_VERIFY,
- total_tests, src_off, dst_off,
- len, error_count);
+ result("data error", total_tests, src_off, dst_off,
+ len, error_count);
failed_tests++;
} else {
- thread_result_add(info, result, DMATEST_ET_OK,
- total_tests, src_off, dst_off,
- len, 0);
+ verbose_result("test passed", total_tests, src_off,
+ dst_off, len, 0);
}
}
+ runtime = ktime_us_delta(ktime_get(), ktime);
ret = 0;
for (i = 0; thread->dsts[i]; i++)
@@ -802,20 +700,17 @@ err_srcbuf:
err_srcs:
kfree(pq_coefs);
err_thread_type:
- pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
- thread_name, total_tests, failed_tests, ret);
+ pr_info("%s: summary %u tests, %u failures %llu iops %llu KB/s (%d)\n",
+ current->comm, total_tests, failed_tests,
+ dmatest_persec(runtime, total_tests),
+ dmatest_KBs(runtime, total_len), ret);
/* terminate all transfers on specified channels */
if (ret)
dmaengine_terminate_all(chan);
thread->done = true;
-
- if (params->iterations > 0)
- while (!kthread_should_stop()) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit);
- interruptible_sleep_on(&wait_dmatest_exit);
- }
+ wake_up(&thread_wait);
return ret;
}
@@ -828,9 +723,10 @@ static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
ret = kthread_stop(thread->task);
- pr_debug("dmatest: thread %s exited with status %d\n",
- thread->task->comm, ret);
+ pr_debug("thread %s exited with status %d\n",
+ thread->task->comm, ret);
list_del(&thread->node);
+ put_task_struct(thread->task);
kfree(thread);
}
@@ -861,27 +757,27 @@ static int dmatest_add_threads(struct dmatest_info *info,
for (i = 0; i < params->threads_per_chan; i++) {
thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
if (!thread) {
- pr_warning("dmatest: No memory for %s-%s%u\n",
- dma_chan_name(chan), op, i);
-
+ pr_warn("No memory for %s-%s%u\n",
+ dma_chan_name(chan), op, i);
break;
}
thread->info = info;
thread->chan = dtc->chan;
thread->type = type;
smp_wmb();
- thread->task = kthread_run(dmatest_func, thread, "%s-%s%u",
+ thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
dma_chan_name(chan), op, i);
if (IS_ERR(thread->task)) {
- pr_warning("dmatest: Failed to run thread %s-%s%u\n",
- dma_chan_name(chan), op, i);
+ pr_warn("Failed to create thread %s-%s%u\n",
+ dma_chan_name(chan), op, i);
kfree(thread);
break;
}
/* srcbuf and dstbuf are allocated by the thread itself */
-
+ get_task_struct(thread->task);
list_add_tail(&thread->node, &dtc->threads);
+ wake_up_process(thread->task);
}
return i;
@@ -897,7 +793,7 @@ static int dmatest_add_channel(struct dmatest_info *info,
dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
if (!dtc) {
- pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan));
+ pr_warn("No memory for %s\n", dma_chan_name(chan));
return -ENOMEM;
}
@@ -917,7 +813,7 @@ static int dmatest_add_channel(struct dmatest_info *info,
thread_count += cnt > 0 ? cnt : 0;
}
- pr_info("dmatest: Started %u threads using %s\n",
+ pr_info("Started %u threads using %s\n",
thread_count, dma_chan_name(chan));
list_add_tail(&dtc->node, &info->channels);
@@ -937,20 +833,20 @@ static bool filter(struct dma_chan *chan, void *param)
return true;
}
-static int __run_threaded_test(struct dmatest_info *info)
+static void request_channels(struct dmatest_info *info,
+ enum dma_transaction_type type)
{
dma_cap_mask_t mask;
- struct dma_chan *chan;
- struct dmatest_params *params = &info->params;
- int err = 0;
dma_cap_zero(mask);
- dma_cap_set(DMA_MEMCPY, mask);
+ dma_cap_set(type, mask);
for (;;) {
+ struct dmatest_params *params = &info->params;
+ struct dma_chan *chan;
+
chan = dma_request_channel(mask, filter, params);
if (chan) {
- err = dmatest_add_channel(info, chan);
- if (err) {
+ if (dmatest_add_channel(info, chan)) {
dma_release_channel(chan);
break; /* add_channel failed, punt */
}
@@ -960,22 +856,30 @@ static int __run_threaded_test(struct dmatest_info *info)
info->nr_channels >= params->max_channels)
break; /* we have all we need */
}
- return err;
}
-#ifndef MODULE
-static int run_threaded_test(struct dmatest_info *info)
+static void run_threaded_test(struct dmatest_info *info)
{
- int ret;
+ struct dmatest_params *params = &info->params;
- mutex_lock(&info->lock);
- ret = __run_threaded_test(info);
- mutex_unlock(&info->lock);
- return ret;
+ /* Copy test parameters */
+ params->buf_size = test_buf_size;
+ strlcpy(params->channel, strim(test_channel), sizeof(params->channel));
+ strlcpy(params->device, strim(test_device), sizeof(params->device));
+ params->threads_per_chan = threads_per_chan;
+ params->max_channels = max_channels;
+ params->iterations = iterations;
+ params->xor_sources = xor_sources;
+ params->pq_sources = pq_sources;
+ params->timeout = timeout;
+ params->noverify = noverify;
+
+ request_channels(info, DMA_MEMCPY);
+ request_channels(info, DMA_XOR);
+ request_channels(info, DMA_PQ);
}
-#endif
-static void __stop_threaded_test(struct dmatest_info *info)
+static void stop_threaded_test(struct dmatest_info *info)
{
struct dmatest_chan *dtc, *_dtc;
struct dma_chan *chan;
@@ -984,203 +888,86 @@ static void __stop_threaded_test(struct dmatest_info *info)
list_del(&dtc->node);
chan = dtc->chan;
dmatest_cleanup_channel(dtc);
- pr_debug("dmatest: dropped channel %s\n", dma_chan_name(chan));
+ pr_debug("dropped channel %s\n", dma_chan_name(chan));
dma_release_channel(chan);
}
info->nr_channels = 0;
}
-static void stop_threaded_test(struct dmatest_info *info)
+static void restart_threaded_test(struct dmatest_info *info, bool run)
{
- mutex_lock(&info->lock);
- __stop_threaded_test(info);
- mutex_unlock(&info->lock);
-}
-
-static int __restart_threaded_test(struct dmatest_info *info, bool run)
-{
- struct dmatest_params *params = &info->params;
+ /* we might be called early to set run=, defer running until all
+ * parameters have been evaluated
+ */
+ if (!info->did_init)
+ return;
/* Stop any running test first */
- __stop_threaded_test(info);
-
- if (run == false)
- return 0;
-
- /* Clear results from previous run */
- result_free(info, NULL);
-
- /* Copy test parameters */
- params->buf_size = test_buf_size;
- strlcpy(params->channel, strim(test_channel), sizeof(params->channel));
- strlcpy(params->device, strim(test_device), sizeof(params->device));
- params->threads_per_chan = threads_per_chan;
- params->max_channels = max_channels;
- params->iterations = iterations;
- params->xor_sources = xor_sources;
- params->pq_sources = pq_sources;
- params->timeout = timeout;
+ stop_threaded_test(info);
/* Run test with new parameters */
- return __run_threaded_test(info);
-}
-
-static bool __is_threaded_test_run(struct dmatest_info *info)
-{
- struct dmatest_chan *dtc;
-
- list_for_each_entry(dtc, &info->channels, node) {
- struct dmatest_thread *thread;
-
- list_for_each_entry(thread, &dtc->threads, node) {
- if (!thread->done)
- return true;
- }
- }
-
- return false;
+ run_threaded_test(info);
}
-static ssize_t dtf_read_run(struct file *file, char __user *user_buf,
- size_t count, loff_t *ppos)
+static int dmatest_run_get(char *val, const struct kernel_param *kp)
{
- struct dmatest_info *info = file->private_data;
- char buf[3];
+ struct dmatest_info *info = &test_info;
mutex_lock(&info->lock);
-
- if (__is_threaded_test_run(info)) {
- buf[0] = 'Y';
+ if (is_threaded_test_run(info)) {
+ dmatest_run = true;
} else {
- __stop_threaded_test(info);
- buf[0] = 'N';
+ stop_threaded_test(info);
+ dmatest_run = false;
}
-
mutex_unlock(&info->lock);
- buf[1] = '\n';
- buf[2] = 0x00;
- return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t dtf_write_run(struct file *file, const char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct dmatest_info *info = file->private_data;
- char buf[16];
- bool bv;
- int ret = 0;
- if (copy_from_user(buf, user_buf, min(count, (sizeof(buf) - 1))))
- return -EFAULT;
-
- if (strtobool(buf, &bv) == 0) {
- mutex_lock(&info->lock);
-
- if (__is_threaded_test_run(info))
- ret = -EBUSY;
- else
- ret = __restart_threaded_test(info, bv);
-
- mutex_unlock(&info->lock);
- }
-
- return ret ? ret : count;
+ return param_get_bool(val, kp);
}
-static const struct file_operations dtf_run_fops = {
- .read = dtf_read_run,
- .write = dtf_write_run,
- .open = simple_open,
- .llseek = default_llseek,
-};
-
-static int dtf_results_show(struct seq_file *sf, void *data)
+static int dmatest_run_set(const char *val, const struct kernel_param *kp)
{
- struct dmatest_info *info = sf->private;
- struct dmatest_result *result;
- struct dmatest_thread_result *tr;
- unsigned int i;
+ struct dmatest_info *info = &test_info;
+ int ret;
- mutex_lock(&info->results_lock);
- list_for_each_entry(result, &info->results, node) {
- list_for_each_entry(tr, &result->results, node) {
- seq_printf(sf, "%s\n",
- thread_result_get(result->name, tr));
- if (tr->type == DMATEST_ET_VERIFY_BUF) {
- for (i = 0; i < tr->vr->error_count; i++) {
- seq_printf(sf, "\t%s\n",
- verify_result_get_one(tr->vr, i));
- }
- }
- }
+ mutex_lock(&info->lock);
+ ret = param_set_bool(val, kp);
+ if (ret) {
+ mutex_unlock(&info->lock);
+ return ret;
}
- mutex_unlock(&info->results_lock);
- return 0;
-}
-
-static int dtf_results_open(struct inode *inode, struct file *file)
-{
- return single_open(file, dtf_results_show, inode->i_private);
-}
-
-static const struct file_operations dtf_results_fops = {
- .open = dtf_results_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int dmatest_register_dbgfs(struct dmatest_info *info)
-{
- struct dentry *d;
-
- d = debugfs_create_dir("dmatest", NULL);
- if (IS_ERR(d))
- return PTR_ERR(d);
- if (!d)
- goto err_root;
+ if (is_threaded_test_run(info))
+ ret = -EBUSY;
+ else if (dmatest_run)
+ restart_threaded_test(info, dmatest_run);
- info->root = d;
-
- /* Run or stop threaded test */
- debugfs_create_file("run", S_IWUSR | S_IRUGO, info->root, info,
- &dtf_run_fops);
-
- /* Results of test in progress */
- debugfs_create_file("results", S_IRUGO, info->root, info,
- &dtf_results_fops);
-
- return 0;
+ mutex_unlock(&info->lock);
-err_root:
- pr_err("dmatest: Failed to initialize debugfs\n");
- return -ENOMEM;
+ return ret;
}
static int __init dmatest_init(void)
{
struct dmatest_info *info = &test_info;
- int ret;
-
- memset(info, 0, sizeof(*info));
+ struct dmatest_params *params = &info->params;
- mutex_init(&info->lock);
- INIT_LIST_HEAD(&info->channels);
+ if (dmatest_run) {
+ mutex_lock(&info->lock);
+ run_threaded_test(info);
+ mutex_unlock(&info->lock);
+ }
- mutex_init(&info->results_lock);
- INIT_LIST_HEAD(&info->results);
+ if (params->iterations && wait)
+ wait_event(thread_wait, !is_threaded_test_run(info));
- ret = dmatest_register_dbgfs(info);
- if (ret)
- return ret;
+ /* module parameters are stable, inittime tests are started,
+ * let userspace take over 'run' control
+ */
+ info->did_init = true;
-#ifdef MODULE
return 0;
-#else
- return run_threaded_test(info);
-#endif
}
/* when compiled-in wait for drivers to load first */
late_initcall(dmatest_init);
@@ -1189,9 +976,9 @@ static void __exit dmatest_exit(void)
{
struct dmatest_info *info = &test_info;
- debugfs_remove_recursive(info->root);
+ mutex_lock(&info->lock);
stop_threaded_test(info);
- result_free(info, NULL);
+ mutex_unlock(&info->lock);
}
module_exit(dmatest_exit);
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 89eb89f22284..7516be4677cf 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -85,10 +85,6 @@ static struct device *chan2dev(struct dma_chan *chan)
{
return &chan->dev->device;
}
-static struct device *chan2parent(struct dma_chan *chan)
-{
- return chan->dev->device.parent;
-}
static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
{
@@ -311,26 +307,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
list_splice_init(&desc->tx_list, &dwc->free_list);
list_move(&desc->desc_node, &dwc->free_list);
- if (!is_slave_direction(dwc->direction)) {
- struct device *parent = chan2parent(&dwc->chan);
- if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
- dma_unmap_single(parent, desc->lli.dar,
- desc->total_len, DMA_FROM_DEVICE);
- else
- dma_unmap_page(parent, desc->lli.dar,
- desc->total_len, DMA_FROM_DEVICE);
- }
- if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
- dma_unmap_single(parent, desc->lli.sar,
- desc->total_len, DMA_TO_DEVICE);
- else
- dma_unmap_page(parent, desc->lli.sar,
- desc->total_len, DMA_TO_DEVICE);
- }
- }
-
+ dma_descriptor_unmap(txd);
spin_unlock_irqrestore(&dwc->lock, flags);
if (callback)
@@ -1098,13 +1075,13 @@ dwc_tx_status(struct dma_chan *chan,
enum dma_status ret;
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret != DMA_SUCCESS)
+ if (ret != DMA_COMPLETE)
dma_set_residue(txstate, dwc_get_residue(dwc));
if (dwc->paused && ret == DMA_IN_PROGRESS)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index bef8a368c8dd..2539ea0cbc63 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -46,14 +46,21 @@
#define EDMA_CHANS 64
#endif /* CONFIG_ARCH_DAVINCI_DA8XX */
-/* Max of 16 segments per channel to conserve PaRAM slots */
-#define MAX_NR_SG 16
+/*
+ * Max of 20 segments per channel to conserve PaRAM slots
+ * Also note that MAX_NR_SG should be atleast the no.of periods
+ * that are required for ASoC, otherwise DMA prep calls will
+ * fail. Today davinci-pcm is the only user of this driver and
+ * requires atleast 17 slots, so we setup the default to 20.
+ */
+#define MAX_NR_SG 20
#define EDMA_MAX_SLOTS MAX_NR_SG
#define EDMA_DESCRIPTORS 16
struct edma_desc {
struct virt_dma_desc vdesc;
struct list_head node;
+ int cyclic;
int absync;
int pset_nr;
int processed;
@@ -167,8 +174,13 @@ static void edma_execute(struct edma_chan *echan)
* then setup a link to the dummy slot, this results in all future
* events being absorbed and that's OK because we're done
*/
- if (edesc->processed == edesc->pset_nr)
- edma_link(echan->slot[nslots-1], echan->ecc->dummy_slot);
+ if (edesc->processed == edesc->pset_nr) {
+ if (edesc->cyclic)
+ edma_link(echan->slot[nslots-1], echan->slot[1]);
+ else
+ edma_link(echan->slot[nslots-1],
+ echan->ecc->dummy_slot);
+ }
edma_resume(echan->ch_num);
@@ -250,6 +262,117 @@ static int edma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
return ret;
}
+/*
+ * A PaRAM set configuration abstraction used by other modes
+ * @chan: Channel who's PaRAM set we're configuring
+ * @pset: PaRAM set to initialize and setup.
+ * @src_addr: Source address of the DMA
+ * @dst_addr: Destination address of the DMA
+ * @burst: In units of dev_width, how much to send
+ * @dev_width: How much is the dev_width
+ * @dma_length: Total length of the DMA transfer
+ * @direction: Direction of the transfer
+ */
+static int edma_config_pset(struct dma_chan *chan, struct edmacc_param *pset,
+ dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
+ enum dma_slave_buswidth dev_width, unsigned int dma_length,
+ enum dma_transfer_direction direction)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+ struct device *dev = chan->device->dev;
+ int acnt, bcnt, ccnt, cidx;
+ int src_bidx, dst_bidx, src_cidx, dst_cidx;
+ int absync;
+
+ acnt = dev_width;
+ /*
+ * If the maxburst is equal to the fifo width, use
+ * A-synced transfers. This allows for large contiguous
+ * buffer transfers using only one PaRAM set.
+ */
+ if (burst == 1) {
+ /*
+ * For the A-sync case, bcnt and ccnt are the remainder
+ * and quotient respectively of the division of:
+ * (dma_length / acnt) by (SZ_64K -1). This is so
+ * that in case bcnt over flows, we have ccnt to use.
+ * Note: In A-sync tranfer only, bcntrld is used, but it
+ * only applies for sg_dma_len(sg) >= SZ_64K.
+ * In this case, the best way adopted is- bccnt for the
+ * first frame will be the remainder below. Then for
+ * every successive frame, bcnt will be SZ_64K-1. This
+ * is assured as bcntrld = 0xffff in end of function.
+ */
+ absync = false;
+ ccnt = dma_length / acnt / (SZ_64K - 1);
+ bcnt = dma_length / acnt - ccnt * (SZ_64K - 1);
+ /*
+ * If bcnt is non-zero, we have a remainder and hence an
+ * extra frame to transfer, so increment ccnt.
+ */
+ if (bcnt)
+ ccnt++;
+ else
+ bcnt = SZ_64K - 1;
+ cidx = acnt;
+ } else {
+ /*
+ * If maxburst is greater than the fifo address_width,
+ * use AB-synced transfers where A count is the fifo
+ * address_width and B count is the maxburst. In this
+ * case, we are limited to transfers of C count frames
+ * of (address_width * maxburst) where C count is limited
+ * to SZ_64K-1. This places an upper bound on the length
+ * of an SG segment that can be handled.
+ */
+ absync = true;
+ bcnt = burst;
+ ccnt = dma_length / (acnt * bcnt);
+ if (ccnt > (SZ_64K - 1)) {
+ dev_err(dev, "Exceeded max SG segment size\n");
+ return -EINVAL;
+ }
+ cidx = acnt * bcnt;
+ }
+
+ if (direction == DMA_MEM_TO_DEV) {
+ src_bidx = acnt;
+ src_cidx = cidx;
+ dst_bidx = 0;
+ dst_cidx = 0;
+ } else if (direction == DMA_DEV_TO_MEM) {
+ src_bidx = 0;
+ src_cidx = 0;
+ dst_bidx = acnt;
+ dst_cidx = cidx;
+ } else {
+ dev_err(dev, "%s: direction not implemented yet\n", __func__);
+ return -EINVAL;
+ }
+
+ pset->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+ /* Configure A or AB synchronized transfers */
+ if (absync)
+ pset->opt |= SYNCDIM;
+
+ pset->src = src_addr;
+ pset->dst = dst_addr;
+
+ pset->src_dst_bidx = (dst_bidx << 16) | src_bidx;
+ pset->src_dst_cidx = (dst_cidx << 16) | src_cidx;
+
+ pset->a_b_cnt = bcnt << 16 | acnt;
+ pset->ccnt = ccnt;
+ /*
+ * Only time when (bcntrld) auto reload is required is for
+ * A-sync case, and in this case, a requirement of reload value
+ * of SZ_64K-1 only is assured. 'link' is initially set to NULL
+ * and then later will be populated by edma_execute.
+ */
+ pset->link_bcntrld = 0xffffffff;
+ return absync;
+}
+
static struct dma_async_tx_descriptor *edma_prep_slave_sg(
struct dma_chan *chan, struct scatterlist *sgl,
unsigned int sg_len, enum dma_transfer_direction direction,
@@ -258,23 +381,21 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
struct edma_chan *echan = to_edma_chan(chan);
struct device *dev = chan->device->dev;
struct edma_desc *edesc;
- dma_addr_t dev_addr;
+ dma_addr_t src_addr = 0, dst_addr = 0;
enum dma_slave_buswidth dev_width;
u32 burst;
struct scatterlist *sg;
- int acnt, bcnt, ccnt, src, dst, cidx;
- int src_bidx, dst_bidx, src_cidx, dst_cidx;
- int i, nslots;
+ int i, nslots, ret;
if (unlikely(!echan || !sgl || !sg_len))
return NULL;
if (direction == DMA_DEV_TO_MEM) {
- dev_addr = echan->cfg.src_addr;
+ src_addr = echan->cfg.src_addr;
dev_width = echan->cfg.src_addr_width;
burst = echan->cfg.src_maxburst;
} else if (direction == DMA_MEM_TO_DEV) {
- dev_addr = echan->cfg.dst_addr;
+ dst_addr = echan->cfg.dst_addr;
dev_width = echan->cfg.dst_addr_width;
burst = echan->cfg.dst_maxburst;
} else {
@@ -307,7 +428,6 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
if (echan->slot[i] < 0) {
kfree(edesc);
dev_err(dev, "Failed to allocate slot\n");
- kfree(edesc);
return NULL;
}
}
@@ -315,64 +435,21 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
/* Configure PaRAM sets for each SG */
for_each_sg(sgl, sg, sg_len, i) {
-
- acnt = dev_width;
-
- /*
- * If the maxburst is equal to the fifo width, use
- * A-synced transfers. This allows for large contiguous
- * buffer transfers using only one PaRAM set.
- */
- if (burst == 1) {
- edesc->absync = false;
- ccnt = sg_dma_len(sg) / acnt / (SZ_64K - 1);
- bcnt = sg_dma_len(sg) / acnt - ccnt * (SZ_64K - 1);
- if (bcnt)
- ccnt++;
- else
- bcnt = SZ_64K - 1;
- cidx = acnt;
- /*
- * If maxburst is greater than the fifo address_width,
- * use AB-synced transfers where A count is the fifo
- * address_width and B count is the maxburst. In this
- * case, we are limited to transfers of C count frames
- * of (address_width * maxburst) where C count is limited
- * to SZ_64K-1. This places an upper bound on the length
- * of an SG segment that can be handled.
- */
- } else {
- edesc->absync = true;
- bcnt = burst;
- ccnt = sg_dma_len(sg) / (acnt * bcnt);
- if (ccnt > (SZ_64K - 1)) {
- dev_err(dev, "Exceeded max SG segment size\n");
- kfree(edesc);
- return NULL;
- }
- cidx = acnt * bcnt;
+ /* Get address for each SG */
+ if (direction == DMA_DEV_TO_MEM)
+ dst_addr = sg_dma_address(sg);
+ else
+ src_addr = sg_dma_address(sg);
+
+ ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+ dst_addr, burst, dev_width,
+ sg_dma_len(sg), direction);
+ if (ret < 0) {
+ kfree(edesc);
+ return NULL;
}
- if (direction == DMA_MEM_TO_DEV) {
- src = sg_dma_address(sg);
- dst = dev_addr;
- src_bidx = acnt;
- src_cidx = cidx;
- dst_bidx = 0;
- dst_cidx = 0;
- } else {
- src = dev_addr;
- dst = sg_dma_address(sg);
- src_bidx = 0;
- src_cidx = 0;
- dst_bidx = acnt;
- dst_cidx = cidx;
- }
-
- edesc->pset[i].opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
- /* Configure A or AB synchronized transfers */
- if (edesc->absync)
- edesc->pset[i].opt |= SYNCDIM;
+ edesc->absync = ret;
/* If this is the last in a current SG set of transactions,
enable interrupts so that next set is processed */
@@ -382,17 +459,138 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
/* If this is the last set, enable completion interrupt flag */
if (i == sg_len - 1)
edesc->pset[i].opt |= TCINTEN;
+ }
- edesc->pset[i].src = src;
- edesc->pset[i].dst = dst;
+ return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
- edesc->pset[i].src_dst_bidx = (dst_bidx << 16) | src_bidx;
- edesc->pset[i].src_dst_cidx = (dst_cidx << 16) | src_cidx;
+static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long tx_flags, void *context)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+ struct device *dev = chan->device->dev;
+ struct edma_desc *edesc;
+ dma_addr_t src_addr, dst_addr;
+ enum dma_slave_buswidth dev_width;
+ u32 burst;
+ int i, ret, nslots;
+
+ if (unlikely(!echan || !buf_len || !period_len))
+ return NULL;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ src_addr = echan->cfg.src_addr;
+ dst_addr = buf_addr;
+ dev_width = echan->cfg.src_addr_width;
+ burst = echan->cfg.src_maxburst;
+ } else if (direction == DMA_MEM_TO_DEV) {
+ src_addr = buf_addr;
+ dst_addr = echan->cfg.dst_addr;
+ dev_width = echan->cfg.dst_addr_width;
+ burst = echan->cfg.dst_maxburst;
+ } else {
+ dev_err(dev, "%s: bad direction?\n", __func__);
+ return NULL;
+ }
+
+ if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+ dev_err(dev, "Undefined slave buswidth\n");
+ return NULL;
+ }
+
+ if (unlikely(buf_len % period_len)) {
+ dev_err(dev, "Period should be multiple of Buffer length\n");
+ return NULL;
+ }
+
+ nslots = (buf_len / period_len) + 1;
+
+ /*
+ * Cyclic DMA users such as audio cannot tolerate delays introduced
+ * by cases where the number of periods is more than the maximum
+ * number of SGs the EDMA driver can handle at a time. For DMA types
+ * such as Slave SGs, such delays are tolerable and synchronized,
+ * but the synchronization is difficult to achieve with Cyclic and
+ * cannot be guaranteed, so we error out early.
+ */
+ if (nslots > MAX_NR_SG)
+ return NULL;
+
+ edesc = kzalloc(sizeof(*edesc) + nslots *
+ sizeof(edesc->pset[0]), GFP_ATOMIC);
+ if (!edesc) {
+ dev_dbg(dev, "Failed to allocate a descriptor\n");
+ return NULL;
+ }
+
+ edesc->cyclic = 1;
+ edesc->pset_nr = nslots;
+
+ dev_dbg(dev, "%s: nslots=%d\n", __func__, nslots);
+ dev_dbg(dev, "%s: period_len=%d\n", __func__, period_len);
+ dev_dbg(dev, "%s: buf_len=%d\n", __func__, buf_len);
+
+ for (i = 0; i < nslots; i++) {
+ /* Allocate a PaRAM slot, if needed */
+ if (echan->slot[i] < 0) {
+ echan->slot[i] =
+ edma_alloc_slot(EDMA_CTLR(echan->ch_num),
+ EDMA_SLOT_ANY);
+ if (echan->slot[i] < 0) {
+ dev_err(dev, "Failed to allocate slot\n");
+ return NULL;
+ }
+ }
+
+ if (i == nslots - 1) {
+ memcpy(&edesc->pset[i], &edesc->pset[0],
+ sizeof(edesc->pset[0]));
+ break;
+ }
+
+ ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+ dst_addr, burst, dev_width, period_len,
+ direction);
+ if (ret < 0)
+ return NULL;
- edesc->pset[i].a_b_cnt = bcnt << 16 | acnt;
- edesc->pset[i].ccnt = ccnt;
- edesc->pset[i].link_bcntrld = 0xffffffff;
+ if (direction == DMA_DEV_TO_MEM)
+ dst_addr += period_len;
+ else
+ src_addr += period_len;
+ dev_dbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
+ dev_dbg(dev,
+ "\n pset[%d]:\n"
+ " chnum\t%d\n"
+ " slot\t%d\n"
+ " opt\t%08x\n"
+ " src\t%08x\n"
+ " dst\t%08x\n"
+ " abcnt\t%08x\n"
+ " ccnt\t%08x\n"
+ " bidx\t%08x\n"
+ " cidx\t%08x\n"
+ " lkrld\t%08x\n",
+ i, echan->ch_num, echan->slot[i],
+ edesc->pset[i].opt,
+ edesc->pset[i].src,
+ edesc->pset[i].dst,
+ edesc->pset[i].a_b_cnt,
+ edesc->pset[i].ccnt,
+ edesc->pset[i].src_dst_bidx,
+ edesc->pset[i].src_dst_cidx,
+ edesc->pset[i].link_bcntrld);
+
+ edesc->absync = ret;
+
+ /*
+ * Enable interrupts for every period because callback
+ * has to be called for every period.
+ */
+ edesc->pset[i].opt |= TCINTEN;
}
return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
@@ -406,30 +604,34 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
unsigned long flags;
struct edmacc_param p;
- /* Pause the channel */
- edma_pause(echan->ch_num);
+ edesc = echan->edesc;
+
+ /* Pause the channel for non-cyclic */
+ if (!edesc || (edesc && !edesc->cyclic))
+ edma_pause(echan->ch_num);
switch (ch_status) {
- case DMA_COMPLETE:
+ case EDMA_DMA_COMPLETE:
spin_lock_irqsave(&echan->vchan.lock, flags);
- edesc = echan->edesc;
if (edesc) {
- if (edesc->processed == edesc->pset_nr) {
+ if (edesc->cyclic) {
+ vchan_cyclic_callback(&edesc->vdesc);
+ } else if (edesc->processed == edesc->pset_nr) {
dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
edma_stop(echan->ch_num);
vchan_cookie_complete(&edesc->vdesc);
+ edma_execute(echan);
} else {
dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
+ edma_execute(echan);
}
-
- edma_execute(echan);
}
spin_unlock_irqrestore(&echan->vchan.lock, flags);
break;
- case DMA_CC_ERROR:
+ case EDMA_DMA_CC_ERROR:
spin_lock_irqsave(&echan->vchan.lock, flags);
edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
@@ -579,7 +781,7 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
unsigned long flags;
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret == DMA_SUCCESS || !txstate)
+ if (ret == DMA_COMPLETE || !txstate)
return ret;
spin_lock_irqsave(&echan->vchan.lock, flags);
@@ -619,6 +821,7 @@ static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
struct device *dev)
{
dma->device_prep_slave_sg = edma_prep_slave_sg;
+ dma->device_prep_dma_cyclic = edma_prep_dma_cyclic;
dma->device_alloc_chan_resources = edma_alloc_chan_resources;
dma->device_free_chan_resources = edma_free_chan_resources;
dma->device_issue_pending = edma_issue_pending;
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 591cd8c63abb..cb4bf682a708 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -733,28 +733,6 @@ static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac)
spin_unlock_irqrestore(&edmac->lock, flags);
}
-static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc)
-{
- struct device *dev = desc->txd.chan->device->dev;
-
- if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
- dma_unmap_single(dev, desc->src_addr, desc->size,
- DMA_TO_DEVICE);
- else
- dma_unmap_page(dev, desc->src_addr, desc->size,
- DMA_TO_DEVICE);
- }
- if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
- dma_unmap_single(dev, desc->dst_addr, desc->size,
- DMA_FROM_DEVICE);
- else
- dma_unmap_page(dev, desc->dst_addr, desc->size,
- DMA_FROM_DEVICE);
- }
-}
-
static void ep93xx_dma_tasklet(unsigned long data)
{
struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data;
@@ -787,13 +765,7 @@ static void ep93xx_dma_tasklet(unsigned long data)
/* Now we can release all the chained descriptors */
list_for_each_entry_safe(desc, d, &list, node) {
- /*
- * For the memcpy channels the API requires us to unmap the
- * buffers unless requested otherwise.
- */
- if (!edmac->chan.private)
- ep93xx_dma_unmap_buffers(desc);
-
+ dma_descriptor_unmap(&desc->txd);
ep93xx_dma_desc_put(edmac, desc);
}
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 61517dd0d0b7..7086a16a55f2 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -870,22 +870,7 @@ static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
/* Run any dependencies */
dma_run_dependencies(txd);
- /* Unmap the dst buffer, if requested */
- if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
- dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
- else
- dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
- }
-
- /* Unmap the src buffer, if requested */
- if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
- dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
- else
- dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
- }
-
+ dma_descriptor_unmap(txd);
#ifdef FSL_DMA_LD_DEBUG
chan_dbg(chan, "LD %p free\n", desc);
#endif
@@ -1255,7 +1240,9 @@ static int fsl_dma_chan_probe(struct fsldma_device *fdev,
WARN_ON(fdev->feature != chan->feature);
chan->dev = fdev->dev;
- chan->id = ((res.start - 0x100) & 0xfff) >> 7;
+ chan->id = (res.start & 0xfff) < 0x300 ?
+ ((res.start - 0x100) & 0xfff) >> 7 :
+ ((res.start - 0x200) & 0xfff) >> 7;
if (chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
dev_err(fdev->dev, "too many channels for device\n");
err = -EINVAL;
@@ -1428,6 +1415,7 @@ static int fsldma_of_remove(struct platform_device *op)
}
static const struct of_device_id fsldma_of_ids[] = {
+ { .compatible = "fsl,elo3-dma", },
{ .compatible = "fsl,eloplus-dma", },
{ .compatible = "fsl,elo-dma", },
{}
@@ -1449,7 +1437,7 @@ static struct platform_driver fsldma_of_driver = {
static __init int fsldma_init(void)
{
- pr_info("Freescale Elo / Elo Plus DMA driver\n");
+ pr_info("Freescale Elo series DMA driver\n");
return platform_driver_register(&fsldma_of_driver);
}
@@ -1461,5 +1449,5 @@ static void __exit fsldma_exit(void)
subsys_initcall(fsldma_init);
module_exit(fsldma_exit);
-MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_DESCRIPTION("Freescale Elo series DMA driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index f5c38791fc74..1ffc24484d23 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -112,7 +112,7 @@ struct fsldma_chan_regs {
};
struct fsldma_chan;
-#define FSL_DMA_MAX_CHANS_PER_DEVICE 4
+#define FSL_DMA_MAX_CHANS_PER_DEVICE 8
struct fsldma_device {
void __iomem *regs; /* DGSR register base */
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 55852c026791..6f9ac2022abd 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -572,9 +572,11 @@ static int imxdma_xfer_desc(struct imxdma_desc *d)
imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel));
- dev_dbg(imxdma->dev, "%s channel: %d dest=0x%08x src=0x%08x "
- "dma_length=%d\n", __func__, imxdmac->channel,
- d->dest, d->src, d->len);
+ dev_dbg(imxdma->dev,
+ "%s channel: %d dest=0x%08llx src=0x%08llx dma_length=%zu\n",
+ __func__, imxdmac->channel,
+ (unsigned long long)d->dest,
+ (unsigned long long)d->src, d->len);
break;
/* Cyclic transfer is the same as slave_sg with special sg configuration. */
@@ -586,20 +588,22 @@ static int imxdma_xfer_desc(struct imxdma_desc *d)
imx_dmav1_writel(imxdma, imxdmac->ccr_from_device,
DMA_CCR(imxdmac->channel));
- dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
- "total length=%d dev_addr=0x%08x (dev2mem)\n",
- __func__, imxdmac->channel, d->sg, d->sgcount,
- d->len, imxdmac->per_address);
+ dev_dbg(imxdma->dev,
+ "%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (dev2mem)\n",
+ __func__, imxdmac->channel,
+ d->sg, d->sgcount, d->len,
+ (unsigned long long)imxdmac->per_address);
} else if (d->direction == DMA_MEM_TO_DEV) {
imx_dmav1_writel(imxdma, imxdmac->per_address,
DMA_DAR(imxdmac->channel));
imx_dmav1_writel(imxdma, imxdmac->ccr_to_device,
DMA_CCR(imxdmac->channel));
- dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
- "total length=%d dev_addr=0x%08x (mem2dev)\n",
- __func__, imxdmac->channel, d->sg, d->sgcount,
- d->len, imxdmac->per_address);
+ dev_dbg(imxdma->dev,
+ "%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (mem2dev)\n",
+ __func__, imxdmac->channel,
+ d->sg, d->sgcount, d->len,
+ (unsigned long long)imxdmac->per_address);
} else {
dev_err(imxdma->dev, "%s channel: %d bad dma mode\n",
__func__, imxdmac->channel);
@@ -771,7 +775,7 @@ static int imxdma_alloc_chan_resources(struct dma_chan *chan)
desc->desc.tx_submit = imxdma_tx_submit;
/* txd.flags will be overwritten in prep funcs */
desc->desc.flags = DMA_CTRL_ACK;
- desc->status = DMA_SUCCESS;
+ desc->status = DMA_COMPLETE;
list_add_tail(&desc->node, &imxdmac->ld_free);
imxdmac->descs_allocated++;
@@ -870,7 +874,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
int i;
unsigned int periods = buf_len / period_len;
- dev_dbg(imxdma->dev, "%s channel: %d buf_len=%d period_len=%d\n",
+ dev_dbg(imxdma->dev, "%s channel: %d buf_len=%zu period_len=%zu\n",
__func__, imxdmac->channel, buf_len, period_len);
if (list_empty(&imxdmac->ld_free) ||
@@ -926,8 +930,9 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_memcpy(
struct imxdma_engine *imxdma = imxdmac->imxdma;
struct imxdma_desc *desc;
- dev_dbg(imxdma->dev, "%s channel: %d src=0x%x dst=0x%x len=%d\n",
- __func__, imxdmac->channel, src, dest, len);
+ dev_dbg(imxdma->dev, "%s channel: %d src=0x%llx dst=0x%llx len=%zu\n",
+ __func__, imxdmac->channel, (unsigned long long)src,
+ (unsigned long long)dest, len);
if (list_empty(&imxdmac->ld_free) ||
imxdma_chan_is_doing_cyclic(imxdmac))
@@ -956,9 +961,10 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved(
struct imxdma_engine *imxdma = imxdmac->imxdma;
struct imxdma_desc *desc;
- dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%x dst_start=0x%x\n"
- " src_sgl=%s dst_sgl=%s numf=%d frame_size=%d\n", __func__,
- imxdmac->channel, xt->src_start, xt->dst_start,
+ dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%llx dst_start=0x%llx\n"
+ " src_sgl=%s dst_sgl=%s numf=%zu frame_size=%zu\n", __func__,
+ imxdmac->channel, (unsigned long long)xt->src_start,
+ (unsigned long long) xt->dst_start,
xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false",
xt->numf, xt->frame_size);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index c1fd504cae28..c75679d42028 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -638,7 +638,7 @@ static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
if (error)
sdmac->status = DMA_ERROR;
else
- sdmac->status = DMA_SUCCESS;
+ sdmac->status = DMA_COMPLETE;
dma_cookie_complete(&sdmac->desc);
if (sdmac->desc.callback)
@@ -1089,8 +1089,8 @@ static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
param &= ~BD_CONT;
}
- dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
- i, count, sg->dma_address,
+ dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n",
+ i, count, (u64)sg->dma_address,
param & BD_WRAP ? "wrap" : "",
param & BD_INTR ? " intr" : "");
@@ -1163,8 +1163,8 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
if (i + 1 == num_periods)
param |= BD_WRAP;
- dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
- i, period_len, dma_addr,
+ dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n",
+ i, period_len, (u64)dma_addr,
param & BD_WRAP ? "wrap" : "",
param & BD_INTR ? " intr" : "");
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index a975ebebea8a..1aab8130efa1 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -309,7 +309,7 @@ static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
callback_txd(param_txd);
}
if (midc->raw_tfr) {
- desc->status = DMA_SUCCESS;
+ desc->status = DMA_COMPLETE;
if (desc->lli != NULL) {
pci_pool_free(desc->lli_pool, desc->lli,
desc->lli_phys);
@@ -481,7 +481,7 @@ static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
enum dma_status ret;
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret != DMA_SUCCESS) {
+ if (ret != DMA_COMPLETE) {
spin_lock_bh(&midc->lock);
midc_scan_descriptors(to_middma_device(chan->device), midc);
spin_unlock_bh(&midc->lock);
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 5ff6fc1819dc..1a49c777607c 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -531,21 +531,6 @@ static void ioat1_cleanup_event(unsigned long data)
writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
}
-void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
- size_t len, struct ioat_dma_descriptor *hw)
-{
- struct pci_dev *pdev = chan->device->pdev;
- size_t offset = len - hw->size;
-
- if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
- ioat_unmap(pdev, hw->dst_addr - offset, len,
- PCI_DMA_FROMDEVICE, flags, 1);
-
- if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
- ioat_unmap(pdev, hw->src_addr - offset, len,
- PCI_DMA_TODEVICE, flags, 0);
-}
-
dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan)
{
dma_addr_t phys_complete;
@@ -602,7 +587,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete)
dump_desc_dbg(ioat, desc);
if (tx->cookie) {
dma_cookie_complete(tx);
- ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+ dma_descriptor_unmap(tx);
ioat->active -= desc->hw->tx_cnt;
if (tx->callback) {
tx->callback(tx->callback_param);
@@ -733,7 +718,7 @@ ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
enum dma_status ret;
ret = dma_cookie_status(c, cookie, txstate);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
device->cleanup_fn((unsigned long) c);
@@ -833,8 +818,7 @@ int ioat_dma_self_test(struct ioatdma_device *device)
dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
- flags = DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP |
- DMA_PREP_INTERRUPT;
+ flags = DMA_PREP_INTERRUPT;
tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
IOAT_TEST_SIZE, flags);
if (!tx) {
@@ -859,7 +843,7 @@ int ioat_dma_self_test(struct ioatdma_device *device)
if (tmo == 0 ||
dma->device_tx_status(dma_chan, cookie, NULL)
- != DMA_SUCCESS) {
+ != DMA_COMPLETE) {
dev_err(dev, "Self-test copy timed out, disabling\n");
err = -ENODEV;
goto unmap_dma;
@@ -885,8 +869,7 @@ static char ioat_interrupt_style[32] = "msix";
module_param_string(ioat_interrupt_style, ioat_interrupt_style,
sizeof(ioat_interrupt_style), 0644);
MODULE_PARM_DESC(ioat_interrupt_style,
- "set ioat interrupt style: msix (default), "
- "msix-single-vector, msi, intx)");
+ "set ioat interrupt style: msix (default), msi, intx");
/**
* ioat_dma_setup_interrupts - setup interrupt handler
@@ -904,8 +887,6 @@ int ioat_dma_setup_interrupts(struct ioatdma_device *device)
if (!strcmp(ioat_interrupt_style, "msix"))
goto msix;
- if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
- goto msix_single_vector;
if (!strcmp(ioat_interrupt_style, "msi"))
goto msi;
if (!strcmp(ioat_interrupt_style, "intx"))
@@ -920,10 +901,8 @@ msix:
device->msix_entries[i].entry = i;
err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
- if (err < 0)
+ if (err)
goto msi;
- if (err > 0)
- goto msix_single_vector;
for (i = 0; i < msixcnt; i++) {
msix = &device->msix_entries[i];
@@ -937,29 +916,13 @@ msix:
chan = ioat_chan_by_index(device, j);
devm_free_irq(dev, msix->vector, chan);
}
- goto msix_single_vector;
+ goto msi;
}
}
intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
device->irq_mode = IOAT_MSIX;
goto done;
-msix_single_vector:
- msix = &device->msix_entries[0];
- msix->entry = 0;
- err = pci_enable_msix(pdev, device->msix_entries, 1);
- if (err)
- goto msi;
-
- err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
- "ioat-msix", device);
- if (err) {
- pci_disable_msix(pdev);
- goto msi;
- }
- device->irq_mode = IOAT_MSIX_SINGLE;
- goto done;
-
msi:
err = pci_enable_msi(pdev);
if (err)
@@ -971,7 +934,7 @@ msi:
pci_disable_msi(pdev);
goto intx;
}
- device->irq_mode = IOAT_MSIX;
+ device->irq_mode = IOAT_MSI;
goto done;
intx:
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 54fb7b9ff9aa..11fb877ddca9 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -52,7 +52,6 @@
enum ioat_irq_mode {
IOAT_NOIRQ = 0,
IOAT_MSIX,
- IOAT_MSIX_SINGLE,
IOAT_MSI,
IOAT_INTX
};
@@ -83,7 +82,6 @@ struct ioatdma_device {
struct pci_pool *completion_pool;
#define MAX_SED_POOLS 5
struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
- struct kmem_cache *sed_pool;
struct dma_device common;
u8 version;
struct msix_entry msix_entries[4];
@@ -342,16 +340,6 @@ static inline bool is_ioat_bug(unsigned long err)
return !!err;
}
-static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
- int direction, enum dma_ctrl_flags flags, bool dst)
-{
- if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
- (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
- pci_unmap_single(pdev, addr, len, direction);
- else
- pci_unmap_page(pdev, addr, len, direction);
-}
-
int ioat_probe(struct ioatdma_device *device);
int ioat_register(struct ioatdma_device *device);
int ioat1_dma_probe(struct ioatdma_device *dev, int dca);
@@ -363,8 +351,6 @@ void ioat_init_channel(struct ioatdma_device *device,
struct ioat_chan_common *chan, int idx);
enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
struct dma_tx_state *txstate);
-void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
- size_t len, struct ioat_dma_descriptor *hw);
bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
dma_addr_t *phys_complete);
void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index b925e1b1d139..5d3affe7e976 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -148,7 +148,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
tx = &desc->txd;
dump_desc_dbg(ioat, desc);
if (tx->cookie) {
- ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+ dma_descriptor_unmap(tx);
dma_cookie_complete(tx);
if (tx->callback) {
tx->callback(tx->callback_param);
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
index 212d584fe427..470292767e68 100644
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -157,7 +157,6 @@ static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
int ioat2_dma_probe(struct ioatdma_device *dev, int dca);
int ioat3_dma_probe(struct ioatdma_device *dev, int dca);
-void ioat3_dma_remove(struct ioatdma_device *dev);
struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index d8ececaf1b57..820817e97e62 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -67,6 +67,8 @@
#include "dma.h"
#include "dma_v2.h"
+extern struct kmem_cache *ioat3_sed_cache;
+
/* ioat hardware assumes at least two sources for raid operations */
#define src_cnt_to_sw(x) ((x) + 2)
#define src_cnt_to_hw(x) ((x) - 2)
@@ -87,22 +89,8 @@ static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
0, 1, 2, 3, 4, 5, 6 };
-/*
- * technically sources 1 and 2 do not require SED, but the op will have
- * at least 9 descriptors so that's irrelevant.
- */
-static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 1, 1, 1 };
-
static void ioat3_eh(struct ioat2_dma_chan *ioat);
-static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
-{
- struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
-
- return raw->field[xor_idx_to_field[idx]];
-}
-
static void xor_set_src(struct ioat_raw_descriptor *descs[2],
dma_addr_t addr, u32 offset, int idx)
{
@@ -135,12 +123,6 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2],
pq->coef[idx] = coef;
}
-static int sed_get_pq16_pool_idx(int src_cnt)
-{
-
- return pq16_idx_to_sed[src_cnt];
-}
-
static bool is_jf_ioat(struct pci_dev *pdev)
{
switch (pdev->device) {
@@ -272,7 +254,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
struct ioat_sed_ent *sed;
gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
- sed = kmem_cache_alloc(device->sed_pool, flags);
+ sed = kmem_cache_alloc(ioat3_sed_cache, flags);
if (!sed)
return NULL;
@@ -280,7 +262,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool],
flags, &sed->dma);
if (!sed->hw) {
- kmem_cache_free(device->sed_pool, sed);
+ kmem_cache_free(ioat3_sed_cache, sed);
return NULL;
}
@@ -293,165 +275,7 @@ static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *s
return;
dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
- kmem_cache_free(device->sed_pool, sed);
-}
-
-static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
- struct ioat_ring_ent *desc, int idx)
-{
- struct ioat_chan_common *chan = &ioat->base;
- struct pci_dev *pdev = chan->device->pdev;
- size_t len = desc->len;
- size_t offset = len - desc->hw->size;
- struct dma_async_tx_descriptor *tx = &desc->txd;
- enum dma_ctrl_flags flags = tx->flags;
-
- switch (desc->hw->ctl_f.op) {
- case IOAT_OP_COPY:
- if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
- ioat_dma_unmap(chan, flags, len, desc->hw);
- break;
- case IOAT_OP_XOR_VAL:
- case IOAT_OP_XOR: {
- struct ioat_xor_descriptor *xor = desc->xor;
- struct ioat_ring_ent *ext;
- struct ioat_xor_ext_descriptor *xor_ex = NULL;
- int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
- struct ioat_raw_descriptor *descs[2];
- int i;
-
- if (src_cnt > 5) {
- ext = ioat2_get_ring_ent(ioat, idx + 1);
- xor_ex = ext->xor_ex;
- }
-
- if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- descs[0] = (struct ioat_raw_descriptor *) xor;
- descs[1] = (struct ioat_raw_descriptor *) xor_ex;
- for (i = 0; i < src_cnt; i++) {
- dma_addr_t src = xor_get_src(descs, i);
-
- ioat_unmap(pdev, src - offset, len,
- PCI_DMA_TODEVICE, flags, 0);
- }
-
- /* dest is a source in xor validate operations */
- if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
- ioat_unmap(pdev, xor->dst_addr - offset, len,
- PCI_DMA_TODEVICE, flags, 1);
- break;
- }
- }
-
- if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
- ioat_unmap(pdev, xor->dst_addr - offset, len,
- PCI_DMA_FROMDEVICE, flags, 1);
- break;
- }
- case IOAT_OP_PQ_VAL:
- case IOAT_OP_PQ: {
- struct ioat_pq_descriptor *pq = desc->pq;
- struct ioat_ring_ent *ext;
- struct ioat_pq_ext_descriptor *pq_ex = NULL;
- int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
- struct ioat_raw_descriptor *descs[2];
- int i;
-
- if (src_cnt > 3) {
- ext = ioat2_get_ring_ent(ioat, idx + 1);
- pq_ex = ext->pq_ex;
- }
-
- /* in the 'continue' case don't unmap the dests as sources */
- if (dmaf_p_disabled_continue(flags))
- src_cnt--;
- else if (dmaf_continue(flags))
- src_cnt -= 3;
-
- if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- descs[0] = (struct ioat_raw_descriptor *) pq;
- descs[1] = (struct ioat_raw_descriptor *) pq_ex;
- for (i = 0; i < src_cnt; i++) {
- dma_addr_t src = pq_get_src(descs, i);
-
- ioat_unmap(pdev, src - offset, len,
- PCI_DMA_TODEVICE, flags, 0);
- }
-
- /* the dests are sources in pq validate operations */
- if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
- if (!(flags & DMA_PREP_PQ_DISABLE_P))
- ioat_unmap(pdev, pq->p_addr - offset,
- len, PCI_DMA_TODEVICE, flags, 0);
- if (!(flags & DMA_PREP_PQ_DISABLE_Q))
- ioat_unmap(pdev, pq->q_addr - offset,
- len, PCI_DMA_TODEVICE, flags, 0);
- break;
- }
- }
-
- if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (!(flags & DMA_PREP_PQ_DISABLE_P))
- ioat_unmap(pdev, pq->p_addr - offset, len,
- PCI_DMA_BIDIRECTIONAL, flags, 1);
- if (!(flags & DMA_PREP_PQ_DISABLE_Q))
- ioat_unmap(pdev, pq->q_addr - offset, len,
- PCI_DMA_BIDIRECTIONAL, flags, 1);
- }
- break;
- }
- case IOAT_OP_PQ_16S:
- case IOAT_OP_PQ_VAL_16S: {
- struct ioat_pq_descriptor *pq = desc->pq;
- int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
- struct ioat_raw_descriptor *descs[4];
- int i;
-
- /* in the 'continue' case don't unmap the dests as sources */
- if (dmaf_p_disabled_continue(flags))
- src_cnt--;
- else if (dmaf_continue(flags))
- src_cnt -= 3;
-
- if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- descs[0] = (struct ioat_raw_descriptor *)pq;
- descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw);
- descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]);
- for (i = 0; i < src_cnt; i++) {
- dma_addr_t src = pq16_get_src(descs, i);
-
- ioat_unmap(pdev, src - offset, len,
- PCI_DMA_TODEVICE, flags, 0);
- }
-
- /* the dests are sources in pq validate operations */
- if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
- if (!(flags & DMA_PREP_PQ_DISABLE_P))
- ioat_unmap(pdev, pq->p_addr - offset,
- len, PCI_DMA_TODEVICE,
- flags, 0);
- if (!(flags & DMA_PREP_PQ_DISABLE_Q))
- ioat_unmap(pdev, pq->q_addr - offset,
- len, PCI_DMA_TODEVICE,
- flags, 0);
- break;
- }
- }
-
- if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- if (!(flags & DMA_PREP_PQ_DISABLE_P))
- ioat_unmap(pdev, pq->p_addr - offset, len,
- PCI_DMA_BIDIRECTIONAL, flags, 1);
- if (!(flags & DMA_PREP_PQ_DISABLE_Q))
- ioat_unmap(pdev, pq->q_addr - offset, len,
- PCI_DMA_BIDIRECTIONAL, flags, 1);
- }
- break;
- }
- default:
- dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
- __func__, desc->hw->ctl_f.op);
- }
+ kmem_cache_free(ioat3_sed_cache, sed);
}
static bool desc_has_ext(struct ioat_ring_ent *desc)
@@ -577,7 +401,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
tx = &desc->txd;
if (tx->cookie) {
dma_cookie_complete(tx);
- ioat3_dma_unmap(ioat, desc, idx + i);
+ dma_descriptor_unmap(tx);
if (tx->callback) {
tx->callback(tx->callback_param);
tx->callback = NULL;
@@ -807,7 +631,7 @@ ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
enum dma_status ret;
ret = dma_cookie_status(c, cookie, txstate);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
ioat3_cleanup(ioat);
@@ -1129,9 +953,6 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
u8 op;
int i, s, idx, num_descs;
- /* this function only handles src_cnt 9 - 16 */
- BUG_ON(src_cnt < 9);
-
/* this function is only called with 9-16 sources */
op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
@@ -1159,8 +980,7 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
descs[0] = (struct ioat_raw_descriptor *) pq;
- desc->sed = ioat3_alloc_sed(device,
- sed_get_pq16_pool_idx(src_cnt));
+ desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3);
if (!desc->sed) {
dev_err(to_dev(chan),
"%s: no free sed entries\n", __func__);
@@ -1218,13 +1038,21 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
return &desc->txd;
}
+static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
+{
+ if (dmaf_p_disabled_continue(flags))
+ return src_cnt + 1;
+ else if (dmaf_continue(flags))
+ return src_cnt + 3;
+ else
+ return src_cnt;
+}
+
static struct dma_async_tx_descriptor *
ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
unsigned int src_cnt, const unsigned char *scf, size_t len,
unsigned long flags)
{
- struct dma_device *dma = chan->device;
-
/* specify valid address for disabled result */
if (flags & DMA_PREP_PQ_DISABLE_P)
dst[0] = dst[1];
@@ -1244,7 +1072,7 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
single_source_coef[0] = scf[0];
single_source_coef[1] = 0;
- return (src_cnt > 8) && (dma->max_pq > 8) ?
+ return src_cnt_flags(src_cnt, flags) > 8 ?
__ioat3_prep_pq16_lock(chan, NULL, dst, single_source,
2, single_source_coef, len,
flags) :
@@ -1252,7 +1080,7 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
single_source_coef, len, flags);
} else {
- return (src_cnt > 8) && (dma->max_pq > 8) ?
+ return src_cnt_flags(src_cnt, flags) > 8 ?
__ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
scf, len, flags) :
__ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt,
@@ -1265,8 +1093,6 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
unsigned int src_cnt, const unsigned char *scf, size_t len,
enum sum_check_flags *pqres, unsigned long flags)
{
- struct dma_device *dma = chan->device;
-
/* specify valid address for disabled result */
if (flags & DMA_PREP_PQ_DISABLE_P)
pq[0] = pq[1];
@@ -1278,7 +1104,7 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
*/
*pqres = 0;
- return (src_cnt > 8) && (dma->max_pq > 8) ?
+ return src_cnt_flags(src_cnt, flags) > 8 ?
__ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
flags) :
__ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
@@ -1289,7 +1115,6 @@ static struct dma_async_tx_descriptor *
ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
unsigned int src_cnt, size_t len, unsigned long flags)
{
- struct dma_device *dma = chan->device;
unsigned char scf[src_cnt];
dma_addr_t pq[2];
@@ -1298,7 +1123,7 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
flags |= DMA_PREP_PQ_DISABLE_Q;
pq[1] = dst; /* specify valid address for disabled result */
- return (src_cnt > 8) && (dma->max_pq > 8) ?
+ return src_cnt_flags(src_cnt, flags) > 8 ?
__ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
flags) :
__ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
@@ -1310,7 +1135,6 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
unsigned int src_cnt, size_t len,
enum sum_check_flags *result, unsigned long flags)
{
- struct dma_device *dma = chan->device;
unsigned char scf[src_cnt];
dma_addr_t pq[2];
@@ -1324,8 +1148,7 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
flags |= DMA_PREP_PQ_DISABLE_Q;
pq[1] = pq[0]; /* specify valid address for disabled result */
-
- return (src_cnt > 8) && (dma->max_pq > 8) ?
+ return src_cnt_flags(src_cnt, flags) > 8 ?
__ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
scf, len, flags) :
__ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
@@ -1444,9 +1267,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
DMA_TO_DEVICE);
tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
IOAT_NUM_SRC_TEST, PAGE_SIZE,
- DMA_PREP_INTERRUPT |
- DMA_COMPL_SKIP_SRC_UNMAP |
- DMA_COMPL_SKIP_DEST_UNMAP);
+ DMA_PREP_INTERRUPT);
if (!tx) {
dev_err(dev, "Self-test xor prep failed\n");
@@ -1468,7 +1289,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
- if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+ if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
dev_err(dev, "Self-test xor timed out\n");
err = -ENODEV;
goto dma_unmap;
@@ -1507,9 +1328,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
DMA_TO_DEVICE);
tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
- &xor_val_result, DMA_PREP_INTERRUPT |
- DMA_COMPL_SKIP_SRC_UNMAP |
- DMA_COMPL_SKIP_DEST_UNMAP);
+ &xor_val_result, DMA_PREP_INTERRUPT);
if (!tx) {
dev_err(dev, "Self-test zero prep failed\n");
err = -ENODEV;
@@ -1530,7 +1349,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
- if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+ if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
dev_err(dev, "Self-test validate timed out\n");
err = -ENODEV;
goto dma_unmap;
@@ -1545,6 +1364,8 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
goto free_resources;
}
+ memset(page_address(dest), 0, PAGE_SIZE);
+
/* test for non-zero parity sum */
op = IOAT_OP_XOR_VAL;
@@ -1554,9 +1375,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
DMA_TO_DEVICE);
tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
- &xor_val_result, DMA_PREP_INTERRUPT |
- DMA_COMPL_SKIP_SRC_UNMAP |
- DMA_COMPL_SKIP_DEST_UNMAP);
+ &xor_val_result, DMA_PREP_INTERRUPT);
if (!tx) {
dev_err(dev, "Self-test 2nd zero prep failed\n");
err = -ENODEV;
@@ -1577,7 +1396,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
- if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+ if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
dev_err(dev, "Self-test 2nd validate timed out\n");
err = -ENODEV;
goto dma_unmap;
@@ -1630,52 +1449,36 @@ static int ioat3_dma_self_test(struct ioatdma_device *device)
static int ioat3_irq_reinit(struct ioatdma_device *device)
{
- int msixcnt = device->common.chancnt;
struct pci_dev *pdev = device->pdev;
- int i;
- struct msix_entry *msix;
- struct ioat_chan_common *chan;
- int err = 0;
+ int irq = pdev->irq, i;
+
+ if (!is_bwd_ioat(pdev))
+ return 0;
switch (device->irq_mode) {
case IOAT_MSIX:
+ for (i = 0; i < device->common.chancnt; i++) {
+ struct msix_entry *msix = &device->msix_entries[i];
+ struct ioat_chan_common *chan;
- for (i = 0; i < msixcnt; i++) {
- msix = &device->msix_entries[i];
chan = ioat_chan_by_index(device, i);
devm_free_irq(&pdev->dev, msix->vector, chan);
}
pci_disable_msix(pdev);
break;
-
- case IOAT_MSIX_SINGLE:
- msix = &device->msix_entries[0];
- chan = ioat_chan_by_index(device, 0);
- devm_free_irq(&pdev->dev, msix->vector, chan);
- pci_disable_msix(pdev);
- break;
-
case IOAT_MSI:
- chan = ioat_chan_by_index(device, 0);
- devm_free_irq(&pdev->dev, pdev->irq, chan);
pci_disable_msi(pdev);
- break;
-
+ /* fall through */
case IOAT_INTX:
- chan = ioat_chan_by_index(device, 0);
- devm_free_irq(&pdev->dev, pdev->irq, chan);
+ devm_free_irq(&pdev->dev, irq, device);
break;
-
default:
return 0;
}
-
device->irq_mode = IOAT_NOIRQ;
- err = ioat_dma_setup_interrupts(device);
-
- return err;
+ return ioat_dma_setup_interrupts(device);
}
static int ioat3_reset_hw(struct ioat_chan_common *chan)
@@ -1718,14 +1521,12 @@ static int ioat3_reset_hw(struct ioat_chan_common *chan)
}
err = ioat2_reset_sync(chan, msecs_to_jiffies(200));
- if (err) {
- dev_err(&pdev->dev, "Failed to reset!\n");
- return err;
- }
-
- if (device->irq_mode != IOAT_NOIRQ && is_bwd_ioat(pdev))
+ if (!err)
err = ioat3_irq_reinit(device);
+ if (err)
+ dev_err(&pdev->dev, "Failed to reset: %d\n", err);
+
return err;
}
@@ -1835,21 +1636,15 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
char pool_name[14];
int i;
- /* allocate sw descriptor pool for SED */
- device->sed_pool = kmem_cache_create("ioat_sed",
- sizeof(struct ioat_sed_ent), 0, 0, NULL);
- if (!device->sed_pool)
- return -ENOMEM;
-
for (i = 0; i < MAX_SED_POOLS; i++) {
snprintf(pool_name, 14, "ioat_hw%d_sed", i);
/* allocate SED DMA pool */
- device->sed_hw_pool[i] = dma_pool_create(pool_name,
+ device->sed_hw_pool[i] = dmam_pool_create(pool_name,
&pdev->dev,
SED_SIZE * (i + 1), 64, 0);
if (!device->sed_hw_pool[i])
- goto sed_pool_cleanup;
+ return -ENOMEM;
}
}
@@ -1875,28 +1670,4 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
device->dca = ioat3_dca_init(pdev, device->reg_base);
return 0;
-
-sed_pool_cleanup:
- if (device->sed_pool) {
- int i;
- kmem_cache_destroy(device->sed_pool);
-
- for (i = 0; i < MAX_SED_POOLS; i++)
- if (device->sed_hw_pool[i])
- dma_pool_destroy(device->sed_hw_pool[i]);
- }
-
- return -ENOMEM;
-}
-
-void ioat3_dma_remove(struct ioatdma_device *device)
-{
- if (device->sed_pool) {
- int i;
- kmem_cache_destroy(device->sed_pool);
-
- for (i = 0; i < MAX_SED_POOLS; i++)
- if (device->sed_hw_pool[i])
- dma_pool_destroy(device->sed_hw_pool[i]);
- }
}
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 2c8d560e6334..1d051cd045db 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -123,6 +123,7 @@ module_param(ioat_dca_enabled, int, 0644);
MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
struct kmem_cache *ioat2_cache;
+struct kmem_cache *ioat3_sed_cache;
#define DRV_NAME "ioatdma"
@@ -207,9 +208,6 @@ static void ioat_remove(struct pci_dev *pdev)
if (!device)
return;
- if (device->version >= IOAT_VER_3_0)
- ioat3_dma_remove(device);
-
dev_err(&pdev->dev, "Removing dma and dca services\n");
if (device->dca) {
unregister_dca_provider(device->dca, &pdev->dev);
@@ -221,7 +219,7 @@ static void ioat_remove(struct pci_dev *pdev)
static int __init ioat_init_module(void)
{
- int err;
+ int err = -ENOMEM;
pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
DRV_NAME, IOAT_DMA_VERSION);
@@ -231,9 +229,21 @@ static int __init ioat_init_module(void)
if (!ioat2_cache)
return -ENOMEM;
+ ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0);
+ if (!ioat3_sed_cache)
+ goto err_ioat2_cache;
+
err = pci_register_driver(&ioat_pci_driver);
if (err)
- kmem_cache_destroy(ioat2_cache);
+ goto err_ioat3_cache;
+
+ return 0;
+
+ err_ioat3_cache:
+ kmem_cache_destroy(ioat3_sed_cache);
+
+ err_ioat2_cache:
+ kmem_cache_destroy(ioat2_cache);
return err;
}
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index dd8b44a56e5d..c56137bc3868 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -61,80 +61,6 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
}
}
-static void
-iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
-{
- struct dma_async_tx_descriptor *tx = &desc->async_tx;
- struct iop_adma_desc_slot *unmap = desc->group_head;
- struct device *dev = &iop_chan->device->pdev->dev;
- u32 len = unmap->unmap_len;
- enum dma_ctrl_flags flags = tx->flags;
- u32 src_cnt;
- dma_addr_t addr;
- dma_addr_t dest;
-
- src_cnt = unmap->unmap_src_cnt;
- dest = iop_desc_get_dest_addr(unmap, iop_chan);
- if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- enum dma_data_direction dir;
-
- if (src_cnt > 1) /* is xor? */
- dir = DMA_BIDIRECTIONAL;
- else
- dir = DMA_FROM_DEVICE;
-
- dma_unmap_page(dev, dest, len, dir);
- }
-
- if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- while (src_cnt--) {
- addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
- if (addr == dest)
- continue;
- dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
- }
- }
- desc->group_head = NULL;
-}
-
-static void
-iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
-{
- struct dma_async_tx_descriptor *tx = &desc->async_tx;
- struct iop_adma_desc_slot *unmap = desc->group_head;
- struct device *dev = &iop_chan->device->pdev->dev;
- u32 len = unmap->unmap_len;
- enum dma_ctrl_flags flags = tx->flags;
- u32 src_cnt = unmap->unmap_src_cnt;
- dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
- dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
- int i;
-
- if (tx->flags & DMA_PREP_CONTINUE)
- src_cnt -= 3;
-
- if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
- dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
- dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
- }
-
- if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- dma_addr_t addr;
-
- for (i = 0; i < src_cnt; i++) {
- addr = iop_desc_get_src_addr(unmap, iop_chan, i);
- dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
- }
- if (desc->pq_check_result) {
- dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
- dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
- }
- }
-
- desc->group_head = NULL;
-}
-
-
static dma_cookie_t
iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
@@ -152,15 +78,9 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
if (tx->callback)
tx->callback(tx->callback_param);
- /* unmap dma addresses
- * (unmap_single vs unmap_page?)
- */
- if (desc->group_head && desc->unmap_len) {
- if (iop_desc_is_pq(desc))
- iop_desc_unmap_pq(iop_chan, desc);
- else
- iop_desc_unmap(iop_chan, desc);
- }
+ dma_descriptor_unmap(tx);
+ if (desc->group_head)
+ desc->group_head = NULL;
}
/* run dependent operations */
@@ -591,7 +511,6 @@ iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
if (sw_desc) {
grp_start = sw_desc->group_head;
iop_desc_init_interrupt(grp_start, iop_chan);
- grp_start->unmap_len = 0;
sw_desc->async_tx.flags = flags;
}
spin_unlock_bh(&iop_chan->lock);
@@ -623,8 +542,6 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
iop_desc_set_byte_count(grp_start, iop_chan, len);
iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
iop_desc_set_memcpy_src_addr(grp_start, dma_src);
- sw_desc->unmap_src_cnt = 1;
- sw_desc->unmap_len = len;
sw_desc->async_tx.flags = flags;
}
spin_unlock_bh(&iop_chan->lock);
@@ -657,8 +574,6 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
iop_desc_init_xor(grp_start, src_cnt, flags);
iop_desc_set_byte_count(grp_start, iop_chan, len);
iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
- sw_desc->unmap_src_cnt = src_cnt;
- sw_desc->unmap_len = len;
sw_desc->async_tx.flags = flags;
while (src_cnt--)
iop_desc_set_xor_src_addr(grp_start, src_cnt,
@@ -694,8 +609,6 @@ iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
grp_start->xor_check_result = result;
pr_debug("\t%s: grp_start->xor_check_result: %p\n",
__func__, grp_start->xor_check_result);
- sw_desc->unmap_src_cnt = src_cnt;
- sw_desc->unmap_len = len;
sw_desc->async_tx.flags = flags;
while (src_cnt--)
iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
@@ -748,8 +661,6 @@ iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
dst[0] = dst[1] & 0x7;
iop_desc_set_pq_addr(g, dst);
- sw_desc->unmap_src_cnt = src_cnt;
- sw_desc->unmap_len = len;
sw_desc->async_tx.flags = flags;
for (i = 0; i < src_cnt; i++)
iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
@@ -804,8 +715,6 @@ iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
g->pq_check_result = pqres;
pr_debug("\t%s: g->pq_check_result: %p\n",
__func__, g->pq_check_result);
- sw_desc->unmap_src_cnt = src_cnt+2;
- sw_desc->unmap_len = len;
sw_desc->async_tx.flags = flags;
while (src_cnt--)
iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
@@ -864,7 +773,7 @@ static enum dma_status iop_adma_status(struct dma_chan *chan,
int ret;
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
iop_adma_slot_cleanup(iop_chan);
@@ -983,7 +892,7 @@ static int iop_adma_memcpy_self_test(struct iop_adma_device *device)
msleep(1);
if (iop_adma_status(dma_chan, cookie, NULL) !=
- DMA_SUCCESS) {
+ DMA_COMPLETE) {
dev_err(dma_chan->device->dev,
"Self-test copy timed out, disabling\n");
err = -ENODEV;
@@ -1083,7 +992,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device)
msleep(8);
if (iop_adma_status(dma_chan, cookie, NULL) !=
- DMA_SUCCESS) {
+ DMA_COMPLETE) {
dev_err(dma_chan->device->dev,
"Self-test xor timed out, disabling\n");
err = -ENODEV;
@@ -1129,7 +1038,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device)
iop_adma_issue_pending(dma_chan);
msleep(8);
- if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+ if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
dev_err(dma_chan->device->dev,
"Self-test zero sum timed out, disabling\n");
err = -ENODEV;
@@ -1158,7 +1067,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device)
iop_adma_issue_pending(dma_chan);
msleep(8);
- if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+ if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
dev_err(dma_chan->device->dev,
"Self-test non-zero sum timed out, disabling\n");
err = -ENODEV;
@@ -1254,7 +1163,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
msleep(8);
if (iop_adma_status(dma_chan, cookie, NULL) !=
- DMA_SUCCESS) {
+ DMA_COMPLETE) {
dev_err(dev, "Self-test pq timed out, disabling\n");
err = -ENODEV;
goto free_resources;
@@ -1291,7 +1200,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
msleep(8);
if (iop_adma_status(dma_chan, cookie, NULL) !=
- DMA_SUCCESS) {
+ DMA_COMPLETE) {
dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
err = -ENODEV;
goto free_resources;
@@ -1323,7 +1232,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
msleep(8);
if (iop_adma_status(dma_chan, cookie, NULL) !=
- DMA_SUCCESS) {
+ DMA_COMPLETE) {
dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
err = -ENODEV;
goto free_resources;
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index cb9c0bc317e8..128ca143486d 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1232,8 +1232,10 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id)
desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list);
descnew = desc;
- dev_dbg(dev, "IDMAC irq %d, dma 0x%08x, next dma 0x%08x, current %d, curbuf 0x%08x\n",
- irq, sg_dma_address(*sg), sgnext ? sg_dma_address(sgnext) : 0, ichan->active_buffer, curbuf);
+ dev_dbg(dev, "IDMAC irq %d, dma %#llx, next dma %#llx, current %d, curbuf %#x\n",
+ irq, (u64)sg_dma_address(*sg),
+ sgnext ? (u64)sg_dma_address(sgnext) : 0,
+ ichan->active_buffer, curbuf);
/* Find the descriptor of sgnext */
sgnew = idmac_sg_next(ichan, &descnew, *sg);
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index a2c330f5f952..e26075408e9b 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -344,7 +344,7 @@ static enum dma_status k3_dma_tx_status(struct dma_chan *chan,
size_t bytes = 0;
ret = dma_cookie_status(&c->vc.chan, cookie, state);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
spin_lock_irqsave(&c->vc.lock, flags);
@@ -693,7 +693,7 @@ static int k3_dma_probe(struct platform_device *op)
irq = platform_get_irq(op, 0);
ret = devm_request_irq(&op->dev, irq,
- k3_dma_int_handler, IRQF_DISABLED, DRIVER_NAME, d);
+ k3_dma_int_handler, 0, DRIVER_NAME, d);
if (ret)
return ret;
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index ff8d7827f8cb..dcb1e05149a7 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -798,8 +798,7 @@ static void dma_do_tasklet(unsigned long data)
* move the descriptors to a temporary list so we can drop
* the lock during the entire cleanup operation
*/
- list_del(&desc->node);
- list_add(&desc->node, &chain_cleanup);
+ list_move(&desc->node, &chain_cleanup);
/*
* Look for the first list entry which has the ENDIRQEN flag
@@ -863,7 +862,7 @@ static int mmp_pdma_chan_init(struct mmp_pdma_device *pdev,
if (irq) {
ret = devm_request_irq(pdev->dev, irq,
- mmp_pdma_chan_handler, IRQF_DISABLED, "pdma", phy);
+ mmp_pdma_chan_handler, 0, "pdma", phy);
if (ret) {
dev_err(pdev->dev, "channel request irq fail!\n");
return ret;
@@ -970,7 +969,7 @@ static int mmp_pdma_probe(struct platform_device *op)
/* all chan share one irq, demux inside */
irq = platform_get_irq(op, 0);
ret = devm_request_irq(pdev->dev, irq,
- mmp_pdma_int_handler, IRQF_DISABLED, "pdma", pdev);
+ mmp_pdma_int_handler, 0, "pdma", pdev);
if (ret)
return ret;
}
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index d3b6358e5a27..3ddacc14a736 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -62,6 +62,11 @@
#define TDCR_BURSTSZ_16B (0x3 << 6)
#define TDCR_BURSTSZ_32B (0x6 << 6)
#define TDCR_BURSTSZ_64B (0x7 << 6)
+#define TDCR_BURSTSZ_SQU_1B (0x5 << 6)
+#define TDCR_BURSTSZ_SQU_2B (0x6 << 6)
+#define TDCR_BURSTSZ_SQU_4B (0x0 << 6)
+#define TDCR_BURSTSZ_SQU_8B (0x1 << 6)
+#define TDCR_BURSTSZ_SQU_16B (0x3 << 6)
#define TDCR_BURSTSZ_SQU_32B (0x7 << 6)
#define TDCR_BURSTSZ_128B (0x5 << 6)
#define TDCR_DSTDIR_MSK (0x3 << 4) /* Dst Direction */
@@ -158,7 +163,7 @@ static void mmp_tdma_disable_chan(struct mmp_tdma_chan *tdmac)
/* disable irq */
writel(0, tdmac->reg_base + TDIMR);
- tdmac->status = DMA_SUCCESS;
+ tdmac->status = DMA_COMPLETE;
}
static void mmp_tdma_resume_chan(struct mmp_tdma_chan *tdmac)
@@ -228,8 +233,31 @@ static int mmp_tdma_config_chan(struct mmp_tdma_chan *tdmac)
return -EINVAL;
}
} else if (tdmac->type == PXA910_SQU) {
- tdcr |= TDCR_BURSTSZ_SQU_32B;
tdcr |= TDCR_SSPMOD;
+
+ switch (tdmac->burst_sz) {
+ case 1:
+ tdcr |= TDCR_BURSTSZ_SQU_1B;
+ break;
+ case 2:
+ tdcr |= TDCR_BURSTSZ_SQU_2B;
+ break;
+ case 4:
+ tdcr |= TDCR_BURSTSZ_SQU_4B;
+ break;
+ case 8:
+ tdcr |= TDCR_BURSTSZ_SQU_8B;
+ break;
+ case 16:
+ tdcr |= TDCR_BURSTSZ_SQU_16B;
+ break;
+ case 32:
+ tdcr |= TDCR_BURSTSZ_SQU_32B;
+ break;
+ default:
+ dev_err(tdmac->dev, "mmp_tdma: unknown burst size.\n");
+ return -EINVAL;
+ }
}
writel(tdcr, tdmac->reg_base + TDCR);
@@ -324,7 +352,7 @@ static int mmp_tdma_alloc_chan_resources(struct dma_chan *chan)
if (tdmac->irq) {
ret = devm_request_irq(tdmac->dev, tdmac->irq,
- mmp_tdma_chan_handler, IRQF_DISABLED, "tdma", tdmac);
+ mmp_tdma_chan_handler, 0, "tdma", tdmac);
if (ret)
return ret;
}
@@ -365,7 +393,7 @@ static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic(
int num_periods = buf_len / period_len;
int i = 0, buf = 0;
- if (tdmac->status != DMA_SUCCESS)
+ if (tdmac->status != DMA_COMPLETE)
return NULL;
if (period_len > TDMA_MAX_XFER_BYTES) {
@@ -499,7 +527,7 @@ static int mmp_tdma_chan_init(struct mmp_tdma_device *tdev,
tdmac->idx = idx;
tdmac->type = type;
tdmac->reg_base = (unsigned long)tdev->base + idx * 4;
- tdmac->status = DMA_SUCCESS;
+ tdmac->status = DMA_COMPLETE;
tdev->tdmac[tdmac->idx] = tdmac;
tasklet_init(&tdmac->tasklet, dma_do_tasklet, (unsigned long)tdmac);
@@ -554,7 +582,7 @@ static int mmp_tdma_probe(struct platform_device *pdev)
if (irq_num != chan_num) {
irq = platform_get_irq(pdev, 0);
ret = devm_request_irq(&pdev->dev, irq,
- mmp_tdma_int_handler, IRQF_DISABLED, "tdma", tdev);
+ mmp_tdma_int_handler, 0, "tdma", tdev);
if (ret)
return ret;
}
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 536dcb8ba5fd..7807f0ef4e20 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -60,14 +60,6 @@ static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
return hw_desc->phy_dest_addr;
}
-static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
- int src_idx)
-{
- struct mv_xor_desc *hw_desc = desc->hw_desc;
- return hw_desc->phy_src_addr[mv_phy_src_idx(src_idx)];
-}
-
-
static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
u32 byte_count)
{
@@ -278,42 +270,9 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
desc->async_tx.callback(
desc->async_tx.callback_param);
- /* unmap dma addresses
- * (unmap_single vs unmap_page?)
- */
- if (desc->group_head && desc->unmap_len) {
- struct mv_xor_desc_slot *unmap = desc->group_head;
- struct device *dev = mv_chan_to_devp(mv_chan);
- u32 len = unmap->unmap_len;
- enum dma_ctrl_flags flags = desc->async_tx.flags;
- u32 src_cnt;
- dma_addr_t addr;
- dma_addr_t dest;
-
- src_cnt = unmap->unmap_src_cnt;
- dest = mv_desc_get_dest_addr(unmap);
- if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- enum dma_data_direction dir;
-
- if (src_cnt > 1) /* is xor ? */
- dir = DMA_BIDIRECTIONAL;
- else
- dir = DMA_FROM_DEVICE;
- dma_unmap_page(dev, dest, len, dir);
- }
-
- if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- while (src_cnt--) {
- addr = mv_desc_get_src_addr(unmap,
- src_cnt);
- if (addr == dest)
- continue;
- dma_unmap_page(dev, addr, len,
- DMA_TO_DEVICE);
- }
- }
+ dma_descriptor_unmap(&desc->async_tx);
+ if (desc->group_head)
desc->group_head = NULL;
- }
}
/* run dependent operations */
@@ -749,7 +708,7 @@ static enum dma_status mv_xor_status(struct dma_chan *chan,
enum dma_status ret;
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret == DMA_SUCCESS) {
+ if (ret == DMA_COMPLETE) {
mv_xor_clean_completed_slots(mv_chan);
return ret;
}
@@ -874,7 +833,7 @@ static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan)
msleep(1);
if (mv_xor_status(dma_chan, cookie, NULL) !=
- DMA_SUCCESS) {
+ DMA_COMPLETE) {
dev_err(dma_chan->device->dev,
"Self-test copy timed out, disabling\n");
err = -ENODEV;
@@ -968,7 +927,7 @@ mv_xor_xor_self_test(struct mv_xor_chan *mv_chan)
msleep(8);
if (mv_xor_status(dma_chan, cookie, NULL) !=
- DMA_SUCCESS) {
+ DMA_COMPLETE) {
dev_err(dma_chan->device->dev,
"Self-test xor timed out, disabling\n");
err = -ENODEV;
@@ -1076,10 +1035,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
}
mv_chan->mmr_base = xordev->xor_base;
- if (!mv_chan->mmr_base) {
- ret = -ENOMEM;
- goto err_free_dma;
- }
+ mv_chan->mmr_high_base = xordev->xor_high_base;
tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
mv_chan);
@@ -1138,7 +1094,7 @@ static void
mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
const struct mbus_dram_target_info *dram)
{
- void __iomem *base = xordev->xor_base;
+ void __iomem *base = xordev->xor_high_base;
u32 win_enable = 0;
int i;
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index 06b067f24c9b..d0749229c875 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -34,13 +34,13 @@
#define XOR_OPERATION_MODE_MEMCPY 2
#define XOR_DESCRIPTOR_SWAP BIT(14)
-#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4))
-#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4))
-#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4))
-#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
-#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4))
-#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0)
-#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4)
+#define XOR_CURR_DESC(chan) (chan->mmr_high_base + 0x10 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan) (chan->mmr_high_base + 0x00 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan) (chan->mmr_high_base + 0x20 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_high_base + 0xB0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan) (chan->mmr_high_base + 0xC0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_high_base + 0xE0)
+#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_high_base + 0xE4)
#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4))
#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4))
@@ -50,11 +50,11 @@
#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60)
#define XOR_INTR_MASK_VALUE 0x3F5
-#define WINDOW_BASE(w) (0x250 + ((w) << 2))
-#define WINDOW_SIZE(w) (0x270 + ((w) << 2))
-#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2))
-#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2))
-#define WINDOW_OVERRIDE_CTRL(chan) (0x2A0 + ((chan) << 2))
+#define WINDOW_BASE(w) (0x50 + ((w) << 2))
+#define WINDOW_SIZE(w) (0x70 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w) (0x90 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan) (0x40 + ((chan) << 2))
+#define WINDOW_OVERRIDE_CTRL(chan) (0xA0 + ((chan) << 2))
struct mv_xor_device {
void __iomem *xor_base;
@@ -82,6 +82,7 @@ struct mv_xor_chan {
int pending;
spinlock_t lock; /* protects the descriptor slot pool */
void __iomem *mmr_base;
+ void __iomem *mmr_high_base;
unsigned int idx;
int irq;
enum dma_transaction_type current_type;
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index ccd13df841db..ead491346da7 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -27,6 +27,7 @@
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_dma.h>
+#include <linux/list.h>
#include <asm/irq.h>
@@ -57,6 +58,9 @@
(((dma_is_apbh(d) && apbh_is_old(d)) ? 0x050 : 0x110) + (n) * 0x70)
#define HW_APBHX_CHn_SEMA(d, n) \
(((dma_is_apbh(d) && apbh_is_old(d)) ? 0x080 : 0x140) + (n) * 0x70)
+#define HW_APBHX_CHn_BAR(d, n) \
+ (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x070 : 0x130) + (n) * 0x70)
+#define HW_APBX_CHn_DEBUG1(d, n) (0x150 + (n) * 0x70)
/*
* ccw bits definitions
@@ -115,7 +119,9 @@ struct mxs_dma_chan {
int desc_count;
enum dma_status status;
unsigned int flags;
+ bool reset;
#define MXS_DMA_SG_LOOP (1 << 0)
+#define MXS_DMA_USE_SEMAPHORE (1 << 1)
};
#define MXS_DMA_CHANNELS 16
@@ -201,12 +207,47 @@ static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
int chan_id = mxs_chan->chan.chan_id;
- if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma))
+ /*
+ * mxs dma channel resets can cause a channel stall. To recover from a
+ * channel stall, we have to reset the whole DMA engine. To avoid this,
+ * we use cyclic DMA with semaphores, that are enhanced in
+ * mxs_dma_int_handler. To reset the channel, we can simply stop writing
+ * into the semaphore counter.
+ */
+ if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+ mxs_chan->flags & MXS_DMA_SG_LOOP) {
+ mxs_chan->reset = true;
+ } else if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma)) {
writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL),
mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
- else
+ } else {
+ unsigned long elapsed = 0;
+ const unsigned long max_wait = 50000; /* 50ms */
+ void __iomem *reg_dbg1 = mxs_dma->base +
+ HW_APBX_CHn_DEBUG1(mxs_dma, chan_id);
+
+ /*
+ * On i.MX28 APBX, the DMA channel can stop working if we reset
+ * the channel while it is in READ_FLUSH (0x08) state.
+ * We wait here until we leave the state. Then we trigger the
+ * reset. Waiting a maximum of 50ms, the kernel shouldn't crash
+ * because of this.
+ */
+ while ((readl(reg_dbg1) & 0xf) == 0x8 && elapsed < max_wait) {
+ udelay(100);
+ elapsed += 100;
+ }
+
+ if (elapsed >= max_wait)
+ dev_err(&mxs_chan->mxs_dma->pdev->dev,
+ "Failed waiting for the DMA channel %d to leave state READ_FLUSH, trying to reset channel in READ_FLUSH state now\n",
+ chan_id);
+
writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL),
mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_SET);
+ }
+
+ mxs_chan->status = DMA_COMPLETE;
}
static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
@@ -219,12 +260,21 @@ static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(mxs_dma, chan_id));
/* write 1 to SEMA to kick off the channel */
- writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+ if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+ mxs_chan->flags & MXS_DMA_SG_LOOP) {
+ /* A cyclic DMA consists of at least 2 segments, so initialize
+ * the semaphore with 2 so we have enough time to add 1 to the
+ * semaphore if we need to */
+ writel(2, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+ } else {
+ writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+ }
+ mxs_chan->reset = false;
}
static void mxs_dma_disable_chan(struct mxs_dma_chan *mxs_chan)
{
- mxs_chan->status = DMA_SUCCESS;
+ mxs_chan->status = DMA_COMPLETE;
}
static void mxs_dma_pause_chan(struct mxs_dma_chan *mxs_chan)
@@ -272,58 +322,88 @@ static void mxs_dma_tasklet(unsigned long data)
mxs_chan->desc.callback(mxs_chan->desc.callback_param);
}
+static int mxs_dma_irq_to_chan(struct mxs_dma_engine *mxs_dma, int irq)
+{
+ int i;
+
+ for (i = 0; i != mxs_dma->nr_channels; ++i)
+ if (mxs_dma->mxs_chans[i].chan_irq == irq)
+ return i;
+
+ return -EINVAL;
+}
+
static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
{
struct mxs_dma_engine *mxs_dma = dev_id;
- u32 stat1, stat2;
+ struct mxs_dma_chan *mxs_chan;
+ u32 completed;
+ u32 err;
+ int chan = mxs_dma_irq_to_chan(mxs_dma, irq);
+
+ if (chan < 0)
+ return IRQ_NONE;
/* completion status */
- stat1 = readl(mxs_dma->base + HW_APBHX_CTRL1);
- stat1 &= MXS_DMA_CHANNELS_MASK;
- writel(stat1, mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR);
+ completed = readl(mxs_dma->base + HW_APBHX_CTRL1);
+ completed = (completed >> chan) & 0x1;
+
+ /* Clear interrupt */
+ writel((1 << chan),
+ mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR);
/* error status */
- stat2 = readl(mxs_dma->base + HW_APBHX_CTRL2);
- writel(stat2, mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR);
+ err = readl(mxs_dma->base + HW_APBHX_CTRL2);
+ err &= (1 << (MXS_DMA_CHANNELS + chan)) | (1 << chan);
+
+ /*
+ * error status bit is in the upper 16 bits, error irq bit in the lower
+ * 16 bits. We transform it into a simpler error code:
+ * err: 0x00 = no error, 0x01 = TERMINATION, 0x02 = BUS_ERROR
+ */
+ err = (err >> (MXS_DMA_CHANNELS + chan)) + (err >> chan);
+
+ /* Clear error irq */
+ writel((1 << chan),
+ mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR);
/*
* When both completion and error of termination bits set at the
* same time, we do not take it as an error. IOW, it only becomes
- * an error we need to handle here in case of either it's (1) a bus
- * error or (2) a termination error with no completion.
+ * an error we need to handle here in case of either it's a bus
+ * error or a termination error with no completion. 0x01 is termination
+ * error, so we can subtract err & completed to get the real error case.
*/
- stat2 = ((stat2 >> MXS_DMA_CHANNELS) & stat2) | /* (1) */
- (~(stat2 >> MXS_DMA_CHANNELS) & stat2 & ~stat1); /* (2) */
-
- /* combine error and completion status for checking */
- stat1 = (stat2 << MXS_DMA_CHANNELS) | stat1;
- while (stat1) {
- int channel = fls(stat1) - 1;
- struct mxs_dma_chan *mxs_chan =
- &mxs_dma->mxs_chans[channel % MXS_DMA_CHANNELS];
-
- if (channel >= MXS_DMA_CHANNELS) {
- dev_dbg(mxs_dma->dma_device.dev,
- "%s: error in channel %d\n", __func__,
- channel - MXS_DMA_CHANNELS);
- mxs_chan->status = DMA_ERROR;
- mxs_dma_reset_chan(mxs_chan);
- } else {
- if (mxs_chan->flags & MXS_DMA_SG_LOOP)
- mxs_chan->status = DMA_IN_PROGRESS;
- else
- mxs_chan->status = DMA_SUCCESS;
- }
+ err -= err & completed;
- stat1 &= ~(1 << channel);
+ mxs_chan = &mxs_dma->mxs_chans[chan];
- if (mxs_chan->status == DMA_SUCCESS)
- dma_cookie_complete(&mxs_chan->desc);
+ if (err) {
+ dev_dbg(mxs_dma->dma_device.dev,
+ "%s: error in channel %d\n", __func__,
+ chan);
+ mxs_chan->status = DMA_ERROR;
+ mxs_dma_reset_chan(mxs_chan);
+ } else if (mxs_chan->status != DMA_COMPLETE) {
+ if (mxs_chan->flags & MXS_DMA_SG_LOOP) {
+ mxs_chan->status = DMA_IN_PROGRESS;
+ if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE)
+ writel(1, mxs_dma->base +
+ HW_APBHX_CHn_SEMA(mxs_dma, chan));
+ } else {
+ mxs_chan->status = DMA_COMPLETE;
+ }
+ }
- /* schedule tasklet on this channel */
- tasklet_schedule(&mxs_chan->tasklet);
+ if (mxs_chan->status == DMA_COMPLETE) {
+ if (mxs_chan->reset)
+ return IRQ_HANDLED;
+ dma_cookie_complete(&mxs_chan->desc);
}
+ /* schedule tasklet on this channel */
+ tasklet_schedule(&mxs_chan->tasklet);
+
return IRQ_HANDLED;
}
@@ -523,6 +603,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
mxs_chan->status = DMA_IN_PROGRESS;
mxs_chan->flags |= MXS_DMA_SG_LOOP;
+ mxs_chan->flags |= MXS_DMA_USE_SEMAPHORE;
if (num_periods > NUM_CCW) {
dev_err(mxs_dma->dma_device.dev,
@@ -554,6 +635,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
ccw->bits |= CCW_IRQ;
ccw->bits |= CCW_HALT_ON_TERM;
ccw->bits |= CCW_TERM_FLUSH;
+ ccw->bits |= CCW_DEC_SEM;
ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
@@ -599,8 +681,24 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
dma_cookie_t cookie, struct dma_tx_state *txstate)
{
struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+ u32 residue = 0;
+
+ if (mxs_chan->status == DMA_IN_PROGRESS &&
+ mxs_chan->flags & MXS_DMA_SG_LOOP) {
+ struct mxs_dma_ccw *last_ccw;
+ u32 bar;
+
+ last_ccw = &mxs_chan->ccw[mxs_chan->desc_count - 1];
+ residue = last_ccw->xfer_bytes + last_ccw->bufaddr;
+
+ bar = readl(mxs_dma->base +
+ HW_APBHX_CHn_BAR(mxs_dma, chan->chan_id));
+ residue -= bar;
+ }
- dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
+ dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+ residue);
return mxs_chan->status;
}
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index ec3fc4fd9160..2f66cf4e54fe 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -248,7 +248,7 @@ static enum dma_status omap_dma_tx_status(struct dma_chan *chan,
unsigned long flags;
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret == DMA_SUCCESS || !txstate)
+ if (ret == DMA_COMPLETE || !txstate)
return ret;
spin_lock_irqsave(&c->vc.lock, flags);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index df8b10fd1726..cdf0483b8f2d 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2268,6 +2268,8 @@ static void pl330_tasklet(unsigned long data)
list_move_tail(&desc->node, &pch->dmac->desc_pool);
}
+ dma_descriptor_unmap(&desc->txd);
+
if (callback) {
spin_unlock_irqrestore(&pch->lock, flags);
callback(callback_param);
@@ -2314,7 +2316,7 @@ bool pl330_filter(struct dma_chan *chan, void *param)
return false;
peri_id = chan->private;
- return *peri_id == (unsigned)param;
+ return *peri_id == (unsigned long)param;
}
EXPORT_SYMBOL(pl330_filter);
@@ -2926,16 +2928,23 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
amba_set_drvdata(adev, pdmac);
- irq = adev->irq[0];
- ret = request_irq(irq, pl330_irq_handler, 0,
- dev_name(&adev->dev), pi);
- if (ret)
- return ret;
+ for (i = 0; i < AMBA_NR_IRQS; i++) {
+ irq = adev->irq[i];
+ if (irq) {
+ ret = devm_request_irq(&adev->dev, irq,
+ pl330_irq_handler, 0,
+ dev_name(&adev->dev), pi);
+ if (ret)
+ return ret;
+ } else {
+ break;
+ }
+ }
pi->pcfg.periph_id = adev->periphid;
ret = pl330_add(pi);
if (ret)
- goto probe_err1;
+ return ret;
INIT_LIST_HEAD(&pdmac->desc_pool);
spin_lock_init(&pdmac->pool_lock);
@@ -3033,8 +3042,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
return 0;
probe_err3:
- amba_set_drvdata(adev, NULL);
-
/* Idle the DMAC */
list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
chan.device_node) {
@@ -3048,8 +3055,6 @@ probe_err3:
}
probe_err2:
pl330_del(pi);
-probe_err1:
- free_irq(irq, pi);
return ret;
}
@@ -3059,7 +3064,6 @@ static int pl330_remove(struct amba_device *adev)
struct dma_pl330_dmac *pdmac = amba_get_drvdata(adev);
struct dma_pl330_chan *pch, *_p;
struct pl330_info *pi;
- int irq;
if (!pdmac)
return 0;
@@ -3068,7 +3072,6 @@ static int pl330_remove(struct amba_device *adev)
of_dma_controller_free(adev->dev.of_node);
dma_async_device_unregister(&pdmac->ddma);
- amba_set_drvdata(adev, NULL);
/* Idle the DMAC */
list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
@@ -3086,9 +3089,6 @@ static int pl330_remove(struct amba_device *adev)
pl330_del(pi);
- irq = adev->irq[0];
- free_irq(irq, pi);
-
return 0;
}
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index e24b5ef486b5..8da48c6b2a38 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -804,218 +804,6 @@ static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
}
/**
- * ppc440spe_desc_get_src_addr - extract the source address from the descriptor
- */
-static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc,
- struct ppc440spe_adma_chan *chan, int src_idx)
-{
- struct dma_cdb *dma_hw_desc;
- struct xor_cb *xor_hw_desc;
-
- switch (chan->device->id) {
- case PPC440SPE_DMA0_ID:
- case PPC440SPE_DMA1_ID:
- dma_hw_desc = desc->hw_desc;
- /* May have 0, 1, 2, or 3 sources */
- switch (dma_hw_desc->opc) {
- case DMA_CDB_OPC_NO_OP:
- case DMA_CDB_OPC_DFILL128:
- return 0;
- case DMA_CDB_OPC_DCHECK128:
- if (unlikely(src_idx)) {
- printk(KERN_ERR "%s: try to get %d source for"
- " DCHECK128\n", __func__, src_idx);
- BUG();
- }
- return le32_to_cpu(dma_hw_desc->sg1l);
- case DMA_CDB_OPC_MULTICAST:
- case DMA_CDB_OPC_MV_SG1_SG2:
- if (unlikely(src_idx > 2)) {
- printk(KERN_ERR "%s: try to get %d source from"
- " DMA descr\n", __func__, src_idx);
- BUG();
- }
- if (src_idx) {
- if (le32_to_cpu(dma_hw_desc->sg1u) &
- DMA_CUED_XOR_WIN_MSK) {
- u8 region;
-
- if (src_idx == 1)
- return le32_to_cpu(
- dma_hw_desc->sg1l) +
- desc->unmap_len;
-
- region = (le32_to_cpu(
- dma_hw_desc->sg1u)) >>
- DMA_CUED_REGION_OFF;
-
- region &= DMA_CUED_REGION_MSK;
- switch (region) {
- case DMA_RXOR123:
- return le32_to_cpu(
- dma_hw_desc->sg1l) +
- (desc->unmap_len << 1);
- case DMA_RXOR124:
- return le32_to_cpu(
- dma_hw_desc->sg1l) +
- (desc->unmap_len * 3);
- case DMA_RXOR125:
- return le32_to_cpu(
- dma_hw_desc->sg1l) +
- (desc->unmap_len << 2);
- default:
- printk(KERN_ERR
- "%s: try to"
- " get src3 for region %02x"
- "PPC440SPE_DESC_RXOR12?\n",
- __func__, region);
- BUG();
- }
- } else {
- printk(KERN_ERR
- "%s: try to get %d"
- " source for non-cued descr\n",
- __func__, src_idx);
- BUG();
- }
- }
- return le32_to_cpu(dma_hw_desc->sg1l);
- default:
- printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
- __func__, dma_hw_desc->opc);
- BUG();
- }
- return le32_to_cpu(dma_hw_desc->sg1l);
- case PPC440SPE_XOR_ID:
- /* May have up to 16 sources */
- xor_hw_desc = desc->hw_desc;
- return xor_hw_desc->ops[src_idx].l;
- }
- return 0;
-}
-
-/**
- * ppc440spe_desc_get_dest_addr - extract the destination address from the
- * descriptor
- */
-static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc,
- struct ppc440spe_adma_chan *chan, int idx)
-{
- struct dma_cdb *dma_hw_desc;
- struct xor_cb *xor_hw_desc;
-
- switch (chan->device->id) {
- case PPC440SPE_DMA0_ID:
- case PPC440SPE_DMA1_ID:
- dma_hw_desc = desc->hw_desc;
-
- if (likely(!idx))
- return le32_to_cpu(dma_hw_desc->sg2l);
- return le32_to_cpu(dma_hw_desc->sg3l);
- case PPC440SPE_XOR_ID:
- xor_hw_desc = desc->hw_desc;
- return xor_hw_desc->cbtal;
- }
- return 0;
-}
-
-/**
- * ppc440spe_desc_get_src_num - extract the number of source addresses from
- * the descriptor
- */
-static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc,
- struct ppc440spe_adma_chan *chan)
-{
- struct dma_cdb *dma_hw_desc;
- struct xor_cb *xor_hw_desc;
-
- switch (chan->device->id) {
- case PPC440SPE_DMA0_ID:
- case PPC440SPE_DMA1_ID:
- dma_hw_desc = desc->hw_desc;
-
- switch (dma_hw_desc->opc) {
- case DMA_CDB_OPC_NO_OP:
- case DMA_CDB_OPC_DFILL128:
- return 0;
- case DMA_CDB_OPC_DCHECK128:
- return 1;
- case DMA_CDB_OPC_MV_SG1_SG2:
- case DMA_CDB_OPC_MULTICAST:
- /*
- * Only for RXOR operations we have more than
- * one source
- */
- if (le32_to_cpu(dma_hw_desc->sg1u) &
- DMA_CUED_XOR_WIN_MSK) {
- /* RXOR op, there are 2 or 3 sources */
- if (((le32_to_cpu(dma_hw_desc->sg1u) >>
- DMA_CUED_REGION_OFF) &
- DMA_CUED_REGION_MSK) == DMA_RXOR12) {
- /* RXOR 1-2 */
- return 2;
- } else {
- /* RXOR 1-2-3/1-2-4/1-2-5 */
- return 3;
- }
- }
- return 1;
- default:
- printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
- __func__, dma_hw_desc->opc);
- BUG();
- }
- case PPC440SPE_XOR_ID:
- /* up to 16 sources */
- xor_hw_desc = desc->hw_desc;
- return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK;
- default:
- BUG();
- }
- return 0;
-}
-
-/**
- * ppc440spe_desc_get_dst_num - get the number of destination addresses in
- * this descriptor
- */
-static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc,
- struct ppc440spe_adma_chan *chan)
-{
- struct dma_cdb *dma_hw_desc;
-
- switch (chan->device->id) {
- case PPC440SPE_DMA0_ID:
- case PPC440SPE_DMA1_ID:
- /* May be 1 or 2 destinations */
- dma_hw_desc = desc->hw_desc;
- switch (dma_hw_desc->opc) {
- case DMA_CDB_OPC_NO_OP:
- case DMA_CDB_OPC_DCHECK128:
- return 0;
- case DMA_CDB_OPC_MV_SG1_SG2:
- case DMA_CDB_OPC_DFILL128:
- return 1;
- case DMA_CDB_OPC_MULTICAST:
- if (desc->dst_cnt == 2)
- return 2;
- else
- return 1;
- default:
- printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
- __func__, dma_hw_desc->opc);
- BUG();
- }
- case PPC440SPE_XOR_ID:
- /* Always only 1 destination */
- return 1;
- default:
- BUG();
- }
- return 0;
-}
-
-/**
* ppc440spe_desc_get_link - get the address of the descriptor that
* follows this one
*/
@@ -1707,43 +1495,6 @@ static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
}
}
-static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan,
- struct ppc440spe_adma_desc_slot *desc)
-{
- u32 src_cnt, dst_cnt;
- dma_addr_t addr;
-
- /*
- * get the number of sources & destination
- * included in this descriptor and unmap
- * them all
- */
- src_cnt = ppc440spe_desc_get_src_num(desc, chan);
- dst_cnt = ppc440spe_desc_get_dst_num(desc, chan);
-
- /* unmap destinations */
- if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- while (dst_cnt--) {
- addr = ppc440spe_desc_get_dest_addr(
- desc, chan, dst_cnt);
- dma_unmap_page(chan->device->dev,
- addr, desc->unmap_len,
- DMA_FROM_DEVICE);
- }
- }
-
- /* unmap sources */
- if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- while (src_cnt--) {
- addr = ppc440spe_desc_get_src_addr(
- desc, chan, src_cnt);
- dma_unmap_page(chan->device->dev,
- addr, desc->unmap_len,
- DMA_TO_DEVICE);
- }
- }
-}
-
/**
* ppc440spe_adma_run_tx_complete_actions - call functions to be called
* upon completion
@@ -1767,26 +1518,7 @@ static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
desc->async_tx.callback(
desc->async_tx.callback_param);
- /* unmap dma addresses
- * (unmap_single vs unmap_page?)
- *
- * actually, ppc's dma_unmap_page() functions are empty, so
- * the following code is just for the sake of completeness
- */
- if (chan && chan->needs_unmap && desc->group_head &&
- desc->unmap_len) {
- struct ppc440spe_adma_desc_slot *unmap =
- desc->group_head;
- /* assume 1 slot per op always */
- u32 slot_count = unmap->slot_cnt;
-
- /* Run through the group list and unmap addresses */
- for (i = 0; i < slot_count; i++) {
- BUG_ON(!unmap);
- ppc440spe_adma_unmap(chan, unmap);
- unmap = unmap->hw_next;
- }
- }
+ dma_descriptor_unmap(&desc->async_tx);
}
/* run dependent operations */
@@ -3893,7 +3625,7 @@ static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
ppc440spe_chan = to_ppc440spe_adma_chan(chan);
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
ppc440spe_adma_slot_cleanup(ppc440spe_chan);
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
index 461a91ab70bb..ab26d46bbe15 100644
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -436,7 +436,7 @@ static enum dma_status sa11x0_dma_tx_status(struct dma_chan *chan,
enum dma_status ret;
ret = dma_cookie_status(&c->vc.chan, cookie, state);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
if (!state)
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index d94ab592cc1b..2e7b394def80 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -724,7 +724,7 @@ static enum dma_status shdma_tx_status(struct dma_chan *chan,
* If we don't find cookie on the queue, it has been aborted and we have
* to report error
*/
- if (status != DMA_SUCCESS) {
+ if (status != DMA_COMPLETE) {
struct shdma_desc *sdesc;
status = DMA_ERROR;
list_for_each_entry(sdesc, &schan->ld_queue, node)
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
index 1069e8869f20..0d765c0e21ec 100644
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c
@@ -685,7 +685,7 @@ MODULE_DEVICE_TABLE(of, sh_dmae_of_match);
static int sh_dmae_probe(struct platform_device *pdev)
{
const struct sh_dmae_pdata *pdata;
- unsigned long irqflags = IRQF_DISABLED,
+ unsigned long irqflags = 0,
chan_flag[SH_DMAE_MAX_CHANNELS] = {};
int errirq, chan_irq[SH_DMAE_MAX_CHANNELS];
int err, i, irq_cnt = 0, irqres = 0, irq_cap = 0;
@@ -838,7 +838,7 @@ static int sh_dmae_probe(struct platform_device *pdev)
IORESOURCE_IRQ_SHAREABLE)
chan_flag[irq_cnt] = IRQF_SHARED;
else
- chan_flag[irq_cnt] = IRQF_DISABLED;
+ chan_flag[irq_cnt] = 0;
dev_dbg(&pdev->dev,
"Found IRQ %d for channel %d\n",
i, irq_cnt);
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 82d2b97ad942..b8c031b7de4e 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -14,6 +14,7 @@
#include <linux/platform_device.h>
#include <linux/clk.h>
#include <linux/delay.h>
+#include <linux/log2.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
#include <linux/err.h>
@@ -2626,7 +2627,7 @@ static enum dma_status d40_tx_status(struct dma_chan *chan,
}
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret != DMA_SUCCESS)
+ if (ret != DMA_COMPLETE)
dma_set_residue(txstate, stedma40_residue(chan));
if (d40_is_paused(d40c))
@@ -2796,8 +2797,8 @@ static int d40_set_runtime_config(struct dma_chan *chan,
src_addr_width > DMA_SLAVE_BUSWIDTH_8_BYTES ||
dst_addr_width <= DMA_SLAVE_BUSWIDTH_UNDEFINED ||
dst_addr_width > DMA_SLAVE_BUSWIDTH_8_BYTES ||
- ((src_addr_width > 1) && (src_addr_width & 1)) ||
- ((dst_addr_width > 1) && (dst_addr_width & 1)))
+ !is_power_of_2(src_addr_width) ||
+ !is_power_of_2(dst_addr_width))
return -EINVAL;
cfg->src_info.data_width = src_addr_width;
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 5d4986e5f5fa..73654e33f13b 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -570,7 +570,7 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc,
list_del(&sgreq->node);
if (sgreq->last_sg) {
- dma_desc->dma_status = DMA_SUCCESS;
+ dma_desc->dma_status = DMA_COMPLETE;
dma_cookie_complete(&dma_desc->txd);
if (!dma_desc->cb_count)
list_add_tail(&dma_desc->cb_node, &tdc->cb_desc);
@@ -768,7 +768,7 @@ static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
unsigned int residual;
ret = dma_cookie_status(dc, cookie, txstate);
- if (ret == DMA_SUCCESS)
+ if (ret == DMA_COMPLETE)
return ret;
spin_lock_irqsave(&tdc->lock, flags);
@@ -1018,7 +1018,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
return &dma_desc->txd;
}
-struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
+static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
size_t period_len, enum dma_transfer_direction direction,
unsigned long flags, void *context)
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 28af214fce04..4506a7b4f972 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -154,38 +154,6 @@ static bool __td_dma_done_ack(struct timb_dma_chan *td_chan)
return done;
}
-static void __td_unmap_desc(struct timb_dma_chan *td_chan, const u8 *dma_desc,
- bool single)
-{
- dma_addr_t addr;
- int len;
-
- addr = (dma_desc[7] << 24) | (dma_desc[6] << 16) | (dma_desc[5] << 8) |
- dma_desc[4];
-
- len = (dma_desc[3] << 8) | dma_desc[2];
-
- if (single)
- dma_unmap_single(chan2dev(&td_chan->chan), addr, len,
- DMA_TO_DEVICE);
- else
- dma_unmap_page(chan2dev(&td_chan->chan), addr, len,
- DMA_TO_DEVICE);
-}
-
-static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single)
-{
- struct timb_dma_chan *td_chan = container_of(td_desc->txd.chan,
- struct timb_dma_chan, chan);
- u8 *descs;
-
- for (descs = td_desc->desc_list; ; descs += TIMB_DMA_DESC_SIZE) {
- __td_unmap_desc(td_chan, descs, single);
- if (descs[0] & 0x02)
- break;
- }
-}
-
static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
struct scatterlist *sg, bool last)
{
@@ -293,10 +261,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
list_move(&td_desc->desc_node, &td_chan->free_list);
- if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
- __td_unmap_descs(td_desc,
- txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE);
-
+ dma_descriptor_unmap(txd);
/*
* The API requires that no submissions are done from a
* callback, so we don't need to drop the lock here
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index 71e8e775189e..bae6c29f5502 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -419,30 +419,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
list_splice_init(&desc->tx_list, &dc->free_list);
list_move(&desc->desc_node, &dc->free_list);
- if (!ds) {
- dma_addr_t dmaaddr;
- if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
- dmaaddr = is_dmac64(dc) ?
- desc->hwdesc.DAR : desc->hwdesc32.DAR;
- if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
- dma_unmap_single(chan2parent(&dc->chan),
- dmaaddr, desc->len, DMA_FROM_DEVICE);
- else
- dma_unmap_page(chan2parent(&dc->chan),
- dmaaddr, desc->len, DMA_FROM_DEVICE);
- }
- if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
- dmaaddr = is_dmac64(dc) ?
- desc->hwdesc.SAR : desc->hwdesc32.SAR;
- if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
- dma_unmap_single(chan2parent(&dc->chan),
- dmaaddr, desc->len, DMA_TO_DEVICE);
- else
- dma_unmap_page(chan2parent(&dc->chan),
- dmaaddr, desc->len, DMA_TO_DEVICE);
- }
- }
-
+ dma_descriptor_unmap(txd);
/*
* The API requires that no submissions are done from a
* callback, so we don't need to drop the lock here
@@ -962,8 +939,8 @@ txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
enum dma_status ret;
ret = dma_cookie_status(chan, cookie, txstate);
- if (ret == DMA_SUCCESS)
- return DMA_SUCCESS;
+ if (ret == DMA_COMPLETE)
+ return DMA_COMPLETE;
spin_lock_bh(&dc->lock);
txx9dmac_scan_descriptors(dc);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 7dd446150294..4e10b10d3ddd 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -13,6 +13,7 @@
#include <linux/acpi_gpio.h>
#include <linux/idr.h>
#include <linux/slab.h>
+#include <linux/acpi.h>
#define CREATE_TRACE_POINTS
#include <trace/events/gpio.h>
diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c
index 43959edd4291..dfff0907f70e 100644
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/intel_acpi.c
@@ -196,7 +196,7 @@ static bool intel_dsm_pci_probe(struct pci_dev *pdev)
acpi_handle dhandle;
int ret;
- dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+ dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
return false;
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 1b2f41c3f191..6d69a9bad865 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -638,7 +638,7 @@ static void intel_didl_outputs(struct drm_device *dev)
u32 temp;
int i = 0;
- handle = DEVICE_ACPI_HANDLE(&dev->pdev->dev);
+ handle = ACPI_HANDLE(&dev->pdev->dev);
if (!handle || acpi_bus_get_device(handle, &acpi_dev))
return;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
index e286e132c7e7..129120473f6c 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
@@ -116,7 +116,7 @@ mxm_shadow_dsm(struct nouveau_mxm *mxm, u8 version)
acpi_handle handle;
int ret;
- handle = DEVICE_ACPI_HANDLE(&device->pdev->dev);
+ handle = ACPI_HANDLE(&device->pdev->dev);
if (!handle)
return false;
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 07273a2ae62f..95c740454049 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -256,7 +256,7 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev)
acpi_handle dhandle;
int retval = 0;
- dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+ dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
return false;
@@ -414,7 +414,7 @@ bool nouveau_acpi_rom_supported(struct pci_dev *pdev)
if (!nouveau_dsm_priv.dsm_detected && !nouveau_dsm_priv.optimus_detected)
return false;
- dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+ dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
return false;
@@ -448,7 +448,7 @@ nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector)
return NULL;
}
- handle = DEVICE_ACPI_HANDLE(&dev->pdev->dev);
+ handle = ACPI_HANDLE(&dev->pdev->dev);
if (!handle)
return NULL;
diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c
index 10f98c7742d8..98a9074b306b 100644
--- a/drivers/gpu/drm/radeon/radeon_acpi.c
+++ b/drivers/gpu/drm/radeon/radeon_acpi.c
@@ -369,7 +369,7 @@ int radeon_atif_handler(struct radeon_device *rdev,
return NOTIFY_DONE;
/* Check pending SBIOS requests */
- handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+ handle = ACPI_HANDLE(&rdev->pdev->dev);
count = radeon_atif_get_sbios_requests(handle, &req);
if (count <= 0)
@@ -556,7 +556,7 @@ int radeon_acpi_pcie_notify_device_ready(struct radeon_device *rdev)
struct radeon_atcs *atcs = &rdev->atcs;
/* Get the device handle */
- handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+ handle = ACPI_HANDLE(&rdev->pdev->dev);
if (!handle)
return -EINVAL;
@@ -596,7 +596,7 @@ int radeon_acpi_pcie_performance_request(struct radeon_device *rdev,
u32 retry = 3;
/* Get the device handle */
- handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+ handle = ACPI_HANDLE(&rdev->pdev->dev);
if (!handle)
return -EINVAL;
@@ -699,7 +699,7 @@ int radeon_acpi_init(struct radeon_device *rdev)
int ret;
/* Get the device handle */
- handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+ handle = ACPI_HANDLE(&rdev->pdev->dev);
/* No need to proceed if we're sure that ATIF is not supported */
if (!ASIC_IS_AVIVO(rdev) || !rdev->bios || !handle)
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 6153ec18943a..9d302eaeea15 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -8,8 +8,7 @@
*/
#include <linux/vga_switcheroo.h>
#include <linux/slab.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
+#include <linux/acpi.h>
#include <linux/pci.h>
#include "radeon_acpi.h"
@@ -447,7 +446,7 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
acpi_handle dhandle, atpx_handle;
acpi_status status;
- dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+ dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
return false;
@@ -493,7 +492,7 @@ static int radeon_atpx_init(void)
*/
static int radeon_atpx_get_client_id(struct pci_dev *pdev)
{
- if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+ if (radeon_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
return VGA_SWITCHEROO_IGD;
else
return VGA_SWITCHEROO_DIS;
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index c155d6f3fa68..b3633d9a5317 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -185,7 +185,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
return false;
while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
- dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+ dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
continue;
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index ae48d18ee315..5f7e55f4b7f0 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -1008,7 +1008,7 @@ static int i2c_hid_probe(struct i2c_client *client,
hid->hid_get_raw_report = i2c_hid_get_raw_report;
hid->hid_output_raw_report = i2c_hid_output_raw_report;
hid->dev.parent = &client->dev;
- ACPI_HANDLE_SET(&hid->dev, ACPI_HANDLE(&client->dev));
+ ACPI_COMPANION_SET(&hid->dev, ACPI_COMPANION(&client->dev));
hid->bus = BUS_I2C;
hid->version = le16_to_cpu(ihid->hdesc.bcdVersion);
hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID);
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 5923cfa390c8..d74c0b34248e 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -615,6 +615,22 @@ void i2c_unlock_adapter(struct i2c_adapter *adapter)
}
EXPORT_SYMBOL_GPL(i2c_unlock_adapter);
+static void i2c_dev_set_name(struct i2c_adapter *adap,
+ struct i2c_client *client)
+{
+ struct acpi_device *adev = ACPI_COMPANION(&client->dev);
+
+ if (adev) {
+ dev_set_name(&client->dev, "i2c-%s", acpi_dev_name(adev));
+ return;
+ }
+
+ /* For 10-bit clients, add an arbitrary offset to avoid collisions */
+ dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
+ client->addr | ((client->flags & I2C_CLIENT_TEN)
+ ? 0xa000 : 0));
+}
+
/**
* i2c_new_device - instantiate an i2c device
* @adap: the adapter managing the device
@@ -671,12 +687,9 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
client->dev.bus = &i2c_bus_type;
client->dev.type = &i2c_client_type;
client->dev.of_node = info->of_node;
- ACPI_HANDLE_SET(&client->dev, info->acpi_node.handle);
+ ACPI_COMPANION_SET(&client->dev, info->acpi_node.companion);
- /* For 10-bit clients, add an arbitrary offset to avoid collisions */
- dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
- client->addr | ((client->flags & I2C_CLIENT_TEN)
- ? 0xa000 : 0));
+ i2c_dev_set_name(adap, client);
status = device_register(&client->dev);
if (status)
goto out_err;
@@ -1100,7 +1113,7 @@ static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
return AE_OK;
memset(&info, 0, sizeof(info));
- info.acpi_node.handle = handle;
+ info.acpi_node.companion = adev;
info.irq = -1;
INIT_LIST_HEAD(&resource_list);
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index 140c8ef50529..d9e1f7ccfe6f 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -7,6 +7,7 @@
* Copyright (C) 2006 Hannes Reinecke
*/
+#include <linux/acpi.h>
#include <linux/ata.h>
#include <linux/delay.h>
#include <linux/device.h>
@@ -19,8 +20,6 @@
#include <linux/dmi.h>
#include <linux/module.h>
-#include <acpi/acpi_bus.h>
-
#define REGS_PER_GTF 7
struct GTM_buffer {
@@ -128,7 +127,7 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
DEBPRINT("ENTER: pci %02x:%02x.%01x\n", bus, devnum, func);
- dev_handle = DEVICE_ACPI_HANDLE(dev);
+ dev_handle = ACPI_HANDLE(dev);
if (!dev_handle) {
DEBPRINT("no acpi handle for device\n");
goto err;
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 3226ce98fb18..cbd4e9abc47e 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1,7 +1,7 @@
/*
* intel_idle.c - native hardware idle loop for modern Intel processors
*
- * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2013, Intel Corporation.
* Len Brown <len.brown@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
@@ -329,6 +329,22 @@ static struct cpuidle_state atom_cstates[] __initdata = {
{
.enter = NULL }
};
+static struct cpuidle_state avn_cstates[CPUIDLE_STATE_MAX] = {
+ {
+ .name = "C1-AVN",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 2,
+ .target_residency = 2,
+ .enter = &intel_idle },
+ {
+ .name = "C6-AVN",
+ .desc = "MWAIT 0x51",
+ .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 15,
+ .target_residency = 45,
+ .enter = &intel_idle },
+};
/**
* intel_idle
@@ -462,6 +478,11 @@ static const struct idle_cpu idle_cpu_hsw = {
.disable_promotion_to_c1e = true,
};
+static const struct idle_cpu idle_cpu_avn = {
+ .state_table = avn_cstates,
+ .disable_promotion_to_c1e = true,
+};
+
#define ICPU(model, cpu) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
@@ -483,6 +504,7 @@ static const struct x86_cpu_id intel_idle_ids[] = {
ICPU(0x3f, idle_cpu_hsw),
ICPU(0x45, idle_cpu_hsw),
ICPU(0x46, idle_cpu_hsw),
+ ICPU(0x4D, idle_cpu_avn),
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8766eabb0014..b6b7a2866c9e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -112,7 +112,7 @@ static inline int speed_max(struct mddev *mddev)
static struct ctl_table_header *raid_table_header;
-static ctl_table raid_table[] = {
+static struct ctl_table raid_table[] = {
{
.procname = "speed_limit_min",
.data = &sysctl_speed_limit_min,
@@ -130,7 +130,7 @@ static ctl_table raid_table[] = {
{ }
};
-static ctl_table raid_dir_table[] = {
+static struct ctl_table raid_dir_table[] = {
{
.procname = "raid",
.maxlen = 0,
@@ -140,7 +140,7 @@ static ctl_table raid_dir_table[] = {
{ }
};
-static ctl_table raid_root_table[] = {
+static struct ctl_table raid_root_table[] = {
{
.procname = "dev",
.maxlen = 0,
@@ -562,11 +562,19 @@ static struct mddev * mddev_find(dev_t unit)
goto retry;
}
-static inline int mddev_lock(struct mddev * mddev)
+static inline int __must_check mddev_lock(struct mddev * mddev)
{
return mutex_lock_interruptible(&mddev->reconfig_mutex);
}
+/* Sometimes we need to take the lock in a situation where
+ * failure due to interrupts is not acceptable.
+ */
+static inline void mddev_lock_nointr(struct mddev * mddev)
+{
+ mutex_lock(&mddev->reconfig_mutex);
+}
+
static inline int mddev_is_locked(struct mddev *mddev)
{
return mutex_is_locked(&mddev->reconfig_mutex);
@@ -2978,7 +2986,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
for_each_mddev(mddev, tmp) {
struct md_rdev *rdev2;
- mddev_lock(mddev);
+ mddev_lock_nointr(mddev);
rdev_for_each(rdev2, mddev)
if (rdev->bdev == rdev2->bdev &&
rdev != rdev2 &&
@@ -2994,7 +3002,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
break;
}
}
- mddev_lock(my_mddev);
+ mddev_lock_nointr(my_mddev);
if (overlap) {
/* Someone else could have slipped in a size
* change here, but doing so is just silly.
@@ -3580,6 +3588,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->in_sync = 1;
del_timer_sync(&mddev->safemode_timer);
}
+ blk_set_stacking_limits(&mddev->queue->limits);
pers->run(mddev);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
mddev_resume(mddev);
@@ -5258,7 +5267,7 @@ static void __md_stop_writes(struct mddev *mddev)
void md_stop_writes(struct mddev *mddev)
{
- mddev_lock(mddev);
+ mddev_lock_nointr(mddev);
__md_stop_writes(mddev);
mddev_unlock(mddev);
}
@@ -5291,20 +5300,35 @@ EXPORT_SYMBOL_GPL(md_stop);
static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
{
int err = 0;
+ int did_freeze = 0;
+
+ if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+ did_freeze = 1;
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ }
+ if (mddev->sync_thread) {
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ /* Thread might be blocked waiting for metadata update
+ * which will now never happen */
+ wake_up_process(mddev->sync_thread->tsk);
+ }
+ mddev_unlock(mddev);
+ wait_event(resync_wait, mddev->sync_thread == NULL);
+ mddev_lock_nointr(mddev);
+
mutex_lock(&mddev->open_mutex);
- if (atomic_read(&mddev->openers) > !!bdev) {
+ if (atomic_read(&mddev->openers) > !!bdev ||
+ mddev->sync_thread ||
+ (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
printk("md: %s still in use.\n",mdname(mddev));
+ if (did_freeze) {
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ }
err = -EBUSY;
goto out;
}
- if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
- /* Someone opened the device since we flushed it
- * so page cache could be dirty and it is too late
- * to flush. So abort
- */
- mutex_unlock(&mddev->open_mutex);
- return -EBUSY;
- }
if (mddev->pers) {
__md_stop_writes(mddev);
@@ -5315,7 +5339,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
set_disk_ro(mddev->gendisk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_state);
- err = 0;
+ err = 0;
}
out:
mutex_unlock(&mddev->open_mutex);
@@ -5331,20 +5355,34 @@ static int do_md_stop(struct mddev * mddev, int mode,
{
struct gendisk *disk = mddev->gendisk;
struct md_rdev *rdev;
+ int did_freeze = 0;
+
+ if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+ did_freeze = 1;
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ }
+ if (mddev->sync_thread) {
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ /* Thread might be blocked waiting for metadata update
+ * which will now never happen */
+ wake_up_process(mddev->sync_thread->tsk);
+ }
+ mddev_unlock(mddev);
+ wait_event(resync_wait, mddev->sync_thread == NULL);
+ mddev_lock_nointr(mddev);
mutex_lock(&mddev->open_mutex);
if (atomic_read(&mddev->openers) > !!bdev ||
- mddev->sysfs_active) {
+ mddev->sysfs_active ||
+ mddev->sync_thread ||
+ (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
printk("md: %s still in use.\n",mdname(mddev));
mutex_unlock(&mddev->open_mutex);
- return -EBUSY;
- }
- if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
- /* Someone opened the device since we flushed it
- * so page cache could be dirty and it is too late
- * to flush. So abort
- */
- mutex_unlock(&mddev->open_mutex);
+ if (did_freeze) {
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ }
return -EBUSY;
}
if (mddev->pers) {
@@ -6551,7 +6589,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
- mddev_lock(mddev);
+ mddev_lock_nointr(mddev);
}
} else {
err = -EROFS;
@@ -7361,9 +7399,6 @@ void md_do_sync(struct md_thread *thread)
mddev->curr_resync = 2;
try_again:
- if (kthread_should_stop())
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
goto skip;
for_each_mddev(mddev2, tmp) {
@@ -7388,7 +7423,7 @@ void md_do_sync(struct md_thread *thread)
* be caught by 'softlockup'
*/
prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
- if (!kthread_should_stop() &&
+ if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
mddev2->curr_resync >= mddev->curr_resync) {
printk(KERN_INFO "md: delaying %s of %s"
" until %s has finished (they"
@@ -7464,7 +7499,7 @@ void md_do_sync(struct md_thread *thread)
last_check = 0;
if (j>2) {
- printk(KERN_INFO
+ printk(KERN_INFO
"md: resuming %s of %s from checkpoint.\n",
desc, mdname(mddev));
mddev->curr_resync = j;
@@ -7501,7 +7536,8 @@ void md_do_sync(struct md_thread *thread)
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
- while (j >= mddev->resync_max && !kthread_should_stop()) {
+ while (j >= mddev->resync_max &&
+ !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* As this condition is controlled by user-space,
* we can block indefinitely, so use '_interruptible'
* to avoid triggering warnings.
@@ -7509,17 +7545,18 @@ void md_do_sync(struct md_thread *thread)
flush_signals(current); /* just in case */
wait_event_interruptible(mddev->recovery_wait,
mddev->resync_max > j
- || kthread_should_stop());
+ || test_bit(MD_RECOVERY_INTR,
+ &mddev->recovery));
}
- if (kthread_should_stop())
- goto interrupted;
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+ break;
sectors = mddev->pers->sync_request(mddev, j, &skipped,
currspeed < speed_min(mddev));
if (sectors == 0) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- goto out;
+ break;
}
if (!skipped) { /* actual IO requested */
@@ -7556,10 +7593,8 @@ void md_do_sync(struct md_thread *thread)
last_mark = next;
}
-
- if (kthread_should_stop())
- goto interrupted;
-
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+ break;
/*
* this loop exits only if either when we are slower than
@@ -7582,11 +7617,12 @@ void md_do_sync(struct md_thread *thread)
}
}
}
- printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
+ printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
+ test_bit(MD_RECOVERY_INTR, &mddev->recovery)
+ ? "interrupted" : "done");
/*
* this also signals 'finished resyncing' to md_stop
*/
- out:
blk_finish_plug(&plug);
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
@@ -7640,16 +7676,6 @@ void md_do_sync(struct md_thread *thread)
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread);
return;
-
- interrupted:
- /*
- * got a signal, exit.
- */
- printk(KERN_INFO
- "md: md_do_sync() got signal ... exiting\n");
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- goto out;
-
}
EXPORT_SYMBOL_GPL(md_do_sync);
@@ -7894,6 +7920,7 @@ void md_reap_sync_thread(struct mddev *mddev)
/* resync has finished, collect result */
md_unregister_thread(&mddev->sync_thread);
+ wake_up(&resync_wait);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* success...*/
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index af6681b19776..1e5a540995e9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -66,7 +66,8 @@
*/
static int max_queued_requests = 1024;
-static void allow_barrier(struct r1conf *conf);
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+ sector_t bi_sector);
static void lower_barrier(struct r1conf *conf);
static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -84,10 +85,12 @@ static void r1bio_pool_free(void *r1_bio, void *data)
}
#define RESYNC_BLOCK_SIZE (64*1024)
-//#define RESYNC_BLOCK_SIZE PAGE_SIZE
+#define RESYNC_DEPTH 32
#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
-#define RESYNC_WINDOW (2048*1024)
+#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
+#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
+#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
{
@@ -225,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
struct bio *bio = r1_bio->master_bio;
int done;
struct r1conf *conf = r1_bio->mddev->private;
+ sector_t start_next_window = r1_bio->start_next_window;
+ sector_t bi_sector = bio->bi_sector;
if (bio->bi_phys_segments) {
unsigned long flags;
@@ -232,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio)
bio->bi_phys_segments--;
done = (bio->bi_phys_segments == 0);
spin_unlock_irqrestore(&conf->device_lock, flags);
+ /*
+ * make_request() might be waiting for
+ * bi_phys_segments to decrease
+ */
+ wake_up(&conf->wait_barrier);
} else
done = 1;
@@ -243,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
* Wake up any possible resync thread that waits for the device
* to go idle.
*/
- allow_barrier(conf);
+ allow_barrier(conf, start_next_window, bi_sector);
}
}
@@ -814,8 +824,6 @@ static void flush_pending_writes(struct r1conf *conf)
* there is no normal IO happeing. It must arrange to call
* lower_barrier when the particular background IO completes.
*/
-#define RESYNC_DEPTH 32
-
static void raise_barrier(struct r1conf *conf)
{
spin_lock_irq(&conf->resync_lock);
@@ -827,9 +835,19 @@ static void raise_barrier(struct r1conf *conf)
/* block any new IO from starting */
conf->barrier++;
- /* Now wait for all pending IO to complete */
+ /* For these conditions we must wait:
+ * A: while the array is in frozen state
+ * B: while barrier >= RESYNC_DEPTH, meaning resync reach
+ * the max count which allowed.
+ * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
+ * next resync will reach to the window which normal bios are
+ * handling.
+ */
wait_event_lock_irq(conf->wait_barrier,
- !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+ !conf->array_frozen &&
+ conf->barrier < RESYNC_DEPTH &&
+ (conf->start_next_window >=
+ conf->next_resync + RESYNC_SECTORS),
conf->resync_lock);
spin_unlock_irq(&conf->resync_lock);
@@ -845,10 +863,33 @@ static void lower_barrier(struct r1conf *conf)
wake_up(&conf->wait_barrier);
}
-static void wait_barrier(struct r1conf *conf)
+static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
{
+ bool wait = false;
+
+ if (conf->array_frozen || !bio)
+ wait = true;
+ else if (conf->barrier && bio_data_dir(bio) == WRITE) {
+ if (conf->next_resync < RESYNC_WINDOW_SECTORS)
+ wait = true;
+ else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
+ >= bio_end_sector(bio)) ||
+ (conf->next_resync + NEXT_NORMALIO_DISTANCE
+ <= bio->bi_sector))
+ wait = false;
+ else
+ wait = true;
+ }
+
+ return wait;
+}
+
+static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
+{
+ sector_t sector = 0;
+
spin_lock_irq(&conf->resync_lock);
- if (conf->barrier) {
+ if (need_to_wait_for_sync(conf, bio)) {
conf->nr_waiting++;
/* Wait for the barrier to drop.
* However if there are already pending
@@ -860,22 +901,67 @@ static void wait_barrier(struct r1conf *conf)
* count down.
*/
wait_event_lock_irq(conf->wait_barrier,
- !conf->barrier ||
- (conf->nr_pending &&
+ !conf->array_frozen &&
+ (!conf->barrier ||
+ ((conf->start_next_window <
+ conf->next_resync + RESYNC_SECTORS) &&
current->bio_list &&
- !bio_list_empty(current->bio_list)),
+ !bio_list_empty(current->bio_list))),
conf->resync_lock);
conf->nr_waiting--;
}
+
+ if (bio && bio_data_dir(bio) == WRITE) {
+ if (conf->next_resync + NEXT_NORMALIO_DISTANCE
+ <= bio->bi_sector) {
+ if (conf->start_next_window == MaxSector)
+ conf->start_next_window =
+ conf->next_resync +
+ NEXT_NORMALIO_DISTANCE;
+
+ if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
+ <= bio->bi_sector)
+ conf->next_window_requests++;
+ else
+ conf->current_window_requests++;
+ }
+ if (bio->bi_sector >= conf->start_next_window)
+ sector = conf->start_next_window;
+ }
+
conf->nr_pending++;
spin_unlock_irq(&conf->resync_lock);
+ return sector;
}
-static void allow_barrier(struct r1conf *conf)
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+ sector_t bi_sector)
{
unsigned long flags;
+
spin_lock_irqsave(&conf->resync_lock, flags);
conf->nr_pending--;
+ if (start_next_window) {
+ if (start_next_window == conf->start_next_window) {
+ if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
+ <= bi_sector)
+ conf->next_window_requests--;
+ else
+ conf->current_window_requests--;
+ } else
+ conf->current_window_requests--;
+
+ if (!conf->current_window_requests) {
+ if (conf->next_window_requests) {
+ conf->current_window_requests =
+ conf->next_window_requests;
+ conf->next_window_requests = 0;
+ conf->start_next_window +=
+ NEXT_NORMALIO_DISTANCE;
+ } else
+ conf->start_next_window = MaxSector;
+ }
+ }
spin_unlock_irqrestore(&conf->resync_lock, flags);
wake_up(&conf->wait_barrier);
}
@@ -884,8 +970,7 @@ static void freeze_array(struct r1conf *conf, int extra)
{
/* stop syncio and normal IO and wait for everything to
* go quite.
- * We increment barrier and nr_waiting, and then
- * wait until nr_pending match nr_queued+extra
+ * We wait until nr_pending match nr_queued+extra
* This is called in the context of one normal IO request
* that has failed. Thus any sync request that might be pending
* will be blocked by nr_pending, and we need to wait for
@@ -895,8 +980,7 @@ static void freeze_array(struct r1conf *conf, int extra)
* we continue.
*/
spin_lock_irq(&conf->resync_lock);
- conf->barrier++;
- conf->nr_waiting++;
+ conf->array_frozen = 1;
wait_event_lock_irq_cmd(conf->wait_barrier,
conf->nr_pending == conf->nr_queued+extra,
conf->resync_lock,
@@ -907,8 +991,7 @@ static void unfreeze_array(struct r1conf *conf)
{
/* reverse the effect of the freeze */
spin_lock_irq(&conf->resync_lock);
- conf->barrier--;
- conf->nr_waiting--;
+ conf->array_frozen = 0;
wake_up(&conf->wait_barrier);
spin_unlock_irq(&conf->resync_lock);
}
@@ -1013,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
int first_clone;
int sectors_handled;
int max_sectors;
+ sector_t start_next_window;
/*
* Register the new request and wait if the reconstruction
@@ -1042,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
finish_wait(&conf->wait_barrier, &w);
}
- wait_barrier(conf);
+ start_next_window = wait_barrier(conf, bio);
bitmap = mddev->bitmap;
@@ -1163,6 +1247,7 @@ read_again:
disks = conf->raid_disks * 2;
retry_write:
+ r1_bio->start_next_window = start_next_window;
blocked_rdev = NULL;
rcu_read_lock();
max_sectors = r1_bio->sectors;
@@ -1231,14 +1316,24 @@ read_again:
if (unlikely(blocked_rdev)) {
/* Wait for this device to become unblocked */
int j;
+ sector_t old = start_next_window;
for (j = 0; j < i; j++)
if (r1_bio->bios[j])
rdev_dec_pending(conf->mirrors[j].rdev, mddev);
r1_bio->state = 0;
- allow_barrier(conf);
+ allow_barrier(conf, start_next_window, bio->bi_sector);
md_wait_for_blocked_rdev(blocked_rdev, mddev);
- wait_barrier(conf);
+ start_next_window = wait_barrier(conf, bio);
+ /*
+ * We must make sure the multi r1bios of bio have
+ * the same value of bi_phys_segments
+ */
+ if (bio->bi_phys_segments && old &&
+ old != start_next_window)
+ /* Wait for the former r1bio(s) to complete */
+ wait_event(conf->wait_barrier,
+ bio->bi_phys_segments == 1);
goto retry_write;
}
@@ -1438,11 +1533,14 @@ static void print_conf(struct r1conf *conf)
static void close_sync(struct r1conf *conf)
{
- wait_barrier(conf);
- allow_barrier(conf);
+ wait_barrier(conf, NULL);
+ allow_barrier(conf, 0, 0);
mempool_destroy(conf->r1buf_pool);
conf->r1buf_pool = NULL;
+
+ conf->next_resync = 0;
+ conf->start_next_window = MaxSector;
}
static int raid1_spare_active(struct mddev *mddev)
@@ -2714,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
conf->pending_count = 0;
conf->recovery_disabled = mddev->recovery_disabled - 1;
+ conf->start_next_window = MaxSector;
+ conf->current_window_requests = conf->next_window_requests = 0;
+
err = -EIO;
for (i = 0; i < conf->raid_disks * 2; i++) {
@@ -2871,8 +2972,8 @@ static int stop(struct mddev *mddev)
atomic_read(&bitmap->behind_writes) == 0);
}
- raise_barrier(conf);
- lower_barrier(conf);
+ freeze_array(conf, 0);
+ unfreeze_array(conf);
md_unregister_thread(&mddev->thread);
if (conf->r1bio_pool)
@@ -3031,10 +3132,10 @@ static void raid1_quiesce(struct mddev *mddev, int state)
wake_up(&conf->wait_barrier);
break;
case 1:
- raise_barrier(conf);
+ freeze_array(conf, 0);
break;
case 0:
- lower_barrier(conf);
+ unfreeze_array(conf);
break;
}
}
@@ -3051,7 +3152,8 @@ static void *raid1_takeover(struct mddev *mddev)
mddev->new_chunk_sectors = 0;
conf = setup_conf(mddev);
if (!IS_ERR(conf))
- conf->barrier = 1;
+ /* Array must appear to be quiesced */
+ conf->array_frozen = 1;
return conf;
}
return ERR_PTR(-EINVAL);
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index 0ff3715fb7eb..9bebca7bff2f 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -41,6 +41,19 @@ struct r1conf {
*/
sector_t next_resync;
+ /* When raid1 starts resync, we divide array into four partitions
+ * |---------|--------------|---------------------|-------------|
+ * next_resync start_next_window end_window
+ * start_next_window = next_resync + NEXT_NORMALIO_DISTANCE
+ * end_window = start_next_window + NEXT_NORMALIO_DISTANCE
+ * current_window_requests means the count of normalIO between
+ * start_next_window and end_window.
+ * next_window_requests means the count of normalIO after end_window.
+ * */
+ sector_t start_next_window;
+ int current_window_requests;
+ int next_window_requests;
+
spinlock_t device_lock;
/* list of 'struct r1bio' that need to be processed by raid1d,
@@ -65,6 +78,7 @@ struct r1conf {
int nr_waiting;
int nr_queued;
int barrier;
+ int array_frozen;
/* Set to 1 if a full sync is needed, (fresh device added).
* Cleared when a sync completes.
@@ -111,6 +125,7 @@ struct r1bio {
* in this BehindIO request
*/
sector_t sector;
+ sector_t start_next_window;
int sectors;
unsigned long state;
struct mddev *mddev;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7c3508abb5e1..c504e8389e69 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4384,7 +4384,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait, mddev->flags == 0 ||
- kthread_should_stop());
+ test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+ allow_barrier(conf);
+ return sectors_done;
+ }
conf->reshape_safe = mddev->reshape_position;
allow_barrier(conf);
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7f0e17a27aeb..47da0af6322b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -85,6 +85,42 @@ static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
return &conf->stripe_hashtbl[hash];
}
+static inline int stripe_hash_locks_hash(sector_t sect)
+{
+ return (sect >> STRIPE_SHIFT) & STRIPE_HASH_LOCKS_MASK;
+}
+
+static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
+{
+ spin_lock_irq(conf->hash_locks + hash);
+ spin_lock(&conf->device_lock);
+}
+
+static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
+{
+ spin_unlock(&conf->device_lock);
+ spin_unlock_irq(conf->hash_locks + hash);
+}
+
+static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
+{
+ int i;
+ local_irq_disable();
+ spin_lock(conf->hash_locks);
+ for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
+ spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
+ spin_lock(&conf->device_lock);
+}
+
+static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
+{
+ int i;
+ spin_unlock(&conf->device_lock);
+ for (i = NR_STRIPE_HASH_LOCKS; i; i--)
+ spin_unlock(conf->hash_locks + i - 1);
+ local_irq_enable();
+}
+
/* bio's attached to a stripe+device for I/O are linked together in bi_sector
* order without overlap. There may be several bio's per stripe+device, and
* a bio could span several devices.
@@ -249,7 +285,8 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
}
}
-static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
+static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
+ struct list_head *temp_inactive_list)
{
BUG_ON(!list_empty(&sh->lru));
BUG_ON(atomic_read(&conf->active_stripes)==0);
@@ -278,23 +315,68 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
< IO_THRESHOLD)
md_wakeup_thread(conf->mddev->thread);
atomic_dec(&conf->active_stripes);
- if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
- list_add_tail(&sh->lru, &conf->inactive_list);
- wake_up(&conf->wait_for_stripe);
- if (conf->retry_read_aligned)
- md_wakeup_thread(conf->mddev->thread);
- }
+ if (!test_bit(STRIPE_EXPANDING, &sh->state))
+ list_add_tail(&sh->lru, temp_inactive_list);
}
}
-static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
+static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
+ struct list_head *temp_inactive_list)
{
if (atomic_dec_and_test(&sh->count))
- do_release_stripe(conf, sh);
+ do_release_stripe(conf, sh, temp_inactive_list);
+}
+
+/*
+ * @hash could be NR_STRIPE_HASH_LOCKS, then we have a list of inactive_list
+ *
+ * Be careful: Only one task can add/delete stripes from temp_inactive_list at
+ * given time. Adding stripes only takes device lock, while deleting stripes
+ * only takes hash lock.
+ */
+static void release_inactive_stripe_list(struct r5conf *conf,
+ struct list_head *temp_inactive_list,
+ int hash)
+{
+ int size;
+ bool do_wakeup = false;
+ unsigned long flags;
+
+ if (hash == NR_STRIPE_HASH_LOCKS) {
+ size = NR_STRIPE_HASH_LOCKS;
+ hash = NR_STRIPE_HASH_LOCKS - 1;
+ } else
+ size = 1;
+ while (size) {
+ struct list_head *list = &temp_inactive_list[size - 1];
+
+ /*
+ * We don't hold any lock here yet, get_active_stripe() might
+ * remove stripes from the list
+ */
+ if (!list_empty_careful(list)) {
+ spin_lock_irqsave(conf->hash_locks + hash, flags);
+ if (list_empty(conf->inactive_list + hash) &&
+ !list_empty(list))
+ atomic_dec(&conf->empty_inactive_list_nr);
+ list_splice_tail_init(list, conf->inactive_list + hash);
+ do_wakeup = true;
+ spin_unlock_irqrestore(conf->hash_locks + hash, flags);
+ }
+ size--;
+ hash--;
+ }
+
+ if (do_wakeup) {
+ wake_up(&conf->wait_for_stripe);
+ if (conf->retry_read_aligned)
+ md_wakeup_thread(conf->mddev->thread);
+ }
}
/* should hold conf->device_lock already */
-static int release_stripe_list(struct r5conf *conf)
+static int release_stripe_list(struct r5conf *conf,
+ struct list_head *temp_inactive_list)
{
struct stripe_head *sh;
int count = 0;
@@ -303,6 +385,8 @@ static int release_stripe_list(struct r5conf *conf)
head = llist_del_all(&conf->released_stripes);
head = llist_reverse_order(head);
while (head) {
+ int hash;
+
sh = llist_entry(head, struct stripe_head, release_list);
head = llist_next(head);
/* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */
@@ -313,7 +397,8 @@ static int release_stripe_list(struct r5conf *conf)
* again, the count is always > 1. This is true for
* STRIPE_ON_UNPLUG_LIST bit too.
*/
- __release_stripe(conf, sh);
+ hash = sh->hash_lock_index;
+ __release_stripe(conf, sh, &temp_inactive_list[hash]);
count++;
}
@@ -324,9 +409,12 @@ static void release_stripe(struct stripe_head *sh)
{
struct r5conf *conf = sh->raid_conf;
unsigned long flags;
+ struct list_head list;
+ int hash;
bool wakeup;
- if (test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
+ if (unlikely(!conf->mddev->thread) ||
+ test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
goto slow_path;
wakeup = llist_add(&sh->release_list, &conf->released_stripes);
if (wakeup)
@@ -336,8 +424,11 @@ slow_path:
local_irq_save(flags);
/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
- do_release_stripe(conf, sh);
+ INIT_LIST_HEAD(&list);
+ hash = sh->hash_lock_index;
+ do_release_stripe(conf, sh, &list);
spin_unlock(&conf->device_lock);
+ release_inactive_stripe_list(conf, &list, hash);
}
local_irq_restore(flags);
}
@@ -362,18 +453,21 @@ static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh)
/* find an idle stripe, make sure it is unhashed, and return it. */
-static struct stripe_head *get_free_stripe(struct r5conf *conf)
+static struct stripe_head *get_free_stripe(struct r5conf *conf, int hash)
{
struct stripe_head *sh = NULL;
struct list_head *first;
- if (list_empty(&conf->inactive_list))
+ if (list_empty(conf->inactive_list + hash))
goto out;
- first = conf->inactive_list.next;
+ first = (conf->inactive_list + hash)->next;
sh = list_entry(first, struct stripe_head, lru);
list_del_init(first);
remove_hash(sh);
atomic_inc(&conf->active_stripes);
+ BUG_ON(hash != sh->hash_lock_index);
+ if (list_empty(conf->inactive_list + hash))
+ atomic_inc(&conf->empty_inactive_list_nr);
out:
return sh;
}
@@ -416,7 +510,7 @@ static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
{
struct r5conf *conf = sh->raid_conf;
- int i;
+ int i, seq;
BUG_ON(atomic_read(&sh->count) != 0);
BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
@@ -426,7 +520,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
(unsigned long long)sh->sector);
remove_hash(sh);
-
+retry:
+ seq = read_seqcount_begin(&conf->gen_lock);
sh->generation = conf->generation - previous;
sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
sh->sector = sector;
@@ -448,6 +543,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
dev->flags = 0;
raid5_build_block(sh, i, previous);
}
+ if (read_seqcount_retry(&conf->gen_lock, seq))
+ goto retry;
insert_hash(conf, sh);
sh->cpu = smp_processor_id();
}
@@ -552,29 +649,31 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
int previous, int noblock, int noquiesce)
{
struct stripe_head *sh;
+ int hash = stripe_hash_locks_hash(sector);
pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
- spin_lock_irq(&conf->device_lock);
+ spin_lock_irq(conf->hash_locks + hash);
do {
wait_event_lock_irq(conf->wait_for_stripe,
conf->quiesce == 0 || noquiesce,
- conf->device_lock);
+ *(conf->hash_locks + hash));
sh = __find_stripe(conf, sector, conf->generation - previous);
if (!sh) {
if (!conf->inactive_blocked)
- sh = get_free_stripe(conf);
+ sh = get_free_stripe(conf, hash);
if (noblock && sh == NULL)
break;
if (!sh) {
conf->inactive_blocked = 1;
- wait_event_lock_irq(conf->wait_for_stripe,
- !list_empty(&conf->inactive_list) &&
- (atomic_read(&conf->active_stripes)
- < (conf->max_nr_stripes *3/4)
- || !conf->inactive_blocked),
- conf->device_lock);
+ wait_event_lock_irq(
+ conf->wait_for_stripe,
+ !list_empty(conf->inactive_list + hash) &&
+ (atomic_read(&conf->active_stripes)
+ < (conf->max_nr_stripes * 3 / 4)
+ || !conf->inactive_blocked),
+ *(conf->hash_locks + hash));
conf->inactive_blocked = 0;
} else
init_stripe(sh, sector, previous);
@@ -585,9 +684,11 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
&& !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
&& !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
} else {
+ spin_lock(&conf->device_lock);
if (!test_bit(STRIPE_HANDLE, &sh->state))
atomic_inc(&conf->active_stripes);
if (list_empty(&sh->lru) &&
+ !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state) &&
!test_bit(STRIPE_EXPANDING, &sh->state))
BUG();
list_del_init(&sh->lru);
@@ -595,6 +696,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
sh->group->stripes_cnt--;
sh->group = NULL;
}
+ spin_unlock(&conf->device_lock);
}
}
} while (sh == NULL);
@@ -602,7 +704,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
if (sh)
atomic_inc(&sh->count);
- spin_unlock_irq(&conf->device_lock);
+ spin_unlock_irq(conf->hash_locks + hash);
return sh;
}
@@ -758,7 +860,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
bi->bi_sector = (sh->sector
+ rdev->data_offset);
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
- bi->bi_rw |= REQ_FLUSH;
+ bi->bi_rw |= REQ_NOMERGE;
bi->bi_vcnt = 1;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
@@ -1582,7 +1684,7 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
put_cpu();
}
-static int grow_one_stripe(struct r5conf *conf)
+static int grow_one_stripe(struct r5conf *conf, int hash)
{
struct stripe_head *sh;
sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
@@ -1598,6 +1700,7 @@ static int grow_one_stripe(struct r5conf *conf)
kmem_cache_free(conf->slab_cache, sh);
return 0;
}
+ sh->hash_lock_index = hash;
/* we just created an active stripe so... */
atomic_set(&sh->count, 1);
atomic_inc(&conf->active_stripes);
@@ -1610,6 +1713,7 @@ static int grow_stripes(struct r5conf *conf, int num)
{
struct kmem_cache *sc;
int devs = max(conf->raid_disks, conf->previous_raid_disks);
+ int hash;
if (conf->mddev->gendisk)
sprintf(conf->cache_name[0],
@@ -1627,9 +1731,13 @@ static int grow_stripes(struct r5conf *conf, int num)
return 1;
conf->slab_cache = sc;
conf->pool_size = devs;
- while (num--)
- if (!grow_one_stripe(conf))
+ hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
+ while (num--) {
+ if (!grow_one_stripe(conf, hash))
return 1;
+ conf->max_nr_stripes++;
+ hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
+ }
return 0;
}
@@ -1687,6 +1795,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
int err;
struct kmem_cache *sc;
int i;
+ int hash, cnt;
if (newsize <= conf->pool_size)
return 0; /* never bother to shrink */
@@ -1726,19 +1835,29 @@ static int resize_stripes(struct r5conf *conf, int newsize)
* OK, we have enough stripes, start collecting inactive
* stripes and copying them over
*/
+ hash = 0;
+ cnt = 0;
list_for_each_entry(nsh, &newstripes, lru) {
- spin_lock_irq(&conf->device_lock);
- wait_event_lock_irq(conf->wait_for_stripe,
- !list_empty(&conf->inactive_list),
- conf->device_lock);
- osh = get_free_stripe(conf);
- spin_unlock_irq(&conf->device_lock);
+ lock_device_hash_lock(conf, hash);
+ wait_event_cmd(conf->wait_for_stripe,
+ !list_empty(conf->inactive_list + hash),
+ unlock_device_hash_lock(conf, hash),
+ lock_device_hash_lock(conf, hash));
+ osh = get_free_stripe(conf, hash);
+ unlock_device_hash_lock(conf, hash);
atomic_set(&nsh->count, 1);
for(i=0; i<conf->pool_size; i++)
nsh->dev[i].page = osh->dev[i].page;
for( ; i<newsize; i++)
nsh->dev[i].page = NULL;
+ nsh->hash_lock_index = hash;
kmem_cache_free(conf->slab_cache, osh);
+ cnt++;
+ if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
+ !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
+ hash++;
+ cnt = 0;
+ }
}
kmem_cache_destroy(conf->slab_cache);
@@ -1797,13 +1916,13 @@ static int resize_stripes(struct r5conf *conf, int newsize)
return err;
}
-static int drop_one_stripe(struct r5conf *conf)
+static int drop_one_stripe(struct r5conf *conf, int hash)
{
struct stripe_head *sh;
- spin_lock_irq(&conf->device_lock);
- sh = get_free_stripe(conf);
- spin_unlock_irq(&conf->device_lock);
+ spin_lock_irq(conf->hash_locks + hash);
+ sh = get_free_stripe(conf, hash);
+ spin_unlock_irq(conf->hash_locks + hash);
if (!sh)
return 0;
BUG_ON(atomic_read(&sh->count));
@@ -1815,8 +1934,10 @@ static int drop_one_stripe(struct r5conf *conf)
static void shrink_stripes(struct r5conf *conf)
{
- while (drop_one_stripe(conf))
- ;
+ int hash;
+ for (hash = 0; hash < NR_STRIPE_HASH_LOCKS; hash++)
+ while (drop_one_stripe(conf, hash))
+ ;
if (conf->slab_cache)
kmem_cache_destroy(conf->slab_cache);
@@ -1921,6 +2042,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
mdname(conf->mddev), bdn);
else
retry = 1;
+ if (set_bad && test_bit(In_sync, &rdev->flags)
+ && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
+ retry = 1;
if (retry)
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
set_bit(R5_ReadError, &sh->dev[i].flags);
@@ -3900,7 +4024,8 @@ static void raid5_activate_delayed(struct r5conf *conf)
}
}
-static void activate_bit_delay(struct r5conf *conf)
+static void activate_bit_delay(struct r5conf *conf,
+ struct list_head *temp_inactive_list)
{
/* device_lock is held */
struct list_head head;
@@ -3908,9 +4033,11 @@ static void activate_bit_delay(struct r5conf *conf)
list_del_init(&conf->bitmap_list);
while (!list_empty(&head)) {
struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
+ int hash;
list_del_init(&sh->lru);
atomic_inc(&sh->count);
- __release_stripe(conf, sh);
+ hash = sh->hash_lock_index;
+ __release_stripe(conf, sh, &temp_inactive_list[hash]);
}
}
@@ -3926,7 +4053,7 @@ int md_raid5_congested(struct mddev *mddev, int bits)
return 1;
if (conf->quiesce)
return 1;
- if (list_empty_careful(&conf->inactive_list))
+ if (atomic_read(&conf->empty_inactive_list_nr))
return 1;
return 0;
@@ -4256,6 +4383,7 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
struct raid5_plug_cb {
struct blk_plug_cb cb;
struct list_head list;
+ struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS];
};
static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
@@ -4266,6 +4394,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
struct mddev *mddev = cb->cb.data;
struct r5conf *conf = mddev->private;
int cnt = 0;
+ int hash;
if (cb->list.next && !list_empty(&cb->list)) {
spin_lock_irq(&conf->device_lock);
@@ -4283,11 +4412,14 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
* STRIPE_ON_RELEASE_LIST could be set here. In that
* case, the count is always > 1 here
*/
- __release_stripe(conf, sh);
+ hash = sh->hash_lock_index;
+ __release_stripe(conf, sh, &cb->temp_inactive_list[hash]);
cnt++;
}
spin_unlock_irq(&conf->device_lock);
}
+ release_inactive_stripe_list(conf, cb->temp_inactive_list,
+ NR_STRIPE_HASH_LOCKS);
if (mddev->queue)
trace_block_unplug(mddev->queue, cnt, !from_schedule);
kfree(cb);
@@ -4308,8 +4440,12 @@ static void release_stripe_plug(struct mddev *mddev,
cb = container_of(blk_cb, struct raid5_plug_cb, cb);
- if (cb->list.next == NULL)
+ if (cb->list.next == NULL) {
+ int i;
INIT_LIST_HEAD(&cb->list);
+ for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+ INIT_LIST_HEAD(cb->temp_inactive_list + i);
+ }
if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
list_add_tail(&sh->lru, &cb->list);
@@ -4692,14 +4828,19 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
/* Cannot proceed until we've updated the superblock... */
wait_event(conf->wait_for_overlap,
- atomic_read(&conf->reshape_stripes)==0);
+ atomic_read(&conf->reshape_stripes)==0
+ || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+ if (atomic_read(&conf->reshape_stripes) != 0)
+ return 0;
mddev->reshape_position = conf->reshape_progress;
mddev->curr_resync_completed = sector_nr;
conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait, mddev->flags == 0 ||
- kthread_should_stop());
+ test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+ return 0;
spin_lock_irq(&conf->device_lock);
conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock);
@@ -4782,7 +4923,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
>= mddev->resync_max - mddev->curr_resync_completed) {
/* Cannot proceed until we've updated the superblock... */
wait_event(conf->wait_for_overlap,
- atomic_read(&conf->reshape_stripes) == 0);
+ atomic_read(&conf->reshape_stripes) == 0
+ || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+ if (atomic_read(&conf->reshape_stripes) != 0)
+ goto ret;
mddev->reshape_position = conf->reshape_progress;
mddev->curr_resync_completed = sector_nr;
conf->reshape_checkpoint = jiffies;
@@ -4790,13 +4934,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_DEVS, &mddev->flags)
- || kthread_should_stop());
+ || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+ goto ret;
spin_lock_irq(&conf->device_lock);
conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap);
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
+ret:
return reshape_sectors;
}
@@ -4954,27 +5101,45 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
}
static int handle_active_stripes(struct r5conf *conf, int group,
- struct r5worker *worker)
+ struct r5worker *worker,
+ struct list_head *temp_inactive_list)
{
struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
- int i, batch_size = 0;
+ int i, batch_size = 0, hash;
+ bool release_inactive = false;
while (batch_size < MAX_STRIPE_BATCH &&
(sh = __get_priority_stripe(conf, group)) != NULL)
batch[batch_size++] = sh;
- if (batch_size == 0)
- return batch_size;
+ if (batch_size == 0) {
+ for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+ if (!list_empty(temp_inactive_list + i))
+ break;
+ if (i == NR_STRIPE_HASH_LOCKS)
+ return batch_size;
+ release_inactive = true;
+ }
spin_unlock_irq(&conf->device_lock);
+ release_inactive_stripe_list(conf, temp_inactive_list,
+ NR_STRIPE_HASH_LOCKS);
+
+ if (release_inactive) {
+ spin_lock_irq(&conf->device_lock);
+ return 0;
+ }
+
for (i = 0; i < batch_size; i++)
handle_stripe(batch[i]);
cond_resched();
spin_lock_irq(&conf->device_lock);
- for (i = 0; i < batch_size; i++)
- __release_stripe(conf, batch[i]);
+ for (i = 0; i < batch_size; i++) {
+ hash = batch[i]->hash_lock_index;
+ __release_stripe(conf, batch[i], &temp_inactive_list[hash]);
+ }
return batch_size;
}
@@ -4995,9 +5160,10 @@ static void raid5_do_work(struct work_struct *work)
while (1) {
int batch_size, released;
- released = release_stripe_list(conf);
+ released = release_stripe_list(conf, worker->temp_inactive_list);
- batch_size = handle_active_stripes(conf, group_id, worker);
+ batch_size = handle_active_stripes(conf, group_id, worker,
+ worker->temp_inactive_list);
worker->working = false;
if (!batch_size && !released)
break;
@@ -5036,7 +5202,7 @@ static void raid5d(struct md_thread *thread)
struct bio *bio;
int batch_size, released;
- released = release_stripe_list(conf);
+ released = release_stripe_list(conf, conf->temp_inactive_list);
if (
!list_empty(&conf->bitmap_list)) {
@@ -5046,7 +5212,7 @@ static void raid5d(struct md_thread *thread)
bitmap_unplug(mddev->bitmap);
spin_lock_irq(&conf->device_lock);
conf->seq_write = conf->seq_flush;
- activate_bit_delay(conf);
+ activate_bit_delay(conf, conf->temp_inactive_list);
}
raid5_activate_delayed(conf);
@@ -5060,7 +5226,8 @@ static void raid5d(struct md_thread *thread)
handled++;
}
- batch_size = handle_active_stripes(conf, ANY_GROUP, NULL);
+ batch_size = handle_active_stripes(conf, ANY_GROUP, NULL,
+ conf->temp_inactive_list);
if (!batch_size && !released)
break;
handled += batch_size;
@@ -5096,22 +5263,29 @@ raid5_set_cache_size(struct mddev *mddev, int size)
{
struct r5conf *conf = mddev->private;
int err;
+ int hash;
if (size <= 16 || size > 32768)
return -EINVAL;
+ hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
while (size < conf->max_nr_stripes) {
- if (drop_one_stripe(conf))
+ if (drop_one_stripe(conf, hash))
conf->max_nr_stripes--;
else
break;
+ hash--;
+ if (hash < 0)
+ hash = NR_STRIPE_HASH_LOCKS - 1;
}
err = md_allow_write(mddev);
if (err)
return err;
+ hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
while (size > conf->max_nr_stripes) {
- if (grow_one_stripe(conf))
+ if (grow_one_stripe(conf, hash))
conf->max_nr_stripes++;
else break;
+ hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
}
return 0;
}
@@ -5199,15 +5373,18 @@ raid5_show_group_thread_cnt(struct mddev *mddev, char *page)
return 0;
}
-static int alloc_thread_groups(struct r5conf *conf, int cnt);
+static int alloc_thread_groups(struct r5conf *conf, int cnt,
+ int *group_cnt,
+ int *worker_cnt_per_group,
+ struct r5worker_group **worker_groups);
static ssize_t
raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
{
struct r5conf *conf = mddev->private;
unsigned long new;
int err;
- struct r5worker_group *old_groups;
- int old_group_cnt;
+ struct r5worker_group *new_groups, *old_groups;
+ int group_cnt, worker_cnt_per_group;
if (len >= PAGE_SIZE)
return -EINVAL;
@@ -5223,14 +5400,19 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
mddev_suspend(mddev);
old_groups = conf->worker_groups;
- old_group_cnt = conf->worker_cnt_per_group;
+ if (old_groups)
+ flush_workqueue(raid5_wq);
+
+ err = alloc_thread_groups(conf, new,
+ &group_cnt, &worker_cnt_per_group,
+ &new_groups);
+ if (!err) {
+ spin_lock_irq(&conf->device_lock);
+ conf->group_cnt = group_cnt;
+ conf->worker_cnt_per_group = worker_cnt_per_group;
+ conf->worker_groups = new_groups;
+ spin_unlock_irq(&conf->device_lock);
- conf->worker_groups = NULL;
- err = alloc_thread_groups(conf, new);
- if (err) {
- conf->worker_groups = old_groups;
- conf->worker_cnt_per_group = old_group_cnt;
- } else {
if (old_groups)
kfree(old_groups[0].workers);
kfree(old_groups);
@@ -5260,40 +5442,47 @@ static struct attribute_group raid5_attrs_group = {
.attrs = raid5_attrs,
};
-static int alloc_thread_groups(struct r5conf *conf, int cnt)
+static int alloc_thread_groups(struct r5conf *conf, int cnt,
+ int *group_cnt,
+ int *worker_cnt_per_group,
+ struct r5worker_group **worker_groups)
{
- int i, j;
+ int i, j, k;
ssize_t size;
struct r5worker *workers;
- conf->worker_cnt_per_group = cnt;
+ *worker_cnt_per_group = cnt;
if (cnt == 0) {
- conf->worker_groups = NULL;
+ *group_cnt = 0;
+ *worker_groups = NULL;
return 0;
}
- conf->group_cnt = num_possible_nodes();
+ *group_cnt = num_possible_nodes();
size = sizeof(struct r5worker) * cnt;
- workers = kzalloc(size * conf->group_cnt, GFP_NOIO);
- conf->worker_groups = kzalloc(sizeof(struct r5worker_group) *
- conf->group_cnt, GFP_NOIO);
- if (!conf->worker_groups || !workers) {
+ workers = kzalloc(size * *group_cnt, GFP_NOIO);
+ *worker_groups = kzalloc(sizeof(struct r5worker_group) *
+ *group_cnt, GFP_NOIO);
+ if (!*worker_groups || !workers) {
kfree(workers);
- kfree(conf->worker_groups);
- conf->worker_groups = NULL;
+ kfree(*worker_groups);
return -ENOMEM;
}
- for (i = 0; i < conf->group_cnt; i++) {
+ for (i = 0; i < *group_cnt; i++) {
struct r5worker_group *group;
- group = &conf->worker_groups[i];
+ group = worker_groups[i];
INIT_LIST_HEAD(&group->handle_list);
group->conf = conf;
group->workers = workers + i * cnt;
for (j = 0; j < cnt; j++) {
- group->workers[j].group = group;
- INIT_WORK(&group->workers[j].work, raid5_do_work);
+ struct r5worker *worker = group->workers + j;
+ worker->group = group;
+ INIT_WORK(&worker->work, raid5_do_work);
+
+ for (k = 0; k < NR_STRIPE_HASH_LOCKS; k++)
+ INIT_LIST_HEAD(worker->temp_inactive_list + k);
}
}
@@ -5444,6 +5633,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
struct md_rdev *rdev;
struct disk_info *disk;
char pers_name[6];
+ int i;
+ int group_cnt, worker_cnt_per_group;
+ struct r5worker_group *new_group;
if (mddev->new_level != 5
&& mddev->new_level != 4
@@ -5478,7 +5670,12 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if (conf == NULL)
goto abort;
/* Don't enable multi-threading by default*/
- if (alloc_thread_groups(conf, 0))
+ if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group,
+ &new_group)) {
+ conf->group_cnt = group_cnt;
+ conf->worker_cnt_per_group = worker_cnt_per_group;
+ conf->worker_groups = new_group;
+ } else
goto abort;
spin_lock_init(&conf->device_lock);
seqcount_init(&conf->gen_lock);
@@ -5488,7 +5685,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
INIT_LIST_HEAD(&conf->hold_list);
INIT_LIST_HEAD(&conf->delayed_list);
INIT_LIST_HEAD(&conf->bitmap_list);
- INIT_LIST_HEAD(&conf->inactive_list);
init_llist_head(&conf->released_stripes);
atomic_set(&conf->active_stripes, 0);
atomic_set(&conf->preread_active_stripes, 0);
@@ -5514,6 +5710,21 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
goto abort;
+ /* We init hash_locks[0] separately to that it can be used
+ * as the reference lock in the spin_lock_nest_lock() call
+ * in lock_all_device_hash_locks_irq in order to convince
+ * lockdep that we know what we are doing.
+ */
+ spin_lock_init(conf->hash_locks);
+ for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
+ spin_lock_init(conf->hash_locks + i);
+
+ for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+ INIT_LIST_HEAD(conf->inactive_list + i);
+
+ for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+ INIT_LIST_HEAD(conf->temp_inactive_list + i);
+
conf->level = mddev->new_level;
if (raid5_alloc_percpu(conf) != 0)
goto abort;
@@ -5554,7 +5765,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
else
conf->max_degraded = 1;
conf->algorithm = mddev->new_layout;
- conf->max_nr_stripes = NR_STRIPES;
conf->reshape_progress = mddev->reshape_position;
if (conf->reshape_progress != MaxSector) {
conf->prev_chunk_sectors = mddev->chunk_sectors;
@@ -5563,7 +5773,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
- if (grow_stripes(conf, conf->max_nr_stripes)) {
+ atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
+ if (grow_stripes(conf, NR_STRIPES)) {
printk(KERN_ERR
"md/raid:%s: couldn't allocate %dkB for buffers\n",
mdname(mddev), memory);
@@ -6369,12 +6580,18 @@ static int raid5_start_reshape(struct mddev *mddev)
if (!mddev->sync_thread) {
mddev->recovery = 0;
spin_lock_irq(&conf->device_lock);
+ write_seqcount_begin(&conf->gen_lock);
mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
+ mddev->new_chunk_sectors =
+ conf->chunk_sectors = conf->prev_chunk_sectors;
+ mddev->new_layout = conf->algorithm = conf->prev_algo;
rdev_for_each(rdev, mddev)
rdev->new_data_offset = rdev->data_offset;
smp_wmb();
+ conf->generation --;
conf->reshape_progress = MaxSector;
mddev->reshape_position = MaxSector;
+ write_seqcount_end(&conf->gen_lock);
spin_unlock_irq(&conf->device_lock);
return -EAGAIN;
}
@@ -6462,27 +6679,28 @@ static void raid5_quiesce(struct mddev *mddev, int state)
break;
case 1: /* stop all writes */
- spin_lock_irq(&conf->device_lock);
+ lock_all_device_hash_locks_irq(conf);
/* '2' tells resync/reshape to pause so that all
* active stripes can drain
*/
conf->quiesce = 2;
- wait_event_lock_irq(conf->wait_for_stripe,
+ wait_event_cmd(conf->wait_for_stripe,
atomic_read(&conf->active_stripes) == 0 &&
atomic_read(&conf->active_aligned_reads) == 0,
- conf->device_lock);
+ unlock_all_device_hash_locks_irq(conf),
+ lock_all_device_hash_locks_irq(conf));
conf->quiesce = 1;
- spin_unlock_irq(&conf->device_lock);
+ unlock_all_device_hash_locks_irq(conf);
/* allow reshape to continue */
wake_up(&conf->wait_for_overlap);
break;
case 0: /* re-enable writes */
- spin_lock_irq(&conf->device_lock);
+ lock_all_device_hash_locks_irq(conf);
conf->quiesce = 0;
wake_up(&conf->wait_for_stripe);
wake_up(&conf->wait_for_overlap);
- spin_unlock_irq(&conf->device_lock);
+ unlock_all_device_hash_locks_irq(conf);
break;
}
}
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index b42e6b462eda..01ad8ae8f578 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -205,6 +205,7 @@ struct stripe_head {
short pd_idx; /* parity disk index */
short qd_idx; /* 'Q' disk index for raid6 */
short ddf_layout;/* use DDF ordering to calculate Q */
+ short hash_lock_index;
unsigned long state; /* state flags */
atomic_t count; /* nr of active thread/requests */
int bm_seq; /* sequence number for bitmap flushes */
@@ -367,9 +368,18 @@ struct disk_info {
struct md_rdev *rdev, *replacement;
};
+/* NOTE NR_STRIPE_HASH_LOCKS must remain below 64.
+ * This is because we sometimes take all the spinlocks
+ * and creating that much locking depth can cause
+ * problems.
+ */
+#define NR_STRIPE_HASH_LOCKS 8
+#define STRIPE_HASH_LOCKS_MASK (NR_STRIPE_HASH_LOCKS - 1)
+
struct r5worker {
struct work_struct work;
struct r5worker_group *group;
+ struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS];
bool working;
};
@@ -382,6 +392,8 @@ struct r5worker_group {
struct r5conf {
struct hlist_head *stripe_hashtbl;
+ /* only protect corresponding hash list and inactive_list */
+ spinlock_t hash_locks[NR_STRIPE_HASH_LOCKS];
struct mddev *mddev;
int chunk_sectors;
int level, algorithm;
@@ -462,7 +474,8 @@ struct r5conf {
* Free stripes pool
*/
atomic_t active_stripes;
- struct list_head inactive_list;
+ struct list_head inactive_list[NR_STRIPE_HASH_LOCKS];
+ atomic_t empty_inactive_list_nr;
struct llist_head released_stripes;
wait_queue_head_t wait_for_stripe;
wait_queue_head_t wait_for_overlap;
@@ -477,6 +490,7 @@ struct r5conf {
* the new thread here until we fully activate the array.
*/
struct md_thread *thread;
+ struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS];
struct r5worker_group *worker_groups;
int group_cnt;
int worker_cnt_per_group;
diff --git a/drivers/media/platform/m2m-deinterlace.c b/drivers/media/platform/m2m-deinterlace.c
index 36513e896413..65cab70fefcb 100644
--- a/drivers/media/platform/m2m-deinterlace.c
+++ b/drivers/media/platform/m2m-deinterlace.c
@@ -341,8 +341,7 @@ static void deinterlace_issue_dma(struct deinterlace_ctx *ctx, int op,
ctx->xt->dir = DMA_MEM_TO_MEM;
ctx->xt->src_sgl = false;
ctx->xt->dst_sgl = true;
- flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT |
- DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SKIP_SRC_UNMAP;
+ flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
tx = dmadev->device_prep_interleaved_dma(chan, ctx->xt, flags);
if (tx == NULL) {
diff --git a/drivers/media/platform/timblogiw.c b/drivers/media/platform/timblogiw.c
index 6a74ce040d28..ccdadd623a3a 100644
--- a/drivers/media/platform/timblogiw.c
+++ b/drivers/media/platform/timblogiw.c
@@ -565,7 +565,7 @@ static void buffer_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb)
desc = dmaengine_prep_slave_sg(fh->chan,
buf->sg, sg_elems, DMA_DEV_TO_MEM,
- DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+ DMA_PREP_INTERRUPT);
if (!desc) {
spin_lock_irq(&fh->queue_lock);
list_del_init(&vb->queue);
diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c
index 08b18f3f5264..9e2b985293fc 100644
--- a/drivers/misc/carma/carma-fpga.c
+++ b/drivers/misc/carma/carma-fpga.c
@@ -633,8 +633,7 @@ static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf)
struct dma_async_tx_descriptor *tx;
dma_cookie_t cookie;
dma_addr_t dst, src;
- unsigned long dma_flags = DMA_COMPL_SKIP_DEST_UNMAP |
- DMA_COMPL_SKIP_SRC_UNMAP;
+ unsigned long dma_flags = 0;
dst_sg = buf->vb.sglist;
dst_nents = buf->vb.sglen;
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index ef8956568c3a..157b570ba343 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -308,8 +308,7 @@ static void sdio_acpi_set_handle(struct sdio_func *func)
struct mmc_host *host = func->card->host;
u64 addr = (host->slotno << 16) | func->num;
- ACPI_HANDLE_SET(&func->dev,
- acpi_get_child(ACPI_HANDLE(host->parent), addr));
+ acpi_preset_companion(&func->dev, ACPI_HANDLE(host->parent), addr);
}
#else
static inline void sdio_acpi_set_handle(struct sdio_func *func) {}
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index d78a97d4153a..59f08c44abdb 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -375,8 +375,7 @@ static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len,
dma_dev = host->dma_chan->device;
- flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
- DMA_COMPL_SKIP_DEST_UNMAP;
+ flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
phys_addr = dma_map_single(dma_dev->dev, p, len, dir);
if (dma_mapping_error(dma_dev->dev, phys_addr)) {
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 3dc1a7564d87..8b2752263db9 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -573,8 +573,6 @@ static int dma_xfer(struct fsmc_nand_data *host, void *buffer, int len,
dma_dev = chan->device;
dma_addr = dma_map_single(dma_dev->dev, buffer, len, direction);
- flags |= DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP;
-
if (direction == DMA_TO_DEVICE) {
dma_src = dma_addr;
dma_dst = host->data_pa;
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index 0951f7aca1ef..822616e3c375 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -459,8 +459,7 @@ static int ks8842_tx_frame_dma(struct sk_buff *skb, struct net_device *netdev)
sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4;
ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
- &ctl->sg, 1, DMA_MEM_TO_DEV,
- DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+ &ctl->sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
if (!ctl->adesc)
return NETDEV_TX_BUSY;
@@ -571,8 +570,7 @@ static int __ks8842_start_new_rx_dma(struct net_device *netdev)
sg_dma_len(sg) = DMA_BUFFER_SIZE;
ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
- sg, 1, DMA_DEV_TO_MEM,
- DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+ sg, 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
if (!ctl->adesc)
goto out;
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 12a9e83c008b..d0222f13d154 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -1034,10 +1034,9 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
struct dma_chan *chan = qp->dma_chan;
struct dma_device *device;
size_t pay_off, buff_off;
- dma_addr_t src, dest;
+ struct dmaengine_unmap_data *unmap;
dma_cookie_t cookie;
void *buf = entry->buf;
- unsigned long flags;
entry->len = len;
@@ -1045,35 +1044,49 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
goto err;
if (len < copy_bytes)
- goto err1;
+ goto err_wait;
device = chan->device;
pay_off = (size_t) offset & ~PAGE_MASK;
buff_off = (size_t) buf & ~PAGE_MASK;
if (!is_dma_copy_aligned(device, pay_off, buff_off, len))
- goto err1;
+ goto err_wait;
- dest = dma_map_single(device->dev, buf, len, DMA_FROM_DEVICE);
- if (dma_mapping_error(device->dev, dest))
- goto err1;
+ unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
+ if (!unmap)
+ goto err_wait;
- src = dma_map_single(device->dev, offset, len, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dev, src))
- goto err2;
+ unmap->len = len;
+ unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset),
+ pay_off, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(device->dev, unmap->addr[0]))
+ goto err_get_unmap;
+
+ unmap->to_cnt = 1;
- flags = DMA_COMPL_DEST_UNMAP_SINGLE | DMA_COMPL_SRC_UNMAP_SINGLE |
- DMA_PREP_INTERRUPT;
- txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+ unmap->addr[1] = dma_map_page(device->dev, virt_to_page(buf),
+ buff_off, len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(device->dev, unmap->addr[1]))
+ goto err_get_unmap;
+
+ unmap->from_cnt = 1;
+
+ txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+ unmap->addr[0], len,
+ DMA_PREP_INTERRUPT);
if (!txd)
- goto err3;
+ goto err_get_unmap;
txd->callback = ntb_rx_copy_callback;
txd->callback_param = entry;
+ dma_set_unmap(txd, unmap);
cookie = dmaengine_submit(txd);
if (dma_submit_error(cookie))
- goto err3;
+ goto err_set_unmap;
+
+ dmaengine_unmap_put(unmap);
qp->last_cookie = cookie;
@@ -1081,11 +1094,11 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
return;
-err3:
- dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
-err2:
- dma_unmap_single(device->dev, dest, len, DMA_FROM_DEVICE);
-err1:
+err_set_unmap:
+ dmaengine_unmap_put(unmap);
+err_get_unmap:
+ dmaengine_unmap_put(unmap);
+err_wait:
/* If the callbacks come out of order, the writing of the index to the
* last completed will be out of order. This may result in the
* receive stalling forever.
@@ -1245,12 +1258,12 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
struct dma_chan *chan = qp->dma_chan;
struct dma_device *device;
size_t dest_off, buff_off;
- dma_addr_t src, dest;
+ struct dmaengine_unmap_data *unmap;
+ dma_addr_t dest;
dma_cookie_t cookie;
void __iomem *offset;
size_t len = entry->len;
void *buf = entry->buf;
- unsigned long flags;
offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1273,28 +1286,41 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
if (!is_dma_copy_aligned(device, buff_off, dest_off, len))
goto err;
- src = dma_map_single(device->dev, buf, len, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dev, src))
+ unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+ if (!unmap)
goto err;
- flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_PREP_INTERRUPT;
- txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+ unmap->len = len;
+ unmap->addr[0] = dma_map_page(device->dev, virt_to_page(buf),
+ buff_off, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(device->dev, unmap->addr[0]))
+ goto err_get_unmap;
+
+ unmap->to_cnt = 1;
+
+ txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
+ DMA_PREP_INTERRUPT);
if (!txd)
- goto err1;
+ goto err_get_unmap;
txd->callback = ntb_tx_copy_callback;
txd->callback_param = entry;
+ dma_set_unmap(txd, unmap);
cookie = dmaengine_submit(txd);
if (dma_submit_error(cookie))
- goto err1;
+ goto err_set_unmap;
+
+ dmaengine_unmap_put(unmap);
dma_async_issue_pending(chan);
qp->tx_async++;
return;
-err1:
- dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
+err_set_unmap:
+ dmaengine_unmap_put(unmap);
+err_get_unmap:
+ dmaengine_unmap_put(unmap);
err:
ntb_memcpy_tx(entry, offset);
qp->tx_memcpy++;
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 1ce8ee054f1a..a94d850ae228 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -367,7 +367,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL };
}
- handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+ handle = ACPI_HANDLE(&pdev->dev);
if (!handle) {
/*
* This hotplug controller was not listed in the ACPI name
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index 26100f510b10..1592dbe4f904 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -176,7 +176,6 @@ u8 acpiphp_get_latch_status(struct acpiphp_slot *slot);
u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot);
/* variables */
-extern bool acpiphp_debug;
extern bool acpiphp_disabled;
#endif /* _ACPIPHP_H */
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index ead7c534095e..cff7cadfc2e4 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -54,7 +54,7 @@ int pciehp_acpi_slot_detection_check(struct pci_dev *dev)
{
if (slot_detection_mode != PCIEHP_DETECT_ACPI)
return 0;
- if (acpi_pci_detect_ejectable(DEVICE_ACPI_HANDLE(&dev->dev)))
+ if (acpi_pci_detect_ejectable(ACPI_HANDLE(&dev->dev)))
return 0;
return -ENODEV;
}
@@ -96,7 +96,7 @@ static int __init dummy_probe(struct pcie_device *dev)
dup_slot_id++;
}
list_add_tail(&slot->list, &dummy_slots);
- handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+ handle = ACPI_HANDLE(&pdev->dev);
if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
acpi_slot_detected = 1;
return -ENODEV; /* dummy driver always returns error */
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index b2781dfe60e9..5b05a68cca6c 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -9,6 +9,7 @@
* Work to add BIOS PROM support was completed by Mike Habeck.
*/
+#include <linux/acpi.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -29,7 +30,6 @@
#include <asm/sn/sn_feature_sets.h>
#include <asm/sn/sn_sal.h>
#include <asm/sn/types.h>
-#include <linux/acpi.h>
#include <asm/sn/acpi.h>
#include "../pci.h"
@@ -414,7 +414,7 @@ static int enable_slot(struct hotplug_slot *bss_hotplug_slot)
acpi_handle rethandle;
acpi_status ret;
- phandle = PCI_CONTROLLER(slot->pci_bus)->acpi_handle;
+ phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
if (acpi_bus_get_device(phandle, &pdevice)) {
dev_dbg(&slot->pci_bus->self->dev,
@@ -495,7 +495,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
/* free the ACPI resources for the slot */
if (SN_ACPI_BASE_SUPPORT() &&
- PCI_CONTROLLER(slot->pci_bus)->acpi_handle) {
+ PCI_CONTROLLER(slot->pci_bus)->companion) {
unsigned long long adr;
struct acpi_device *device;
acpi_handle phandle;
@@ -504,7 +504,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
acpi_status ret;
/* Get the rootbus node pointer */
- phandle = PCI_CONTROLLER(slot->pci_bus)->acpi_handle;
+ phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
acpi_scan_lock_acquire();
/*
diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c
index 1b90579b233a..50ce68098298 100644
--- a/drivers/pci/ioapic.c
+++ b/drivers/pci/ioapic.c
@@ -37,7 +37,7 @@ static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
char *type;
struct resource *res;
- handle = DEVICE_ACPI_HANDLE(&dev->dev);
+ handle = ACPI_HANDLE(&dev->dev);
if (!handle)
return -EINVAL;
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index dfd1f59de729..f166126e28d1 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -173,14 +173,14 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
static bool acpi_pci_power_manageable(struct pci_dev *dev)
{
- acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+ acpi_handle handle = ACPI_HANDLE(&dev->dev);
return handle ? acpi_bus_power_manageable(handle) : false;
}
static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
{
- acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+ acpi_handle handle = ACPI_HANDLE(&dev->dev);
static const u8 state_conv[] = {
[PCI_D0] = ACPI_STATE_D0,
[PCI_D1] = ACPI_STATE_D1,
@@ -217,7 +217,7 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
static bool acpi_pci_can_wakeup(struct pci_dev *dev)
{
- acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+ acpi_handle handle = ACPI_HANDLE(&dev->dev);
return handle ? acpi_bus_can_wakeup(handle) : false;
}
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index edaed6f4da6c..d51f45aa669e 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -263,7 +263,7 @@ device_has_dsm(struct device *dev)
acpi_handle handle;
struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
- handle = DEVICE_ACPI_HANDLE(dev);
+ handle = ACPI_HANDLE(dev);
if (!handle)
return FALSE;
@@ -295,7 +295,7 @@ acpilabel_show(struct device *dev, struct device_attribute *attr, char *buf)
acpi_handle handle;
int length;
- handle = DEVICE_ACPI_HANDLE(dev);
+ handle = ACPI_HANDLE(dev);
if (!handle)
return -1;
@@ -316,7 +316,7 @@ acpiindex_show(struct device *dev, struct device_attribute *attr, char *buf)
acpi_handle handle;
int length;
- handle = DEVICE_ACPI_HANDLE(dev);
+ handle = ACPI_HANDLE(dev);
if (!handle)
return -1;
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 605a9be55129..b9429fbf1cd8 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -519,7 +519,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
gmux_data->power_state = VGA_SWITCHEROO_ON;
- gmux_data->dhandle = DEVICE_ACPI_HANDLE(&pnp->dev);
+ gmux_data->dhandle = ACPI_HANDLE(&pnp->dev);
if (!gmux_data->dhandle) {
pr_err("Cannot find acpi handle for pnp device %s\n",
dev_name(&pnp->dev));
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 747826d99059..14655a0f0431 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -89,7 +89,7 @@ static int pnpacpi_set_resources(struct pnp_dev *dev)
pnp_dbg(&dev->dev, "set resources\n");
- handle = DEVICE_ACPI_HANDLE(&dev->dev);
+ handle = ACPI_HANDLE(&dev->dev);
if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
return -ENODEV;
@@ -122,7 +122,7 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev)
dev_dbg(&dev->dev, "disable resources\n");
- handle = DEVICE_ACPI_HANDLE(&dev->dev);
+ handle = ACPI_HANDLE(&dev->dev);
if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
return 0;
@@ -144,7 +144,7 @@ static bool pnpacpi_can_wakeup(struct pnp_dev *dev)
struct acpi_device *acpi_dev;
acpi_handle handle;
- handle = DEVICE_ACPI_HANDLE(&dev->dev);
+ handle = ACPI_HANDLE(&dev->dev);
if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
return false;
@@ -159,7 +159,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
acpi_handle handle;
int error = 0;
- handle = DEVICE_ACPI_HANDLE(&dev->dev);
+ handle = ACPI_HANDLE(&dev->dev);
if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
return 0;
@@ -194,7 +194,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
static int pnpacpi_resume(struct pnp_dev *dev)
{
struct acpi_device *acpi_dev;
- acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+ acpi_handle handle = ACPI_HANDLE(&dev->dev);
int error = 0;
if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 15f166a470a7..007730222116 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -626,7 +626,7 @@ comment "Platform RTC drivers"
config RTC_DRV_CMOS
tristate "PC-style 'CMOS'"
- depends on X86 || ALPHA || ARM || M32R || ATARI || PPC || MIPS || SPARC64
+ depends on X86 || ARM || M32R || ATARI || PPC || MIPS || SPARC64
default y if X86
help
Say "yes" here to get direct support for the real time clock
@@ -643,6 +643,14 @@ config RTC_DRV_CMOS
This driver can also be built as a module. If so, the module
will be called rtc-cmos.
+config RTC_DRV_ALPHA
+ bool "Alpha PC-style CMOS"
+ depends on ALPHA
+ default y
+ help
+ Direct support for the real-time clock found on every Alpha
+ system, specifically MC146818 compatibles. If in doubt, say Y.
+
config RTC_DRV_VRTC
tristate "Virtual RTC for Intel MID platforms"
depends on X86_INTEL_MID
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 8b2cd8a5a2ff..c0da95e95702 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -428,6 +428,14 @@ static int __exit at91_rtc_remove(struct platform_device *pdev)
return 0;
}
+static void at91_rtc_shutdown(struct platform_device *pdev)
+{
+ /* Disable all interrupts */
+ at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM |
+ AT91_RTC_SECEV | AT91_RTC_TIMEV |
+ AT91_RTC_CALEV);
+}
+
#ifdef CONFIG_PM_SLEEP
/* AT91RM9200 RTC Power management control */
@@ -466,6 +474,7 @@ static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume);
static struct platform_driver at91_rtc_driver = {
.remove = __exit_p(at91_rtc_remove),
+ .shutdown = at91_rtc_shutdown,
.driver = {
.name = "at91_rtc",
.owner = THIS_MODULE,
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index b9f0192758d6..6d207afec8cb 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -150,7 +150,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
&dws->tx_sgl,
1,
DMA_MEM_TO_DEV,
- DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+ DMA_PREP_INTERRUPT);
txdesc->callback = dw_spi_dma_done;
txdesc->callback_param = dws;
@@ -173,7 +173,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
&dws->rx_sgl,
1,
DMA_DEV_TO_MEM,
- DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+ DMA_PREP_INTERRUPT);
rxdesc->callback = dw_spi_dma_done;
rxdesc->callback_param = dws;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8d85ddc46011..18cc625d887f 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -357,6 +357,19 @@ struct spi_device *spi_alloc_device(struct spi_master *master)
}
EXPORT_SYMBOL_GPL(spi_alloc_device);
+static void spi_dev_set_name(struct spi_device *spi)
+{
+ struct acpi_device *adev = ACPI_COMPANION(&spi->dev);
+
+ if (adev) {
+ dev_set_name(&spi->dev, "spi-%s", acpi_dev_name(adev));
+ return;
+ }
+
+ dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->master->dev),
+ spi->chip_select);
+}
+
/**
* spi_add_device - Add spi_device allocated with spi_alloc_device
* @spi: spi_device to register
@@ -383,9 +396,7 @@ int spi_add_device(struct spi_device *spi)
}
/* Set the bus ID string */
- dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->master->dev),
- spi->chip_select);
-
+ spi_dev_set_name(spi);
/* We need to make sure there's no other device with this
* chipselect **BEFORE** we call setup(), else we'll trash
@@ -1144,7 +1155,7 @@ static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
return AE_NO_MEMORY;
}
- ACPI_HANDLE_SET(&spi->dev, handle);
+ ACPI_COMPANION_SET(&spi->dev, adev);
spi->irq = -1;
INIT_LIST_HEAD(&resource_list);
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 537750261aaa..7d8103cd3e2e 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1433,7 +1433,7 @@ static void work_fn_rx(struct work_struct *work)
desc = s->desc_rx[new];
if (dma_async_is_tx_complete(s->chan_rx, s->active_rx, NULL, NULL) !=
- DMA_SUCCESS) {
+ DMA_COMPLETE) {
/* Handle incomplete DMA receive */
struct dma_chan *chan = s->chan_rx;
struct shdma_desc *sh_desc = container_of(desc,
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 06cec635e703..a7c04e24ca48 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5501,6 +5501,6 @@ acpi_handle usb_get_hub_port_acpi_handle(struct usb_device *hdev,
if (!hub)
return NULL;
- return DEVICE_ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
+ return ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
}
#endif
diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c
index 255c14464bf2..4e243c37f17f 100644
--- a/drivers/usb/core/usb-acpi.c
+++ b/drivers/usb/core/usb-acpi.c
@@ -173,7 +173,7 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle)
}
/* root hub's parent is the usb hcd. */
- parent_handle = DEVICE_ACPI_HANDLE(dev->parent);
+ parent_handle = ACPI_HANDLE(dev->parent);
*handle = acpi_get_child(parent_handle, udev->portnum);
if (!*handle)
return -ENODEV;
@@ -194,7 +194,7 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle)
raw_port_num = usb_hcd_find_raw_port_number(hcd,
port_num);
- *handle = acpi_get_child(DEVICE_ACPI_HANDLE(&udev->dev),
+ *handle = acpi_get_child(ACPI_HANDLE(&udev->dev),
raw_port_num);
if (!*handle)
return -ENODEV;
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index d15f6e80479f..188825122aae 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -59,12 +59,12 @@ static int xen_add_device(struct device *dev)
add.flags = XEN_PCI_DEV_EXTFN;
#ifdef CONFIG_ACPI
- handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+ handle = ACPI_HANDLE(&pci_dev->dev);
if (!handle && pci_dev->bus->bridge)
- handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+ handle = ACPI_HANDLE(pci_dev->bus->bridge);
#ifdef CONFIG_PCI_IOV
if (!handle && pci_dev->is_virtfn)
- handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+ handle = ACPI_HANDLE(physfn->bus->bridge);
#endif
if (handle) {
acpi_status status;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index f039b104a98e..b03dd23feda8 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -43,23 +43,6 @@
#include "fid.h"
/**
- * v9fs_dentry_delete - called when dentry refcount equals 0
- * @dentry: dentry in question
- *
- * By returning 1 here we should remove cacheing of unused
- * dentry components.
- *
- */
-
-static int v9fs_dentry_delete(const struct dentry *dentry)
-{
- p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
- dentry->d_name.name, dentry);
-
- return 1;
-}
-
-/**
* v9fs_cached_dentry_delete - called when dentry refcount equals 0
* @dentry: dentry in question
*
@@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = {
};
const struct dentry_operations v9fs_dentry_operations = {
- .d_delete = v9fs_dentry_delete,
+ .d_delete = always_delete_dentry,
.d_release = v9fs_dentry_release,
};
diff --git a/fs/aio.c b/fs/aio.c
index 823efcbb6ccd..08159ed13649 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -80,6 +80,8 @@ struct kioctx {
struct percpu_ref users;
atomic_t dead;
+ struct percpu_ref reqs;
+
unsigned long user_id;
struct __percpu kioctx_cpu *cpu;
@@ -107,7 +109,6 @@ struct kioctx {
struct page **ring_pages;
long nr_pages;
- struct rcu_head rcu_head;
struct work_struct free_work;
struct {
@@ -250,8 +251,10 @@ static void aio_free_ring(struct kioctx *ctx)
put_aio_ring_file(ctx);
- if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
+ if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
kfree(ctx->ring_pages);
+ ctx->ring_pages = NULL;
+ }
}
static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
@@ -463,26 +466,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
return cancel(kiocb);
}
-static void free_ioctx_rcu(struct rcu_head *head)
+static void free_ioctx(struct work_struct *work)
{
- struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+ struct kioctx *ctx = container_of(work, struct kioctx, free_work);
+ pr_debug("freeing %p\n", ctx);
+
+ aio_free_ring(ctx);
free_percpu(ctx->cpu);
kmem_cache_free(kioctx_cachep, ctx);
}
+static void free_ioctx_reqs(struct percpu_ref *ref)
+{
+ struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
+
+ INIT_WORK(&ctx->free_work, free_ioctx);
+ schedule_work(&ctx->free_work);
+}
+
/*
* When this function runs, the kioctx has been removed from the "hash table"
* and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
* now it's safe to cancel any that need to be.
*/
-static void free_ioctx(struct work_struct *work)
+static void free_ioctx_users(struct percpu_ref *ref)
{
- struct kioctx *ctx = container_of(work, struct kioctx, free_work);
- struct aio_ring *ring;
+ struct kioctx *ctx = container_of(ref, struct kioctx, users);
struct kiocb *req;
- unsigned cpu, avail;
- DEFINE_WAIT(wait);
spin_lock_irq(&ctx->ctx_lock);
@@ -496,54 +507,8 @@ static void free_ioctx(struct work_struct *work)
spin_unlock_irq(&ctx->ctx_lock);
- for_each_possible_cpu(cpu) {
- struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu);
-
- atomic_add(kcpu->reqs_available, &ctx->reqs_available);
- kcpu->reqs_available = 0;
- }
-
- while (1) {
- prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE);
-
- ring = kmap_atomic(ctx->ring_pages[0]);
- avail = (ring->head <= ring->tail)
- ? ring->tail - ring->head
- : ctx->nr_events - ring->head + ring->tail;
-
- atomic_add(avail, &ctx->reqs_available);
- ring->head = ring->tail;
- kunmap_atomic(ring);
-
- if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1)
- break;
-
- schedule();
- }
- finish_wait(&ctx->wait, &wait);
-
- WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1);
-
- aio_free_ring(ctx);
-
- pr_debug("freeing %p\n", ctx);
-
- /*
- * Here the call_rcu() is between the wait_event() for reqs_active to
- * hit 0, and freeing the ioctx.
- *
- * aio_complete() decrements reqs_active, but it has to touch the ioctx
- * after to issue a wakeup so we use rcu.
- */
- call_rcu(&ctx->rcu_head, free_ioctx_rcu);
-}
-
-static void free_ioctx_ref(struct percpu_ref *ref)
-{
- struct kioctx *ctx = container_of(ref, struct kioctx, users);
-
- INIT_WORK(&ctx->free_work, free_ioctx);
- schedule_work(&ctx->free_work);
+ percpu_ref_kill(&ctx->reqs);
+ percpu_ref_put(&ctx->reqs);
}
static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
@@ -602,6 +567,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
}
}
+static void aio_nr_sub(unsigned nr)
+{
+ spin_lock(&aio_nr_lock);
+ if (WARN_ON(aio_nr - nr > aio_nr))
+ aio_nr = 0;
+ else
+ aio_nr -= nr;
+ spin_unlock(&aio_nr_lock);
+}
+
/* ioctx_alloc
* Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
*/
@@ -639,8 +614,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
ctx->max_reqs = nr_events;
- if (percpu_ref_init(&ctx->users, free_ioctx_ref))
- goto out_freectx;
+ if (percpu_ref_init(&ctx->users, free_ioctx_users))
+ goto err;
+
+ if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
+ goto err;
spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->completion_lock);
@@ -651,10 +629,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
ctx->cpu = alloc_percpu(struct kioctx_cpu);
if (!ctx->cpu)
- goto out_freeref;
+ goto err;
if (aio_setup_ring(ctx) < 0)
- goto out_freepcpu;
+ goto err;
atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
@@ -666,7 +644,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
aio_nr + nr_events < aio_nr) {
spin_unlock(&aio_nr_lock);
- goto out_cleanup;
+ err = -EAGAIN;
+ goto err;
}
aio_nr += ctx->max_reqs;
spin_unlock(&aio_nr_lock);
@@ -675,23 +654,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
err = ioctx_add_table(ctx, mm);
if (err)
- goto out_cleanup_put;
+ goto err_cleanup;
pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
ctx, ctx->user_id, mm, ctx->nr_events);
return ctx;
-out_cleanup_put:
- percpu_ref_put(&ctx->users);
-out_cleanup:
- err = -EAGAIN;
- aio_free_ring(ctx);
-out_freepcpu:
+err_cleanup:
+ aio_nr_sub(ctx->max_reqs);
+err:
free_percpu(ctx->cpu);
-out_freeref:
+ free_percpu(ctx->reqs.pcpu_count);
free_percpu(ctx->users.pcpu_count);
-out_freectx:
- put_aio_ring_file(ctx);
kmem_cache_free(kioctx_cachep, ctx);
pr_debug("error allocating ioctx %d\n", err);
return ERR_PTR(err);
@@ -726,10 +700,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
* -EAGAIN with no ioctxs actually in use (as far as userspace
* could tell).
*/
- spin_lock(&aio_nr_lock);
- BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
- aio_nr -= ctx->max_reqs;
- spin_unlock(&aio_nr_lock);
+ aio_nr_sub(ctx->max_reqs);
if (ctx->mmap_size)
vm_munmap(ctx->mmap_base, ctx->mmap_size);
@@ -861,6 +832,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
if (unlikely(!req))
goto out_put;
+ percpu_ref_get(&ctx->reqs);
+
req->ki_ctx = ctx;
return req;
out_put:
@@ -930,12 +903,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
return;
}
- /*
- * Take rcu_read_lock() in case the kioctx is being destroyed, as we
- * need to issue a wakeup after incrementing reqs_available.
- */
- rcu_read_lock();
-
if (iocb->ki_list.next) {
unsigned long flags;
@@ -1010,7 +977,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
- rcu_read_unlock();
+ percpu_ref_put(&ctx->reqs);
}
EXPORT_SYMBOL(aio_complete);
@@ -1421,6 +1388,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
return 0;
out_put_req:
put_reqs_available(ctx, 1);
+ percpu_ref_put(&ctx->reqs);
kiocb_free(req);
return ret;
}
diff --git a/fs/bio.c b/fs/bio.c
index 2bdb4e25ee77..33d79a4eb92d 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
*page, unsigned int len, unsigned int offset,
- unsigned short max_sectors)
+ unsigned int max_sectors)
{
int retried_segments = 0;
struct bio_vec *bvec;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index f9d5094e1029..aa976eced2d2 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -9,12 +9,17 @@ config BTRFS_FS
select XOR_BLOCKS
help
- Btrfs is a new filesystem with extents, writable snapshotting,
- support for multiple devices and many more features.
+ Btrfs is a general purpose copy-on-write filesystem with extents,
+ writable snapshotting, support for multiple devices and many more
+ features focused on fault tolerance, repair and easy administration.
- Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
- FINALIZED. You should say N here unless you are interested in
- testing Btrfs with non-critical data.
+ The filesystem disk format is no longer unstable, and it's not
+ expected to change unless there are strong reasons to do so. If there
+ is a format change, file systems with a unchanged format will
+ continue to be mountable and usable by newer kernels.
+
+ For more information, please see the web pages at
+ http://btrfs.wiki.kernel.org.
To compile this file system support as a module, choose M here. The
module will be called btrfs.
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 8aec751fa464..c1e0b0caf9cc 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -495,6 +495,7 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
spin_lock_irq(&workers->lock);
if (workers->stopping) {
spin_unlock_irq(&workers->lock);
+ ret = -EINVAL;
goto fail_kthread;
}
list_add_tail(&worker->worker_list, &workers->idle_list);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index e0aab4456974..b50764bef141 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -77,6 +77,15 @@
* the integrity of (super)-block write requests, do not
* enable the config option BTRFS_FS_CHECK_INTEGRITY to
* include and compile the integrity check tool.
+ *
+ * Expect millions of lines of information in the kernel log with an
+ * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
+ * kernel config to at least 26 (which is 64MB). Usually the value is
+ * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
+ * changed like this before LOG_BUF_SHIFT can be set to a high value:
+ * config LOG_BUF_SHIFT
+ * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
+ * range 12 30
*/
#include <linux/sched.h>
@@ -124,6 +133,7 @@
#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
+#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000
struct btrfsic_dev_state;
struct btrfsic_state;
@@ -3015,6 +3025,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
(rw & WRITE) && NULL != bio->bi_io_vec) {
unsigned int i;
u64 dev_bytenr;
+ u64 cur_bytenr;
int bio_is_patched;
char **mapped_datav;
@@ -3033,6 +3044,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
GFP_NOFS);
if (!mapped_datav)
goto leave;
+ cur_bytenr = dev_bytenr;
for (i = 0; i < bio->bi_vcnt; i++) {
BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
@@ -3044,16 +3056,13 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
kfree(mapped_datav);
goto leave;
}
- if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
- BTRFSIC_PRINT_MASK_VERBOSE) ==
- (dev_state->state->print_mask &
- (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
- BTRFSIC_PRINT_MASK_VERBOSE)))
+ if (dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
printk(KERN_INFO
- "#%u: page=%p, len=%u, offset=%u\n",
- i, bio->bi_io_vec[i].bv_page,
- bio->bi_io_vec[i].bv_len,
+ "#%u: bytenr=%llu, len=%u, offset=%u\n",
+ i, cur_bytenr, bio->bi_io_vec[i].bv_len,
bio->bi_io_vec[i].bv_offset);
+ cur_bytenr += bio->bi_io_vec[i].bv_len;
}
btrfsic_process_written_block(dev_state, dev_bytenr,
mapped_datav, bio->bi_vcnt,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f9aeb2759a64..54ab86127f7a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3613,9 +3613,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum *sums);
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 file_start, int contig);
-int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- u64 isize);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
/* inode.c */
@@ -3744,9 +3741,6 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int skip_pinned);
-int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace,
- u64 start, u64 end, int skip_pinned,
- int modified);
extern const struct file_operations btrfs_file_operations;
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 342f9fd411e3..2cfc3dfff64f 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -366,7 +366,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
dev_replace->tgtdev = tgt_device;
printk_in_rcu(KERN_INFO
- "btrfs: dev_replace from %s (devid %llu) to %s) started\n",
+ "btrfs: dev_replace from %s (devid %llu) to %s started\n",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4c4ed0bb3da1..8072cfa8a3b1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3517,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
int btrfs_commit_super(struct btrfs_root *root)
{
struct btrfs_trans_handle *trans;
- int ret;
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
@@ -3531,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root)
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
- /* run commit again to drop the original snapshot */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
- ret = btrfs_write_and_wait_transaction(NULL, root);
- if (ret) {
- btrfs_error(root->fs_info, ret,
- "Failed to sync btree inode to disk.");
- return ret;
- }
-
- ret = write_ctree_super(NULL, root, 0);
- return ret;
+ return btrfs_commit_transaction(trans, root);
}
int close_ctree(struct btrfs_root *root)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 856bc2b2192c..8e457fca0a0b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1980,6 +1980,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
int ret;
+ ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
BUG_ON(!mirror_num);
/* we can't repair anything in raid56 yet */
@@ -2036,6 +2037,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
int ret = 0;
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
@@ -2057,12 +2061,12 @@ static int clean_io_failure(u64 start, struct page *page)
u64 private;
u64 private_failure;
struct io_failure_record *failrec;
- struct btrfs_fs_info *fs_info;
+ struct inode *inode = page->mapping->host;
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct extent_state *state;
int num_copies;
int did_repair = 0;
int ret;
- struct inode *inode = page->mapping->host;
private = 0;
ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
@@ -2085,6 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page)
did_repair = 1;
goto out;
}
+ if (fs_info->sb->s_flags & MS_RDONLY)
+ goto out;
spin_lock(&BTRFS_I(inode)->io_tree.lock);
state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
@@ -2094,7 +2100,6 @@ static int clean_io_failure(u64 start, struct page *page)
if (state && state->start <= failrec->start &&
state->end >= failrec->start + failrec->len - 1) {
- fs_info = BTRFS_I(inode)->root->fs_info;
num_copies = btrfs_num_copies(fs_info, failrec->logical,
failrec->len);
if (num_copies > 1) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index da8d2f696ac5..f1a77449d032 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2129,7 +2129,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
old->extent_offset, fs_info,
path, record_one_backref,
old);
- BUG_ON(ret < 0 && ret != -ENOENT);
+ if (ret < 0 && ret != -ENOENT)
+ return false;
/* no backref to be processed for this extent */
if (!old->count) {
@@ -6186,8 +6187,7 @@ insert:
write_unlock(&em_tree->lock);
out:
- if (em)
- trace_btrfs_get_extent(root, em);
+ trace_btrfs_get_extent(root, em);
if (path)
btrfs_free_path(path);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 25a8f3812f14..69582d5b69d1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -638,6 +638,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
WARN_ON(nr < 0);
}
}
+ list_splice_tail(&splice, &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
}
@@ -803,7 +804,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
btrfs_put_ordered_extent(ordered);
break;
}
- if (ordered->file_offset + ordered->len < start) {
+ if (ordered->file_offset + ordered->len <= start) {
btrfs_put_ordered_extent(ordered);
break;
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 2544805544f0..561e2f16ba3e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -938,8 +938,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
BTRFS_DEV_STAT_CORRUPTION_ERRS);
}
- if (sctx->readonly && !sctx->is_dev_replace)
- goto did_not_correct_error;
+ if (sctx->readonly) {
+ ASSERT(!sctx->is_dev_replace);
+ goto out;
+ }
if (!is_metadata && !have_csum) {
struct scrub_fixup_nodatasum *fixup_nodatasum;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 57c16b46afbd..c6a872a8a468 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1480,7 +1480,7 @@ static void do_async_commit(struct work_struct *work)
* We've got freeze protection passed with the transaction.
* Tell lockdep about it.
*/
- if (ac->newtrans->type < TRANS_JOIN_NOLOCK)
+ if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_acquire_read(
&ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
@@ -1521,7 +1521,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
* Tell lockdep we've released the freeze rwsem, since the
* async commit thread will be the one to unlock it.
*/
- if (trans->type < TRANS_JOIN_NOLOCK)
+ if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_release(
&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1, _THIS_IP_);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 744553c83fe2..9f7fc51ca334 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3697,7 +3697,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
ret = btrfs_truncate_inode_items(trans, log,
inode, 0, 0);
} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
- &BTRFS_I(inode)->runtime_flags)) {
+ &BTRFS_I(inode)->runtime_flags) ||
+ inode_only == LOG_INODE_EXISTS) {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
max_key.type = BTRFS_XATTR_ITEM_KEY;
@@ -3801,7 +3802,7 @@ log_extents:
err = ret;
goto out_unlock;
}
- } else {
+ } else if (inode_only == LOG_INODE_ALL) {
struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em, *n;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0db637097862..92303f42baaa 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5394,7 +5394,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
{
struct bio_vec *prev;
struct request_queue *q = bdev_get_queue(bdev);
- unsigned short max_sectors = queue_max_sectors(q);
+ unsigned int max_sectors = queue_max_sectors(q);
struct bvec_merge_data bvm = {
.bi_bdev = bdev,
.bi_sector = sector,
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 277bd1be21fd..e081acbac2e7 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -56,29 +56,28 @@ static void configfs_d_iput(struct dentry * dentry,
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
- BUG_ON(sd->s_dentry != dentry);
/* Coordinate with configfs_readdir */
spin_lock(&configfs_dirent_lock);
- sd->s_dentry = NULL;
+ /* Coordinate with configfs_attach_attr where will increase
+ * sd->s_count and update sd->s_dentry to new allocated one.
+ * Only set sd->dentry to null when this dentry is the only
+ * sd owner.
+ * If not do so, configfs_d_iput may run just after
+ * configfs_attach_attr and set sd->s_dentry to null
+ * even it's still in use.
+ */
+ if (atomic_read(&sd->s_count) <= 2)
+ sd->s_dentry = NULL;
+
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
iput(inode);
}
-/*
- * We _must_ delete our dentries on last dput, as the chain-to-parent
- * behavior is required to clear the parents of default_groups.
- */
-static int configfs_d_delete(const struct dentry *dentry)
-{
- return 1;
-}
-
const struct dentry_operations configfs_dentry_ops = {
.d_iput = configfs_d_iput,
- /* simple_delete_dentry() isn't exported */
- .d_delete = configfs_d_delete,
+ .d_delete = always_delete_dentry,
};
#ifdef CONFIG_LOCKDEP
@@ -426,8 +425,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
struct configfs_attribute * attr = sd->s_element;
int error;
+ spin_lock(&configfs_dirent_lock);
dentry->d_fsdata = configfs_get(sd);
sd->s_dentry = dentry;
+ spin_unlock(&configfs_dirent_lock);
+
error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
configfs_init_file);
if (error) {
diff --git a/fs/coredump.c b/fs/coredump.c
index 62406b6959b6..bc3fbcd32558 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -695,7 +695,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
while (nr) {
if (dump_interrupted())
return 0;
- n = vfs_write(file, addr, nr, &pos);
+ n = __kernel_write(file, addr, nr, &pos);
if (n <= 0)
return 0;
file->f_pos = pos;
@@ -733,7 +733,7 @@ int dump_align(struct coredump_params *cprm, int align)
{
unsigned mod = cprm->written & (align - 1);
if (align & (align - 1))
- return -EINVAL;
- return mod ? dump_skip(cprm, align - mod) : 0;
+ return 0;
+ return mod ? dump_skip(cprm, align - mod) : 1;
}
EXPORT_SYMBOL(dump_align);
diff --git a/fs/dcache.c b/fs/dcache.c
index 0a38ef8d7f00..4bdb300b16e2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock);
static struct kmem_cache *dentry_cache __read_mostly;
-/**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
- *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
- */
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
- if (!(*seq & 1)) /* Even */
- *seq = read_seqbegin(lock);
- else /* Odd */
- read_seqlock_excl(lock);
-}
-
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
- return !(seq & 1) && read_seqretry(lock, seq);
-}
-
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
- if (seq & 1)
- read_sequnlock_excl(lock);
-}
-
/*
* This is the single most critical data structure when it comes
* to the dcache: the hashtable for lookups. Somebody should try
@@ -125,8 +96,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq)
* This hash-function tries to avoid losing too many bits of hash
* information, yet avoid using a prime hash-size or similar.
*/
-#define D_HASHBITS d_hash_shift
-#define D_HASHMASK d_hash_mask
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
@@ -137,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
unsigned int hash)
{
hash += (unsigned long) parent / L1_CACHE_BYTES;
- hash = hash + (hash >> D_HASHBITS);
- return dentry_hashtable + (hash & D_HASHMASK);
+ hash = hash + (hash >> d_hash_shift);
+ return dentry_hashtable + (hash & d_hash_mask);
}
/* Statistics gathering. */
@@ -469,7 +438,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
{
list_del(&dentry->d_u.d_child);
/*
- * Inform try_to_ascend() that we are no longer attached to the
+ * Inform d_walk() that we are no longer attached to the
* dentry tree
*/
dentry->d_flags |= DCACHE_DENTRY_KILLED;
@@ -1069,34 +1038,6 @@ void shrink_dcache_sb(struct super_block *sb)
}
EXPORT_SYMBOL(shrink_dcache_sb);
-/*
- * This tries to ascend one level of parenthood, but
- * we can race with renaming, so we need to re-check
- * the parenthood after dropping the lock and check
- * that the sequence number still matches.
- */
-static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
-{
- struct dentry *new = old->d_parent;
-
- rcu_read_lock();
- spin_unlock(&old->d_lock);
- spin_lock(&new->d_lock);
-
- /*
- * might go back up the wrong parent if we have had a rename
- * or deletion
- */
- if (new != old->d_parent ||
- (old->d_flags & DCACHE_DENTRY_KILLED) ||
- need_seqretry(&rename_lock, seq)) {
- spin_unlock(&new->d_lock);
- new = NULL;
- }
- rcu_read_unlock();
- return new;
-}
-
/**
* enum d_walk_ret - action to talke during tree walk
* @D_WALK_CONTINUE: contrinue walk
@@ -1185,9 +1126,24 @@ resume:
*/
if (this_parent != parent) {
struct dentry *child = this_parent;
- this_parent = try_to_ascend(this_parent, seq);
- if (!this_parent)
+ this_parent = child->d_parent;
+
+ rcu_read_lock();
+ spin_unlock(&child->d_lock);
+ spin_lock(&this_parent->d_lock);
+
+ /*
+ * might go back up the wrong parent if we have had a rename
+ * or deletion
+ */
+ if (this_parent != child->d_parent ||
+ (child->d_flags & DCACHE_DENTRY_KILLED) ||
+ need_seqretry(&rename_lock, seq)) {
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
goto rename_retry;
+ }
+ rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index a8766b880c07..becc725a1953 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr)
return 0;
}
-/*
- * Retaining negative dentries for an in-memory filesystem just wastes
- * memory and lookup time: arrange for them to be deleted immediately.
- */
-static int efivarfs_delete_dentry(const struct dentry *dentry)
-{
- return 1;
-}
-
static struct dentry_operations efivarfs_d_ops = {
.d_compare = efivarfs_d_compare,
.d_hash = efivarfs_d_hash,
- .d_delete = efivarfs_delete_dentry,
+ .d_delete = always_delete_dentry,
};
static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
diff --git a/fs/exec.c b/fs/exec.c
index 977319fd77f3..7ea097f6b341 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1380,10 +1380,6 @@ int search_binary_handler(struct linux_binprm *bprm)
if (retval)
return retval;
- retval = audit_bprm(bprm);
- if (retval)
- return retval;
-
retval = -ENOENT;
retry:
read_lock(&binfmt_lock);
@@ -1431,6 +1427,7 @@ static int exec_binprm(struct linux_binprm *bprm)
ret = search_binary_handler(bprm);
if (ret >= 0) {
+ audit_bprm(bprm);
trace_sched_process_exec(current, old_pid, bprm);
ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
current->did_exec = 1;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e66a8009aff1..c8420f7e4db6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1899,7 +1899,8 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
gi->nhash = 0;
}
/* Skip entries for other sb and dead entries */
- } while (gi->sdp != gi->gl->gl_sbd || __lockref_is_dead(&gl->gl_lockref));
+ } while (gi->sdp != gi->gl->gl_sbd ||
+ __lockref_is_dead(&gi->gl->gl_lockref));
return 0;
}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 1615df16cf4e..7119504159f1 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1171,8 +1171,11 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
if (d != NULL)
dentry = d;
if (dentry->d_inode) {
- if (!(*opened & FILE_OPENED))
+ if (!(*opened & FILE_OPENED)) {
+ if (d == NULL)
+ dget(dentry);
return finish_no_open(file, dentry);
+ }
dput(d);
return 0;
}
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index c8423d6de6c3..2a6ba06bee6f 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -466,19 +466,19 @@ static void gdlm_cancel(struct gfs2_glock *gl)
static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
char *lvb_bits)
{
- uint32_t gen;
+ __le32 gen;
memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
- memcpy(&gen, lvb_bits, sizeof(uint32_t));
+ memcpy(&gen, lvb_bits, sizeof(__le32));
*lvb_gen = le32_to_cpu(gen);
}
static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
char *lvb_bits)
{
- uint32_t gen;
+ __le32 gen;
memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
gen = cpu_to_le32(lvb_gen);
- memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
+ memcpy(ls->ls_control_lvb, &gen, sizeof(__le32));
}
static int all_jid_bits_clear(char *lvb)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 453b50eaddec..98236d0df3ca 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -667,7 +667,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
struct buffer_head *bh;
struct page *page;
void *kaddr, *ptr;
- struct gfs2_quota q, *qp;
+ struct gfs2_quota q;
int err, nbytes;
u64 size;
@@ -683,28 +683,25 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
return err;
err = -EIO;
- qp = &q;
- qp->qu_value = be64_to_cpu(qp->qu_value);
- qp->qu_value += change;
- qp->qu_value = cpu_to_be64(qp->qu_value);
- qd->qd_qb.qb_value = qp->qu_value;
+ be64_add_cpu(&q.qu_value, change);
+ qd->qd_qb.qb_value = q.qu_value;
if (fdq) {
if (fdq->d_fieldmask & FS_DQ_BSOFT) {
- qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
- qd->qd_qb.qb_warn = qp->qu_warn;
+ q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
+ qd->qd_qb.qb_warn = q.qu_warn;
}
if (fdq->d_fieldmask & FS_DQ_BHARD) {
- qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
- qd->qd_qb.qb_limit = qp->qu_limit;
+ q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
+ qd->qd_qb.qb_limit = q.qu_limit;
}
if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
- qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
- qd->qd_qb.qb_value = qp->qu_value;
+ q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
+ qd->qd_qb.qb_value = q.qu_value;
}
}
/* Write the quota into the quota file on disk */
- ptr = qp;
+ ptr = &q;
nbytes = sizeof(struct gfs2_quota);
get_a_page:
page = find_or_create_page(mapping, index, GFP_NOFS);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 4d83abdd5635..c8d6161bd682 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1127,7 +1127,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
rgd->rd_free_clone = rgd->rd_free;
}
- if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+ if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
rgd->rd_bits[0].bi_bh->b_data);
@@ -1161,7 +1161,7 @@ int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
if (rgd->rd_flags & GFS2_RDF_UPTODATE)
return 0;
- if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+ if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
return gfs2_rgrp_bh_get(rgd);
rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 25437280a207..db23ce1bd903 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -33,15 +33,6 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
#define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
-static int hostfs_d_delete(const struct dentry *dentry)
-{
- return 1;
-}
-
-static const struct dentry_operations hostfs_dentry_ops = {
- .d_delete = hostfs_d_delete,
-};
-
/* Changed in hostfs_args before the kernel starts running */
static char *root_ino = "";
static int append = 0;
@@ -925,7 +916,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
sb->s_blocksize_bits = 10;
sb->s_magic = HOSTFS_SUPER_MAGIC;
sb->s_op = &hostfs_sbops;
- sb->s_d_op = &hostfs_dentry_ops;
+ sb->s_d_op = &simple_dentry_operations;
sb->s_maxbytes = MAX_LFS_FILESIZE;
/* NULL is printed as <NULL> by sprintf: avoid that. */
diff --git a/fs/libfs.c b/fs/libfs.c
index 5de06947ba5e..a1844244246f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -47,10 +47,16 @@ EXPORT_SYMBOL(simple_statfs);
* Retaining negative dentries for an in-memory filesystem just wastes
* memory and lookup time: arrange for them to be deleted immediately.
*/
-static int simple_delete_dentry(const struct dentry *dentry)
+int always_delete_dentry(const struct dentry *dentry)
{
return 1;
}
+EXPORT_SYMBOL(always_delete_dentry);
+
+const struct dentry_operations simple_dentry_operations = {
+ .d_delete = always_delete_dentry,
+};
+EXPORT_SYMBOL(simple_dentry_operations);
/*
* Lookup the data. This is trivial - if the dentry didn't already
@@ -58,10 +64,6 @@ static int simple_delete_dentry(const struct dentry *dentry)
*/
struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
- static const struct dentry_operations simple_dentry_operations = {
- .d_delete = simple_delete_dentry,
- };
-
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
if (!dentry->d_sb->s_d_op)
diff --git a/fs/namei.c b/fs/namei.c
index e029a4cbff7d..8f77a8cea289 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2435,6 +2435,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
*/
static inline int may_create(struct inode *dir, struct dentry *child)
{
+ audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 088de1355e93..ee7237f99f54 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -141,8 +141,8 @@ xdr_error: \
static void next_decode_page(struct nfsd4_compoundargs *argp)
{
- argp->pagelist++;
argp->p = page_address(argp->pagelist[0]);
+ argp->pagelist++;
if (argp->pagelen < PAGE_SIZE) {
argp->end = argp->p + (argp->pagelen>>2);
argp->pagelen = 0;
@@ -1229,6 +1229,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
len -= pages * PAGE_SIZE;
argp->p = (__be32 *)page_address(argp->pagelist[0]);
+ argp->pagelist++;
argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
}
argp->p += XDR_QUADLEN(len);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 94b5f5d2bfed..7eea63cada1d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -298,41 +298,12 @@ commit_metadata(struct svc_fh *fhp)
}
/*
- * Set various file attributes.
- * N.B. After this call fhp needs an fh_put
+ * Go over the attributes and take care of the small differences between
+ * NFS semantics and what Linux expects.
*/
-__be32
-nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
+static void
+nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
- struct dentry *dentry;
- struct inode *inode;
- int accmode = NFSD_MAY_SATTR;
- umode_t ftype = 0;
- __be32 err;
- int host_err;
- int size_change = 0;
-
- if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
- accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
- if (iap->ia_valid & ATTR_SIZE)
- ftype = S_IFREG;
-
- /* Get inode */
- err = fh_verify(rqstp, fhp, ftype, accmode);
- if (err)
- goto out;
-
- dentry = fhp->fh_dentry;
- inode = dentry->d_inode;
-
- /* Ignore any mode updates on symlinks */
- if (S_ISLNK(inode->i_mode))
- iap->ia_valid &= ~ATTR_MODE;
-
- if (!iap->ia_valid)
- goto out;
-
/*
* NFSv2 does not differentiate between "set-[ac]time-to-now"
* which only requires access, and "set-[ac]time-to-X" which
@@ -342,8 +313,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
* convert to "set to now" instead of "set to explicit time"
*
* We only call inode_change_ok as the last test as technically
- * it is not an interface that we should be using. It is only
- * valid if the filesystem does not define it's own i_op->setattr.
+ * it is not an interface that we should be using.
*/
#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
#define MAX_TOUCH_TIME_ERROR (30*60)
@@ -369,30 +339,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~BOTH_TIME_SET;
}
}
-
- /*
- * The size case is special.
- * It changes the file as well as the attributes.
- */
- if (iap->ia_valid & ATTR_SIZE) {
- if (iap->ia_size < inode->i_size) {
- err = nfsd_permission(rqstp, fhp->fh_export, dentry,
- NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- goto out;
- }
-
- host_err = get_write_access(inode);
- if (host_err)
- goto out_nfserr;
-
- size_change = 1;
- host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
- if (host_err) {
- put_write_access(inode);
- goto out_nfserr;
- }
- }
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
@@ -415,32 +361,111 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
}
}
+}
- /* Change the attributes. */
+static __be32
+nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct iattr *iap)
+{
+ struct inode *inode = fhp->fh_dentry->d_inode;
+ int host_err;
- iap->ia_valid |= ATTR_CTIME;
+ if (iap->ia_size < inode->i_size) {
+ __be32 err;
- err = nfserr_notsync;
- if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
- host_err = nfsd_break_lease(inode);
- if (host_err)
- goto out_nfserr;
- fh_lock(fhp);
+ err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
+ if (err)
+ return err;
+ }
- host_err = notify_change(dentry, iap, NULL);
- err = nfserrno(host_err);
- fh_unlock(fhp);
+ host_err = get_write_access(inode);
+ if (host_err)
+ goto out_nfserrno;
+
+ host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
+ if (host_err)
+ goto out_put_write_access;
+ return 0;
+
+out_put_write_access:
+ put_write_access(inode);
+out_nfserrno:
+ return nfserrno(host_err);
+}
+
+/*
+ * Set various file attributes. After this call fhp needs an fh_put.
+ */
+__be32
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+ int check_guard, time_t guardtime)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ int accmode = NFSD_MAY_SATTR;
+ umode_t ftype = 0;
+ __be32 err;
+ int host_err;
+ int size_change = 0;
+
+ if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+ accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
+ if (iap->ia_valid & ATTR_SIZE)
+ ftype = S_IFREG;
+
+ /* Get inode */
+ err = fh_verify(rqstp, fhp, ftype, accmode);
+ if (err)
+ goto out;
+
+ dentry = fhp->fh_dentry;
+ inode = dentry->d_inode;
+
+ /* Ignore any mode updates on symlinks */
+ if (S_ISLNK(inode->i_mode))
+ iap->ia_valid &= ~ATTR_MODE;
+
+ if (!iap->ia_valid)
+ goto out;
+
+ nfsd_sanitize_attrs(inode, iap);
+
+ /*
+ * The size case is special, it changes the file in addition to the
+ * attributes.
+ */
+ if (iap->ia_valid & ATTR_SIZE) {
+ err = nfsd_get_write_access(rqstp, fhp, iap);
+ if (err)
+ goto out;
+ size_change = 1;
}
+
+ iap->ia_valid |= ATTR_CTIME;
+
+ if (check_guard && guardtime != inode->i_ctime.tv_sec) {
+ err = nfserr_notsync;
+ goto out_put_write_access;
+ }
+
+ host_err = nfsd_break_lease(inode);
+ if (host_err)
+ goto out_put_write_access_nfserror;
+
+ fh_lock(fhp);
+ host_err = notify_change(dentry, iap, NULL);
+ fh_unlock(fhp);
+
+out_put_write_access_nfserror:
+ err = nfserrno(host_err);
+out_put_write_access:
if (size_change)
put_write_access(inode);
if (!err)
commit_metadata(fhp);
out:
return err;
-
-out_nfserr:
- err = nfserrno(host_err);
- goto out;
}
#if defined(CONFIG_NFSD_V2_ACL) || \
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1485e38daaa3..03c8d747be48 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1151,10 +1151,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
goto out_free_page;
}
- kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
- if (!uid_valid(kloginuid)) {
- length = -EINVAL;
- goto out_free_page;
+
+ /* is userspace tring to explicitly UNSET the loginuid? */
+ if (loginuid == AUDIT_UID_UNSET) {
+ kloginuid = INVALID_UID;
+ } else {
+ kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
+ if (!uid_valid(kloginuid)) {
+ length = -EINVAL;
+ goto out_free_page;
+ }
}
length = audit_set_loginuid(kloginuid);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 737e15615b04..cca93b6fb9a9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -175,22 +175,6 @@ static const struct inode_operations proc_link_inode_operations = {
};
/*
- * As some entries in /proc are volatile, we want to
- * get rid of unused dentries. This could be made
- * smarter: we could keep a "volatile" flag in the
- * inode to indicate which ones to keep.
- */
-static int proc_delete_dentry(const struct dentry * dentry)
-{
- return 1;
-}
-
-static const struct dentry_operations proc_dentry_operations =
-{
- .d_delete = proc_delete_dentry,
-};
-
-/*
* Don't create negative dentries here, return -ENOENT by hand
* instead.
*/
@@ -209,7 +193,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
inode = proc_get_inode(dir->i_sb, de);
if (!inode)
return ERR_PTR(-ENOMEM);
- d_set_d_op(dentry, &proc_dentry_operations);
+ d_set_d_op(dentry, &simple_dentry_operations);
d_add(dentry, inode);
return NULL;
}
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 49a7fff2e83a..9ae46b87470d 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = {
.setattr = proc_setattr,
};
-static int ns_delete_dentry(const struct dentry *dentry)
-{
- /* Don't cache namespace inodes when not in use */
- return 1;
-}
-
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
{
struct inode *inode = dentry->d_inode;
@@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
const struct dentry_operations ns_dentry_operations =
{
- .d_delete = ns_delete_dentry,
+ .d_delete = always_delete_dentry,
.d_dname = ns_dname,
};
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index c70111ebefd4..b6fa8657dcbc 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -25,6 +25,78 @@ config SQUASHFS
If unsure, say N.
+choice
+ prompt "File decompression options"
+ depends on SQUASHFS
+ help
+ Squashfs now supports two options for decompressing file
+ data. Traditionally Squashfs has decompressed into an
+ intermediate buffer and then memcopied it into the page cache.
+ Squashfs now supports the ability to decompress directly into
+ the page cache.
+
+ If unsure, select "Decompress file data into an intermediate buffer"
+
+config SQUASHFS_FILE_CACHE
+ bool "Decompress file data into an intermediate buffer"
+ help
+ Decompress file data into an intermediate buffer and then
+ memcopy it into the page cache.
+
+config SQUASHFS_FILE_DIRECT
+ bool "Decompress files directly into the page cache"
+ help
+ Directly decompress file data into the page cache.
+ Doing so can significantly improve performance because
+ it eliminates a memcpy and it also removes the lock contention
+ on the single buffer.
+
+endchoice
+
+choice
+ prompt "Decompressor parallelisation options"
+ depends on SQUASHFS
+ help
+ Squashfs now supports three parallelisation options for
+ decompression. Each one exhibits various trade-offs between
+ decompression performance and CPU and memory usage.
+
+ If in doubt, select "Single threaded compression"
+
+config SQUASHFS_DECOMP_SINGLE
+ bool "Single threaded compression"
+ help
+ Traditionally Squashfs has used single-threaded decompression.
+ Only one block (data or metadata) can be decompressed at any
+ one time. This limits CPU and memory usage to a minimum.
+
+config SQUASHFS_DECOMP_MULTI
+ bool "Use multiple decompressors for parallel I/O"
+ help
+ By default Squashfs uses a single decompressor but it gives
+ poor performance on parallel I/O workloads when using multiple CPU
+ machines due to waiting on decompressor availability.
+
+ If you have a parallel I/O workload and your system has enough memory,
+ using this option may improve overall I/O performance.
+
+ This decompressor implementation uses up to two parallel
+ decompressors per core. It dynamically allocates decompressors
+ on a demand basis.
+
+config SQUASHFS_DECOMP_MULTI_PERCPU
+ bool "Use percpu multiple decompressors for parallel I/O"
+ help
+ By default Squashfs uses a single decompressor but it gives
+ poor performance on parallel I/O workloads when using multiple CPU
+ machines due to waiting on decompressor availability.
+
+ This decompressor implementation uses a maximum of one
+ decompressor per core. It uses percpu variables to ensure
+ decompression is load-balanced across the cores.
+
+endchoice
+
config SQUASHFS_XATTR
bool "Squashfs XATTR support"
depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 110b0476f3b4..4132520b4ff2 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,6 +5,11 @@
obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o decompressor.o
+squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
+squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 41d108ecc9be..0cea9b9236d0 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -36,6 +36,7 @@
#include "squashfs_fs_sb.h"
#include "squashfs.h"
#include "decompressor.h"
+#include "page_actor.h"
/*
* Read the metadata block length, this is stored in the first two
@@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb,
* generated a larger block - this does occasionally happen with compression
* algorithms).
*/
-int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
- int length, u64 *next_index, int srclength, int pages)
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
+ u64 *next_index, struct squashfs_page_actor *output)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
struct buffer_head **bh;
int offset = index & ((1 << msblk->devblksize_log2) - 1);
u64 cur_index = index >> msblk->devblksize_log2;
- int bytes, compressed, b = 0, k = 0, page = 0, avail;
+ int bytes, compressed, b = 0, k = 0, avail, i;
- bh = kcalloc(((srclength + msblk->devblksize - 1)
+ bh = kcalloc(((output->length + msblk->devblksize - 1)
>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
if (bh == NULL)
return -ENOMEM;
@@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
*next_index = index + length;
TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
- index, compressed ? "" : "un", length, srclength);
+ index, compressed ? "" : "un", length, output->length);
- if (length < 0 || length > srclength ||
+ if (length < 0 || length > output->length ||
(index + length) > msblk->bytes_used)
goto read_failure;
@@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
compressed ? "" : "un", length);
- if (length < 0 || length > srclength ||
+ if (length < 0 || length > output->length ||
(index + length) > msblk->bytes_used)
goto block_release;
@@ -158,9 +159,15 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
ll_rw_block(READ, b - 1, bh + 1);
}
+ for (i = 0; i < b; i++) {
+ wait_on_buffer(bh[i]);
+ if (!buffer_uptodate(bh[i]))
+ goto block_release;
+ }
+
if (compressed) {
- length = squashfs_decompress(msblk, buffer, bh, b, offset,
- length, srclength, pages);
+ length = squashfs_decompress(msblk, bh, b, offset, length,
+ output);
if (length < 0)
goto read_failure;
} else {
@@ -168,22 +175,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
* Block is uncompressed.
*/
int in, pg_offset = 0;
+ void *data = squashfs_first_page(output);
for (bytes = length; k < b; k++) {
in = min(bytes, msblk->devblksize - offset);
bytes -= in;
- wait_on_buffer(bh[k]);
- if (!buffer_uptodate(bh[k]))
- goto block_release;
while (in) {
if (pg_offset == PAGE_CACHE_SIZE) {
- page++;
+ data = squashfs_next_page(output);
pg_offset = 0;
}
avail = min_t(int, in, PAGE_CACHE_SIZE -
pg_offset);
- memcpy(buffer[page] + pg_offset,
- bh[k]->b_data + offset, avail);
+ memcpy(data + pg_offset, bh[k]->b_data + offset,
+ avail);
in -= avail;
pg_offset += avail;
offset += avail;
@@ -191,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
offset = 0;
put_bh(bh[k]);
}
+ squashfs_finish_page(output);
}
kfree(bh);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index af0b73802592..1cb70a0b2168 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -56,6 +56,7 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
#include "squashfs.h"
+#include "page_actor.h"
/*
* Look-up block in cache, and increment usage count. If not in cache, read
@@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
entry->error = 0;
spin_unlock(&cache->lock);
- entry->length = squashfs_read_data(sb, entry->data,
- block, length, &entry->next_index,
- cache->block_size, cache->pages);
+ entry->length = squashfs_read_data(sb, block, length,
+ &entry->next_index, entry->actor);
spin_lock(&cache->lock);
@@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
kfree(cache->entry[i].data[j]);
kfree(cache->entry[i].data);
}
+ kfree(cache->entry[i].actor);
}
kfree(cache->entry);
@@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
goto cleanup;
}
}
+
+ entry->actor = squashfs_page_actor_init(entry->data,
+ cache->pages, 0);
+ if (entry->actor == NULL) {
+ ERROR("Failed to allocate %s cache entry\n", name);
+ goto cleanup;
+ }
}
return cache;
@@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
int i, res;
void *table, *buffer, **data;
+ struct squashfs_page_actor *actor;
table = buffer = kmalloc(length, GFP_KERNEL);
if (table == NULL)
@@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
goto failed;
}
+ actor = squashfs_page_actor_init(data, pages, length);
+ if (actor == NULL) {
+ res = -ENOMEM;
+ goto failed2;
+ }
+
for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
data[i] = buffer;
- res = squashfs_read_data(sb, data, block, length |
- SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
+ res = squashfs_read_data(sb, block, length |
+ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
kfree(data);
+ kfree(actor);
if (res < 0)
goto failed;
return table;
+failed2:
+ kfree(data);
failed:
kfree(table);
return ERR_PTR(res);
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 3f6271d86abc..ac22fe73b0ad 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -30,6 +30,7 @@
#include "squashfs_fs_sb.h"
#include "decompressor.h"
#include "squashfs.h"
+#include "page_actor.h"
/*
* This file (and decompressor.h) implements a decompressor framework for
@@ -37,29 +38,29 @@
*/
static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
- NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
+ NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
};
#ifndef CONFIG_SQUASHFS_LZO
static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
- NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
+ NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
};
#endif
#ifndef CONFIG_SQUASHFS_XZ
static const struct squashfs_decompressor squashfs_xz_comp_ops = {
- NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+ NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
};
#endif
#ifndef CONFIG_SQUASHFS_ZLIB
static const struct squashfs_decompressor squashfs_zlib_comp_ops = {
- NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
+ NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
};
#endif
static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
- NULL, NULL, NULL, 0, "unknown", 0
+ NULL, NULL, NULL, NULL, 0, "unknown", 0
};
static const struct squashfs_decompressor *decompressor[] = {
@@ -83,10 +84,11 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
}
-void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
+static void *get_comp_opts(struct super_block *sb, unsigned short flags)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
- void *strm, *buffer = NULL;
+ void *buffer = NULL, *comp_opts;
+ struct squashfs_page_actor *actor = NULL;
int length = 0;
/*
@@ -94,23 +96,46 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
*/
if (SQUASHFS_COMP_OPTS(flags)) {
buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
- if (buffer == NULL)
- return ERR_PTR(-ENOMEM);
+ if (buffer == NULL) {
+ comp_opts = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ actor = squashfs_page_actor_init(&buffer, 1, 0);
+ if (actor == NULL) {
+ comp_opts = ERR_PTR(-ENOMEM);
+ goto out;
+ }
- length = squashfs_read_data(sb, &buffer,
- sizeof(struct squashfs_super_block), 0, NULL,
- PAGE_CACHE_SIZE, 1);
+ length = squashfs_read_data(sb,
+ sizeof(struct squashfs_super_block), 0, NULL, actor);
if (length < 0) {
- strm = ERR_PTR(length);
- goto finished;
+ comp_opts = ERR_PTR(length);
+ goto out;
}
}
- strm = msblk->decompressor->init(msblk, buffer, length);
+ comp_opts = squashfs_comp_opts(msblk, buffer, length);
-finished:
+out:
+ kfree(actor);
kfree(buffer);
+ return comp_opts;
+}
+
+
+void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ void *stream, *comp_opts = get_comp_opts(sb, flags);
+
+ if (IS_ERR(comp_opts))
+ return comp_opts;
+
+ stream = squashfs_decompressor_create(msblk, comp_opts);
+ if (IS_ERR(stream))
+ kfree(comp_opts);
- return strm;
+ return stream;
}
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 330073e29029..af0985321808 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -24,28 +24,22 @@
*/
struct squashfs_decompressor {
- void *(*init)(struct squashfs_sb_info *, void *, int);
+ void *(*init)(struct squashfs_sb_info *, void *);
+ void *(*comp_opts)(struct squashfs_sb_info *, void *, int);
void (*free)(void *);
- int (*decompress)(struct squashfs_sb_info *, void **,
- struct buffer_head **, int, int, int, int, int);
+ int (*decompress)(struct squashfs_sb_info *, void *,
+ struct buffer_head **, int, int, int,
+ struct squashfs_page_actor *);
int id;
char *name;
int supported;
};
-static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
- void *s)
+static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk,
+ void *buff, int length)
{
- if (msblk->decompressor)
- msblk->decompressor->free(s);
-}
-
-static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
- void **buffer, struct buffer_head **bh, int b, int offset, int length,
- int srclength, int pages)
-{
- return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
- length, srclength, pages);
+ return msblk->decompressor->comp_opts ?
+ msblk->decompressor->comp_opts(msblk, buff, length) : NULL;
}
#ifdef CONFIG_SQUASHFS_XZ
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c
new file mode 100644
index 000000000000..d6008a636479
--- /dev/null
+++ b/fs/squashfs/decompressor_multi.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2013
+ * Minchan Kim <minchan@kernel.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/cpumask.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression in the
+ * decompressor framework
+ */
+
+
+/*
+ * The reason that multiply two is that a CPU can request new I/O
+ * while it is waiting previous request.
+ */
+#define MAX_DECOMPRESSOR (num_online_cpus() * 2)
+
+
+int squashfs_max_decompressors(void)
+{
+ return MAX_DECOMPRESSOR;
+}
+
+
+struct squashfs_stream {
+ void *comp_opts;
+ struct list_head strm_list;
+ struct mutex mutex;
+ int avail_decomp;
+ wait_queue_head_t wait;
+};
+
+
+struct decomp_stream {
+ void *stream;
+ struct list_head list;
+};
+
+
+static void put_decomp_stream(struct decomp_stream *decomp_strm,
+ struct squashfs_stream *stream)
+{
+ mutex_lock(&stream->mutex);
+ list_add(&decomp_strm->list, &stream->strm_list);
+ mutex_unlock(&stream->mutex);
+ wake_up(&stream->wait);
+}
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+ void *comp_opts)
+{
+ struct squashfs_stream *stream;
+ struct decomp_stream *decomp_strm = NULL;
+ int err = -ENOMEM;
+
+ stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+ if (!stream)
+ goto out;
+
+ stream->comp_opts = comp_opts;
+ mutex_init(&stream->mutex);
+ INIT_LIST_HEAD(&stream->strm_list);
+ init_waitqueue_head(&stream->wait);
+
+ /*
+ * We should have a decompressor at least as default
+ * so if we fail to allocate new decompressor dynamically,
+ * we could always fall back to default decompressor and
+ * file system works.
+ */
+ decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+ if (!decomp_strm)
+ goto out;
+
+ decomp_strm->stream = msblk->decompressor->init(msblk,
+ stream->comp_opts);
+ if (IS_ERR(decomp_strm->stream)) {
+ err = PTR_ERR(decomp_strm->stream);
+ goto out;
+ }
+
+ list_add(&decomp_strm->list, &stream->strm_list);
+ stream->avail_decomp = 1;
+ return stream;
+
+out:
+ kfree(decomp_strm);
+ kfree(stream);
+ return ERR_PTR(err);
+}
+
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+ struct squashfs_stream *stream = msblk->stream;
+ if (stream) {
+ struct decomp_stream *decomp_strm;
+
+ while (!list_empty(&stream->strm_list)) {
+ decomp_strm = list_entry(stream->strm_list.prev,
+ struct decomp_stream, list);
+ list_del(&decomp_strm->list);
+ msblk->decompressor->free(decomp_strm->stream);
+ kfree(decomp_strm);
+ stream->avail_decomp--;
+ }
+ WARN_ON(stream->avail_decomp);
+ kfree(stream->comp_opts);
+ kfree(stream);
+ }
+}
+
+
+static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
+ struct squashfs_stream *stream)
+{
+ struct decomp_stream *decomp_strm;
+
+ while (1) {
+ mutex_lock(&stream->mutex);
+
+ /* There is available decomp_stream */
+ if (!list_empty(&stream->strm_list)) {
+ decomp_strm = list_entry(stream->strm_list.prev,
+ struct decomp_stream, list);
+ list_del(&decomp_strm->list);
+ mutex_unlock(&stream->mutex);
+ break;
+ }
+
+ /*
+ * If there is no available decomp and already full,
+ * let's wait for releasing decomp from other users.
+ */
+ if (stream->avail_decomp >= MAX_DECOMPRESSOR)
+ goto wait;
+
+ /* Let's allocate new decomp */
+ decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+ if (!decomp_strm)
+ goto wait;
+
+ decomp_strm->stream = msblk->decompressor->init(msblk,
+ stream->comp_opts);
+ if (IS_ERR(decomp_strm->stream)) {
+ kfree(decomp_strm);
+ goto wait;
+ }
+
+ stream->avail_decomp++;
+ WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR);
+
+ mutex_unlock(&stream->mutex);
+ break;
+wait:
+ /*
+ * If system memory is tough, let's for other's
+ * releasing instead of hurting VM because it could
+ * make page cache thrashing.
+ */
+ mutex_unlock(&stream->mutex);
+ wait_event(stream->wait,
+ !list_empty(&stream->strm_list));
+ }
+
+ return decomp_strm;
+}
+
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+ int b, int offset, int length, struct squashfs_page_actor *output)
+{
+ int res;
+ struct squashfs_stream *stream = msblk->stream;
+ struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream);
+ res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
+ bh, b, offset, length, output);
+ put_decomp_stream(decomp_stream, stream);
+ if (res < 0)
+ ERROR("%s decompression failed, data probably corrupt\n",
+ msblk->decompressor->name);
+ return res;
+}
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
new file mode 100644
index 000000000000..23a9c28ad8ea
--- /dev/null
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/percpu.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression using percpu
+ * variables, one thread per cpu core.
+ */
+
+struct squashfs_stream {
+ void *stream;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+ void *comp_opts)
+{
+ struct squashfs_stream *stream;
+ struct squashfs_stream __percpu *percpu;
+ int err, cpu;
+
+ percpu = alloc_percpu(struct squashfs_stream);
+ if (percpu == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ for_each_possible_cpu(cpu) {
+ stream = per_cpu_ptr(percpu, cpu);
+ stream->stream = msblk->decompressor->init(msblk, comp_opts);
+ if (IS_ERR(stream->stream)) {
+ err = PTR_ERR(stream->stream);
+ goto out;
+ }
+ }
+
+ kfree(comp_opts);
+ return (__force void *) percpu;
+
+out:
+ for_each_possible_cpu(cpu) {
+ stream = per_cpu_ptr(percpu, cpu);
+ if (!IS_ERR_OR_NULL(stream->stream))
+ msblk->decompressor->free(stream->stream);
+ }
+ free_percpu(percpu);
+ return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+ struct squashfs_stream __percpu *percpu =
+ (struct squashfs_stream __percpu *) msblk->stream;
+ struct squashfs_stream *stream;
+ int cpu;
+
+ if (msblk->stream) {
+ for_each_possible_cpu(cpu) {
+ stream = per_cpu_ptr(percpu, cpu);
+ msblk->decompressor->free(stream->stream);
+ }
+ free_percpu(percpu);
+ }
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+ int b, int offset, int length, struct squashfs_page_actor *output)
+{
+ struct squashfs_stream __percpu *percpu =
+ (struct squashfs_stream __percpu *) msblk->stream;
+ struct squashfs_stream *stream = get_cpu_ptr(percpu);
+ int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+ offset, length, output);
+ put_cpu_ptr(stream);
+
+ if (res < 0)
+ ERROR("%s decompression failed, data probably corrupt\n",
+ msblk->decompressor->name);
+
+ return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+ return num_possible_cpus();
+}
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c
new file mode 100644
index 000000000000..a6c75929a00e
--- /dev/null
+++ b/fs/squashfs/decompressor_single.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements single-threaded decompression in the
+ * decompressor framework
+ */
+
+struct squashfs_stream {
+ void *stream;
+ struct mutex mutex;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+ void *comp_opts)
+{
+ struct squashfs_stream *stream;
+ int err = -ENOMEM;
+
+ stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+ if (stream == NULL)
+ goto out;
+
+ stream->stream = msblk->decompressor->init(msblk, comp_opts);
+ if (IS_ERR(stream->stream)) {
+ err = PTR_ERR(stream->stream);
+ goto out;
+ }
+
+ kfree(comp_opts);
+ mutex_init(&stream->mutex);
+ return stream;
+
+out:
+ kfree(stream);
+ return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+ struct squashfs_stream *stream = msblk->stream;
+
+ if (stream) {
+ msblk->decompressor->free(stream->stream);
+ kfree(stream);
+ }
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+ int b, int offset, int length, struct squashfs_page_actor *output)
+{
+ int res;
+ struct squashfs_stream *stream = msblk->stream;
+
+ mutex_lock(&stream->mutex);
+ res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+ offset, length, output);
+ mutex_unlock(&stream->mutex);
+
+ if (res < 0)
+ ERROR("%s decompression failed, data probably corrupt\n",
+ msblk->decompressor->name);
+
+ return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+ return 1;
+}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 8ca62c28fe12..e5c9689062ba 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
return le32_to_cpu(size);
}
-
-static int squashfs_readpage(struct file *file, struct page *page)
+/* Copy data into page cache */
+void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
+ int bytes, int offset)
{
struct inode *inode = page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int bytes, i, offset = 0, sparse = 0;
- struct squashfs_cache_entry *buffer = NULL;
void *pageaddr;
-
- int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
- int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
- int start_index = page->index & ~mask;
- int end_index = start_index | mask;
- int file_end = i_size_read(inode) >> msblk->block_log;
-
- TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
- page->index, squashfs_i(inode)->start);
-
- if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT))
- goto out;
-
- if (index < file_end || squashfs_i(inode)->fragment_block ==
- SQUASHFS_INVALID_BLK) {
- /*
- * Reading a datablock from disk. Need to read block list
- * to get location and block size.
- */
- u64 block = 0;
- int bsize = read_blocklist(inode, index, &block);
- if (bsize < 0)
- goto error_out;
-
- if (bsize == 0) { /* hole */
- bytes = index == file_end ?
- (i_size_read(inode) & (msblk->block_size - 1)) :
- msblk->block_size;
- sparse = 1;
- } else {
- /*
- * Read and decompress datablock.
- */
- buffer = squashfs_get_datablock(inode->i_sb,
- block, bsize);
- if (buffer->error) {
- ERROR("Unable to read page, block %llx, size %x"
- "\n", block, bsize);
- squashfs_cache_put(buffer);
- goto error_out;
- }
- bytes = buffer->length;
- }
- } else {
- /*
- * Datablock is stored inside a fragment (tail-end packed
- * block).
- */
- buffer = squashfs_get_fragment(inode->i_sb,
- squashfs_i(inode)->fragment_block,
- squashfs_i(inode)->fragment_size);
-
- if (buffer->error) {
- ERROR("Unable to read page, block %llx, size %x\n",
- squashfs_i(inode)->fragment_block,
- squashfs_i(inode)->fragment_size);
- squashfs_cache_put(buffer);
- goto error_out;
- }
- bytes = i_size_read(inode) & (msblk->block_size - 1);
- offset = squashfs_i(inode)->fragment_offset;
- }
+ int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ int start_index = page->index & ~mask, end_index = start_index | mask;
/*
* Loop copying datablock into pages. As the datablock likely covers
@@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page)
for (i = start_index; i <= end_index && bytes > 0; i++,
bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
struct page *push_page;
- int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE);
+ int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0;
TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
@@ -475,11 +413,75 @@ skip_page:
if (i != page->index)
page_cache_release(push_page);
}
+}
+
+/* Read datablock stored packed inside a fragment (tail-end packed block) */
+static int squashfs_readpage_fragment(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
+ squashfs_i(inode)->fragment_block,
+ squashfs_i(inode)->fragment_size);
+ int res = buffer->error;
+
+ if (res)
+ ERROR("Unable to read page, block %llx, size %x\n",
+ squashfs_i(inode)->fragment_block,
+ squashfs_i(inode)->fragment_size);
+ else
+ squashfs_copy_cache(page, buffer, i_size_read(inode) &
+ (msblk->block_size - 1),
+ squashfs_i(inode)->fragment_offset);
+
+ squashfs_cache_put(buffer);
+ return res;
+}
- if (!sparse)
- squashfs_cache_put(buffer);
+static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
+{
+ struct inode *inode = page->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int bytes = index == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+ squashfs_copy_cache(page, NULL, bytes, 0);
return 0;
+}
+
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+ int file_end = i_size_read(inode) >> msblk->block_log;
+ int res;
+ void *pageaddr;
+
+ TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+ page->index, squashfs_i(inode)->start);
+
+ if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT))
+ goto out;
+
+ if (index < file_end || squashfs_i(inode)->fragment_block ==
+ SQUASHFS_INVALID_BLK) {
+ u64 block = 0;
+ int bsize = read_blocklist(inode, index, &block);
+ if (bsize < 0)
+ goto error_out;
+
+ if (bsize == 0)
+ res = squashfs_readpage_sparse(page, index, file_end);
+ else
+ res = squashfs_readpage_block(page, block, bsize);
+ } else
+ res = squashfs_readpage_fragment(page);
+
+ if (!res)
+ return 0;
error_out:
SetPageError(page);
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
new file mode 100644
index 000000000000..f2310d2a2019
--- /dev/null
+++ b/fs/squashfs/file_cache.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/* Read separately compressed datablock and memcopy into page cache */
+int squashfs_readpage_block(struct page *page, u64 block, int bsize)
+{
+ struct inode *i = page->mapping->host;
+ struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+ block, bsize);
+ int res = buffer->error;
+
+ if (res)
+ ERROR("Unable to read page, block %llx, size %x\n", block,
+ bsize);
+ else
+ squashfs_copy_cache(page, buffer, buffer->length, 0);
+
+ squashfs_cache_put(buffer);
+ return res;
+}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
new file mode 100644
index 000000000000..2943b2bfae48
--- /dev/null
+++ b/fs/squashfs/file_direct.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+#include "page_actor.h"
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+ int pages, struct page **page);
+
+/* Read separately compressed datablock directly into page cache */
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
+
+{
+ struct inode *inode = target_page->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+
+ int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+ int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ int start_index = target_page->index & ~mask;
+ int end_index = start_index | mask;
+ int i, n, pages, missing_pages, bytes, res = -ENOMEM;
+ struct page **page;
+ struct squashfs_page_actor *actor;
+ void *pageaddr;
+
+ if (end_index > file_end)
+ end_index = file_end;
+
+ pages = end_index - start_index + 1;
+
+ page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+ if (page == NULL)
+ return res;
+
+ /*
+ * Create a "page actor" which will kmap and kunmap the
+ * page cache pages appropriately within the decompressor
+ */
+ actor = squashfs_page_actor_init_special(page, pages, 0);
+ if (actor == NULL)
+ goto out;
+
+ /* Try to grab all the pages covered by the Squashfs block */
+ for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
+ page[i] = (n == target_page->index) ? target_page :
+ grab_cache_page_nowait(target_page->mapping, n);
+
+ if (page[i] == NULL) {
+ missing_pages++;
+ continue;
+ }
+
+ if (PageUptodate(page[i])) {
+ unlock_page(page[i]);
+ page_cache_release(page[i]);
+ page[i] = NULL;
+ missing_pages++;
+ }
+ }
+
+ if (missing_pages) {
+ /*
+ * Couldn't get one or more pages, this page has either
+ * been VM reclaimed, but others are still in the page cache
+ * and uptodate, or we're racing with another thread in
+ * squashfs_readpage also trying to grab them. Fall back to
+ * using an intermediate buffer.
+ */
+ res = squashfs_read_cache(target_page, block, bsize, pages,
+ page);
+ goto out;
+ }
+
+ /* Decompress directly into the page cache buffers */
+ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
+ if (res < 0)
+ goto mark_errored;
+
+ /* Last page may have trailing bytes not filled */
+ bytes = res % PAGE_CACHE_SIZE;
+ if (bytes) {
+ pageaddr = kmap_atomic(page[pages - 1]);
+ memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+ kunmap_atomic(pageaddr);
+ }
+
+ /* Mark pages as uptodate, unlock and release */
+ for (i = 0; i < pages; i++) {
+ flush_dcache_page(page[i]);
+ SetPageUptodate(page[i]);
+ unlock_page(page[i]);
+ if (page[i] != target_page)
+ page_cache_release(page[i]);
+ }
+
+ kfree(actor);
+ kfree(page);
+
+ return 0;
+
+mark_errored:
+ /* Decompression failed, mark pages as errored. Target_page is
+ * dealt with by the caller
+ */
+ for (i = 0; i < pages; i++) {
+ if (page[i] == target_page)
+ continue;
+ flush_dcache_page(page[i]);
+ SetPageError(page[i]);
+ unlock_page(page[i]);
+ page_cache_release(page[i]);
+ }
+
+out:
+ kfree(actor);
+ kfree(page);
+ return res;
+}
+
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+ int pages, struct page **page)
+{
+ struct inode *i = target_page->mapping->host;
+ struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+ block, bsize);
+ int bytes = buffer->length, res = buffer->error, n, offset = 0;
+ void *pageaddr;
+
+ if (res) {
+ ERROR("Unable to read page, block %llx, size %x\n", block,
+ bsize);
+ goto out;
+ }
+
+ for (n = 0; n < pages && bytes > 0; n++,
+ bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+ int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+
+ if (page[n] == NULL)
+ continue;
+
+ pageaddr = kmap_atomic(page[n]);
+ squashfs_copy_data(pageaddr, buffer, offset, avail);
+ memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+ kunmap_atomic(pageaddr);
+ flush_dcache_page(page[n]);
+ SetPageUptodate(page[n]);
+ unlock_page(page[n]);
+ if (page[n] != target_page)
+ page_cache_release(page[n]);
+ }
+
+out:
+ squashfs_cache_put(buffer);
+ return res;
+}
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 00f4dfc5f088..244b9fbfff7b 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -31,13 +31,14 @@
#include "squashfs_fs_sb.h"
#include "squashfs.h"
#include "decompressor.h"
+#include "page_actor.h"
struct squashfs_lzo {
void *input;
void *output;
};
-static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
+static void *lzo_init(struct squashfs_sb_info *msblk, void *buff)
{
int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
@@ -74,22 +75,16 @@ static void lzo_free(void *strm)
}
-static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
- struct buffer_head **bh, int b, int offset, int length, int srclength,
- int pages)
+static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
+ struct buffer_head **bh, int b, int offset, int length,
+ struct squashfs_page_actor *output)
{
- struct squashfs_lzo *stream = msblk->stream;
- void *buff = stream->input;
+ struct squashfs_lzo *stream = strm;
+ void *buff = stream->input, *data;
int avail, i, bytes = length, res;
- size_t out_len = srclength;
-
- mutex_lock(&msblk->read_data_mutex);
+ size_t out_len = output->length;
for (i = 0; i < b; i++) {
- wait_on_buffer(bh[i]);
- if (!buffer_uptodate(bh[i]))
- goto block_release;
-
avail = min(bytes, msblk->devblksize - offset);
memcpy(buff, bh[i]->b_data + offset, avail);
buff += avail;
@@ -104,24 +99,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
goto failed;
res = bytes = (int)out_len;
- for (i = 0, buff = stream->output; bytes && i < pages; i++) {
- avail = min_t(int, bytes, PAGE_CACHE_SIZE);
- memcpy(buffer[i], buff, avail);
- buff += avail;
- bytes -= avail;
+ data = squashfs_first_page(output);
+ buff = stream->output;
+ while (data) {
+ if (bytes <= PAGE_CACHE_SIZE) {
+ memcpy(data, buff, bytes);
+ break;
+ } else {
+ memcpy(data, buff, PAGE_CACHE_SIZE);
+ buff += PAGE_CACHE_SIZE;
+ bytes -= PAGE_CACHE_SIZE;
+ data = squashfs_next_page(output);
+ }
}
+ squashfs_finish_page(output);
- mutex_unlock(&msblk->read_data_mutex);
return res;
-block_release:
- for (; i < b; i++)
- put_bh(bh[i]);
-
failed:
- mutex_unlock(&msblk->read_data_mutex);
-
- ERROR("lzo decompression failed, data probably corrupt\n");
return -EIO;
}
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
new file mode 100644
index 000000000000..5a1c11f56441
--- /dev/null
+++ b/fs/squashfs/page_actor.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include "page_actor.h"
+
+/*
+ * This file contains implementations of page_actor for decompressing into
+ * an intermediate buffer, and for decompressing directly into the
+ * page cache.
+ *
+ * Calling code should avoid sleeping between calls to squashfs_first_page()
+ * and squashfs_finish_page().
+ */
+
+/* Implementation of page_actor for decompressing into intermediate buffer */
+static void *cache_first_page(struct squashfs_page_actor *actor)
+{
+ actor->next_page = 1;
+ return actor->buffer[0];
+}
+
+static void *cache_next_page(struct squashfs_page_actor *actor)
+{
+ if (actor->next_page == actor->pages)
+ return NULL;
+
+ return actor->buffer[actor->next_page++];
+}
+
+static void cache_finish_page(struct squashfs_page_actor *actor)
+{
+ /* empty */
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
+ int pages, int length)
+{
+ struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+ if (actor == NULL)
+ return NULL;
+
+ actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->buffer = buffer;
+ actor->pages = pages;
+ actor->next_page = 0;
+ actor->squashfs_first_page = cache_first_page;
+ actor->squashfs_next_page = cache_next_page;
+ actor->squashfs_finish_page = cache_finish_page;
+ return actor;
+}
+
+/* Implementation of page_actor for decompressing directly into page cache. */
+static void *direct_first_page(struct squashfs_page_actor *actor)
+{
+ actor->next_page = 1;
+ return actor->pageaddr = kmap_atomic(actor->page[0]);
+}
+
+static void *direct_next_page(struct squashfs_page_actor *actor)
+{
+ if (actor->pageaddr)
+ kunmap_atomic(actor->pageaddr);
+
+ return actor->pageaddr = actor->next_page == actor->pages ? NULL :
+ kmap_atomic(actor->page[actor->next_page++]);
+}
+
+static void direct_finish_page(struct squashfs_page_actor *actor)
+{
+ if (actor->pageaddr)
+ kunmap_atomic(actor->pageaddr);
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
+ int pages, int length)
+{
+ struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+ if (actor == NULL)
+ return NULL;
+
+ actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->page = page;
+ actor->pages = pages;
+ actor->next_page = 0;
+ actor->pageaddr = NULL;
+ actor->squashfs_first_page = direct_first_page;
+ actor->squashfs_next_page = direct_next_page;
+ actor->squashfs_finish_page = direct_finish_page;
+ return actor;
+}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
new file mode 100644
index 000000000000..26dd82008b82
--- /dev/null
+++ b/fs/squashfs/page_actor.h
@@ -0,0 +1,81 @@
+#ifndef PAGE_ACTOR_H
+#define PAGE_ACTOR_H
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef CONFIG_SQUASHFS_FILE_DIRECT
+struct squashfs_page_actor {
+ void **page;
+ int pages;
+ int length;
+ int next_page;
+};
+
+static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
+ int pages, int length)
+{
+ struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+ if (actor == NULL)
+ return NULL;
+
+ actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->page = page;
+ actor->pages = pages;
+ actor->next_page = 0;
+ return actor;
+}
+
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+ actor->next_page = 1;
+ return actor->page[0];
+}
+
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+ return actor->next_page == actor->pages ? NULL :
+ actor->page[actor->next_page++];
+}
+
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+ /* empty */
+}
+#else
+struct squashfs_page_actor {
+ union {
+ void **buffer;
+ struct page **page;
+ };
+ void *pageaddr;
+ void *(*squashfs_first_page)(struct squashfs_page_actor *);
+ void *(*squashfs_next_page)(struct squashfs_page_actor *);
+ void (*squashfs_finish_page)(struct squashfs_page_actor *);
+ int pages;
+ int length;
+ int next_page;
+};
+
+extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
+extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
+ **, int, int);
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+ return actor->squashfs_first_page(actor);
+}
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+ return actor->squashfs_next_page(actor);
+}
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+ actor->squashfs_finish_page(actor);
+}
+#endif
+#endif
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index d1266516ed08..9e1bb79f7e6f 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -28,8 +28,8 @@
#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args)
/* block.c */
-extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
- int, int);
+extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
+ struct squashfs_page_actor *);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int);
/* decompressor.c */
extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
-extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
+extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
+
+/* decompressor_xxx.c */
+extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
+extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
+extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **,
+ int, int, int, struct squashfs_page_actor *);
+extern int squashfs_max_decompressors(void);
/* export.c */
extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
@@ -59,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *);
extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
u64, u64, unsigned int);
+/* file.c */
+void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
+ int);
+
+/* file_xxx.c */
+extern int squashfs_readpage_block(struct page *, u64, int);
+
/* id.c */
extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64,
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 52934a22f296..1da565cb50c3 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -50,6 +50,7 @@ struct squashfs_cache_entry {
wait_queue_head_t wait_queue;
struct squashfs_cache *cache;
void **data;
+ struct squashfs_page_actor *actor;
};
struct squashfs_sb_info {
@@ -63,10 +64,9 @@ struct squashfs_sb_info {
__le64 *id_table;
__le64 *fragment_index;
__le64 *xattr_id_table;
- struct mutex read_data_mutex;
struct mutex meta_index_mutex;
struct meta_index *meta_index;
- void *stream;
+ struct squashfs_stream *stream;
__le64 *inode_lookup_table;
u64 inode_table;
u64 directory_table;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 60553a9053ca..202df6312d4e 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -98,7 +98,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
msblk->devblksize_log2 = ffz(~msblk->devblksize);
- mutex_init(&msblk->read_data_mutex);
mutex_init(&msblk->meta_index_mutex);
/*
@@ -206,13 +205,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
/* Allocate read_page block */
- msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size);
+ msblk->read_page = squashfs_cache_init("data",
+ squashfs_max_decompressors(), msblk->block_size);
if (msblk->read_page == NULL) {
ERROR("Failed to allocate read_page block\n");
goto failed_mount;
}
- msblk->stream = squashfs_decompressor_init(sb, flags);
+ msblk->stream = squashfs_decompressor_setup(sb, flags);
if (IS_ERR(msblk->stream)) {
err = PTR_ERR(msblk->stream);
msblk->stream = NULL;
@@ -336,7 +336,7 @@ failed_mount:
squashfs_cache_delete(msblk->block_cache);
squashfs_cache_delete(msblk->fragment_cache);
squashfs_cache_delete(msblk->read_page);
- squashfs_decompressor_free(msblk, msblk->stream);
+ squashfs_decompressor_destroy(msblk);
kfree(msblk->inode_lookup_table);
kfree(msblk->fragment_index);
kfree(msblk->id_table);
@@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb)
squashfs_cache_delete(sbi->block_cache);
squashfs_cache_delete(sbi->fragment_cache);
squashfs_cache_delete(sbi->read_page);
- squashfs_decompressor_free(sbi, sbi->stream);
+ squashfs_decompressor_destroy(sbi);
kfree(sbi->id_table);
kfree(sbi->fragment_index);
kfree(sbi->meta_index);
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 1760b7d108f6..c609624e4b8a 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -32,44 +32,70 @@
#include "squashfs_fs_sb.h"
#include "squashfs.h"
#include "decompressor.h"
+#include "page_actor.h"
struct squashfs_xz {
struct xz_dec *state;
struct xz_buf buf;
};
-struct comp_opts {
+struct disk_comp_opts {
__le32 dictionary_size;
__le32 flags;
};
-static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
- int len)
+struct comp_opts {
+ int dict_size;
+};
+
+static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
+ void *buff, int len)
{
- struct comp_opts *comp_opts = buff;
- struct squashfs_xz *stream;
- int dict_size = msblk->block_size;
- int err, n;
+ struct disk_comp_opts *comp_opts = buff;
+ struct comp_opts *opts;
+ int err = 0, n;
+
+ opts = kmalloc(sizeof(*opts), GFP_KERNEL);
+ if (opts == NULL) {
+ err = -ENOMEM;
+ goto out2;
+ }
if (comp_opts) {
/* check compressor options are the expected length */
if (len < sizeof(*comp_opts)) {
err = -EIO;
- goto failed;
+ goto out;
}
- dict_size = le32_to_cpu(comp_opts->dictionary_size);
+ opts->dict_size = le32_to_cpu(comp_opts->dictionary_size);
/* the dictionary size should be 2^n or 2^n+2^(n+1) */
- n = ffs(dict_size) - 1;
- if (dict_size != (1 << n) && dict_size != (1 << n) +
+ n = ffs(opts->dict_size) - 1;
+ if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) +
(1 << (n + 1))) {
err = -EIO;
- goto failed;
+ goto out;
}
- }
+ } else
+ /* use defaults */
+ opts->dict_size = max_t(int, msblk->block_size,
+ SQUASHFS_METADATA_SIZE);
+
+ return opts;
+
+out:
+ kfree(opts);
+out2:
+ return ERR_PTR(err);
+}
+
- dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
+static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff)
+{
+ struct comp_opts *comp_opts = buff;
+ struct squashfs_xz *stream;
+ int err;
stream = kmalloc(sizeof(*stream), GFP_KERNEL);
if (stream == NULL) {
@@ -77,7 +103,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
goto failed;
}
- stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
+ stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size);
if (stream->state == NULL) {
kfree(stream);
err = -ENOMEM;
@@ -103,42 +129,37 @@ static void squashfs_xz_free(void *strm)
}
-static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
- struct buffer_head **bh, int b, int offset, int length, int srclength,
- int pages)
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
+ struct buffer_head **bh, int b, int offset, int length,
+ struct squashfs_page_actor *output)
{
enum xz_ret xz_err;
- int avail, total = 0, k = 0, page = 0;
- struct squashfs_xz *stream = msblk->stream;
-
- mutex_lock(&msblk->read_data_mutex);
+ int avail, total = 0, k = 0;
+ struct squashfs_xz *stream = strm;
xz_dec_reset(stream->state);
stream->buf.in_pos = 0;
stream->buf.in_size = 0;
stream->buf.out_pos = 0;
stream->buf.out_size = PAGE_CACHE_SIZE;
- stream->buf.out = buffer[page++];
+ stream->buf.out = squashfs_first_page(output);
do {
if (stream->buf.in_pos == stream->buf.in_size && k < b) {
avail = min(length, msblk->devblksize - offset);
length -= avail;
- wait_on_buffer(bh[k]);
- if (!buffer_uptodate(bh[k]))
- goto release_mutex;
-
stream->buf.in = bh[k]->b_data + offset;
stream->buf.in_size = avail;
stream->buf.in_pos = 0;
offset = 0;
}
- if (stream->buf.out_pos == stream->buf.out_size
- && page < pages) {
- stream->buf.out = buffer[page++];
- stream->buf.out_pos = 0;
- total += PAGE_CACHE_SIZE;
+ if (stream->buf.out_pos == stream->buf.out_size) {
+ stream->buf.out = squashfs_next_page(output);
+ if (stream->buf.out != NULL) {
+ stream->buf.out_pos = 0;
+ total += PAGE_CACHE_SIZE;
+ }
}
xz_err = xz_dec_run(stream->state, &stream->buf);
@@ -147,23 +168,14 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
put_bh(bh[k++]);
} while (xz_err == XZ_OK);
- if (xz_err != XZ_STREAM_END) {
- ERROR("xz_dec_run error, data probably corrupt\n");
- goto release_mutex;
- }
-
- if (k < b) {
- ERROR("xz_uncompress error, input remaining\n");
- goto release_mutex;
- }
+ squashfs_finish_page(output);
- total += stream->buf.out_pos;
- mutex_unlock(&msblk->read_data_mutex);
- return total;
+ if (xz_err != XZ_STREAM_END || k < b)
+ goto out;
-release_mutex:
- mutex_unlock(&msblk->read_data_mutex);
+ return total + stream->buf.out_pos;
+out:
for (; k < b; k++)
put_bh(bh[k]);
@@ -172,6 +184,7 @@ release_mutex:
const struct squashfs_decompressor squashfs_xz_comp_ops = {
.init = squashfs_xz_init,
+ .comp_opts = squashfs_xz_comp_opts,
.free = squashfs_xz_free,
.decompress = squashfs_xz_uncompress,
.id = XZ_COMPRESSION,
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 55d918fd2d86..8727caba6882 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -32,8 +32,9 @@
#include "squashfs_fs_sb.h"
#include "squashfs.h"
#include "decompressor.h"
+#include "page_actor.h"
-static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
+static void *zlib_init(struct squashfs_sb_info *dummy, void *buff)
{
z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
if (stream == NULL)
@@ -61,44 +62,37 @@ static void zlib_free(void *strm)
}
-static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
- struct buffer_head **bh, int b, int offset, int length, int srclength,
- int pages)
+static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
+ struct buffer_head **bh, int b, int offset, int length,
+ struct squashfs_page_actor *output)
{
- int zlib_err, zlib_init = 0;
- int k = 0, page = 0;
- z_stream *stream = msblk->stream;
-
- mutex_lock(&msblk->read_data_mutex);
+ int zlib_err, zlib_init = 0, k = 0;
+ z_stream *stream = strm;
- stream->avail_out = 0;
+ stream->avail_out = PAGE_CACHE_SIZE;
+ stream->next_out = squashfs_first_page(output);
stream->avail_in = 0;
do {
if (stream->avail_in == 0 && k < b) {
int avail = min(length, msblk->devblksize - offset);
length -= avail;
- wait_on_buffer(bh[k]);
- if (!buffer_uptodate(bh[k]))
- goto release_mutex;
-
stream->next_in = bh[k]->b_data + offset;
stream->avail_in = avail;
offset = 0;
}
- if (stream->avail_out == 0 && page < pages) {
- stream->next_out = buffer[page++];
- stream->avail_out = PAGE_CACHE_SIZE;
+ if (stream->avail_out == 0) {
+ stream->next_out = squashfs_next_page(output);
+ if (stream->next_out != NULL)
+ stream->avail_out = PAGE_CACHE_SIZE;
}
if (!zlib_init) {
zlib_err = zlib_inflateInit(stream);
if (zlib_err != Z_OK) {
- ERROR("zlib_inflateInit returned unexpected "
- "result 0x%x, srclength %d\n",
- zlib_err, srclength);
- goto release_mutex;
+ squashfs_finish_page(output);
+ goto out;
}
zlib_init = 1;
}
@@ -109,29 +103,21 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
put_bh(bh[k++]);
} while (zlib_err == Z_OK);
- if (zlib_err != Z_STREAM_END) {
- ERROR("zlib_inflate error, data probably corrupt\n");
- goto release_mutex;
- }
+ squashfs_finish_page(output);
- zlib_err = zlib_inflateEnd(stream);
- if (zlib_err != Z_OK) {
- ERROR("zlib_inflate error, data probably corrupt\n");
- goto release_mutex;
- }
+ if (zlib_err != Z_STREAM_END)
+ goto out;
- if (k < b) {
- ERROR("zlib_uncompress error, data remaining\n");
- goto release_mutex;
- }
+ zlib_err = zlib_inflateEnd(stream);
+ if (zlib_err != Z_OK)
+ goto out;
- length = stream->total_out;
- mutex_unlock(&msblk->read_data_mutex);
- return length;
+ if (k < b)
+ goto out;
-release_mutex:
- mutex_unlock(&msblk->read_data_mutex);
+ return stream->total_out;
+out:
for (; k < b; k++)
put_bh(bh[k]);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 1c02da8bb7df..3ef11b22e750 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1137,6 +1137,7 @@ xfs_bmap_add_attrfork(
int committed; /* xaction was committed */
int logflags; /* logging flags */
int error; /* error return value */
+ int cancel_flags = 0;
ASSERT(XFS_IFORK_Q(ip) == 0);
@@ -1147,19 +1148,20 @@ xfs_bmap_add_attrfork(
if (rsvd)
tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
- if (error)
- goto error0;
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+ cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
- if (error) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
- return error;
- }
+ if (error)
+ goto trans_cancel;
+ cancel_flags |= XFS_TRANS_ABORT;
if (XFS_IFORK_Q(ip))
- goto error1;
+ goto trans_cancel;
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
/*
* For inodes coming from pre-6.2 filesystems.
@@ -1169,7 +1171,7 @@ xfs_bmap_add_attrfork(
}
ASSERT(ip->i_d.di_anextents == 0);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
switch (ip->i_d.di_format) {
@@ -1191,7 +1193,7 @@ xfs_bmap_add_attrfork(
default:
ASSERT(0);
error = XFS_ERROR(EINVAL);
- goto error1;
+ goto trans_cancel;
}
ASSERT(ip->i_afp == NULL);
@@ -1219,7 +1221,7 @@ xfs_bmap_add_attrfork(
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
if (error)
- goto error2;
+ goto bmap_cancel;
if (!xfs_sb_version_hasattr(&mp->m_sb) ||
(!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
__int64_t sbfields = 0;
@@ -1242,14 +1244,16 @@ xfs_bmap_add_attrfork(
error = xfs_bmap_finish(&tp, &flist, &committed);
if (error)
- goto error2;
- return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-error2:
+ goto bmap_cancel;
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+
+bmap_cancel:
xfs_bmap_cancel(&flist);
-error1:
+trans_cancel:
+ xfs_trans_cancel(tp, cancel_flags);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
-error0:
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
return error;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da88f167af78..02df7b408a26 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -41,6 +41,7 @@
#include "xfs_fsops.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
+#include "xfs_dinode.h"
#ifdef HAVE_PERCPU_SB
@@ -718,8 +719,22 @@ xfs_mountfs(
* Set the inode cluster size.
* This may still be overridden by the file system
* block size if it is larger than the chosen cluster size.
+ *
+ * For v5 filesystems, scale the cluster size with the inode size to
+ * keep a constant ratio of inode per cluster buffer, but only if mkfs
+ * has set the inode alignment value appropriately for larger cluster
+ * sizes.
*/
mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ int new_size = mp->m_inode_cluster_size;
+
+ new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
+ if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
+ mp->m_inode_cluster_size = new_size;
+ xfs_info(mp, "Using inode cluster size of %d bytes",
+ mp->m_inode_cluster_size);
+ }
/*
* Set inode alignment fields
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d8101a10d8e..a466c5e5826e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -112,7 +112,7 @@ typedef struct xfs_mount {
__uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
__uint8_t m_agno_log; /* log #ag's */
__uint8_t m_agino_log; /* #bits for agino in inum */
- __uint16_t m_inode_cluster_size;/* min inode buf size */
+ uint m_inode_cluster_size;/* min inode buf size */
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
uint m_blockwmask; /* blockwsize-1 */
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 1bba7f60d94c..50c3f5614288 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -111,12 +111,14 @@ xfs_trans_log_inode(
/*
* First time we log the inode in a transaction, bump the inode change
- * counter if it is configured for this to occur.
+ * counter if it is configured for this to occur. We don't use
+ * inode_inc_version() because there is no need for extra locking around
+ * i_version as we already hold the inode locked exclusively for
+ * metadata modification.
*/
if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
IS_I_VERSION(VFS_I(ip))) {
- inode_inc_iversion(VFS_I(ip));
- ip->i_d.di_changecount = VFS_I(ip)->i_version;
+ ip->i_d.di_changecount = ++VFS_I(ip)->i_version;
flags |= XFS_ILOG_CORE;
}
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index d53d9f0627a7..2fd59c0dae66 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -385,8 +385,7 @@ xfs_calc_ifree_reservation(
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
- MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
- XFS_INODE_CLUSTER_SIZE(mp)) +
+ max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) +
xfs_calc_buf_res(1, 0) +
xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
mp->m_in_maxlevels, 0) +
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 89c60b0f6408..7b2de026a4f3 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -431,9 +431,9 @@ static inline acpi_handle acpi_get_child(acpi_handle handle, u64 addr)
{
return acpi_find_child(handle, addr, false);
}
+void acpi_preset_companion(struct device *dev, acpi_handle parent, u64 addr);
int acpi_is_root_bridge(acpi_handle);
struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
-#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev))
int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
int acpi_disable_wakeup_device_power(struct acpi_device *dev);
diff --git a/include/crypto/hash_info.h b/include/crypto/hash_info.h
new file mode 100644
index 000000000000..e1e5a3e5dd1b
--- /dev/null
+++ b/include/crypto/hash_info.h
@@ -0,0 +1,40 @@
+/*
+ * Hash Info: Hash algorithms information
+ *
+ * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_HASH_INFO_H
+#define _CRYPTO_HASH_INFO_H
+
+#include <crypto/sha.h>
+#include <crypto/md5.h>
+
+#include <uapi/linux/hash_info.h>
+
+/* not defined in include/crypto/ */
+#define RMD128_DIGEST_SIZE 16
+#define RMD160_DIGEST_SIZE 20
+#define RMD256_DIGEST_SIZE 32
+#define RMD320_DIGEST_SIZE 40
+
+/* not defined in include/crypto/ */
+#define WP512_DIGEST_SIZE 64
+#define WP384_DIGEST_SIZE 48
+#define WP256_DIGEST_SIZE 32
+
+/* not defined in include/crypto/ */
+#define TGR128_DIGEST_SIZE 16
+#define TGR160_DIGEST_SIZE 20
+#define TGR192_DIGEST_SIZE 24
+
+extern const char *const hash_algo_name[HASH_ALGO__LAST];
+extern const int hash_digest_size[HASH_ALGO__LAST];
+
+#endif /* _CRYPTO_HASH_INFO_H */
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index f5b0224c9967..fc09732613ad 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -15,6 +15,7 @@
#define _LINUX_PUBLIC_KEY_H
#include <linux/mpi.h>
+#include <crypto/hash_info.h>
enum pkey_algo {
PKEY_ALGO_DSA,
@@ -22,21 +23,11 @@ enum pkey_algo {
PKEY_ALGO__LAST
};
-extern const char *const pkey_algo[PKEY_ALGO__LAST];
+extern const char *const pkey_algo_name[PKEY_ALGO__LAST];
+extern const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST];
-enum pkey_hash_algo {
- PKEY_HASH_MD4,
- PKEY_HASH_MD5,
- PKEY_HASH_SHA1,
- PKEY_HASH_RIPE_MD_160,
- PKEY_HASH_SHA256,
- PKEY_HASH_SHA384,
- PKEY_HASH_SHA512,
- PKEY_HASH_SHA224,
- PKEY_HASH__LAST
-};
-
-extern const char *const pkey_hash_algo[PKEY_HASH__LAST];
+/* asymmetric key implementation supports only up to SHA224 */
+#define PKEY_HASH__LAST (HASH_ALGO_SHA224 + 1)
enum pkey_id_type {
PKEY_ID_PGP, /* OpenPGP generated key ID */
@@ -44,7 +35,7 @@ enum pkey_id_type {
PKEY_ID_TYPE__LAST
};
-extern const char *const pkey_id_type[PKEY_ID_TYPE__LAST];
+extern const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST];
/*
* Cryptographic data for the public-key subtype of the asymmetric key type.
@@ -59,6 +50,7 @@ struct public_key {
#define PKEY_CAN_DECRYPT 0x02
#define PKEY_CAN_SIGN 0x04
#define PKEY_CAN_VERIFY 0x08
+ enum pkey_algo pkey_algo : 8;
enum pkey_id_type id_type : 8;
union {
MPI mpi[5];
@@ -88,7 +80,8 @@ struct public_key_signature {
u8 *digest;
u8 digest_size; /* Number of bytes in digest */
u8 nr_mpi; /* Occupancy of mpi[] */
- enum pkey_hash_algo pkey_hash_algo : 8;
+ enum pkey_algo pkey_algo : 8;
+ enum hash_algo pkey_hash_algo : 8;
union {
MPI mpi[2];
struct {
diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h
new file mode 100644
index 000000000000..d69bc8af3292
--- /dev/null
+++ b/include/keys/big_key-type.h
@@ -0,0 +1,25 @@
+/* Big capacity key type.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_BIG_KEY_TYPE_H
+#define _KEYS_BIG_KEY_TYPE_H
+
+#include <linux/key-type.h>
+
+extern struct key_type key_type_big_key;
+
+extern int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep);
+extern void big_key_revoke(struct key *key);
+extern void big_key_destroy(struct key *key);
+extern void big_key_describe(const struct key *big_key, struct seq_file *m);
+extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen);
+
+#endif /* _KEYS_BIG_KEY_TYPE_H */
diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h
index cf49159b0e3a..fca5c62340a4 100644
--- a/include/keys/keyring-type.h
+++ b/include/keys/keyring-type.h
@@ -1,6 +1,6 @@
/* Keyring key type
*
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -13,19 +13,6 @@
#define _KEYS_KEYRING_TYPE_H
#include <linux/key.h>
-#include <linux/rcupdate.h>
-
-/*
- * the keyring payload contains a list of the keys to which the keyring is
- * subscribed
- */
-struct keyring_list {
- struct rcu_head rcu; /* RCU deletion hook */
- unsigned short maxkeys; /* max keys this list can hold */
- unsigned short nkeys; /* number of keys currently held */
- unsigned short delkey; /* key to be unlinked by RCU */
- struct key __rcu *keys[0];
-};
-
+#include <linux/assoc_array.h>
#endif /* _KEYS_KEYRING_TYPE_H */
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
new file mode 100644
index 000000000000..8dabc399bd1d
--- /dev/null
+++ b/include/keys/system_keyring.h
@@ -0,0 +1,23 @@
+/* System keyring containing trusted public keys.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_SYSTEM_KEYRING_H
+#define _KEYS_SYSTEM_KEYRING_H
+
+#ifdef CONFIG_SYSTEM_TRUSTED_KEYRING
+
+#include <linux/key.h>
+
+extern struct key *system_trusted_keyring;
+
+#endif
+
+#endif /* _KEYS_SYSTEM_KEYRING_H */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b0972c4ce81c..d9099b15b472 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -44,6 +44,20 @@
#include <acpi/acpi_numa.h>
#include <asm/acpi.h>
+static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
+{
+ return adev ? adev->handle : NULL;
+}
+
+#define ACPI_COMPANION(dev) ((dev)->acpi_node.companion)
+#define ACPI_COMPANION_SET(dev, adev) ACPI_COMPANION(dev) = (adev)
+#define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev))
+
+static inline const char *acpi_dev_name(struct acpi_device *adev)
+{
+ return dev_name(&adev->dev);
+}
+
enum acpi_irq_model_id {
ACPI_IRQ_MODEL_PIC = 0,
ACPI_IRQ_MODEL_IOAPIC,
@@ -401,6 +415,15 @@ static inline bool acpi_driver_match_device(struct device *dev,
#define acpi_disabled 1
+#define ACPI_COMPANION(dev) (NULL)
+#define ACPI_COMPANION_SET(dev, adev) do { } while (0)
+#define ACPI_HANDLE(dev) (NULL)
+
+static inline const char *acpi_dev_name(struct acpi_device *adev)
+{
+ return NULL;
+}
+
static inline void acpi_early_init(void) { }
static inline int early_acpi_boot_init(void)
diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h
new file mode 100644
index 000000000000..9a193b84238a
--- /dev/null
+++ b/include/linux/assoc_array.h
@@ -0,0 +1,92 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_ASSOC_ARRAY_H
+#define _LINUX_ASSOC_ARRAY_H
+
+#ifdef CONFIG_ASSOCIATIVE_ARRAY
+
+#include <linux/types.h>
+
+#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */
+
+/*
+ * Generic associative array.
+ */
+struct assoc_array {
+ struct assoc_array_ptr *root; /* The node at the root of the tree */
+ unsigned long nr_leaves_on_tree;
+};
+
+/*
+ * Operations on objects and index keys for use by array manipulation routines.
+ */
+struct assoc_array_ops {
+ /* Method to get a chunk of an index key from caller-supplied data */
+ unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+ /* Method to get a piece of an object's index key */
+ unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+ /* Is this the object we're looking for? */
+ bool (*compare_object)(const void *object, const void *index_key);
+
+ /* How different are two objects, to a bit position in their keys? (or
+ * -1 if they're the same)
+ */
+ int (*diff_objects)(const void *a, const void *b);
+
+ /* Method to free an object. */
+ void (*free_object)(void *object);
+};
+
+/*
+ * Access and manipulation functions.
+ */
+struct assoc_array_edit;
+
+static inline void assoc_array_init(struct assoc_array *array)
+{
+ array->root = NULL;
+ array->nr_leaves_on_tree = 0;
+}
+
+extern int assoc_array_iterate(const struct assoc_array *array,
+ int (*iterator)(const void *object,
+ void *iterator_data),
+ void *iterator_data);
+extern void *assoc_array_find(const struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key);
+extern void assoc_array_destroy(struct assoc_array *array,
+ const struct assoc_array_ops *ops);
+extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key,
+ void *object);
+extern void assoc_array_insert_set_object(struct assoc_array_edit *edit,
+ void *object);
+extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key);
+extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+ const struct assoc_array_ops *ops);
+extern void assoc_array_apply_edit(struct assoc_array_edit *edit);
+extern void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+extern int assoc_array_gc(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ bool (*iterator)(void *object, void *iterator_data),
+ void *iterator_data);
+
+#endif /* CONFIG_ASSOCIATIVE_ARRAY */
+#endif /* _LINUX_ASSOC_ARRAY_H */
diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h
new file mode 100644
index 000000000000..711275e6681c
--- /dev/null
+++ b/include/linux/assoc_array_priv.h
@@ -0,0 +1,182 @@
+/* Private definitions for the generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_ASSOC_ARRAY_PRIV_H
+#define _LINUX_ASSOC_ARRAY_PRIV_H
+
+#ifdef CONFIG_ASSOCIATIVE_ARRAY
+
+#include <linux/assoc_array.h>
+
+#define ASSOC_ARRAY_FAN_OUT 16 /* Number of slots per node */
+#define ASSOC_ARRAY_FAN_MASK (ASSOC_ARRAY_FAN_OUT - 1)
+#define ASSOC_ARRAY_LEVEL_STEP (ilog2(ASSOC_ARRAY_FAN_OUT))
+#define ASSOC_ARRAY_LEVEL_STEP_MASK (ASSOC_ARRAY_LEVEL_STEP - 1)
+#define ASSOC_ARRAY_KEY_CHUNK_MASK (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1)
+#define ASSOC_ARRAY_KEY_CHUNK_SHIFT (ilog2(BITS_PER_LONG))
+
+/*
+ * Undefined type representing a pointer with type information in the bottom
+ * two bits.
+ */
+struct assoc_array_ptr;
+
+/*
+ * An N-way node in the tree.
+ *
+ * Each slot contains one of four things:
+ *
+ * (1) Nothing (NULL).
+ *
+ * (2) A leaf object (pointer types 0).
+ *
+ * (3) A next-level node (pointer type 1, subtype 0).
+ *
+ * (4) A shortcut (pointer type 1, subtype 1).
+ *
+ * The tree is optimised for search-by-ID, but permits reasonable iteration
+ * also.
+ *
+ * The tree is navigated by constructing an index key consisting of an array of
+ * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size.
+ *
+ * The segments correspond to levels of the tree (the first segment is used at
+ * level 0, the second at level 1, etc.).
+ */
+struct assoc_array_node {
+ struct assoc_array_ptr *back_pointer;
+ u8 parent_slot;
+ struct assoc_array_ptr *slots[ASSOC_ARRAY_FAN_OUT];
+ unsigned long nr_leaves_on_branch;
+};
+
+/*
+ * A shortcut through the index space out to where a collection of nodes/leaves
+ * with the same IDs live.
+ */
+struct assoc_array_shortcut {
+ struct assoc_array_ptr *back_pointer;
+ int parent_slot;
+ int skip_to_level;
+ struct assoc_array_ptr *next_node;
+ unsigned long index_key[];
+};
+
+/*
+ * Preallocation cache.
+ */
+struct assoc_array_edit {
+ struct rcu_head rcu;
+ struct assoc_array *array;
+ const struct assoc_array_ops *ops;
+ const struct assoc_array_ops *ops_for_excised_subtree;
+ struct assoc_array_ptr *leaf;
+ struct assoc_array_ptr **leaf_p;
+ struct assoc_array_ptr *dead_leaf;
+ struct assoc_array_ptr *new_meta[3];
+ struct assoc_array_ptr *excised_meta[1];
+ struct assoc_array_ptr *excised_subtree;
+ struct assoc_array_ptr **set_backpointers[ASSOC_ARRAY_FAN_OUT];
+ struct assoc_array_ptr *set_backpointers_to;
+ struct assoc_array_node *adjust_count_on;
+ long adjust_count_by;
+ struct {
+ struct assoc_array_ptr **ptr;
+ struct assoc_array_ptr *to;
+ } set[2];
+ struct {
+ u8 *p;
+ u8 to;
+ } set_parent_slot[1];
+ u8 segment_cache[ASSOC_ARRAY_FAN_OUT + 1];
+};
+
+/*
+ * Internal tree member pointers are marked in the bottom one or two bits to
+ * indicate what type they are so that we don't have to look behind every
+ * pointer to see what it points to.
+ *
+ * We provide functions to test type annotations and to create and translate
+ * the annotated pointers.
+ */
+#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL
+#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL /* Points to leaf (or nowhere) */
+#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL /* Points to node or shortcut */
+#define ASSOC_ARRAY_PTR_SUBTYPE_MASK 0x2UL
+#define ASSOC_ARRAY_PTR_NODE_SUBTYPE 0x0UL
+#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL
+
+static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x)
+{
+ return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK;
+}
+static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x)
+{
+ return !assoc_array_ptr_is_meta(x);
+}
+static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x)
+{
+ return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK;
+}
+static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x)
+{
+ return !assoc_array_ptr_is_shortcut(x);
+}
+
+static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x)
+{
+ return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK);
+}
+
+static inline
+unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x)
+{
+ return (unsigned long)x &
+ ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK);
+}
+static inline
+struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x)
+{
+ return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x);
+}
+static inline
+struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x)
+{
+ return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x);
+}
+
+static inline
+struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t)
+{
+ return (struct assoc_array_ptr *)((unsigned long)p | t);
+}
+static inline
+struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p)
+{
+ return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE);
+}
+static inline
+struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p)
+{
+ return __assoc_array_x_to_ptr(
+ p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE);
+}
+static inline
+struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p)
+{
+ return __assoc_array_x_to_ptr(
+ p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE);
+}
+
+#endif /* CONFIG_ASSOCIATIVE_ARRAY */
+#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 729a4d165bcc..a40641954c29 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -73,6 +73,8 @@ struct audit_field {
void *lsm_rule;
};
+extern int is_audit_feature_set(int which);
+
extern int __init audit_register_class(int class, unsigned *list);
extern int audit_classify_syscall(int abi, unsigned syscall);
extern int audit_classify_arch(int arch);
@@ -207,7 +209,7 @@ static inline int audit_get_sessionid(struct task_struct *tsk)
extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
-extern int __audit_bprm(struct linux_binprm *bprm);
+extern void __audit_bprm(struct linux_binprm *bprm);
extern int __audit_socketcall(int nargs, unsigned long *args);
extern int __audit_sockaddr(int len, void *addr);
extern void __audit_fd_pair(int fd1, int fd2);
@@ -236,11 +238,10 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
if (unlikely(!audit_dummy_context()))
__audit_ipc_set_perm(qbytes, uid, gid, mode);
}
-static inline int audit_bprm(struct linux_binprm *bprm)
+static inline void audit_bprm(struct linux_binprm *bprm)
{
if (unlikely(!audit_dummy_context()))
- return __audit_bprm(bprm);
- return 0;
+ __audit_bprm(bprm);
}
static inline int audit_socketcall(int nargs, unsigned long *args)
{
@@ -367,10 +368,8 @@ static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
gid_t gid, umode_t mode)
{ }
-static inline int audit_bprm(struct linux_binprm *bprm)
-{
- return 0;
-}
+static inline void audit_bprm(struct linux_binprm *bprm)
+{ }
static inline int audit_socketcall(int nargs, unsigned long *args)
{
return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f26ec20f6354..1b135d49b279 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -505,6 +505,9 @@ struct request_queue {
(1 << QUEUE_FLAG_SAME_COMP) | \
(1 << QUEUE_FLAG_ADD_RANDOM))
+#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
+ (1 << QUEUE_FLAG_SAME_COMP))
+
static inline void queue_lockdep_assert_held(struct request_queue *q)
{
if (q->queue_lock)
diff --git a/include/linux/device.h b/include/linux/device.h
index b025925df7f7..952b01033c32 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -644,9 +644,11 @@ struct device_dma_parameters {
unsigned long segment_boundary_mask;
};
+struct acpi_device;
+
struct acpi_dev_node {
#ifdef CONFIG_ACPI
- void *handle;
+ struct acpi_device *companion;
#endif
};
@@ -790,14 +792,6 @@ static inline struct device *kobj_to_dev(struct kobject *kobj)
return container_of(kobj, struct device, kobj);
}
-#ifdef CONFIG_ACPI
-#define ACPI_HANDLE(dev) ((dev)->acpi_node.handle)
-#define ACPI_HANDLE_SET(dev, _handle_) (dev)->acpi_node.handle = (_handle_)
-#else
-#define ACPI_HANDLE(dev) (NULL)
-#define ACPI_HANDLE_SET(dev, _handle_) do { } while (0)
-#endif
-
/* Get the wakeup routines, which depend on struct device */
#include <linux/pm_wakeup.h>
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 0bc727534108..41cf0c399288 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -45,13 +45,13 @@ static inline int dma_submit_error(dma_cookie_t cookie)
/**
* enum dma_status - DMA transaction status
- * @DMA_SUCCESS: transaction completed successfully
+ * @DMA_COMPLETE: transaction completed
* @DMA_IN_PROGRESS: transaction not yet processed
* @DMA_PAUSED: transaction is paused
* @DMA_ERROR: transaction failed
*/
enum dma_status {
- DMA_SUCCESS,
+ DMA_COMPLETE,
DMA_IN_PROGRESS,
DMA_PAUSED,
DMA_ERROR,
@@ -171,12 +171,6 @@ struct dma_interleaved_template {
* @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client
* acknowledges receipt, i.e. has has a chance to establish any dependency
* chains
- * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
- * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
- * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
- * (if not set, do the source dma-unmapping as page)
- * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
- * (if not set, do the destination dma-unmapping as page)
* @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
* @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
* @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
@@ -188,14 +182,10 @@ struct dma_interleaved_template {
enum dma_ctrl_flags {
DMA_PREP_INTERRUPT = (1 << 0),
DMA_CTRL_ACK = (1 << 1),
- DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
- DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
- DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
- DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
- DMA_PREP_PQ_DISABLE_P = (1 << 6),
- DMA_PREP_PQ_DISABLE_Q = (1 << 7),
- DMA_PREP_CONTINUE = (1 << 8),
- DMA_PREP_FENCE = (1 << 9),
+ DMA_PREP_PQ_DISABLE_P = (1 << 2),
+ DMA_PREP_PQ_DISABLE_Q = (1 << 3),
+ DMA_PREP_CONTINUE = (1 << 4),
+ DMA_PREP_FENCE = (1 << 5),
};
/**
@@ -413,6 +403,17 @@ void dma_chan_cleanup(struct kref *kref);
typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
typedef void (*dma_async_tx_callback)(void *dma_async_param);
+
+struct dmaengine_unmap_data {
+ u8 to_cnt;
+ u8 from_cnt;
+ u8 bidi_cnt;
+ struct device *dev;
+ struct kref kref;
+ size_t len;
+ dma_addr_t addr[0];
+};
+
/**
* struct dma_async_tx_descriptor - async transaction descriptor
* ---dma generic offload fields---
@@ -438,6 +439,7 @@ struct dma_async_tx_descriptor {
dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
dma_async_tx_callback callback;
void *callback_param;
+ struct dmaengine_unmap_data *unmap;
#ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
struct dma_async_tx_descriptor *next;
struct dma_async_tx_descriptor *parent;
@@ -445,6 +447,40 @@ struct dma_async_tx_descriptor {
#endif
};
+#ifdef CONFIG_DMA_ENGINE
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+ struct dmaengine_unmap_data *unmap)
+{
+ kref_get(&unmap->kref);
+ tx->unmap = unmap;
+}
+
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags);
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap);
+#else
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+ struct dmaengine_unmap_data *unmap)
+{
+}
+static inline struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+ return NULL;
+}
+static inline void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+}
+#endif
+
+static inline void dma_descriptor_unmap(struct dma_async_tx_descriptor *tx)
+{
+ if (tx->unmap) {
+ dmaengine_unmap_put(tx->unmap);
+ tx->unmap = NULL;
+ }
+}
+
#ifndef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
static inline void txd_lock(struct dma_async_tx_descriptor *txd)
{
@@ -979,10 +1015,10 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
{
if (last_complete <= last_used) {
if ((cookie <= last_complete) || (cookie > last_used))
- return DMA_SUCCESS;
+ return DMA_COMPLETE;
} else {
if ((cookie <= last_complete) && (cookie > last_used))
- return DMA_SUCCESS;
+ return DMA_COMPLETE;
}
return DMA_IN_PROGRESS;
}
@@ -1013,11 +1049,11 @@ static inline struct dma_chan *dma_find_channel(enum dma_transaction_type tx_typ
}
static inline enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
{
- return DMA_SUCCESS;
+ return DMA_COMPLETE;
}
static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
{
- return DMA_SUCCESS;
+ return DMA_COMPLETE;
}
static inline void dma_issue_pending_all(void)
{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bf5d574ebdf4..121f11f001c0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2622,7 +2622,9 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
extern int simple_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
+extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
+extern const struct dentry_operations simple_dentry_operations;
extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index acd2010328f3..9649ff0c63f8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -31,6 +31,7 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
void hugepage_put_subpool(struct hugepage_subpool *spool);
int PageHuge(struct page *page);
+int PageHeadHuge(struct page *page_head);
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -69,7 +70,6 @@ int dequeue_hwpoisoned_huge_page(struct page *page);
bool isolate_huge_page(struct page *page, struct list_head *list);
void putback_active_hugepage(struct page *page);
bool is_hugepage_active(struct page *page);
-void copy_huge_page(struct page *dst, struct page *src);
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
@@ -104,6 +104,11 @@ static inline int PageHuge(struct page *page)
return 0;
}
+static inline int PageHeadHuge(struct page *page_head)
+{
+ return 0;
+}
+
static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
{
}
@@ -140,9 +145,6 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page)
#define isolate_huge_page(p, l) false
#define putback_active_hugepage(p) do {} while (0)
#define is_hugepage_active(x) false
-static inline void copy_huge_page(struct page *dst, struct page *src)
-{
-}
static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot)
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 518a53afb9ea..a74c3a84dfdd 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -45,6 +45,7 @@ struct key_preparsed_payload {
const void *data; /* Raw data */
size_t datalen; /* Raw datalen */
size_t quotalen; /* Quota length for proposed payload */
+ bool trusted; /* True if key is trusted */
};
typedef int (*request_key_actor_t)(struct key_construction *key,
@@ -63,6 +64,11 @@ struct key_type {
*/
size_t def_datalen;
+ /* Default key search algorithm. */
+ unsigned def_lookup_type;
+#define KEYRING_SEARCH_LOOKUP_DIRECT 0x0000 /* Direct lookup by description. */
+#define KEYRING_SEARCH_LOOKUP_ITERATE 0x0001 /* Iterative search. */
+
/* vet a description */
int (*vet_description)(const char *description);
diff --git a/include/linux/key.h b/include/linux/key.h
index 4dfde1161c5e..80d677483e31 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -22,6 +22,7 @@
#include <linux/sysctl.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
+#include <linux/assoc_array.h>
#ifdef __KERNEL__
#include <linux/uidgid.h>
@@ -82,6 +83,12 @@ struct key_owner;
struct keyring_list;
struct keyring_name;
+struct keyring_index_key {
+ struct key_type *type;
+ const char *description;
+ size_t desc_len;
+};
+
/*****************************************************************************/
/*
* key reference with possession attribute handling
@@ -99,7 +106,7 @@ struct keyring_name;
typedef struct __key_reference_with_attributes *key_ref_t;
static inline key_ref_t make_key_ref(const struct key *key,
- unsigned long possession)
+ bool possession)
{
return (key_ref_t) ((unsigned long) key | possession);
}
@@ -109,7 +116,7 @@ static inline struct key *key_ref_to_ptr(const key_ref_t key_ref)
return (struct key *) ((unsigned long) key_ref & ~1UL);
}
-static inline unsigned long is_key_possessed(const key_ref_t key_ref)
+static inline bool is_key_possessed(const key_ref_t key_ref)
{
return (unsigned long) key_ref & 1UL;
}
@@ -129,7 +136,6 @@ struct key {
struct list_head graveyard_link;
struct rb_node serial_node;
};
- struct key_type *type; /* type of key */
struct rw_semaphore sem; /* change vs change sem */
struct key_user *user; /* owner of this key */
void *security; /* security data for this key */
@@ -162,13 +168,21 @@ struct key {
#define KEY_FLAG_NEGATIVE 5 /* set if key is negative */
#define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */
#define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */
+#define KEY_FLAG_TRUSTED 8 /* set if key is trusted */
+#define KEY_FLAG_TRUSTED_ONLY 9 /* set if keyring only accepts links to trusted keys */
- /* the description string
- * - this is used to match a key against search criteria
- * - this should be a printable string
+ /* the key type and key description string
+ * - the desc is used to match a key against search criteria
+ * - it should be a printable string
* - eg: for krb5 AFS, this might be "afs@REDHAT.COM"
*/
- char *description;
+ union {
+ struct keyring_index_key index_key;
+ struct {
+ struct key_type *type; /* type of key */
+ char *description;
+ };
+ };
/* type specific data
* - this is used by the keyring type to index the name
@@ -185,11 +199,14 @@ struct key {
* whatever
*/
union {
- unsigned long value;
- void __rcu *rcudata;
- void *data;
- struct keyring_list __rcu *subscriptions;
- } payload;
+ union {
+ unsigned long value;
+ void __rcu *rcudata;
+ void *data;
+ void *data2[2];
+ } payload;
+ struct assoc_array keys;
+ };
};
extern struct key *key_alloc(struct key_type *type,
@@ -203,18 +220,23 @@ extern struct key *key_alloc(struct key_type *type,
#define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */
#define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */
#define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */
+#define KEY_ALLOC_TRUSTED 0x0004 /* Key should be flagged as trusted */
extern void key_revoke(struct key *key);
extern void key_invalidate(struct key *key);
extern void key_put(struct key *key);
-static inline struct key *key_get(struct key *key)
+static inline struct key *__key_get(struct key *key)
{
- if (key)
- atomic_inc(&key->usage);
+ atomic_inc(&key->usage);
return key;
}
+static inline struct key *key_get(struct key *key)
+{
+ return key ? __key_get(key) : key;
+}
+
static inline void key_ref_put(key_ref_t key_ref)
{
key_put(key_ref_to_ptr(key_ref));
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0548eb201e05..1cedd000cf29 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1318,7 +1318,6 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
#if USE_SPLIT_PTE_PTLOCKS
#if BLOATED_SPINLOCKS
-void __init ptlock_cache_init(void);
extern bool ptlock_alloc(struct page *page);
extern void ptlock_free(struct page *page);
@@ -1327,7 +1326,6 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
return page->ptl;
}
#else /* BLOATED_SPINLOCKS */
-static inline void ptlock_cache_init(void) {}
static inline bool ptlock_alloc(struct page *page)
{
return true;
@@ -1380,17 +1378,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
return &mm->page_table_lock;
}
-static inline void ptlock_cache_init(void) {}
static inline bool ptlock_init(struct page *page) { return true; }
static inline void pte_lock_deinit(struct page *page) {}
#endif /* USE_SPLIT_PTE_PTLOCKS */
-static inline void pgtable_init(void)
-{
- ptlock_cache_init();
- pgtable_cache_init();
-}
-
static inline bool pgtable_page_ctor(struct page *page)
{
inc_zone_page_state(page, NR_PAGETABLE);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 10f5a7272b80..bd299418a934 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -44,18 +44,22 @@ struct page {
/* First double word block */
unsigned long flags; /* Atomic flags, some possibly
* updated asynchronously */
- struct address_space *mapping; /* If low bit clear, points to
- * inode address_space, or NULL.
- * If page mapped as anonymous
- * memory, low bit is set, and
- * it points to anon_vma object:
- * see PAGE_MAPPING_ANON below.
- */
+ union {
+ struct address_space *mapping; /* If low bit clear, points to
+ * inode address_space, or NULL.
+ * If page mapped as anonymous
+ * memory, low bit is set, and
+ * it points to anon_vma object:
+ * see PAGE_MAPPING_ANON below.
+ */
+ void *s_mem; /* slab first object */
+ };
+
/* Second double word */
struct {
union {
pgoff_t index; /* Our offset within mapping. */
- void *freelist; /* slub/slob first free object */
+ void *freelist; /* sl[aou]b first free object */
bool pfmemalloc; /* If set by the page allocator,
* ALLOC_NO_WATERMARKS was set
* and the low watermark was not
@@ -65,9 +69,6 @@ struct page {
* this page is only used to
* free other pages.
*/
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
- pgtable_t pmd_huge_pte; /* protected by page->ptl */
-#endif
};
union {
@@ -114,6 +115,7 @@ struct page {
};
atomic_t _count; /* Usage count, see below. */
};
+ unsigned int active; /* SLAB */
};
};
@@ -135,6 +137,12 @@ struct page {
struct list_head list; /* slobs list of pages */
struct slab *slab_page; /* slab fields */
+ struct rcu_head rcu_head; /* Used by SLAB
+ * when destroying via RCU
+ */
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
+ pgtable_t pmd_huge_pte; /* protected by page->ptl */
+#endif
};
/* Remainder is not double word aligned */
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index d006f0ca60f4..5a462c4e5009 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -27,7 +27,7 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
while (!pci_is_root_bus(pbus))
pbus = pbus->parent;
- return DEVICE_ACPI_HANDLE(pbus->bridge);
+ return ACPI_HANDLE(pbus->bridge);
}
static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
@@ -39,7 +39,7 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
else
dev = &pbus->self->dev;
- return DEVICE_ACPI_HANDLE(dev);
+ return ACPI_HANDLE(dev);
}
void acpi_pci_add_bus(struct pci_bus *bus);
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 179fb91bb5f2..f50821cb64be 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -67,10 +67,10 @@ struct edmacc_param {
#define ITCCHEN BIT(23)
/*ch_status paramater of callback function possible values*/
-#define DMA_COMPLETE 1
-#define DMA_CC_ERROR 2
-#define DMA_TC1_ERROR 3
-#define DMA_TC2_ERROR 4
+#define EDMA_DMA_COMPLETE 1
+#define EDMA_DMA_CC_ERROR 2
+#define EDMA_DMA_TC1_ERROR 3
+#define EDMA_DMA_TC2_ERROR 4
enum address_mode {
INCR = 0,
diff --git a/include/linux/security.h b/include/linux/security.h
index 9d37e2b9d3ec..5623a7f965b7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1052,17 +1052,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
* @xfrm_policy_delete_security:
* @ctx contains the xfrm_sec_ctx.
* Authorize deletion of xp->security.
- * @xfrm_state_alloc_security:
+ * @xfrm_state_alloc:
* @x contains the xfrm_state being added to the Security Association
* Database by the XFRM system.
* @sec_ctx contains the security context information being provided by
* the user-level SA generation program (e.g., setkey or racoon).
- * @secid contains the secid from which to take the mls portion of the context.
* Allocate a security structure to the x->security field; the security
* field is initialized to NULL when the xfrm_state is allocated. Set the
- * context to correspond to either sec_ctx or polsec, with the mls portion
- * taken from secid in the latter case.
- * Return 0 if operation was successful (memory to allocate, legal context).
+ * context to correspond to sec_ctx. Return 0 if operation was successful
+ * (memory to allocate, legal context).
+ * @xfrm_state_alloc_acquire:
+ * @x contains the xfrm_state being added to the Security Association
+ * Database by the XFRM system.
+ * @polsec contains the policy's security context.
+ * @secid contains the secid from which to take the mls portion of the
+ * context.
+ * Allocate a security structure to the x->security field; the security
+ * field is initialized to NULL when the xfrm_state is allocated. Set the
+ * context to correspond to secid. Return 0 if operation was successful
+ * (memory to allocate, legal context).
* @xfrm_state_free_security:
* @x contains the xfrm_state.
* Deallocate x->security.
@@ -1679,9 +1687,11 @@ struct security_operations {
int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx);
void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx);
int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx);
- int (*xfrm_state_alloc_security) (struct xfrm_state *x,
- struct xfrm_user_sec_ctx *sec_ctx,
- u32 secid);
+ int (*xfrm_state_alloc) (struct xfrm_state *x,
+ struct xfrm_user_sec_ctx *sec_ctx);
+ int (*xfrm_state_alloc_acquire) (struct xfrm_state *x,
+ struct xfrm_sec_ctx *polsec,
+ u32 secid);
void (*xfrm_state_free_security) (struct xfrm_state *x);
int (*xfrm_state_delete_security) (struct xfrm_state *x);
int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 1e8a8b6e837d..cf87a24c0f92 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -354,6 +354,35 @@ static inline void read_sequnlock_excl(seqlock_t *sl)
spin_unlock(&sl->lock);
}
+/**
+ * read_seqbegin_or_lock - begin a sequence number check or locking block
+ * @lock: sequence lock
+ * @seq : sequence number to be checked
+ *
+ * First try it once optimistically without taking the lock. If that fails,
+ * take the lock. The sequence number is also used as a marker for deciding
+ * whether to be a reader (even) or writer (odd).
+ * N.B. seq must be initialized to an even number to begin with.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+ if (!(*seq & 1)) /* Even */
+ *seq = read_seqbegin(lock);
+ else /* Odd */
+ read_seqlock_excl(lock);
+}
+
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+ return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+ if (seq & 1)
+ read_sequnlock_excl(lock);
+}
+
static inline void read_seqlock_excl_bh(seqlock_t *sl)
{
spin_lock_bh(&sl->lock);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 74f105847d13..c2bba248fa63 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -53,7 +53,14 @@
* }
* rcu_read_unlock();
*
- * See also the comment on struct slab_rcu in mm/slab.c.
+ * This is useful if we need to approach a kernel structure obliquely,
+ * from its address obtained without the usual locking. We can lock
+ * the structure to stabilize it and check it's still at the given address,
+ * only if we can be sure that the memory has not been meanwhile reused
+ * for some other kind of object (which our subsystem's lock might corrupt).
+ *
+ * rcu_read_lock before reading the address, then rcu_read_unlock after
+ * taking the spinlock within the structure expected at that address.
*/
#define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */
#define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index e9346b4f1ef4..09bfffb08a56 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -27,8 +27,8 @@ struct kmem_cache {
size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */
- struct kmem_cache *slabp_cache;
- unsigned int slab_size;
+ struct kmem_cache *freelist_cache;
+ unsigned int freelist_size;
/* constructor func */
void (*ctor)(void *obj);
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index cc0b67eada42..f56bfa9e4526 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -11,7 +11,7 @@
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
- FREE_FASTPATH, /* Free to cpu slub */
+ FREE_FASTPATH, /* Free to cpu slab */
FREE_SLOWPATH, /* Freeing not to cpu slab */
FREE_FROZEN, /* Freeing to frozen slab */
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4db29859464f..4836ba3c1cd8 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,6 +27,12 @@ struct user_namespace {
kuid_t owner;
kgid_t group;
unsigned int proc_inum;
+
+ /* Register of per-UID persistent keyrings for this namespace */
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+ struct key *persistent_keyring_register;
+ struct rw_semaphore persistent_keyring_register_sem;
+#endif
};
extern struct user_namespace init_user_ns;
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 61939ba30aa0..eaa00b10abaa 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -278,6 +278,31 @@ do { \
__ret; \
})
+#define __wait_event_cmd(wq, condition, cmd1, cmd2) \
+ (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
+ cmd1; schedule(); cmd2)
+
+/**
+ * wait_event_cmd - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * cmd1: the command will be executed before sleep
+ * cmd2: the command will be executed after sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
+#define wait_event_cmd(wq, condition, cmd1, cmd2) \
+do { \
+ if (condition) \
+ break; \
+ __wait_event_cmd(wq, condition, cmd1, cmd2); \
+} while (0)
+
#define __wait_event_interruptible(wq, condition) \
___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
schedule())
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index f18b3b76e01e..4832d75dcbae 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -162,12 +162,14 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict,
{ EXTENT_FLAG_LOGGING, "LOGGING" }, \
{ EXTENT_FLAG_FILLING, "FILLING" })
-TRACE_EVENT(btrfs_get_extent,
+TRACE_EVENT_CONDITION(btrfs_get_extent,
TP_PROTO(struct btrfs_root *root, struct extent_map *map),
TP_ARGS(root, map),
+ TP_CONDITION(map),
+
TP_STRUCT__entry(
__field( u64, root_objectid )
__field( u64, start )
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index db0b825b4810..44b05a09f193 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -68,6 +68,9 @@
#define AUDIT_MAKE_EQUIV 1015 /* Append to watched tree */
#define AUDIT_TTY_GET 1016 /* Get TTY auditing status */
#define AUDIT_TTY_SET 1017 /* Set TTY auditing status */
+#define AUDIT_SET_FEATURE 1018 /* Turn an audit feature on or off */
+#define AUDIT_GET_FEATURE 1019 /* Get which features are enabled */
+#define AUDIT_FEATURE_CHANGE 1020 /* audit log listing feature changes */
#define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */
#define AUDIT_USER_AVC 1107 /* We filter this differently */
@@ -357,6 +360,12 @@ enum {
#define AUDIT_PERM_READ 4
#define AUDIT_PERM_ATTR 8
+/* MAX_AUDIT_MESSAGE_LENGTH is set in audit:lib/libaudit.h as:
+ * 8970 // PATH_MAX*2+CONTEXT_SIZE*2+11+256+1
+ * max header+body+tailer: 44 + 29 + 32 + 262 + 7 + pad
+ */
+#define AUDIT_MESSAGE_TEXT_MAX 8560
+
struct audit_status {
__u32 mask; /* Bit mask for valid entries */
__u32 enabled; /* 1 = enabled, 0 = disabled */
@@ -368,11 +377,28 @@ struct audit_status {
__u32 backlog; /* messages waiting in queue */
};
+struct audit_features {
+#define AUDIT_FEATURE_VERSION 1
+ __u32 vers;
+ __u32 mask; /* which bits we are dealing with */
+ __u32 features; /* which feature to enable/disable */
+ __u32 lock; /* which features to lock */
+};
+
+#define AUDIT_FEATURE_ONLY_UNSET_LOGINUID 0
+#define AUDIT_FEATURE_LOGINUID_IMMUTABLE 1
+#define AUDIT_LAST_FEATURE AUDIT_FEATURE_LOGINUID_IMMUTABLE
+
+#define audit_feature_valid(x) ((x) >= 0 && (x) <= AUDIT_LAST_FEATURE)
+#define AUDIT_FEATURE_TO_MASK(x) (1 << ((x) & 31)) /* mask for __u32 */
+
struct audit_tty_status {
__u32 enabled; /* 1 = enabled, 0 = disabled */
__u32 log_passwd; /* 1 = enabled, 0 = disabled */
};
+#define AUDIT_UID_UNSET (unsigned int)-1
+
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
* AUDIT_LIST_RULES requests.
diff --git a/include/uapi/linux/hash_info.h b/include/uapi/linux/hash_info.h
new file mode 100644
index 000000000000..ca18c45f8304
--- /dev/null
+++ b/include/uapi/linux/hash_info.h
@@ -0,0 +1,37 @@
+/*
+ * Hash Info: Hash algorithms information
+ *
+ * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _UAPI_LINUX_HASH_INFO_H
+#define _UAPI_LINUX_HASH_INFO_H
+
+enum hash_algo {
+ HASH_ALGO_MD4,
+ HASH_ALGO_MD5,
+ HASH_ALGO_SHA1,
+ HASH_ALGO_RIPE_MD_160,
+ HASH_ALGO_SHA256,
+ HASH_ALGO_SHA384,
+ HASH_ALGO_SHA512,
+ HASH_ALGO_SHA224,
+ HASH_ALGO_RIPE_MD_128,
+ HASH_ALGO_RIPE_MD_256,
+ HASH_ALGO_RIPE_MD_320,
+ HASH_ALGO_WP_256,
+ HASH_ALGO_WP_384,
+ HASH_ALGO_WP_512,
+ HASH_ALGO_TGR_128,
+ HASH_ALGO_TGR_160,
+ HASH_ALGO_TGR_192,
+ HASH_ALGO__LAST
+};
+
+#endif /* _UAPI_LINUX_HASH_INFO_H */
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index c9b7f4faf97a..840cb990abe2 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -56,5 +56,6 @@
#define KEYCTL_REJECT 19 /* reject a partially constructed key */
#define KEYCTL_INSTANTIATE_IOV 20 /* instantiate a partially constructed key */
#define KEYCTL_INVALIDATE 21 /* invalidate a key */
+#define KEYCTL_GET_PERSISTENT 22 /* get a user's persistent keyring */
#endif /* _LINUX_KEYCTL_H */
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index fe1a5406d4d9..f7cf7f351144 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -16,6 +16,7 @@
#define _MD_P_H
#include <linux/types.h>
+#include <asm/byteorder.h>
/*
* RAID superblock.
diff --git a/init/Kconfig b/init/Kconfig
index 3fc8a2f2fac4..79383d3aa5dc 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -301,20 +301,6 @@ config AUDIT_TREE
depends on AUDITSYSCALL
select FSNOTIFY
-config AUDIT_LOGINUID_IMMUTABLE
- bool "Make audit loginuid immutable"
- depends on AUDIT
- help
- The config option toggles if a task setting its loginuid requires
- CAP_SYS_AUDITCONTROL or if that task should require no special permissions
- but should instead only allow setting its loginuid if it was never
- previously set. On systems which use systemd or a similar central
- process to restart login services this should be set to true. On older
- systems in which an admin would typically have to directly stop and
- start processes this should be set to false. Setting this to true allows
- one to drop potentially dangerous capabilites from the login tasks,
- but may not be backwards compatible with older init systems.
-
source "kernel/irq/Kconfig"
source "kernel/time/Kconfig"
@@ -1669,6 +1655,18 @@ config BASE_SMALL
default 0 if BASE_FULL
default 1 if !BASE_FULL
+config SYSTEM_TRUSTED_KEYRING
+ bool "Provide system-wide ring of trusted keys"
+ depends on KEYS
+ help
+ Provide a system keyring to which trusted keys can be added. Keys in
+ the keyring are considered to be trusted. Keys may be added at will
+ by the kernel from compiled-in data and from hardware key stores, but
+ userspace may only add extra keys if those keys can be verified by
+ keys already in the keyring.
+
+ Keys in this keyring are used by module signature checking.
+
menuconfig MODULES
bool "Enable loadable module support"
option modules
@@ -1742,6 +1740,7 @@ config MODULE_SRCVERSION_ALL
config MODULE_SIG
bool "Module signature verification"
depends on MODULES
+ select SYSTEM_TRUSTED_KEYRING
select KEYS
select CRYPTO
select ASYMMETRIC_KEY_TYPE
diff --git a/init/main.c b/init/main.c
index 01573fdfa186..febc511e078a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -476,7 +476,7 @@ static void __init mm_init(void)
mem_init();
kmem_cache_init();
percpu_init_late();
- pgtable_init();
+ pgtable_cache_init();
vmalloc_init();
}
diff --git a/ipc/shm.c b/ipc/shm.c
index d69739610fd4..7a51443a51d6 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -208,15 +208,18 @@ static void shm_open(struct vm_area_struct *vma)
*/
static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
{
+ struct file *shm_file;
+
+ shm_file = shp->shm_file;
+ shp->shm_file = NULL;
ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
shm_rmid(ns, shp);
shm_unlock(shp);
- if (!is_file_hugepages(shp->shm_file))
- shmem_lock(shp->shm_file, 0, shp->mlock_user);
+ if (!is_file_hugepages(shm_file))
+ shmem_lock(shm_file, 0, shp->mlock_user);
else if (shp->mlock_user)
- user_shm_unlock(file_inode(shp->shm_file)->i_size,
- shp->mlock_user);
- fput (shp->shm_file);
+ user_shm_unlock(file_inode(shm_file)->i_size, shp->mlock_user);
+ fput(shm_file);
ipc_rcu_putref(shp, shm_rcu_free);
}
@@ -974,15 +977,25 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
ipc_lock_object(&shp->shm_perm);
if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
kuid_t euid = current_euid();
- err = -EPERM;
if (!uid_eq(euid, shp->shm_perm.uid) &&
- !uid_eq(euid, shp->shm_perm.cuid))
+ !uid_eq(euid, shp->shm_perm.cuid)) {
+ err = -EPERM;
goto out_unlock0;
- if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
+ }
+ if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
+ err = -EPERM;
goto out_unlock0;
+ }
}
shm_file = shp->shm_file;
+
+ /* check if shm_destroy() is tearing down shp */
+ if (shm_file == NULL) {
+ err = -EIDRM;
+ goto out_unlock0;
+ }
+
if (is_file_hugepages(shm_file))
goto out_unlock0;
@@ -1101,6 +1114,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
goto out_unlock;
ipc_lock_object(&shp->shm_perm);
+
+ /* check if shm_destroy() is tearing down shp */
+ if (shp->shm_file == NULL) {
+ ipc_unlock_object(&shp->shm_perm);
+ err = -EIDRM;
+ goto out_unlock;
+ }
+
path = shp->shm_file->f_path;
path_get(&path);
shp->shm_nattch++;
diff --git a/kernel/Makefile b/kernel/Makefile
index 09a9c94f42bd..bbaf7d59c1bb 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -41,8 +41,9 @@ ifneq ($(CONFIG_SMP),y)
obj-y += up.o
endif
obj-$(CONFIG_UID16) += uid16.o
+obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o
+obj-$(CONFIG_MODULE_SIG) += module_signing.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_KEXEC) += kexec.o
@@ -122,19 +123,52 @@ targets += timeconst.h
$(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
$(call if_changed,bc)
-ifeq ($(CONFIG_MODULE_SIG),y)
+###############################################################################
+#
+# Roll all the X.509 certificates that we can find together and pull them into
+# the kernel so that they get loaded into the system trusted keyring during
+# boot.
#
-# Pull the signing certificate and any extra certificates into the kernel
+# We look in the source root and the build root for all files whose name ends
+# in ".x509". Unfortunately, this will generate duplicate filenames, so we
+# have make canonicalise the pathnames and then sort them to discard the
+# duplicates.
#
+###############################################################################
+ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
+X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509)
+X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += signing_key.x509
+X509_CERTIFICATES := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \
+ $(or $(realpath $(CERT)),$(CERT))))
+
+ifeq ($(X509_CERTIFICATES),)
+$(warning *** No X.509 certificates found ***)
+endif
+
+ifneq ($(wildcard $(obj)/.x509.list),)
+ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES))
+$(info X.509 certificate list changed)
+$(shell rm $(obj)/.x509.list)
+endif
+endif
+
+kernel/system_certificates.o: $(obj)/x509_certificate_list
-quiet_cmd_touch = TOUCH $@
- cmd_touch = touch $@
+quiet_cmd_x509certs = CERTS $@
+ cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo " - Including cert $(X509)")
-extra_certificates:
- $(call cmd,touch)
+targets += $(obj)/x509_certificate_list
+$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list
+ $(call if_changed,x509certs)
-kernel/modsign_certificate.o: signing_key.x509 extra_certificates
+targets += $(obj)/.x509.list
+$(obj)/.x509.list:
+ @echo $(X509_CERTIFICATES) >$@
+clean-files := x509_certificate_list .x509.list
+endif
+
+ifeq ($(CONFIG_MODULE_SIG),y)
###############################################################################
#
# If module signing is requested, say by allyesconfig, but a key has not been
diff --git a/kernel/audit.c b/kernel/audit.c
index 7b0e23a740ce..906ae5a0233a 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -60,7 +60,6 @@
#ifdef CONFIG_SECURITY
#include <linux/security.h>
#endif
-#include <net/netlink.h>
#include <linux/freezer.h>
#include <linux/tty.h>
#include <linux/pid_namespace.h>
@@ -140,6 +139,17 @@ static struct task_struct *kauditd_task;
static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
+static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
+ .mask = -1,
+ .features = 0,
+ .lock = 0,};
+
+static char *audit_feature_names[2] = {
+ "only_unset_loginuid",
+ "loginuid_immutable",
+};
+
+
/* Serialize requests from userspace. */
DEFINE_MUTEX(audit_cmd_mutex);
@@ -584,6 +594,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
return -EOPNOTSUPP;
case AUDIT_GET:
case AUDIT_SET:
+ case AUDIT_GET_FEATURE:
+ case AUDIT_SET_FEATURE:
case AUDIT_LIST_RULES:
case AUDIT_ADD_RULE:
case AUDIT_DEL_RULE:
@@ -613,7 +625,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
int rc = 0;
uid_t uid = from_kuid(&init_user_ns, current_uid());
- if (!audit_enabled) {
+ if (!audit_enabled && msg_type != AUDIT_USER_AVC) {
*ab = NULL;
return rc;
}
@@ -628,6 +640,94 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
return rc;
}
+int is_audit_feature_set(int i)
+{
+ return af.features & AUDIT_FEATURE_TO_MASK(i);
+}
+
+
+static int audit_get_feature(struct sk_buff *skb)
+{
+ u32 seq;
+
+ seq = nlmsg_hdr(skb)->nlmsg_seq;
+
+ audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0,
+ &af, sizeof(af));
+
+ return 0;
+}
+
+static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature,
+ u32 old_lock, u32 new_lock, int res)
+{
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+ audit_log_format(ab, "feature=%s new=%d old=%d old_lock=%d new_lock=%d res=%d",
+ audit_feature_names[which], !!old_feature, !!new_feature,
+ !!old_lock, !!new_lock, res);
+ audit_log_end(ab);
+}
+
+static int audit_set_feature(struct sk_buff *skb)
+{
+ struct audit_features *uaf;
+ int i;
+
+ BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > sizeof(audit_feature_names)/sizeof(audit_feature_names[0]));
+ uaf = nlmsg_data(nlmsg_hdr(skb));
+
+ /* if there is ever a version 2 we should handle that here */
+
+ for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+ u32 feature = AUDIT_FEATURE_TO_MASK(i);
+ u32 old_feature, new_feature, old_lock, new_lock;
+
+ /* if we are not changing this feature, move along */
+ if (!(feature & uaf->mask))
+ continue;
+
+ old_feature = af.features & feature;
+ new_feature = uaf->features & feature;
+ new_lock = (uaf->lock | af.lock) & feature;
+ old_lock = af.lock & feature;
+
+ /* are we changing a locked feature? */
+ if ((af.lock & feature) && (new_feature != old_feature)) {
+ audit_log_feature_change(i, old_feature, new_feature,
+ old_lock, new_lock, 0);
+ return -EPERM;
+ }
+ }
+ /* nothing invalid, do the changes */
+ for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+ u32 feature = AUDIT_FEATURE_TO_MASK(i);
+ u32 old_feature, new_feature, old_lock, new_lock;
+
+ /* if we are not changing this feature, move along */
+ if (!(feature & uaf->mask))
+ continue;
+
+ old_feature = af.features & feature;
+ new_feature = uaf->features & feature;
+ old_lock = af.lock & feature;
+ new_lock = (uaf->lock | af.lock) & feature;
+
+ if (new_feature != old_feature)
+ audit_log_feature_change(i, old_feature, new_feature,
+ old_lock, new_lock, 1);
+
+ if (new_feature)
+ af.features |= feature;
+ else
+ af.features &= ~feature;
+ af.lock |= new_lock;
+ }
+
+ return 0;
+}
+
static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
u32 seq;
@@ -659,6 +759,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
switch (msg_type) {
case AUDIT_GET:
+ memset(&status_set, 0, sizeof(status_set));
status_set.enabled = audit_enabled;
status_set.failure = audit_failure;
status_set.pid = audit_pid;
@@ -670,7 +771,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
&status_set, sizeof(status_set));
break;
case AUDIT_SET:
- if (nlh->nlmsg_len < sizeof(struct audit_status))
+ if (nlmsg_len(nlh) < sizeof(struct audit_status))
return -EINVAL;
status_get = (struct audit_status *)data;
if (status_get->mask & AUDIT_STATUS_ENABLED) {
@@ -699,6 +800,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
err = audit_set_backlog_limit(status_get->backlog_limit);
break;
+ case AUDIT_GET_FEATURE:
+ err = audit_get_feature(skb);
+ if (err)
+ return err;
+ break;
+ case AUDIT_SET_FEATURE:
+ err = audit_set_feature(skb);
+ if (err)
+ return err;
+ break;
case AUDIT_USER:
case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
@@ -715,7 +826,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
}
audit_log_common_recv_msg(&ab, msg_type);
if (msg_type != AUDIT_USER_TTY)
- audit_log_format(ab, " msg='%.1024s'",
+ audit_log_format(ab, " msg='%.*s'",
+ AUDIT_MESSAGE_TEXT_MAX,
(char *)data);
else {
int size;
@@ -818,7 +930,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
struct task_struct *tsk = current;
spin_lock(&tsk->sighand->siglock);
- s.enabled = tsk->signal->audit_tty != 0;
+ s.enabled = tsk->signal->audit_tty;
s.log_passwd = tsk->signal->audit_tty_log_passwd;
spin_unlock(&tsk->sighand->siglock);
@@ -832,7 +944,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
memset(&s, 0, sizeof(s));
/* guard against past and future API changes */
- memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len));
+ memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
if ((s.enabled != 0 && s.enabled != 1) ||
(s.log_passwd != 0 && s.log_passwd != 1))
return -EINVAL;
@@ -1067,13 +1179,6 @@ static void wait_for_auditd(unsigned long sleep_time)
remove_wait_queue(&audit_backlog_wait, &wait);
}
-/* Obtain an audit buffer. This routine does locking to obtain the
- * audit buffer, but then no locking is required for calls to
- * audit_log_*format. If the tsk is a task that is currently in a
- * syscall, then the syscall is marked as auditable and an audit record
- * will be written at syscall exit. If there is no associated task, tsk
- * should be NULL. */
-
/**
* audit_log_start - obtain an audit buffer
* @ctx: audit_context (may be NULL)
@@ -1389,7 +1494,7 @@ void audit_log_session_info(struct audit_buffer *ab)
u32 sessionid = audit_get_sessionid(current);
uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current));
- audit_log_format(ab, " auid=%u ses=%u\n", auid, sessionid);
+ audit_log_format(ab, " auid=%u ses=%u", auid, sessionid);
}
void audit_log_key(struct audit_buffer *ab, char *key)
@@ -1536,6 +1641,26 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
}
}
+ /* log the audit_names record type */
+ audit_log_format(ab, " nametype=");
+ switch(n->type) {
+ case AUDIT_TYPE_NORMAL:
+ audit_log_format(ab, "NORMAL");
+ break;
+ case AUDIT_TYPE_PARENT:
+ audit_log_format(ab, "PARENT");
+ break;
+ case AUDIT_TYPE_CHILD_DELETE:
+ audit_log_format(ab, "DELETE");
+ break;
+ case AUDIT_TYPE_CHILD_CREATE:
+ audit_log_format(ab, "CREATE");
+ break;
+ default:
+ audit_log_format(ab, "UNKNOWN");
+ break;
+ }
+
audit_log_fcaps(ab, n);
audit_log_end(ab);
}
diff --git a/kernel/audit.h b/kernel/audit.h
index 123c9b7c3979..b779642b29af 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -197,6 +197,9 @@ struct audit_context {
int fd;
int flags;
} mmap;
+ struct {
+ int argc;
+ } execve;
};
int fds[2];
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index f7aee8be7fb2..51f3fd4c1ed3 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -343,6 +343,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
case AUDIT_DEVMINOR:
case AUDIT_EXIT:
case AUDIT_SUCCESS:
+ case AUDIT_INODE:
/* bit ops are only useful on syscall args */
if (f->op == Audit_bitmask || f->op == Audit_bittest)
return -EINVAL;
@@ -423,7 +424,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
f->lsm_rule = NULL;
/* Support legacy tests for a valid loginuid */
- if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) {
+ if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
f->type = AUDIT_LOGINUID_SET;
f->val = 0;
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9845cb32b60a..90594c9f7552 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -95,13 +95,6 @@ struct audit_aux_data {
/* Number of target pids per aux struct. */
#define AUDIT_AUX_PIDS 16
-struct audit_aux_data_execve {
- struct audit_aux_data d;
- int argc;
- int envc;
- struct mm_struct *mm;
-};
-
struct audit_aux_data_pids {
struct audit_aux_data d;
pid_t target_pid[AUDIT_AUX_PIDS];
@@ -121,12 +114,6 @@ struct audit_aux_data_bprm_fcaps {
struct audit_cap_data new_pcap;
};
-struct audit_aux_data_capset {
- struct audit_aux_data d;
- pid_t pid;
- struct audit_cap_data cap;
-};
-
struct audit_tree_refs {
struct audit_tree_refs *next;
struct audit_chunk *c[31];
@@ -566,7 +553,7 @@ static int audit_filter_rules(struct task_struct *tsk,
break;
case AUDIT_INODE:
if (name)
- result = (name->ino == f->val);
+ result = audit_comparator(name->ino, f->op, f->val);
else if (ctx) {
list_for_each_entry(n, &ctx->names_list, list) {
if (audit_comparator(n->ino, f->op, f->val)) {
@@ -943,8 +930,10 @@ int audit_alloc(struct task_struct *tsk)
return 0; /* Return if not auditing. */
state = audit_filter_task(tsk, &key);
- if (state == AUDIT_DISABLED)
+ if (state == AUDIT_DISABLED) {
+ clear_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
return 0;
+ }
if (!(context = audit_alloc_context(state))) {
kfree(key);
@@ -1149,20 +1138,16 @@ static int audit_log_single_execve_arg(struct audit_context *context,
}
static void audit_log_execve_info(struct audit_context *context,
- struct audit_buffer **ab,
- struct audit_aux_data_execve *axi)
+ struct audit_buffer **ab)
{
int i, len;
size_t len_sent = 0;
const char __user *p;
char *buf;
- if (axi->mm != current->mm)
- return; /* execve failed, no additional info */
-
- p = (const char __user *)axi->mm->arg_start;
+ p = (const char __user *)current->mm->arg_start;
- audit_log_format(*ab, "argc=%d", axi->argc);
+ audit_log_format(*ab, "argc=%d", context->execve.argc);
/*
* we need some kernel buffer to hold the userspace args. Just
@@ -1176,7 +1161,7 @@ static void audit_log_execve_info(struct audit_context *context,
return;
}
- for (i = 0; i < axi->argc; i++) {
+ for (i = 0; i < context->execve.argc; i++) {
len = audit_log_single_execve_arg(context, ab, i,
&len_sent, p, buf);
if (len <= 0)
@@ -1279,6 +1264,9 @@ static void show_special(struct audit_context *context, int *call_panic)
audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd,
context->mmap.flags);
break; }
+ case AUDIT_EXECVE: {
+ audit_log_execve_info(context, &ab);
+ break; }
}
audit_log_end(ab);
}
@@ -1325,11 +1313,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
switch (aux->type) {
- case AUDIT_EXECVE: {
- struct audit_aux_data_execve *axi = (void *)aux;
- audit_log_execve_info(context, &ab, axi);
- break; }
-
case AUDIT_BPRM_FCAPS: {
struct audit_aux_data_bprm_fcaps *axs = (void *)aux;
audit_log_format(ab, "fver=%x", axs->fcap_ver);
@@ -1964,6 +1947,43 @@ int auditsc_get_stamp(struct audit_context *ctx,
/* global counter which is incremented every time something logs in */
static atomic_t session_id = ATOMIC_INIT(0);
+static int audit_set_loginuid_perm(kuid_t loginuid)
+{
+ /* if we are unset, we don't need privs */
+ if (!audit_loginuid_set(current))
+ return 0;
+ /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/
+ if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
+ return -EPERM;
+ /* it is set, you need permission */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* reject if this is not an unset and we don't allow that */
+ if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
+ unsigned int oldsessionid, unsigned int sessionid,
+ int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid, ologinuid, nloginuid;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ ologinuid = from_kuid(&init_user_ns, koldloginuid);
+ nloginuid = from_kuid(&init_user_ns, kloginuid),
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+ if (!ab)
+ return;
+ audit_log_format(ab, "pid=%d uid=%u old auid=%u new auid=%u old "
+ "ses=%u new ses=%u res=%d", current->pid, uid, ologinuid,
+ nloginuid, oldsessionid, sessionid, !rc);
+ audit_log_end(ab);
+}
+
/**
* audit_set_loginuid - set current task's audit_context loginuid
* @loginuid: loginuid value
@@ -1975,37 +1995,26 @@ static atomic_t session_id = ATOMIC_INIT(0);
int audit_set_loginuid(kuid_t loginuid)
{
struct task_struct *task = current;
- struct audit_context *context = task->audit_context;
- unsigned int sessionid;
+ unsigned int oldsessionid, sessionid = (unsigned int)-1;
+ kuid_t oldloginuid;
+ int rc;
-#ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE
- if (audit_loginuid_set(task))
- return -EPERM;
-#else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
- if (!capable(CAP_AUDIT_CONTROL))
- return -EPERM;
-#endif /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
+ oldloginuid = audit_get_loginuid(current);
+ oldsessionid = audit_get_sessionid(current);
- sessionid = atomic_inc_return(&session_id);
- if (context && context->in_syscall) {
- struct audit_buffer *ab;
+ rc = audit_set_loginuid_perm(loginuid);
+ if (rc)
+ goto out;
+
+ /* are we setting or clearing? */
+ if (uid_valid(loginuid))
+ sessionid = atomic_inc_return(&session_id);
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
- if (ab) {
- audit_log_format(ab, "login pid=%d uid=%u "
- "old auid=%u new auid=%u"
- " old ses=%u new ses=%u",
- task->pid,
- from_kuid(&init_user_ns, task_uid(task)),
- from_kuid(&init_user_ns, task->loginuid),
- from_kuid(&init_user_ns, loginuid),
- task->sessionid, sessionid);
- audit_log_end(ab);
- }
- }
task->sessionid = sessionid;
task->loginuid = loginuid;
- return 0;
+out:
+ audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc);
+ return rc;
}
/**
@@ -2126,22 +2135,12 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo
context->ipc.has_perm = 1;
}
-int __audit_bprm(struct linux_binprm *bprm)
+void __audit_bprm(struct linux_binprm *bprm)
{
- struct audit_aux_data_execve *ax;
struct audit_context *context = current->audit_context;
- ax = kmalloc(sizeof(*ax), GFP_KERNEL);
- if (!ax)
- return -ENOMEM;
-
- ax->argc = bprm->argc;
- ax->envc = bprm->envc;
- ax->mm = bprm->mm;
- ax->d.type = AUDIT_EXECVE;
- ax->d.next = context->aux;
- context->aux = (void *)ax;
- return 0;
+ context->type = AUDIT_EXECVE;
+ context->execve.argc = bprm->argc;
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e0839bcd48c8..4c62513fe19f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -895,11 +895,6 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
iput(inode);
}
-static int cgroup_delete(const struct dentry *d)
-{
- return 1;
-}
-
static void remove_dir(struct dentry *d)
{
struct dentry *parent = dget(d->d_parent);
@@ -1486,7 +1481,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
{
static const struct dentry_operations cgroup_dops = {
.d_iput = cgroup_diput,
- .d_delete = cgroup_delete,
+ .d_delete = always_delete_dentry,
};
struct inode *inode =
diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S
deleted file mode 100644
index 4a9a86d12c8b..000000000000
--- a/kernel/modsign_certificate.S
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <linux/export.h>
-
-#define GLOBAL(name) \
- .globl VMLINUX_SYMBOL(name); \
- VMLINUX_SYMBOL(name):
-
- .section ".init.data","aw"
-
-GLOBAL(modsign_certificate_list)
- .incbin "signing_key.x509"
- .incbin "extra_certificates"
-GLOBAL(modsign_certificate_list_end)
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c
deleted file mode 100644
index 7cbd4507a7e6..000000000000
--- a/kernel/modsign_pubkey.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/* Public keys for module signature verification
- *
- * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/cred.h>
-#include <linux/err.h>
-#include <keys/asymmetric-type.h>
-#include "module-internal.h"
-
-struct key *modsign_keyring;
-
-extern __initconst const u8 modsign_certificate_list[];
-extern __initconst const u8 modsign_certificate_list_end[];
-
-/*
- * We need to make sure ccache doesn't cache the .o file as it doesn't notice
- * if modsign.pub changes.
- */
-static __initconst const char annoy_ccache[] = __TIME__ "foo";
-
-/*
- * Load the compiled-in keys
- */
-static __init int module_verify_init(void)
-{
- pr_notice("Initialise module verification\n");
-
- modsign_keyring = keyring_alloc(".module_sign",
- KUIDT_INIT(0), KGIDT_INIT(0),
- current_cred(),
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
- KEY_ALLOC_NOT_IN_QUOTA, NULL);
- if (IS_ERR(modsign_keyring))
- panic("Can't allocate module signing keyring\n");
-
- return 0;
-}
-
-/*
- * Must be initialised before we try and load the keys into the keyring.
- */
-device_initcall(module_verify_init);
-
-/*
- * Load the compiled-in keys
- */
-static __init int load_module_signing_keys(void)
-{
- key_ref_t key;
- const u8 *p, *end;
- size_t plen;
-
- pr_notice("Loading module verification certificates\n");
-
- end = modsign_certificate_list_end;
- p = modsign_certificate_list;
- while (p < end) {
- /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
- * than 256 bytes in size.
- */
- if (end - p < 4)
- goto dodgy_cert;
- if (p[0] != 0x30 &&
- p[1] != 0x82)
- goto dodgy_cert;
- plen = (p[2] << 8) | p[3];
- plen += 4;
- if (plen > end - p)
- goto dodgy_cert;
-
- key = key_create_or_update(make_key_ref(modsign_keyring, 1),
- "asymmetric",
- NULL,
- p,
- plen,
- (KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW,
- KEY_ALLOC_NOT_IN_QUOTA);
- if (IS_ERR(key))
- pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n",
- PTR_ERR(key));
- else
- pr_notice("MODSIGN: Loaded cert '%s'\n",
- key_ref_to_ptr(key)->description);
- p += plen;
- }
-
- return 0;
-
-dodgy_cert:
- pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n");
- return 0;
-}
-late_initcall(load_module_signing_keys);
diff --git a/kernel/module-internal.h b/kernel/module-internal.h
index 24f9247b7d02..915e123a430f 100644
--- a/kernel/module-internal.h
+++ b/kernel/module-internal.h
@@ -9,6 +9,4 @@
* 2 of the Licence, or (at your option) any later version.
*/
-extern struct key *modsign_keyring;
-
extern int mod_verify_sig(const void *mod, unsigned long *_modlen);
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index f2970bddc5ea..be5b8fac4bd0 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -14,6 +14,7 @@
#include <crypto/public_key.h>
#include <crypto/hash.h>
#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
#include "module-internal.h"
/*
@@ -28,7 +29,7 @@
*/
struct module_signature {
u8 algo; /* Public-key crypto algorithm [enum pkey_algo] */
- u8 hash; /* Digest algorithm [enum pkey_hash_algo] */
+ u8 hash; /* Digest algorithm [enum hash_algo] */
u8 id_type; /* Key identifier type [enum pkey_id_type] */
u8 signer_len; /* Length of signer's name */
u8 key_id_len; /* Length of key identifier */
@@ -39,7 +40,7 @@ struct module_signature {
/*
* Digest the module contents.
*/
-static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash,
+static struct public_key_signature *mod_make_digest(enum hash_algo hash,
const void *mod,
unsigned long modlen)
{
@@ -54,7 +55,7 @@ static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash,
/* Allocate the hashing algorithm we're going to need and find out how
* big the hash operational data will be.
*/
- tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0);
+ tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0);
if (IS_ERR(tfm))
return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm);
@@ -157,7 +158,7 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
pr_debug("Look up: \"%s\"\n", id);
- key = keyring_search(make_key_ref(modsign_keyring, 1),
+ key = keyring_search(make_key_ref(system_trusted_keyring, 1),
&key_type_asymmetric, id);
if (IS_ERR(key))
pr_warn("Request for unknown module key '%s' err %ld\n",
@@ -217,7 +218,7 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen)
return -ENOPKG;
if (ms.hash >= PKEY_HASH__LAST ||
- !pkey_hash_algo[ms.hash])
+ !hash_algo_name[ms.hash])
return -ENOPKG;
key = request_asymmetric_key(sig, ms.signer_len,
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 10c22cae83a0..b38109e204af 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -792,7 +792,8 @@ void free_basic_memory_bitmaps(void)
{
struct memory_bitmap *bm1, *bm2;
- BUG_ON(!(forbidden_pages_map && free_pages_map));
+ if (WARN_ON(!(forbidden_pages_map && free_pages_map)))
+ return;
bm1 = forbidden_pages_map;
bm2 = free_pages_map;
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 24850270c802..98d357584cd6 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -70,6 +70,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
data->swap = swsusp_resume_device ?
swap_type_of(swsusp_resume_device, 0, NULL) : -1;
data->mode = O_RDONLY;
+ data->free_bitmaps = false;
error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
if (error)
pm_notifier_call_chain(PM_POST_HIBERNATION);
diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S
new file mode 100644
index 000000000000..4aef390671cb
--- /dev/null
+++ b/kernel/system_certificates.S
@@ -0,0 +1,10 @@
+#include <linux/export.h>
+#include <linux/init.h>
+
+ __INITRODATA
+
+ .globl VMLINUX_SYMBOL(system_certificate_list)
+VMLINUX_SYMBOL(system_certificate_list):
+ .incbin "kernel/x509_certificate_list"
+ .globl VMLINUX_SYMBOL(system_certificate_list_end)
+VMLINUX_SYMBOL(system_certificate_list_end):
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
new file mode 100644
index 000000000000..564dd93430a2
--- /dev/null
+++ b/kernel/system_keyring.c
@@ -0,0 +1,105 @@
+/* System trusted keyring for trusted public keys
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
+#include "module-internal.h"
+
+struct key *system_trusted_keyring;
+EXPORT_SYMBOL_GPL(system_trusted_keyring);
+
+extern __initconst const u8 system_certificate_list[];
+extern __initconst const u8 system_certificate_list_end[];
+
+/*
+ * Load the compiled-in keys
+ */
+static __init int system_trusted_keyring_init(void)
+{
+ pr_notice("Initialise system trusted keyring\n");
+
+ system_trusted_keyring =
+ keyring_alloc(".system_keyring",
+ KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+ KEY_ALLOC_NOT_IN_QUOTA, NULL);
+ if (IS_ERR(system_trusted_keyring))
+ panic("Can't allocate system trusted keyring\n");
+
+ set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags);
+ return 0;
+}
+
+/*
+ * Must be initialised before we try and load the keys into the keyring.
+ */
+device_initcall(system_trusted_keyring_init);
+
+/*
+ * Load the compiled-in list of X.509 certificates.
+ */
+static __init int load_system_certificate_list(void)
+{
+ key_ref_t key;
+ const u8 *p, *end;
+ size_t plen;
+
+ pr_notice("Loading compiled-in X.509 certificates\n");
+
+ end = system_certificate_list_end;
+ p = system_certificate_list;
+ while (p < end) {
+ /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+ * than 256 bytes in size.
+ */
+ if (end - p < 4)
+ goto dodgy_cert;
+ if (p[0] != 0x30 &&
+ p[1] != 0x82)
+ goto dodgy_cert;
+ plen = (p[2] << 8) | p[3];
+ plen += 4;
+ if (plen > end - p)
+ goto dodgy_cert;
+
+ key = key_create_or_update(make_key_ref(system_trusted_keyring, 1),
+ "asymmetric",
+ NULL,
+ p,
+ plen,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
+ KEY_ALLOC_NOT_IN_QUOTA |
+ KEY_ALLOC_TRUSTED);
+ if (IS_ERR(key)) {
+ pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+ PTR_ERR(key));
+ } else {
+ pr_notice("Loaded X.509 cert '%s'\n",
+ key_ref_to_ptr(key)->description);
+ key_ref_put(key);
+ }
+ p += plen;
+ }
+
+ return 0;
+
+dodgy_cert:
+ pr_err("Problem parsing in-kernel X.509 certificate list\n");
+ return 0;
+}
+late_initcall(load_system_certificate_list);
diff --git a/kernel/user.c b/kernel/user.c
index 5bbb91988e69..a3a0dbfda329 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,10 @@ struct user_namespace init_user_ns = {
.owner = GLOBAL_ROOT_UID,
.group = GLOBAL_ROOT_GID,
.proc_inum = PROC_USER_INIT_INO,
+#ifdef CONFIG_KEYS_KERBEROS_CACHE
+ .krb_cache_register_sem =
+ __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem),
+#endif
};
EXPORT_SYMBOL_GPL(init_user_ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 13fb1134ba58..240fb62cf394 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -101,6 +101,9 @@ int create_user_ns(struct cred *new)
set_cred_user_ns(new, ns);
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+ init_rwsem(&ns->persistent_keyring_register_sem);
+#endif
return 0;
}
@@ -130,6 +133,9 @@ void free_user_ns(struct user_namespace *ns)
do {
parent = ns->parent;
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+ key_put(ns->persistent_keyring_register);
+#endif
proc_free_inum(ns->proc_inum);
kmem_cache_free(user_ns_cachep, ns);
ns = parent;
diff --git a/lib/Kconfig b/lib/Kconfig
index 06dc74200a51..991c98bc4a3f 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -322,6 +322,20 @@ config TEXTSEARCH_FSM
config BTREE
boolean
+config ASSOCIATIVE_ARRAY
+ bool
+ help
+ Generic associative array. Can be searched and iterated over whilst
+ it is being modified. It is also reasonably quick to search and
+ modify. The algorithms are non-recursive, and the trees are highly
+ capacious.
+
+ See:
+
+ Documentation/assoc_array.txt
+
+ for more information.
+
config HAS_IOMEM
boolean
depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile
index d480a8c92385..b46065fd67a4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,6 +47,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
new file mode 100644
index 000000000000..17edeaf19180
--- /dev/null
+++ b/lib/assoc_array.c
@@ -0,0 +1,1746 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+//#define DEBUG
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/assoc_array_priv.h>
+
+/*
+ * Iterate over an associative array. The caller must hold the RCU read lock
+ * or better.
+ */
+static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
+ const struct assoc_array_ptr *stop,
+ int (*iterator)(const void *leaf,
+ void *iterator_data),
+ void *iterator_data)
+{
+ const struct assoc_array_shortcut *shortcut;
+ const struct assoc_array_node *node;
+ const struct assoc_array_ptr *cursor, *ptr, *parent;
+ unsigned long has_meta;
+ int slot, ret;
+
+ cursor = root;
+
+begin_node:
+ if (assoc_array_ptr_is_shortcut(cursor)) {
+ /* Descend through a shortcut */
+ shortcut = assoc_array_ptr_to_shortcut(cursor);
+ smp_read_barrier_depends();
+ cursor = ACCESS_ONCE(shortcut->next_node);
+ }
+
+ node = assoc_array_ptr_to_node(cursor);
+ smp_read_barrier_depends();
+ slot = 0;
+
+ /* We perform two passes of each node.
+ *
+ * The first pass does all the leaves in this node. This means we
+ * don't miss any leaves if the node is split up by insertion whilst
+ * we're iterating over the branches rooted here (we may, however, see
+ * some leaves twice).
+ */
+ has_meta = 0;
+ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+ ptr = ACCESS_ONCE(node->slots[slot]);
+ has_meta |= (unsigned long)ptr;
+ if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+ /* We need a barrier between the read of the pointer
+ * and dereferencing the pointer - but only if we are
+ * actually going to dereference it.
+ */
+ smp_read_barrier_depends();
+
+ /* Invoke the callback */
+ ret = iterator(assoc_array_ptr_to_leaf(ptr),
+ iterator_data);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* The second pass attends to all the metadata pointers. If we follow
+ * one of these we may find that we don't come back here, but rather go
+ * back to a replacement node with the leaves in a different layout.
+ *
+ * We are guaranteed to make progress, however, as the slot number for
+ * a particular portion of the key space cannot change - and we
+ * continue at the back pointer + 1.
+ */
+ if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
+ goto finished_node;
+ slot = 0;
+
+continue_node:
+ node = assoc_array_ptr_to_node(cursor);
+ smp_read_barrier_depends();
+
+ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+ ptr = ACCESS_ONCE(node->slots[slot]);
+ if (assoc_array_ptr_is_meta(ptr)) {
+ cursor = ptr;
+ goto begin_node;
+ }
+ }
+
+finished_node:
+ /* Move up to the parent (may need to skip back over a shortcut) */
+ parent = ACCESS_ONCE(node->back_pointer);
+ slot = node->parent_slot;
+ if (parent == stop)
+ return 0;
+
+ if (assoc_array_ptr_is_shortcut(parent)) {
+ shortcut = assoc_array_ptr_to_shortcut(parent);
+ smp_read_barrier_depends();
+ cursor = parent;
+ parent = ACCESS_ONCE(shortcut->back_pointer);
+ slot = shortcut->parent_slot;
+ if (parent == stop)
+ return 0;
+ }
+
+ /* Ascend to next slot in parent node */
+ cursor = parent;
+ slot++;
+ goto continue_node;
+}
+
+/**
+ * assoc_array_iterate - Pass all objects in the array to a callback
+ * @array: The array to iterate over.
+ * @iterator: The callback function.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Iterate over all the objects in an associative array. Each one will be
+ * presented to the iterator function.
+ *
+ * If the array is being modified concurrently with the iteration then it is
+ * possible that some objects in the array will be passed to the iterator
+ * callback more than once - though every object should be passed at least
+ * once. If this is undesirable then the caller must lock against modification
+ * for the duration of this function.
+ *
+ * The function will return 0 if no objects were in the array or else it will
+ * return the result of the last iterator function called. Iteration stops
+ * immediately if any call to the iteration function results in a non-zero
+ * return.
+ *
+ * The caller should hold the RCU read lock or better if concurrent
+ * modification is possible.
+ */
+int assoc_array_iterate(const struct assoc_array *array,
+ int (*iterator)(const void *object,
+ void *iterator_data),
+ void *iterator_data)
+{
+ struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
+
+ if (!root)
+ return 0;
+ return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
+}
+
+enum assoc_array_walk_status {
+ assoc_array_walk_tree_empty,
+ assoc_array_walk_found_terminal_node,
+ assoc_array_walk_found_wrong_shortcut,
+} status;
+
+struct assoc_array_walk_result {
+ struct {
+ struct assoc_array_node *node; /* Node in which leaf might be found */
+ int level;
+ int slot;
+ } terminal_node;
+ struct {
+ struct assoc_array_shortcut *shortcut;
+ int level;
+ int sc_level;
+ unsigned long sc_segments;
+ unsigned long dissimilarity;
+ } wrong_shortcut;
+};
+
+/*
+ * Navigate through the internal tree looking for the closest node to the key.
+ */
+static enum assoc_array_walk_status
+assoc_array_walk(const struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key,
+ struct assoc_array_walk_result *result)
+{
+ struct assoc_array_shortcut *shortcut;
+ struct assoc_array_node *node;
+ struct assoc_array_ptr *cursor, *ptr;
+ unsigned long sc_segments, dissimilarity;
+ unsigned long segments;
+ int level, sc_level, next_sc_level;
+ int slot;
+
+ pr_devel("-->%s()\n", __func__);
+
+ cursor = ACCESS_ONCE(array->root);
+ if (!cursor)
+ return assoc_array_walk_tree_empty;
+
+ level = 0;
+
+ /* Use segments from the key for the new leaf to navigate through the
+ * internal tree, skipping through nodes and shortcuts that are on
+ * route to the destination. Eventually we'll come to a slot that is
+ * either empty or contains a leaf at which point we've found a node in
+ * which the leaf we're looking for might be found or into which it
+ * should be inserted.
+ */
+jumped:
+ segments = ops->get_key_chunk(index_key, level);
+ pr_devel("segments[%d]: %lx\n", level, segments);
+
+ if (assoc_array_ptr_is_shortcut(cursor))
+ goto follow_shortcut;
+
+consider_node:
+ node = assoc_array_ptr_to_node(cursor);
+ smp_read_barrier_depends();
+
+ slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+ slot &= ASSOC_ARRAY_FAN_MASK;
+ ptr = ACCESS_ONCE(node->slots[slot]);
+
+ pr_devel("consider slot %x [ix=%d type=%lu]\n",
+ slot, level, (unsigned long)ptr & 3);
+
+ if (!assoc_array_ptr_is_meta(ptr)) {
+ /* The node doesn't have a node/shortcut pointer in the slot
+ * corresponding to the index key that we have to follow.
+ */
+ result->terminal_node.node = node;
+ result->terminal_node.level = level;
+ result->terminal_node.slot = slot;
+ pr_devel("<--%s() = terminal_node\n", __func__);
+ return assoc_array_walk_found_terminal_node;
+ }
+
+ if (assoc_array_ptr_is_node(ptr)) {
+ /* There is a pointer to a node in the slot corresponding to
+ * this index key segment, so we need to follow it.
+ */
+ cursor = ptr;
+ level += ASSOC_ARRAY_LEVEL_STEP;
+ if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
+ goto consider_node;
+ goto jumped;
+ }
+
+ /* There is a shortcut in the slot corresponding to the index key
+ * segment. We follow the shortcut if its partial index key matches
+ * this leaf's. Otherwise we need to split the shortcut.
+ */
+ cursor = ptr;
+follow_shortcut:
+ shortcut = assoc_array_ptr_to_shortcut(cursor);
+ smp_read_barrier_depends();
+ pr_devel("shortcut to %d\n", shortcut->skip_to_level);
+ sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
+ BUG_ON(sc_level > shortcut->skip_to_level);
+
+ do {
+ /* Check the leaf against the shortcut's index key a word at a
+ * time, trimming the final word (the shortcut stores the index
+ * key completely from the root to the shortcut's target).
+ */
+ if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
+ segments = ops->get_key_chunk(index_key, sc_level);
+
+ sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
+ dissimilarity = segments ^ sc_segments;
+
+ if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
+ /* Trim segments that are beyond the shortcut */
+ int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+ dissimilarity &= ~(ULONG_MAX << shift);
+ next_sc_level = shortcut->skip_to_level;
+ } else {
+ next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
+ next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+ }
+
+ if (dissimilarity != 0) {
+ /* This shortcut points elsewhere */
+ result->wrong_shortcut.shortcut = shortcut;
+ result->wrong_shortcut.level = level;
+ result->wrong_shortcut.sc_level = sc_level;
+ result->wrong_shortcut.sc_segments = sc_segments;
+ result->wrong_shortcut.dissimilarity = dissimilarity;
+ return assoc_array_walk_found_wrong_shortcut;
+ }
+
+ sc_level = next_sc_level;
+ } while (sc_level < shortcut->skip_to_level);
+
+ /* The shortcut matches the leaf's index to this point. */
+ cursor = ACCESS_ONCE(shortcut->next_node);
+ if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
+ level = sc_level;
+ goto jumped;
+ } else {
+ level = sc_level;
+ goto consider_node;
+ }
+}
+
+/**
+ * assoc_array_find - Find an object by index key
+ * @array: The associative array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Find an object in an associative array by walking through the internal tree
+ * to the node that should contain the object and then searching the leaves
+ * there. NULL is returned if the requested object was not found in the array.
+ *
+ * The caller must hold the RCU read lock or better.
+ */
+void *assoc_array_find(const struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key)
+{
+ struct assoc_array_walk_result result;
+ const struct assoc_array_node *node;
+ const struct assoc_array_ptr *ptr;
+ const void *leaf;
+ int slot;
+
+ if (assoc_array_walk(array, ops, index_key, &result) !=
+ assoc_array_walk_found_terminal_node)
+ return NULL;
+
+ node = result.terminal_node.node;
+ smp_read_barrier_depends();
+
+ /* If the target key is available to us, it's has to be pointed to by
+ * the terminal node.
+ */
+ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+ ptr = ACCESS_ONCE(node->slots[slot]);
+ if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+ /* We need a barrier between the read of the pointer
+ * and dereferencing the pointer - but only if we are
+ * actually going to dereference it.
+ */
+ leaf = assoc_array_ptr_to_leaf(ptr);
+ smp_read_barrier_depends();
+ if (ops->compare_object(leaf, index_key))
+ return (void *)leaf;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Destructively iterate over an associative array. The caller must prevent
+ * other simultaneous accesses.
+ */
+static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
+ const struct assoc_array_ops *ops)
+{
+ struct assoc_array_shortcut *shortcut;
+ struct assoc_array_node *node;
+ struct assoc_array_ptr *cursor, *parent = NULL;
+ int slot = -1;
+
+ pr_devel("-->%s()\n", __func__);
+
+ cursor = root;
+ if (!cursor) {
+ pr_devel("empty\n");
+ return;
+ }
+
+move_to_meta:
+ if (assoc_array_ptr_is_shortcut(cursor)) {
+ /* Descend through a shortcut */
+ pr_devel("[%d] shortcut\n", slot);
+ BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
+ shortcut = assoc_array_ptr_to_shortcut(cursor);
+ BUG_ON(shortcut->back_pointer != parent);
+ BUG_ON(slot != -1 && shortcut->parent_slot != slot);
+ parent = cursor;
+ cursor = shortcut->next_node;
+ slot = -1;
+ BUG_ON(!assoc_array_ptr_is_node(cursor));
+ }
+
+ pr_devel("[%d] node\n", slot);
+ node = assoc_array_ptr_to_node(cursor);
+ BUG_ON(node->back_pointer != parent);
+ BUG_ON(slot != -1 && node->parent_slot != slot);
+ slot = 0;
+
+continue_node:
+ pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
+ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+ struct assoc_array_ptr *ptr = node->slots[slot];
+ if (!ptr)
+ continue;
+ if (assoc_array_ptr_is_meta(ptr)) {
+ parent = cursor;
+ cursor = ptr;
+ goto move_to_meta;
+ }
+
+ if (ops) {
+ pr_devel("[%d] free leaf\n", slot);
+ ops->free_object(assoc_array_ptr_to_leaf(ptr));
+ }
+ }
+
+ parent = node->back_pointer;
+ slot = node->parent_slot;
+ pr_devel("free node\n");
+ kfree(node);
+ if (!parent)
+ return; /* Done */
+
+ /* Move back up to the parent (may need to free a shortcut on
+ * the way up) */
+ if (assoc_array_ptr_is_shortcut(parent)) {
+ shortcut = assoc_array_ptr_to_shortcut(parent);
+ BUG_ON(shortcut->next_node != cursor);
+ cursor = parent;
+ parent = shortcut->back_pointer;
+ slot = shortcut->parent_slot;
+ pr_devel("free shortcut\n");
+ kfree(shortcut);
+ if (!parent)
+ return;
+
+ BUG_ON(!assoc_array_ptr_is_node(parent));
+ }
+
+ /* Ascend to next slot in parent node */
+ pr_devel("ascend to %p[%d]\n", parent, slot);
+ cursor = parent;
+ node = assoc_array_ptr_to_node(cursor);
+ slot++;
+ goto continue_node;
+}
+
+/**
+ * assoc_array_destroy - Destroy an associative array
+ * @array: The array to destroy.
+ * @ops: The operations to use.
+ *
+ * Discard all metadata and free all objects in an associative array. The
+ * array will be empty and ready to use again upon completion. This function
+ * cannot fail.
+ *
+ * The caller must prevent all other accesses whilst this takes place as no
+ * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
+ * accesses to continue. On the other hand, no memory allocation is required.
+ */
+void assoc_array_destroy(struct assoc_array *array,
+ const struct assoc_array_ops *ops)
+{
+ assoc_array_destroy_subtree(array->root, ops);
+ array->root = NULL;
+}
+
+/*
+ * Handle insertion into an empty tree.
+ */
+static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
+{
+ struct assoc_array_node *new_n0;
+
+ pr_devel("-->%s()\n", __func__);
+
+ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+ if (!new_n0)
+ return false;
+
+ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+ edit->leaf_p = &new_n0->slots[0];
+ edit->adjust_count_on = new_n0;
+ edit->set[0].ptr = &edit->array->root;
+ edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+
+ pr_devel("<--%s() = ok [no root]\n", __func__);
+ return true;
+}
+
+/*
+ * Handle insertion into a terminal node.
+ */
+static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
+ const struct assoc_array_ops *ops,
+ const void *index_key,
+ struct assoc_array_walk_result *result)
+{
+ struct assoc_array_shortcut *shortcut, *new_s0;
+ struct assoc_array_node *node, *new_n0, *new_n1, *side;
+ struct assoc_array_ptr *ptr;
+ unsigned long dissimilarity, base_seg, blank;
+ size_t keylen;
+ bool have_meta;
+ int level, diff;
+ int slot, next_slot, free_slot, i, j;
+
+ node = result->terminal_node.node;
+ level = result->terminal_node.level;
+ edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
+
+ pr_devel("-->%s()\n", __func__);
+
+ /* We arrived at a node which doesn't have an onward node or shortcut
+ * pointer that we have to follow. This means that (a) the leaf we
+ * want must go here (either by insertion or replacement) or (b) we
+ * need to split this node and insert in one of the fragments.
+ */
+ free_slot = -1;
+
+ /* Firstly, we have to check the leaves in this node to see if there's
+ * a matching one we should replace in place.
+ */
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+ ptr = node->slots[i];
+ if (!ptr) {
+ free_slot = i;
+ continue;
+ }
+ if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
+ pr_devel("replace in slot %d\n", i);
+ edit->leaf_p = &node->slots[i];
+ edit->dead_leaf = node->slots[i];
+ pr_devel("<--%s() = ok [replace]\n", __func__);
+ return true;
+ }
+ }
+
+ /* If there is a free slot in this node then we can just insert the
+ * leaf here.
+ */
+ if (free_slot >= 0) {
+ pr_devel("insert in free slot %d\n", free_slot);
+ edit->leaf_p = &node->slots[free_slot];
+ edit->adjust_count_on = node;
+ pr_devel("<--%s() = ok [insert]\n", __func__);
+ return true;
+ }
+
+ /* The node has no spare slots - so we're either going to have to split
+ * it or insert another node before it.
+ *
+ * Whatever, we're going to need at least two new nodes - so allocate
+ * those now. We may also need a new shortcut, but we deal with that
+ * when we need it.
+ */
+ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+ if (!new_n0)
+ return false;
+ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+ new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+ if (!new_n1)
+ return false;
+ edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
+
+ /* We need to find out how similar the leaves are. */
+ pr_devel("no spare slots\n");
+ have_meta = false;
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+ ptr = node->slots[i];
+ if (assoc_array_ptr_is_meta(ptr)) {
+ edit->segment_cache[i] = 0xff;
+ have_meta = true;
+ continue;
+ }
+ base_seg = ops->get_object_key_chunk(
+ assoc_array_ptr_to_leaf(ptr), level);
+ base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+ edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+ }
+
+ if (have_meta) {
+ pr_devel("have meta\n");
+ goto split_node;
+ }
+
+ /* The node contains only leaves */
+ dissimilarity = 0;
+ base_seg = edit->segment_cache[0];
+ for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
+ dissimilarity |= edit->segment_cache[i] ^ base_seg;
+
+ pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
+
+ if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
+ /* The old leaves all cluster in the same slot. We will need
+ * to insert a shortcut if the new node wants to cluster with them.
+ */
+ if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
+ goto all_leaves_cluster_together;
+
+ /* Otherwise we can just insert a new node ahead of the old
+ * one.
+ */
+ goto present_leaves_cluster_but_not_new_leaf;
+ }
+
+split_node:
+ pr_devel("split node\n");
+
+ /* We need to split the current node; we know that the node doesn't
+ * simply contain a full set of leaves that cluster together (it
+ * contains meta pointers and/or non-clustering leaves).
+ *
+ * We need to expel at least two leaves out of a set consisting of the
+ * leaves in the node and the new leaf.
+ *
+ * We need a new node (n0) to replace the current one and a new node to
+ * take the expelled nodes (n1).
+ */
+ edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+ new_n0->back_pointer = node->back_pointer;
+ new_n0->parent_slot = node->parent_slot;
+ new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+ new_n1->parent_slot = -1; /* Need to calculate this */
+
+do_split_node:
+ pr_devel("do_split_node\n");
+
+ new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+ new_n1->nr_leaves_on_branch = 0;
+
+ /* Begin by finding two matching leaves. There have to be at least two
+ * that match - even if there are meta pointers - because any leaf that
+ * would match a slot with a meta pointer in it must be somewhere
+ * behind that meta pointer and cannot be here. Further, given N
+ * remaining leaf slots, we now have N+1 leaves to go in them.
+ */
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+ slot = edit->segment_cache[i];
+ if (slot != 0xff)
+ for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
+ if (edit->segment_cache[j] == slot)
+ goto found_slot_for_multiple_occupancy;
+ }
+found_slot_for_multiple_occupancy:
+ pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
+ BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
+ BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
+ BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
+
+ new_n1->parent_slot = slot;
+
+ /* Metadata pointers cannot change slot */
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+ if (assoc_array_ptr_is_meta(node->slots[i]))
+ new_n0->slots[i] = node->slots[i];
+ else
+ new_n0->slots[i] = NULL;
+ BUG_ON(new_n0->slots[slot] != NULL);
+ new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
+
+ /* Filter the leaf pointers between the new nodes */
+ free_slot = -1;
+ next_slot = 0;
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+ if (assoc_array_ptr_is_meta(node->slots[i]))
+ continue;
+ if (edit->segment_cache[i] == slot) {
+ new_n1->slots[next_slot++] = node->slots[i];
+ new_n1->nr_leaves_on_branch++;
+ } else {
+ do {
+ free_slot++;
+ } while (new_n0->slots[free_slot] != NULL);
+ new_n0->slots[free_slot] = node->slots[i];
+ }
+ }
+
+ pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
+
+ if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
+ do {
+ free_slot++;
+ } while (new_n0->slots[free_slot] != NULL);
+ edit->leaf_p = &new_n0->slots[free_slot];
+ edit->adjust_count_on = new_n0;
+ } else {
+ edit->leaf_p = &new_n1->slots[next_slot++];
+ edit->adjust_count_on = new_n1;
+ }
+
+ BUG_ON(next_slot <= 1);
+
+ edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+ if (edit->segment_cache[i] == 0xff) {
+ ptr = node->slots[i];
+ BUG_ON(assoc_array_ptr_is_leaf(ptr));
+ if (assoc_array_ptr_is_node(ptr)) {
+ side = assoc_array_ptr_to_node(ptr);
+ edit->set_backpointers[i] = &side->back_pointer;
+ } else {
+ shortcut = assoc_array_ptr_to_shortcut(ptr);
+ edit->set_backpointers[i] = &shortcut->back_pointer;
+ }
+ }
+ }
+
+ ptr = node->back_pointer;
+ if (!ptr)
+ edit->set[0].ptr = &edit->array->root;
+ else if (assoc_array_ptr_is_node(ptr))
+ edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
+ else
+ edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
+ edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+ pr_devel("<--%s() = ok [split node]\n", __func__);
+ return true;
+
+present_leaves_cluster_but_not_new_leaf:
+ /* All the old leaves cluster in the same slot, but the new leaf wants
+ * to go into a different slot, so we create a new node to hold the new
+ * leaf and a pointer to a new node holding all the old leaves.
+ */
+ pr_devel("present leaves cluster but not new leaf\n");
+
+ new_n0->back_pointer = node->back_pointer;
+ new_n0->parent_slot = node->parent_slot;
+ new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+ new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+ new_n1->parent_slot = edit->segment_cache[0];
+ new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
+ edit->adjust_count_on = new_n0;
+
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+ new_n1->slots[i] = node->slots[i];
+
+ new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
+ edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
+
+ edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
+ edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+ edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+ pr_devel("<--%s() = ok [insert node before]\n", __func__);
+ return true;
+
+all_leaves_cluster_together:
+ /* All the leaves, new and old, want to cluster together in this node
+ * in the same slot, so we have to replace this node with a shortcut to
+ * skip over the identical parts of the key and then place a pair of
+ * nodes, one inside the other, at the end of the shortcut and
+ * distribute the keys between them.
+ *
+ * Firstly we need to work out where the leaves start diverging as a
+ * bit position into their keys so that we know how big the shortcut
+ * needs to be.
+ *
+ * We only need to make a single pass of N of the N+1 leaves because if
+ * any keys differ between themselves at bit X then at least one of
+ * them must also differ with the base key at bit X or before.
+ */
+ pr_devel("all leaves cluster together\n");
+ diff = INT_MAX;
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+ int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
+ assoc_array_ptr_to_leaf(node->slots[i]));
+ if (x < diff) {
+ BUG_ON(x < 0);
+ diff = x;
+ }
+ }
+ BUG_ON(diff == INT_MAX);
+ BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
+
+ keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+ new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+ keylen * sizeof(unsigned long), GFP_KERNEL);
+ if (!new_s0)
+ return false;
+ edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
+
+ edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+ new_s0->back_pointer = node->back_pointer;
+ new_s0->parent_slot = node->parent_slot;
+ new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+ new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+ new_n0->parent_slot = 0;
+ new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+ new_n1->parent_slot = -1; /* Need to calculate this */
+
+ new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+ pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
+ BUG_ON(level <= 0);
+
+ for (i = 0; i < keylen; i++)
+ new_s0->index_key[i] =
+ ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
+
+ blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+ pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
+ new_s0->index_key[keylen - 1] &= ~blank;
+
+ /* This now reduces to a node splitting exercise for which we'll need
+ * to regenerate the disparity table.
+ */
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+ ptr = node->slots[i];
+ base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
+ level);
+ base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+ edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+ }
+
+ base_seg = ops->get_key_chunk(index_key, level);
+ base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+ edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
+ goto do_split_node;
+}
+
+/*
+ * Handle insertion into the middle of a shortcut.
+ */
+static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
+ const struct assoc_array_ops *ops,
+ struct assoc_array_walk_result *result)
+{
+ struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
+ struct assoc_array_node *node, *new_n0, *side;
+ unsigned long sc_segments, dissimilarity, blank;
+ size_t keylen;
+ int level, sc_level, diff;
+ int sc_slot;
+
+ shortcut = result->wrong_shortcut.shortcut;
+ level = result->wrong_shortcut.level;
+ sc_level = result->wrong_shortcut.sc_level;
+ sc_segments = result->wrong_shortcut.sc_segments;
+ dissimilarity = result->wrong_shortcut.dissimilarity;
+
+ pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
+ __func__, level, dissimilarity, sc_level);
+
+ /* We need to split a shortcut and insert a node between the two
+ * pieces. Zero-length pieces will be dispensed with entirely.
+ *
+ * First of all, we need to find out in which level the first
+ * difference was.
+ */
+ diff = __ffs(dissimilarity);
+ diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+ diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
+ pr_devel("diff=%d\n", diff);
+
+ if (!shortcut->back_pointer) {
+ edit->set[0].ptr = &edit->array->root;
+ } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
+ node = assoc_array_ptr_to_node(shortcut->back_pointer);
+ edit->set[0].ptr = &node->slots[shortcut->parent_slot];
+ } else {
+ BUG();
+ }
+
+ edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
+
+ /* Create a new node now since we're going to need it anyway */
+ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+ if (!new_n0)
+ return false;
+ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+ edit->adjust_count_on = new_n0;
+
+ /* Insert a new shortcut before the new node if this segment isn't of
+ * zero length - otherwise we just connect the new node directly to the
+ * parent.
+ */
+ level += ASSOC_ARRAY_LEVEL_STEP;
+ if (diff > level) {
+ pr_devel("pre-shortcut %d...%d\n", level, diff);
+ keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+ new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+ keylen * sizeof(unsigned long), GFP_KERNEL);
+ if (!new_s0)
+ return false;
+ edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
+ edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+ new_s0->back_pointer = shortcut->back_pointer;
+ new_s0->parent_slot = shortcut->parent_slot;
+ new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+ new_s0->skip_to_level = diff;
+
+ new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+ new_n0->parent_slot = 0;
+
+ memcpy(new_s0->index_key, shortcut->index_key,
+ keylen * sizeof(unsigned long));
+
+ blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+ pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
+ new_s0->index_key[keylen - 1] &= ~blank;
+ } else {
+ pr_devel("no pre-shortcut\n");
+ edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+ new_n0->back_pointer = shortcut->back_pointer;
+ new_n0->parent_slot = shortcut->parent_slot;
+ }
+
+ side = assoc_array_ptr_to_node(shortcut->next_node);
+ new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
+
+ /* We need to know which slot in the new node is going to take a
+ * metadata pointer.
+ */
+ sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+ sc_slot &= ASSOC_ARRAY_FAN_MASK;
+
+ pr_devel("new slot %lx >> %d -> %d\n",
+ sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
+
+ /* Determine whether we need to follow the new node with a replacement
+ * for the current shortcut. We could in theory reuse the current
+ * shortcut if its parent slot number doesn't change - but that's a
+ * 1-in-16 chance so not worth expending the code upon.
+ */
+ level = diff + ASSOC_ARRAY_LEVEL_STEP;
+ if (level < shortcut->skip_to_level) {
+ pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
+ keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+ new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
+ keylen * sizeof(unsigned long), GFP_KERNEL);
+ if (!new_s1)
+ return false;
+ edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
+
+ new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
+ new_s1->parent_slot = sc_slot;
+ new_s1->next_node = shortcut->next_node;
+ new_s1->skip_to_level = shortcut->skip_to_level;
+
+ new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
+
+ memcpy(new_s1->index_key, shortcut->index_key,
+ keylen * sizeof(unsigned long));
+
+ edit->set[1].ptr = &side->back_pointer;
+ edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
+ } else {
+ pr_devel("no post-shortcut\n");
+
+ /* We don't have to replace the pointed-to node as long as we
+ * use memory barriers to make sure the parent slot number is
+ * changed before the back pointer (the parent slot number is
+ * irrelevant to the old parent shortcut).
+ */
+ new_n0->slots[sc_slot] = shortcut->next_node;
+ edit->set_parent_slot[0].p = &side->parent_slot;
+ edit->set_parent_slot[0].to = sc_slot;
+ edit->set[1].ptr = &side->back_pointer;
+ edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+ }
+
+ /* Install the new leaf in a spare slot in the new node. */
+ if (sc_slot == 0)
+ edit->leaf_p = &new_n0->slots[1];
+ else
+ edit->leaf_p = &new_n0->slots[0];
+
+ pr_devel("<--%s() = ok [split shortcut]\n", __func__);
+ return edit;
+}
+
+/**
+ * assoc_array_insert - Script insertion of an object into an associative array
+ * @array: The array to insert into.
+ * @ops: The operations to use.
+ * @index_key: The key to insert at.
+ * @object: The object to insert.
+ *
+ * Precalculate and preallocate a script for the insertion or replacement of an
+ * object in an associative array. This results in an edit script that can
+ * either be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key,
+ void *object)
+{
+ struct assoc_array_walk_result result;
+ struct assoc_array_edit *edit;
+
+ pr_devel("-->%s()\n", __func__);
+
+ /* The leaf pointer we're given must not have the bottom bit set as we
+ * use those for type-marking the pointer. NULL pointers are also not
+ * allowed as they indicate an empty slot but we have to allow them
+ * here as they can be updated later.
+ */
+ BUG_ON(assoc_array_ptr_is_meta(object));
+
+ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+ if (!edit)
+ return ERR_PTR(-ENOMEM);
+ edit->array = array;
+ edit->ops = ops;
+ edit->leaf = assoc_array_leaf_to_ptr(object);
+ edit->adjust_count_by = 1;
+
+ switch (assoc_array_walk(array, ops, index_key, &result)) {
+ case assoc_array_walk_tree_empty:
+ /* Allocate a root node if there isn't one yet */
+ if (!assoc_array_insert_in_empty_tree(edit))
+ goto enomem;
+ return edit;
+
+ case assoc_array_walk_found_terminal_node:
+ /* We found a node that doesn't have a node/shortcut pointer in
+ * the slot corresponding to the index key that we have to
+ * follow.
+ */
+ if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
+ &result))
+ goto enomem;
+ return edit;
+
+ case assoc_array_walk_found_wrong_shortcut:
+ /* We found a shortcut that didn't match our key in a slot we
+ * needed to follow.
+ */
+ if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
+ goto enomem;
+ return edit;
+ }
+
+enomem:
+ /* Clean up after an out of memory error */
+ pr_devel("enomem\n");
+ assoc_array_cancel_edit(edit);
+ return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_insert_set_object - Set the new object pointer in an edit script
+ * @edit: The edit script to modify.
+ * @object: The object pointer to set.
+ *
+ * Change the object to be inserted in an edit script. The object pointed to
+ * by the old object is not freed. This must be done prior to applying the
+ * script.
+ */
+void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
+{
+ BUG_ON(!object);
+ edit->leaf = assoc_array_leaf_to_ptr(object);
+}
+
+struct assoc_array_delete_collapse_context {
+ struct assoc_array_node *node;
+ const void *skip_leaf;
+ int slot;
+};
+
+/*
+ * Subtree collapse to node iterator.
+ */
+static int assoc_array_delete_collapse_iterator(const void *leaf,
+ void *iterator_data)
+{
+ struct assoc_array_delete_collapse_context *collapse = iterator_data;
+
+ if (leaf == collapse->skip_leaf)
+ return 0;
+
+ BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
+
+ collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
+ return 0;
+}
+
+/**
+ * assoc_array_delete - Script deletion of an object from an associative array
+ * @array: The array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Precalculate and preallocate a script for the deletion of an object from an
+ * associative array. This results in an edit script that can either be
+ * applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if the object was found,
+ * NULL if the object was not found or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key)
+{
+ struct assoc_array_delete_collapse_context collapse;
+ struct assoc_array_walk_result result;
+ struct assoc_array_node *node, *new_n0;
+ struct assoc_array_edit *edit;
+ struct assoc_array_ptr *ptr;
+ bool has_meta;
+ int slot, i;
+
+ pr_devel("-->%s()\n", __func__);
+
+ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+ if (!edit)
+ return ERR_PTR(-ENOMEM);
+ edit->array = array;
+ edit->ops = ops;
+ edit->adjust_count_by = -1;
+
+ switch (assoc_array_walk(array, ops, index_key, &result)) {
+ case assoc_array_walk_found_terminal_node:
+ /* We found a node that should contain the leaf we've been
+ * asked to remove - *if* it's in the tree.
+ */
+ pr_devel("terminal_node\n");
+ node = result.terminal_node.node;
+
+ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+ ptr = node->slots[slot];
+ if (ptr &&
+ assoc_array_ptr_is_leaf(ptr) &&
+ ops->compare_object(assoc_array_ptr_to_leaf(ptr),
+ index_key))
+ goto found_leaf;
+ }
+ case assoc_array_walk_tree_empty:
+ case assoc_array_walk_found_wrong_shortcut:
+ default:
+ assoc_array_cancel_edit(edit);
+ pr_devel("not found\n");
+ return NULL;
+ }
+
+found_leaf:
+ BUG_ON(array->nr_leaves_on_tree <= 0);
+
+ /* In the simplest form of deletion we just clear the slot and release
+ * the leaf after a suitable interval.
+ */
+ edit->dead_leaf = node->slots[slot];
+ edit->set[0].ptr = &node->slots[slot];
+ edit->set[0].to = NULL;
+ edit->adjust_count_on = node;
+
+ /* If that concludes erasure of the last leaf, then delete the entire
+ * internal array.
+ */
+ if (array->nr_leaves_on_tree == 1) {
+ edit->set[1].ptr = &array->root;
+ edit->set[1].to = NULL;
+ edit->adjust_count_on = NULL;
+ edit->excised_subtree = array->root;
+ pr_devel("all gone\n");
+ return edit;
+ }
+
+ /* However, we'd also like to clear up some metadata blocks if we
+ * possibly can.
+ *
+ * We go for a simple algorithm of: if this node has FAN_OUT or fewer
+ * leaves in it, then attempt to collapse it - and attempt to
+ * recursively collapse up the tree.
+ *
+ * We could also try and collapse in partially filled subtrees to take
+ * up space in this node.
+ */
+ if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+ struct assoc_array_node *parent, *grandparent;
+ struct assoc_array_ptr *ptr;
+
+ /* First of all, we need to know if this node has metadata so
+ * that we don't try collapsing if all the leaves are already
+ * here.
+ */
+ has_meta = false;
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+ ptr = node->slots[i];
+ if (assoc_array_ptr_is_meta(ptr)) {
+ has_meta = true;
+ break;
+ }
+ }
+
+ pr_devel("leaves: %ld [m=%d]\n",
+ node->nr_leaves_on_branch - 1, has_meta);
+
+ /* Look further up the tree to see if we can collapse this node
+ * into a more proximal node too.
+ */
+ parent = node;
+ collapse_up:
+ pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
+
+ ptr = parent->back_pointer;
+ if (!ptr)
+ goto do_collapse;
+ if (assoc_array_ptr_is_shortcut(ptr)) {
+ struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
+ ptr = s->back_pointer;
+ if (!ptr)
+ goto do_collapse;
+ }
+
+ grandparent = assoc_array_ptr_to_node(ptr);
+ if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+ parent = grandparent;
+ goto collapse_up;
+ }
+
+ do_collapse:
+ /* There's no point collapsing if the original node has no meta
+ * pointers to discard and if we didn't merge into one of that
+ * node's ancestry.
+ */
+ if (has_meta || parent != node) {
+ node = parent;
+
+ /* Create a new node to collapse into */
+ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+ if (!new_n0)
+ goto enomem;
+ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+
+ new_n0->back_pointer = node->back_pointer;
+ new_n0->parent_slot = node->parent_slot;
+ new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+ edit->adjust_count_on = new_n0;
+
+ collapse.node = new_n0;
+ collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
+ collapse.slot = 0;
+ assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
+ node->back_pointer,
+ assoc_array_delete_collapse_iterator,
+ &collapse);
+ pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
+ BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
+
+ if (!node->back_pointer) {
+ edit->set[1].ptr = &array->root;
+ } else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
+ BUG();
+ } else if (assoc_array_ptr_is_node(node->back_pointer)) {
+ struct assoc_array_node *p =
+ assoc_array_ptr_to_node(node->back_pointer);
+ edit->set[1].ptr = &p->slots[node->parent_slot];
+ } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
+ struct assoc_array_shortcut *s =
+ assoc_array_ptr_to_shortcut(node->back_pointer);
+ edit->set[1].ptr = &s->next_node;
+ }
+ edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+ edit->excised_subtree = assoc_array_node_to_ptr(node);
+ }
+ }
+
+ return edit;
+
+enomem:
+ /* Clean up after an out of memory error */
+ pr_devel("enomem\n");
+ assoc_array_cancel_edit(edit);
+ return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_clear - Script deletion of all objects from an associative array
+ * @array: The array to clear.
+ * @ops: The operations to use.
+ *
+ * Precalculate and preallocate a script for the deletion of all the objects
+ * from an associative array. This results in an edit script that can either
+ * be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if there are objects to be
+ * deleted, NULL if there are no objects in the array or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+ const struct assoc_array_ops *ops)
+{
+ struct assoc_array_edit *edit;
+
+ pr_devel("-->%s()\n", __func__);
+
+ if (!array->root)
+ return NULL;
+
+ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+ if (!edit)
+ return ERR_PTR(-ENOMEM);
+ edit->array = array;
+ edit->ops = ops;
+ edit->set[1].ptr = &array->root;
+ edit->set[1].to = NULL;
+ edit->excised_subtree = array->root;
+ edit->ops_for_excised_subtree = ops;
+ pr_devel("all gone\n");
+ return edit;
+}
+
+/*
+ * Handle the deferred destruction after an applied edit.
+ */
+static void assoc_array_rcu_cleanup(struct rcu_head *head)
+{
+ struct assoc_array_edit *edit =
+ container_of(head, struct assoc_array_edit, rcu);
+ int i;
+
+ pr_devel("-->%s()\n", __func__);
+
+ if (edit->dead_leaf)
+ edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
+ for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
+ if (edit->excised_meta[i])
+ kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
+
+ if (edit->excised_subtree) {
+ BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
+ if (assoc_array_ptr_is_node(edit->excised_subtree)) {
+ struct assoc_array_node *n =
+ assoc_array_ptr_to_node(edit->excised_subtree);
+ n->back_pointer = NULL;
+ } else {
+ struct assoc_array_shortcut *s =
+ assoc_array_ptr_to_shortcut(edit->excised_subtree);
+ s->back_pointer = NULL;
+ }
+ assoc_array_destroy_subtree(edit->excised_subtree,
+ edit->ops_for_excised_subtree);
+ }
+
+ kfree(edit);
+}
+
+/**
+ * assoc_array_apply_edit - Apply an edit script to an associative array
+ * @edit: The script to apply.
+ *
+ * Apply an edit script to an associative array to effect an insertion,
+ * deletion or clearance. As the edit script includes preallocated memory,
+ * this is guaranteed not to fail.
+ *
+ * The edit script, dead objects and dead metadata will be scheduled for
+ * destruction after an RCU grace period to permit those doing read-only
+ * accesses on the array to continue to do so under the RCU read lock whilst
+ * the edit is taking place.
+ */
+void assoc_array_apply_edit(struct assoc_array_edit *edit)
+{
+ struct assoc_array_shortcut *shortcut;
+ struct assoc_array_node *node;
+ struct assoc_array_ptr *ptr;
+ int i;
+
+ pr_devel("-->%s()\n", __func__);
+
+ smp_wmb();
+ if (edit->leaf_p)
+ *edit->leaf_p = edit->leaf;
+
+ smp_wmb();
+ for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
+ if (edit->set_parent_slot[i].p)
+ *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
+
+ smp_wmb();
+ for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
+ if (edit->set_backpointers[i])
+ *edit->set_backpointers[i] = edit->set_backpointers_to;
+
+ smp_wmb();
+ for (i = 0; i < ARRAY_SIZE(edit->set); i++)
+ if (edit->set[i].ptr)
+ *edit->set[i].ptr = edit->set[i].to;
+
+ if (edit->array->root == NULL) {
+ edit->array->nr_leaves_on_tree = 0;
+ } else if (edit->adjust_count_on) {
+ node = edit->adjust_count_on;
+ for (;;) {
+ node->nr_leaves_on_branch += edit->adjust_count_by;
+
+ ptr = node->back_pointer;
+ if (!ptr)
+ break;
+ if (assoc_array_ptr_is_shortcut(ptr)) {
+ shortcut = assoc_array_ptr_to_shortcut(ptr);
+ ptr = shortcut->back_pointer;
+ if (!ptr)
+ break;
+ }
+ BUG_ON(!assoc_array_ptr_is_node(ptr));
+ node = assoc_array_ptr_to_node(ptr);
+ }
+
+ edit->array->nr_leaves_on_tree += edit->adjust_count_by;
+ }
+
+ call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
+}
+
+/**
+ * assoc_array_cancel_edit - Discard an edit script.
+ * @edit: The script to discard.
+ *
+ * Free an edit script and all the preallocated data it holds without making
+ * any changes to the associative array it was intended for.
+ *
+ * NOTE! In the case of an insertion script, this does _not_ release the leaf
+ * that was to be inserted. That is left to the caller.
+ */
+void assoc_array_cancel_edit(struct assoc_array_edit *edit)
+{
+ struct assoc_array_ptr *ptr;
+ int i;
+
+ pr_devel("-->%s()\n", __func__);
+
+ /* Clean up after an out of memory error */
+ for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
+ ptr = edit->new_meta[i];
+ if (ptr) {
+ if (assoc_array_ptr_is_node(ptr))
+ kfree(assoc_array_ptr_to_node(ptr));
+ else
+ kfree(assoc_array_ptr_to_shortcut(ptr));
+ }
+ }
+ kfree(edit);
+}
+
+/**
+ * assoc_array_gc - Garbage collect an associative array.
+ * @array: The array to clean.
+ * @ops: The operations to use.
+ * @iterator: A callback function to pass judgement on each object.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Collect garbage from an associative array and pack down the internal tree to
+ * save memory.
+ *
+ * The iterator function is asked to pass judgement upon each object in the
+ * array. If it returns false, the object is discard and if it returns true,
+ * the object is kept. If it returns true, it must increment the object's
+ * usage count (or whatever it needs to do to retain it) before returning.
+ *
+ * This function returns 0 if successful or -ENOMEM if out of memory. In the
+ * latter case, the array is not changed.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+int assoc_array_gc(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ bool (*iterator)(void *object, void *iterator_data),
+ void *iterator_data)
+{
+ struct assoc_array_shortcut *shortcut, *new_s;
+ struct assoc_array_node *node, *new_n;
+ struct assoc_array_edit *edit;
+ struct assoc_array_ptr *cursor, *ptr;
+ struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
+ unsigned long nr_leaves_on_tree;
+ int keylen, slot, nr_free, next_slot, i;
+
+ pr_devel("-->%s()\n", __func__);
+
+ if (!array->root)
+ return 0;
+
+ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+ if (!edit)
+ return -ENOMEM;
+ edit->array = array;
+ edit->ops = ops;
+ edit->ops_for_excised_subtree = ops;
+ edit->set[0].ptr = &array->root;
+ edit->excised_subtree = array->root;
+
+ new_root = new_parent = NULL;
+ new_ptr_pp = &new_root;
+ cursor = array->root;
+
+descend:
+ /* If this point is a shortcut, then we need to duplicate it and
+ * advance the target cursor.
+ */
+ if (assoc_array_ptr_is_shortcut(cursor)) {
+ shortcut = assoc_array_ptr_to_shortcut(cursor);
+ keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+ new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
+ keylen * sizeof(unsigned long), GFP_KERNEL);
+ if (!new_s)
+ goto enomem;
+ pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
+ memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
+ keylen * sizeof(unsigned long)));
+ new_s->back_pointer = new_parent;
+ new_s->parent_slot = shortcut->parent_slot;
+ *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
+ new_ptr_pp = &new_s->next_node;
+ cursor = shortcut->next_node;
+ }
+
+ /* Duplicate the node at this position */
+ node = assoc_array_ptr_to_node(cursor);
+ new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+ if (!new_n)
+ goto enomem;
+ pr_devel("dup node %p -> %p\n", node, new_n);
+ new_n->back_pointer = new_parent;
+ new_n->parent_slot = node->parent_slot;
+ *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
+ new_ptr_pp = NULL;
+ slot = 0;
+
+continue_node:
+ /* Filter across any leaves and gc any subtrees */
+ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+ ptr = node->slots[slot];
+ if (!ptr)
+ continue;
+
+ if (assoc_array_ptr_is_leaf(ptr)) {
+ if (iterator(assoc_array_ptr_to_leaf(ptr),
+ iterator_data))
+ /* The iterator will have done any reference
+ * counting on the object for us.
+ */
+ new_n->slots[slot] = ptr;
+ continue;
+ }
+
+ new_ptr_pp = &new_n->slots[slot];
+ cursor = ptr;
+ goto descend;
+ }
+
+ pr_devel("-- compress node %p --\n", new_n);
+
+ /* Count up the number of empty slots in this node and work out the
+ * subtree leaf count.
+ */
+ new_n->nr_leaves_on_branch = 0;
+ nr_free = 0;
+ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+ ptr = new_n->slots[slot];
+ if (!ptr)
+ nr_free++;
+ else if (assoc_array_ptr_is_leaf(ptr))
+ new_n->nr_leaves_on_branch++;
+ }
+ pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
+
+ /* See what we can fold in */
+ next_slot = 0;
+ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+ struct assoc_array_shortcut *s;
+ struct assoc_array_node *child;
+
+ ptr = new_n->slots[slot];
+ if (!ptr || assoc_array_ptr_is_leaf(ptr))
+ continue;
+
+ s = NULL;
+ if (assoc_array_ptr_is_shortcut(ptr)) {
+ s = assoc_array_ptr_to_shortcut(ptr);
+ ptr = s->next_node;
+ }
+
+ child = assoc_array_ptr_to_node(ptr);
+ new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
+
+ if (child->nr_leaves_on_branch <= nr_free + 1) {
+ /* Fold the child node into this one */
+ pr_devel("[%d] fold node %lu/%d [nx %d]\n",
+ slot, child->nr_leaves_on_branch, nr_free + 1,
+ next_slot);
+
+ /* We would already have reaped an intervening shortcut
+ * on the way back up the tree.
+ */
+ BUG_ON(s);
+
+ new_n->slots[slot] = NULL;
+ nr_free++;
+ if (slot < next_slot)
+ next_slot = slot;
+ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+ struct assoc_array_ptr *p = child->slots[i];
+ if (!p)
+ continue;
+ BUG_ON(assoc_array_ptr_is_meta(p));
+ while (new_n->slots[next_slot])
+ next_slot++;
+ BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
+ new_n->slots[next_slot++] = p;
+ nr_free--;
+ }
+ kfree(child);
+ } else {
+ pr_devel("[%d] retain node %lu/%d [nx %d]\n",
+ slot, child->nr_leaves_on_branch, nr_free + 1,
+ next_slot);
+ }
+ }
+
+ pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
+
+ nr_leaves_on_tree = new_n->nr_leaves_on_branch;
+
+ /* Excise this node if it is singly occupied by a shortcut */
+ if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
+ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
+ if ((ptr = new_n->slots[slot]))
+ break;
+
+ if (assoc_array_ptr_is_meta(ptr) &&
+ assoc_array_ptr_is_shortcut(ptr)) {
+ pr_devel("excise node %p with 1 shortcut\n", new_n);
+ new_s = assoc_array_ptr_to_shortcut(ptr);
+ new_parent = new_n->back_pointer;
+ slot = new_n->parent_slot;
+ kfree(new_n);
+ if (!new_parent) {
+ new_s->back_pointer = NULL;
+ new_s->parent_slot = 0;
+ new_root = ptr;
+ goto gc_complete;
+ }
+
+ if (assoc_array_ptr_is_shortcut(new_parent)) {
+ /* We can discard any preceding shortcut also */
+ struct assoc_array_shortcut *s =
+ assoc_array_ptr_to_shortcut(new_parent);
+
+ pr_devel("excise preceding shortcut\n");
+
+ new_parent = new_s->back_pointer = s->back_pointer;
+ slot = new_s->parent_slot = s->parent_slot;
+ kfree(s);
+ if (!new_parent) {
+ new_s->back_pointer = NULL;
+ new_s->parent_slot = 0;
+ new_root = ptr;
+ goto gc_complete;
+ }
+ }
+
+ new_s->back_pointer = new_parent;
+ new_s->parent_slot = slot;
+ new_n = assoc_array_ptr_to_node(new_parent);
+ new_n->slots[slot] = ptr;
+ goto ascend_old_tree;
+ }
+ }
+
+ /* Excise any shortcuts we might encounter that point to nodes that
+ * only contain leaves.
+ */
+ ptr = new_n->back_pointer;
+ if (!ptr)
+ goto gc_complete;
+
+ if (assoc_array_ptr_is_shortcut(ptr)) {
+ new_s = assoc_array_ptr_to_shortcut(ptr);
+ new_parent = new_s->back_pointer;
+ slot = new_s->parent_slot;
+
+ if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
+ struct assoc_array_node *n;
+
+ pr_devel("excise shortcut\n");
+ new_n->back_pointer = new_parent;
+ new_n->parent_slot = slot;
+ kfree(new_s);
+ if (!new_parent) {
+ new_root = assoc_array_node_to_ptr(new_n);
+ goto gc_complete;
+ }
+
+ n = assoc_array_ptr_to_node(new_parent);
+ n->slots[slot] = assoc_array_node_to_ptr(new_n);
+ }
+ } else {
+ new_parent = ptr;
+ }
+ new_n = assoc_array_ptr_to_node(new_parent);
+
+ascend_old_tree:
+ ptr = node->back_pointer;
+ if (assoc_array_ptr_is_shortcut(ptr)) {
+ shortcut = assoc_array_ptr_to_shortcut(ptr);
+ slot = shortcut->parent_slot;
+ cursor = shortcut->back_pointer;
+ } else {
+ slot = node->parent_slot;
+ cursor = ptr;
+ }
+ BUG_ON(!ptr);
+ node = assoc_array_ptr_to_node(cursor);
+ slot++;
+ goto continue_node;
+
+gc_complete:
+ edit->set[0].to = new_root;
+ assoc_array_apply_edit(edit);
+ edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
+ return 0;
+
+enomem:
+ pr_devel("enomem\n");
+ assoc_array_destroy_subtree(new_root, edit->ops);
+ kfree(edit);
+ return -ENOMEM;
+}
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index 657979f71bef..bf076d281d40 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -121,3 +121,6 @@ void mpi_free(MPI a)
kfree(a);
}
EXPORT_SYMBOL_GPL(mpi_free);
+
+MODULE_DESCRIPTION("Multiprecision maths library");
+MODULE_LICENSE("GPL");
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7d57af21f49e..dee6cf4e6d34 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -476,40 +476,6 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg)
return 0;
}
-static void copy_gigantic_page(struct page *dst, struct page *src)
-{
- int i;
- struct hstate *h = page_hstate(src);
- struct page *dst_base = dst;
- struct page *src_base = src;
-
- for (i = 0; i < pages_per_huge_page(h); ) {
- cond_resched();
- copy_highpage(dst, src);
-
- i++;
- dst = mem_map_next(dst, dst_base, i);
- src = mem_map_next(src, src_base, i);
- }
-}
-
-void copy_huge_page(struct page *dst, struct page *src)
-{
- int i;
- struct hstate *h = page_hstate(src);
-
- if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
- copy_gigantic_page(dst, src);
- return;
- }
-
- might_sleep();
- for (i = 0; i < pages_per_huge_page(h); i++) {
- cond_resched();
- copy_highpage(dst + i, src + i);
- }
-}
-
static void enqueue_huge_page(struct hstate *h, struct page *page)
{
int nid = page_to_nid(page);
@@ -736,6 +702,23 @@ int PageHuge(struct page *page)
}
EXPORT_SYMBOL_GPL(PageHuge);
+/*
+ * PageHeadHuge() only returns true for hugetlbfs head page, but not for
+ * normal or transparent huge pages.
+ */
+int PageHeadHuge(struct page *page_head)
+{
+ compound_page_dtor *dtor;
+
+ if (!PageHead(page_head))
+ return 0;
+
+ dtor = get_compound_page_dtor(page_head);
+
+ return dtor == free_huge_page;
+}
+EXPORT_SYMBOL_GPL(PageHeadHuge);
+
pgoff_t __basepage_index(struct page *page)
{
struct page *page_head = compound_head(page);
diff --git a/mm/memory.c b/mm/memory.c
index 0409e8f43fa0..5d9025f3b3e1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4272,13 +4272,6 @@ void copy_user_huge_page(struct page *dst, struct page *src,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
#if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS
-static struct kmem_cache *page_ptl_cachep;
-void __init ptlock_cache_init(void)
-{
- page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,
- SLAB_PANIC, NULL);
-}
-
bool ptlock_alloc(struct page *page)
{
spinlock_t *ptl;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c4403cdf3433..eca4a3129129 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2950,7 +2950,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
return;
}
- p += snprintf(p, maxlen, policy_modes[mode]);
+ p += snprintf(p, maxlen, "%s", policy_modes[mode]);
if (flags & MPOL_MODE_FLAGS) {
p += snprintf(p, buffer + maxlen - p, "=");
diff --git a/mm/migrate.c b/mm/migrate.c
index 316e720a2023..bb940045fe85 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -442,6 +442,54 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
}
/*
+ * Gigantic pages are so large that we do not guarantee that page++ pointer
+ * arithmetic will work across the entire page. We need something more
+ * specialized.
+ */
+static void __copy_gigantic_page(struct page *dst, struct page *src,
+ int nr_pages)
+{
+ int i;
+ struct page *dst_base = dst;
+ struct page *src_base = src;
+
+ for (i = 0; i < nr_pages; ) {
+ cond_resched();
+ copy_highpage(dst, src);
+
+ i++;
+ dst = mem_map_next(dst, dst_base, i);
+ src = mem_map_next(src, src_base, i);
+ }
+}
+
+static void copy_huge_page(struct page *dst, struct page *src)
+{
+ int i;
+ int nr_pages;
+
+ if (PageHuge(src)) {
+ /* hugetlbfs page */
+ struct hstate *h = page_hstate(src);
+ nr_pages = pages_per_huge_page(h);
+
+ if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
+ __copy_gigantic_page(dst, src, nr_pages);
+ return;
+ }
+ } else {
+ /* thp page */
+ BUG_ON(!PageTransHuge(src));
+ nr_pages = hpage_nr_pages(src);
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ cond_resched();
+ copy_highpage(dst + i, src + i);
+ }
+}
+
+/*
* Copy the page to its new location
*/
void migrate_page_copy(struct page *newpage, struct page *page)
diff --git a/mm/slab.c b/mm/slab.c
index 0c8967bb2018..eb043bf05f4c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -164,72 +164,6 @@
static bool pfmemalloc_active __read_mostly;
/*
- * kmem_bufctl_t:
- *
- * Bufctl's are used for linking objs within a slab
- * linked offsets.
- *
- * This implementation relies on "struct page" for locating the cache &
- * slab an object belongs to.
- * This allows the bufctl structure to be small (one int), but limits
- * the number of objects a slab (not a cache) can contain when off-slab
- * bufctls are used. The limit is the size of the largest general cache
- * that does not use off-slab slabs.
- * For 32bit archs with 4 kB pages, is this 56.
- * This is not serious, as it is only for large objects, when it is unwise
- * to have too many per slab.
- * Note: This limit can be raised by introducing a general cache whose size
- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
- */
-
-typedef unsigned int kmem_bufctl_t;
-#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
-#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
-#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
-
-/*
- * struct slab_rcu
- *
- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
- * arrange for kmem_freepages to be called via RCU. This is useful if
- * we need to approach a kernel structure obliquely, from its address
- * obtained without the usual locking. We can lock the structure to
- * stabilize it and check it's still at the given address, only if we
- * can be sure that the memory has not been meanwhile reused for some
- * other kind of object (which our subsystem's lock might corrupt).
- *
- * rcu_read_lock before reading the address, then rcu_read_unlock after
- * taking the spinlock within the structure expected at that address.
- */
-struct slab_rcu {
- struct rcu_head head;
- struct kmem_cache *cachep;
- void *addr;
-};
-
-/*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-struct slab {
- union {
- struct {
- struct list_head list;
- unsigned long colouroff;
- void *s_mem; /* including colour offset */
- unsigned int inuse; /* num of objs active in slab */
- kmem_bufctl_t free;
- unsigned short nodeid;
- };
- struct slab_rcu __slab_cover_slab_rcu;
- };
-};
-
-/*
* struct array_cache
*
* Purpose:
@@ -456,18 +390,10 @@ static inline struct kmem_cache *virt_to_cache(const void *obj)
return page->slab_cache;
}
-static inline struct slab *virt_to_slab(const void *obj)
-{
- struct page *page = virt_to_head_page(obj);
-
- VM_BUG_ON(!PageSlab(page));
- return page->slab_page;
-}
-
-static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
+static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
unsigned int idx)
{
- return slab->s_mem + cache->size * idx;
+ return page->s_mem + cache->size * idx;
}
/*
@@ -477,9 +403,9 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
*/
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
- const struct slab *slab, void *obj)
+ const struct page *page, void *obj)
{
- u32 offset = (obj - slab->s_mem);
+ u32 offset = (obj - page->s_mem);
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
}
@@ -641,7 +567,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
static size_t slab_mgmt_size(size_t nr_objs, size_t align)
{
- return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
+ return ALIGN(nr_objs * sizeof(unsigned int), align);
}
/*
@@ -660,8 +586,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
* on it. For the latter case, the memory allocated for a
* slab is used for:
*
- * - The struct slab
- * - One kmem_bufctl_t for each object
+ * - One unsigned int for each object
* - Padding to respect alignment of @align
* - @buffer_size bytes for each object
*
@@ -674,8 +599,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
mgmt_size = 0;
nr_objs = slab_size / buffer_size;
- if (nr_objs > SLAB_LIMIT)
- nr_objs = SLAB_LIMIT;
} else {
/*
* Ignore padding for the initial guess. The padding
@@ -685,8 +608,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
* into the memory allocation when taking the padding
* into account.
*/
- nr_objs = (slab_size - sizeof(struct slab)) /
- (buffer_size + sizeof(kmem_bufctl_t));
+ nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int));
/*
* This calculated number will be either the right
@@ -696,9 +618,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
> slab_size)
nr_objs--;
- if (nr_objs > SLAB_LIMIT)
- nr_objs = SLAB_LIMIT;
-
mgmt_size = slab_mgmt_size(nr_objs, align);
}
*num = nr_objs;
@@ -829,10 +748,8 @@ static struct array_cache *alloc_arraycache(int node, int entries,
return nc;
}
-static inline bool is_slab_pfmemalloc(struct slab *slabp)
+static inline bool is_slab_pfmemalloc(struct page *page)
{
- struct page *page = virt_to_page(slabp->s_mem);
-
return PageSlabPfmemalloc(page);
}
@@ -841,23 +758,23 @@ static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
struct array_cache *ac)
{
struct kmem_cache_node *n = cachep->node[numa_mem_id()];
- struct slab *slabp;
+ struct page *page;
unsigned long flags;
if (!pfmemalloc_active)
return;
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(slabp, &n->slabs_full, list)
- if (is_slab_pfmemalloc(slabp))
+ list_for_each_entry(page, &n->slabs_full, lru)
+ if (is_slab_pfmemalloc(page))
goto out;
- list_for_each_entry(slabp, &n->slabs_partial, list)
- if (is_slab_pfmemalloc(slabp))
+ list_for_each_entry(page, &n->slabs_partial, lru)
+ if (is_slab_pfmemalloc(page))
goto out;
- list_for_each_entry(slabp, &n->slabs_free, list)
- if (is_slab_pfmemalloc(slabp))
+ list_for_each_entry(page, &n->slabs_free, lru)
+ if (is_slab_pfmemalloc(page))
goto out;
pfmemalloc_active = false;
@@ -897,8 +814,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
*/
n = cachep->node[numa_mem_id()];
if (!list_empty(&n->slabs_free) && force_refill) {
- struct slab *slabp = virt_to_slab(objp);
- ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem));
+ struct page *page = virt_to_head_page(objp);
+ ClearPageSlabPfmemalloc(page);
clear_obj_pfmemalloc(&objp);
recheck_pfmemalloc_active(cachep, ac);
return objp;
@@ -1099,8 +1016,7 @@ static void drain_alien_cache(struct kmem_cache *cachep,
static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
- struct slab *slabp = virt_to_slab(objp);
- int nodeid = slabp->nodeid;
+ int nodeid = page_to_nid(virt_to_page(objp));
struct kmem_cache_node *n;
struct array_cache *alien = NULL;
int node;
@@ -1111,7 +1027,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
* Make sure we are not freeing a object from another node to the array
* cache on this cpu.
*/
- if (likely(slabp->nodeid == node))
+ if (likely(nodeid == node))
return 0;
n = cachep->node[node];
@@ -1512,6 +1428,8 @@ void __init kmem_cache_init(void)
{
int i;
+ BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
+ sizeof(struct rcu_head));
kmem_cache = &kmem_cache_boot;
setup_node_pointer(kmem_cache);
@@ -1687,7 +1605,7 @@ static noinline void
slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
{
struct kmem_cache_node *n;
- struct slab *slabp;
+ struct page *page;
unsigned long flags;
int node;
@@ -1706,15 +1624,15 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
continue;
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(slabp, &n->slabs_full, list) {
+ list_for_each_entry(page, &n->slabs_full, lru) {
active_objs += cachep->num;
active_slabs++;
}
- list_for_each_entry(slabp, &n->slabs_partial, list) {
- active_objs += slabp->inuse;
+ list_for_each_entry(page, &n->slabs_partial, lru) {
+ active_objs += page->active;
active_slabs++;
}
- list_for_each_entry(slabp, &n->slabs_free, list)
+ list_for_each_entry(page, &n->slabs_free, lru)
num_slabs++;
free_objects += n->free_objects;
@@ -1736,19 +1654,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
* did not request dmaable memory, we might get it, but that
* would be relatively rare and ignorable.
*/
-static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
+ int nodeid)
{
struct page *page;
int nr_pages;
- int i;
-
-#ifndef CONFIG_MMU
- /*
- * Nommu uses slab's for process anonymous memory allocations, and thus
- * requires __GFP_COMP to properly refcount higher order allocations
- */
- flags |= __GFP_COMP;
-#endif
flags |= cachep->allocflags;
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1772,12 +1682,9 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
else
add_zone_page_state(page_zone(page),
NR_SLAB_UNRECLAIMABLE, nr_pages);
- for (i = 0; i < nr_pages; i++) {
- __SetPageSlab(page + i);
-
- if (page->pfmemalloc)
- SetPageSlabPfmemalloc(page + i);
- }
+ __SetPageSlab(page);
+ if (page->pfmemalloc)
+ SetPageSlabPfmemalloc(page);
memcg_bind_pages(cachep, cachep->gfporder);
if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
@@ -1789,17 +1696,15 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
kmemcheck_mark_unallocated_pages(page, nr_pages);
}
- return page_address(page);
+ return page;
}
/*
* Interface to system's page release.
*/
-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
+static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
{
- unsigned long i = (1 << cachep->gfporder);
- struct page *page = virt_to_page(addr);
- const unsigned long nr_freed = i;
+ const unsigned long nr_freed = (1 << cachep->gfporder);
kmemcheck_free_shadow(page, cachep->gfporder);
@@ -1809,27 +1714,28 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
else
sub_zone_page_state(page_zone(page),
NR_SLAB_UNRECLAIMABLE, nr_freed);
- while (i--) {
- BUG_ON(!PageSlab(page));
- __ClearPageSlabPfmemalloc(page);
- __ClearPageSlab(page);
- page++;
- }
+
+ BUG_ON(!PageSlab(page));
+ __ClearPageSlabPfmemalloc(page);
+ __ClearPageSlab(page);
+ page_mapcount_reset(page);
+ page->mapping = NULL;
memcg_release_pages(cachep, cachep->gfporder);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
- free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
+ __free_memcg_kmem_pages(page, cachep->gfporder);
}
static void kmem_rcu_free(struct rcu_head *head)
{
- struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
- struct kmem_cache *cachep = slab_rcu->cachep;
+ struct kmem_cache *cachep;
+ struct page *page;
- kmem_freepages(cachep, slab_rcu->addr);
- if (OFF_SLAB(cachep))
- kmem_cache_free(cachep->slabp_cache, slab_rcu);
+ page = container_of(head, struct page, rcu_head);
+ cachep = page->slab_cache;
+
+ kmem_freepages(cachep, page);
}
#if DEBUG
@@ -1978,19 +1884,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
/* Print some data about the neighboring objects, if they
* exist:
*/
- struct slab *slabp = virt_to_slab(objp);
+ struct page *page = virt_to_head_page(objp);
unsigned int objnr;
- objnr = obj_to_index(cachep, slabp, objp);
+ objnr = obj_to_index(cachep, page, objp);
if (objnr) {
- objp = index_to_obj(cachep, slabp, objnr - 1);
+ objp = index_to_obj(cachep, page, objnr - 1);
realobj = (char *)objp + obj_offset(cachep);
printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
realobj, size);
print_objinfo(cachep, objp, 2);
}
if (objnr + 1 < cachep->num) {
- objp = index_to_obj(cachep, slabp, objnr + 1);
+ objp = index_to_obj(cachep, page, objnr + 1);
realobj = (char *)objp + obj_offset(cachep);
printk(KERN_ERR "Next obj: start=%p, len=%d\n",
realobj, size);
@@ -2001,11 +1907,12 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
#endif
#if DEBUG
-static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy_debugcheck(struct kmem_cache *cachep,
+ struct page *page)
{
int i;
for (i = 0; i < cachep->num; i++) {
- void *objp = index_to_obj(cachep, slabp, i);
+ void *objp = index_to_obj(cachep, page, i);
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
@@ -2030,7 +1937,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
}
}
#else
-static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy_debugcheck(struct kmem_cache *cachep,
+ struct page *page)
{
}
#endif
@@ -2044,23 +1952,34 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
* Before calling the slab must have been unlinked from the cache. The
* cache-lock is not held/needed.
*/
-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy(struct kmem_cache *cachep, struct page *page)
{
- void *addr = slabp->s_mem - slabp->colouroff;
+ void *freelist;
- slab_destroy_debugcheck(cachep, slabp);
+ freelist = page->freelist;
+ slab_destroy_debugcheck(cachep, page);
if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
- struct slab_rcu *slab_rcu;
+ struct rcu_head *head;
+
+ /*
+ * RCU free overloads the RCU head over the LRU.
+ * slab_page has been overloeaded over the LRU,
+ * however it is not used from now on so that
+ * we can use it safely.
+ */
+ head = (void *)&page->rcu_head;
+ call_rcu(head, kmem_rcu_free);
- slab_rcu = (struct slab_rcu *)slabp;
- slab_rcu->cachep = cachep;
- slab_rcu->addr = addr;
- call_rcu(&slab_rcu->head, kmem_rcu_free);
} else {
- kmem_freepages(cachep, addr);
- if (OFF_SLAB(cachep))
- kmem_cache_free(cachep->slabp_cache, slabp);
+ kmem_freepages(cachep, page);
}
+
+ /*
+ * From now on, we don't use freelist
+ * although actual page can be freed in rcu context
+ */
+ if (OFF_SLAB(cachep))
+ kmem_cache_free(cachep->freelist_cache, freelist);
}
/**
@@ -2097,8 +2016,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
* use off-slab slabs. Needed to avoid a possible
* looping condition in cache_grow().
*/
- offslab_limit = size - sizeof(struct slab);
- offslab_limit /= sizeof(kmem_bufctl_t);
+ offslab_limit = size;
+ offslab_limit /= sizeof(unsigned int);
if (num > offslab_limit)
break;
@@ -2220,7 +2139,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
int
__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
{
- size_t left_over, slab_size, ralign;
+ size_t left_over, freelist_size, ralign;
gfp_t gfp;
int err;
size_t size = cachep->size;
@@ -2339,22 +2258,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
if (!cachep->num)
return -E2BIG;
- slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
- + sizeof(struct slab), cachep->align);
+ freelist_size =
+ ALIGN(cachep->num * sizeof(unsigned int), cachep->align);
/*
* If the slab has been placed off-slab, and we have enough space then
* move it on-slab. This is at the expense of any extra colouring.
*/
- if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
+ if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) {
flags &= ~CFLGS_OFF_SLAB;
- left_over -= slab_size;
+ left_over -= freelist_size;
}
if (flags & CFLGS_OFF_SLAB) {
/* really off slab. No need for manual alignment */
- slab_size =
- cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
+ freelist_size = cachep->num * sizeof(unsigned int);
#ifdef CONFIG_PAGE_POISONING
/* If we're going to use the generic kernel_map_pages()
@@ -2371,16 +2289,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
if (cachep->colour_off < cachep->align)
cachep->colour_off = cachep->align;
cachep->colour = left_over / cachep->colour_off;
- cachep->slab_size = slab_size;
+ cachep->freelist_size = freelist_size;
cachep->flags = flags;
- cachep->allocflags = 0;
+ cachep->allocflags = __GFP_COMP;
if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
cachep->allocflags |= GFP_DMA;
cachep->size = size;
cachep->reciprocal_buffer_size = reciprocal_value(size);
if (flags & CFLGS_OFF_SLAB) {
- cachep->slabp_cache = kmalloc_slab(slab_size, 0u);
+ cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
/*
* This is a possibility for one of the malloc_sizes caches.
* But since we go off slab only for object size greater than
@@ -2388,7 +2306,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
* this should not happen at all.
* But leave a BUG_ON for some lucky dude.
*/
- BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
+ BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache));
}
err = setup_cpu_cache(cachep, gfp);
@@ -2494,7 +2412,7 @@ static int drain_freelist(struct kmem_cache *cache,
{
struct list_head *p;
int nr_freed;
- struct slab *slabp;
+ struct page *page;
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
@@ -2506,18 +2424,18 @@ static int drain_freelist(struct kmem_cache *cache,
goto out;
}
- slabp = list_entry(p, struct slab, list);
+ page = list_entry(p, struct page, lru);
#if DEBUG
- BUG_ON(slabp->inuse);
+ BUG_ON(page->active);
#endif
- list_del(&slabp->list);
+ list_del(&page->lru);
/*
* Safe to drop the lock. The slab is no longer linked
* to the cache.
*/
n->free_objects -= cache->num;
spin_unlock_irq(&n->list_lock);
- slab_destroy(cache, slabp);
+ slab_destroy(cache, page);
nr_freed++;
}
out:
@@ -2600,52 +2518,42 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
* descriptors in kmem_cache_create, we search through the malloc_sizes array.
* If we are creating a malloc_sizes cache here it would not be visible to
* kmem_find_general_cachep till the initialization is complete.
- * Hence we cannot have slabp_cache same as the original cache.
+ * Hence we cannot have freelist_cache same as the original cache.
*/
-static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
- int colour_off, gfp_t local_flags,
- int nodeid)
+static void *alloc_slabmgmt(struct kmem_cache *cachep,
+ struct page *page, int colour_off,
+ gfp_t local_flags, int nodeid)
{
- struct slab *slabp;
+ void *freelist;
+ void *addr = page_address(page);
if (OFF_SLAB(cachep)) {
/* Slab management obj is off-slab. */
- slabp = kmem_cache_alloc_node(cachep->slabp_cache,
+ freelist = kmem_cache_alloc_node(cachep->freelist_cache,
local_flags, nodeid);
- /*
- * If the first object in the slab is leaked (it's allocated
- * but no one has a reference to it), we want to make sure
- * kmemleak does not treat the ->s_mem pointer as a reference
- * to the object. Otherwise we will not report the leak.
- */
- kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
- local_flags);
- if (!slabp)
+ if (!freelist)
return NULL;
} else {
- slabp = objp + colour_off;
- colour_off += cachep->slab_size;
+ freelist = addr + colour_off;
+ colour_off += cachep->freelist_size;
}
- slabp->inuse = 0;
- slabp->colouroff = colour_off;
- slabp->s_mem = objp + colour_off;
- slabp->nodeid = nodeid;
- slabp->free = 0;
- return slabp;
+ page->active = 0;
+ page->s_mem = addr + colour_off;
+ return freelist;
}
-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+static inline unsigned int *slab_freelist(struct page *page)
{
- return (kmem_bufctl_t *) (slabp + 1);
+ return (unsigned int *)(page->freelist);
}
static void cache_init_objs(struct kmem_cache *cachep,
- struct slab *slabp)
+ struct page *page)
{
int i;
for (i = 0; i < cachep->num; i++) {
- void *objp = index_to_obj(cachep, slabp, i);
+ void *objp = index_to_obj(cachep, page, i);
#if DEBUG
/* need to poison the objs? */
if (cachep->flags & SLAB_POISON)
@@ -2681,9 +2589,8 @@ static void cache_init_objs(struct kmem_cache *cachep,
if (cachep->ctor)
cachep->ctor(objp);
#endif
- slab_bufctl(slabp)[i] = i + 1;
+ slab_freelist(page)[i] = i;
}
- slab_bufctl(slabp)[i - 1] = BUFCTL_END;
}
static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
@@ -2696,41 +2603,41 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
}
}
-static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
+static void *slab_get_obj(struct kmem_cache *cachep, struct page *page,
int nodeid)
{
- void *objp = index_to_obj(cachep, slabp, slabp->free);
- kmem_bufctl_t next;
+ void *objp;
- slabp->inuse++;
- next = slab_bufctl(slabp)[slabp->free];
+ objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]);
+ page->active++;
#if DEBUG
- slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
- WARN_ON(slabp->nodeid != nodeid);
+ WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
#endif
- slabp->free = next;
return objp;
}
-static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
+static void slab_put_obj(struct kmem_cache *cachep, struct page *page,
void *objp, int nodeid)
{
- unsigned int objnr = obj_to_index(cachep, slabp, objp);
-
+ unsigned int objnr = obj_to_index(cachep, page, objp);
#if DEBUG
+ unsigned int i;
+
/* Verify that the slab belongs to the intended node */
- WARN_ON(slabp->nodeid != nodeid);
+ WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
- if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
- printk(KERN_ERR "slab: double free detected in cache "
- "'%s', objp %p\n", cachep->name, objp);
- BUG();
+ /* Verify double free bug */
+ for (i = page->active; i < cachep->num; i++) {
+ if (slab_freelist(page)[i] == objnr) {
+ printk(KERN_ERR "slab: double free detected in cache "
+ "'%s', objp %p\n", cachep->name, objp);
+ BUG();
+ }
}
#endif
- slab_bufctl(slabp)[objnr] = slabp->free;
- slabp->free = objnr;
- slabp->inuse--;
+ page->active--;
+ slab_freelist(page)[page->active] = objnr;
}
/*
@@ -2738,23 +2645,11 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
* for the slab allocator to be able to lookup the cache and slab of a
* virtual address for kfree, ksize, and slab debugging.
*/
-static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
- void *addr)
+static void slab_map_pages(struct kmem_cache *cache, struct page *page,
+ void *freelist)
{
- int nr_pages;
- struct page *page;
-
- page = virt_to_page(addr);
-
- nr_pages = 1;
- if (likely(!PageCompound(page)))
- nr_pages <<= cache->gfporder;
-
- do {
- page->slab_cache = cache;
- page->slab_page = slab;
- page++;
- } while (--nr_pages);
+ page->slab_cache = cache;
+ page->freelist = freelist;
}
/*
@@ -2762,9 +2657,9 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
* kmem_cache_alloc() when there are no active objs left in a cache.
*/
static int cache_grow(struct kmem_cache *cachep,
- gfp_t flags, int nodeid, void *objp)
+ gfp_t flags, int nodeid, struct page *page)
{
- struct slab *slabp;
+ void *freelist;
size_t offset;
gfp_t local_flags;
struct kmem_cache_node *n;
@@ -2805,20 +2700,20 @@ static int cache_grow(struct kmem_cache *cachep,
* Get mem for the objs. Attempt to allocate a physical page from
* 'nodeid'.
*/
- if (!objp)
- objp = kmem_getpages(cachep, local_flags, nodeid);
- if (!objp)
+ if (!page)
+ page = kmem_getpages(cachep, local_flags, nodeid);
+ if (!page)
goto failed;
/* Get slab management. */
- slabp = alloc_slabmgmt(cachep, objp, offset,
+ freelist = alloc_slabmgmt(cachep, page, offset,
local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
- if (!slabp)
+ if (!freelist)
goto opps1;
- slab_map_pages(cachep, slabp, objp);
+ slab_map_pages(cachep, page, freelist);
- cache_init_objs(cachep, slabp);
+ cache_init_objs(cachep, page);
if (local_flags & __GFP_WAIT)
local_irq_disable();
@@ -2826,13 +2721,13 @@ static int cache_grow(struct kmem_cache *cachep,
spin_lock(&n->list_lock);
/* Make slab active. */
- list_add_tail(&slabp->list, &(n->slabs_free));
+ list_add_tail(&page->lru, &(n->slabs_free));
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num;
spin_unlock(&n->list_lock);
return 1;
opps1:
- kmem_freepages(cachep, objp);
+ kmem_freepages(cachep, page);
failed:
if (local_flags & __GFP_WAIT)
local_irq_disable();
@@ -2880,9 +2775,8 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
- struct page *page;
unsigned int objnr;
- struct slab *slabp;
+ struct page *page;
BUG_ON(virt_to_cache(objp) != cachep);
@@ -2890,8 +2784,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
kfree_debugcheck(objp);
page = virt_to_head_page(objp);
- slabp = page->slab_page;
-
if (cachep->flags & SLAB_RED_ZONE) {
verify_redzone_free(cachep, objp);
*dbg_redzone1(cachep, objp) = RED_INACTIVE;
@@ -2900,14 +2792,11 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = (void *)caller;
- objnr = obj_to_index(cachep, slabp, objp);
+ objnr = obj_to_index(cachep, page, objp);
BUG_ON(objnr >= cachep->num);
- BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
+ BUG_ON(objp != index_to_obj(cachep, page, objnr));
-#ifdef CONFIG_DEBUG_SLAB_LEAK
- slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
-#endif
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
@@ -2924,33 +2813,9 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
return objp;
}
-static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
-{
- kmem_bufctl_t i;
- int entries = 0;
-
- /* Check slab's freelist to see if this obj is there. */
- for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
- entries++;
- if (entries > cachep->num || i >= cachep->num)
- goto bad;
- }
- if (entries != cachep->num - slabp->inuse) {
-bad:
- printk(KERN_ERR "slab: Internal list corruption detected in "
- "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
- cachep->name, cachep->num, slabp, slabp->inuse,
- print_tainted());
- print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
- sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
- 1);
- BUG();
- }
-}
#else
#define kfree_debugcheck(x) do { } while(0)
#define cache_free_debugcheck(x,objp,z) (objp)
-#define check_slabp(x,y) do { } while(0)
#endif
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
@@ -2989,7 +2854,7 @@ retry:
while (batchcount > 0) {
struct list_head *entry;
- struct slab *slabp;
+ struct page *page;
/* Get slab alloc is to come from. */
entry = n->slabs_partial.next;
if (entry == &n->slabs_partial) {
@@ -2999,8 +2864,7 @@ retry:
goto must_grow;
}
- slabp = list_entry(entry, struct slab, list);
- check_slabp(cachep, slabp);
+ page = list_entry(entry, struct page, lru);
check_spinlock_acquired(cachep);
/*
@@ -3008,24 +2872,23 @@ retry:
* there must be at least one object available for
* allocation.
*/
- BUG_ON(slabp->inuse >= cachep->num);
+ BUG_ON(page->active >= cachep->num);
- while (slabp->inuse < cachep->num && batchcount--) {
+ while (page->active < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
- ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
+ ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
node));
}
- check_slabp(cachep, slabp);
/* move slabp to correct slabp list: */
- list_del(&slabp->list);
- if (slabp->free == BUFCTL_END)
- list_add(&slabp->list, &n->slabs_full);
+ list_del(&page->lru);
+ if (page->active == cachep->num)
+ list_add(&page->list, &n->slabs_full);
else
- list_add(&slabp->list, &n->slabs_partial);
+ list_add(&page->list, &n->slabs_partial);
}
must_grow:
@@ -3097,16 +2960,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
*dbg_redzone1(cachep, objp) = RED_ACTIVE;
*dbg_redzone2(cachep, objp) = RED_ACTIVE;
}
-#ifdef CONFIG_DEBUG_SLAB_LEAK
- {
- struct slab *slabp;
- unsigned objnr;
-
- slabp = virt_to_head_page(objp)->slab_page;
- objnr = (unsigned)(objp - slabp->s_mem) / cachep->size;
- slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
- }
-#endif
objp += obj_offset(cachep);
if (cachep->ctor && cachep->flags & SLAB_POISON)
cachep->ctor(objp);
@@ -3248,18 +3101,20 @@ retry:
* We may trigger various forms of reclaim on the allowed
* set and go into memory reserves if necessary.
*/
+ struct page *page;
+
if (local_flags & __GFP_WAIT)
local_irq_enable();
kmem_flagcheck(cache, flags);
- obj = kmem_getpages(cache, local_flags, numa_mem_id());
+ page = kmem_getpages(cache, local_flags, numa_mem_id());
if (local_flags & __GFP_WAIT)
local_irq_disable();
- if (obj) {
+ if (page) {
/*
* Insert into the appropriate per node queues
*/
- nid = page_to_nid(virt_to_page(obj));
- if (cache_grow(cache, flags, nid, obj)) {
+ nid = page_to_nid(page);
+ if (cache_grow(cache, flags, nid, page)) {
obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid);
if (!obj)
@@ -3288,7 +3143,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
int nodeid)
{
struct list_head *entry;
- struct slab *slabp;
+ struct page *page;
struct kmem_cache_node *n;
void *obj;
int x;
@@ -3308,26 +3163,24 @@ retry:
goto must_grow;
}
- slabp = list_entry(entry, struct slab, list);
+ page = list_entry(entry, struct page, lru);
check_spinlock_acquired_node(cachep, nodeid);
- check_slabp(cachep, slabp);
STATS_INC_NODEALLOCS(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
- BUG_ON(slabp->inuse == cachep->num);
+ BUG_ON(page->active == cachep->num);
- obj = slab_get_obj(cachep, slabp, nodeid);
- check_slabp(cachep, slabp);
+ obj = slab_get_obj(cachep, page, nodeid);
n->free_objects--;
/* move slabp to correct slabp list: */
- list_del(&slabp->list);
+ list_del(&page->lru);
- if (slabp->free == BUFCTL_END)
- list_add(&slabp->list, &n->slabs_full);
+ if (page->active == cachep->num)
+ list_add(&page->lru, &n->slabs_full);
else
- list_add(&slabp->list, &n->slabs_partial);
+ list_add(&page->lru, &n->slabs_partial);
spin_unlock(&n->list_lock);
goto done;
@@ -3477,23 +3330,21 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
for (i = 0; i < nr_objects; i++) {
void *objp;
- struct slab *slabp;
+ struct page *page;
clear_obj_pfmemalloc(&objpp[i]);
objp = objpp[i];
- slabp = virt_to_slab(objp);
+ page = virt_to_head_page(objp);
n = cachep->node[node];
- list_del(&slabp->list);
+ list_del(&page->lru);
check_spinlock_acquired_node(cachep, node);
- check_slabp(cachep, slabp);
- slab_put_obj(cachep, slabp, objp, node);
+ slab_put_obj(cachep, page, objp, node);
STATS_DEC_ACTIVE(cachep);
n->free_objects++;
- check_slabp(cachep, slabp);
/* fixup slab chains */
- if (slabp->inuse == 0) {
+ if (page->active == 0) {
if (n->free_objects > n->free_limit) {
n->free_objects -= cachep->num;
/* No need to drop any previously held
@@ -3502,16 +3353,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
* a different cache, refer to comments before
* alloc_slabmgmt.
*/
- slab_destroy(cachep, slabp);
+ slab_destroy(cachep, page);
} else {
- list_add(&slabp->list, &n->slabs_free);
+ list_add(&page->lru, &n->slabs_free);
}
} else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
*/
- list_add_tail(&slabp->list, &n->slabs_partial);
+ list_add_tail(&page->lru, &n->slabs_partial);
}
}
}
@@ -3551,10 +3402,10 @@ free_done:
p = n->slabs_free.next;
while (p != &(n->slabs_free)) {
- struct slab *slabp;
+ struct page *page;
- slabp = list_entry(p, struct slab, list);
- BUG_ON(slabp->inuse);
+ page = list_entry(p, struct page, lru);
+ BUG_ON(page->active);
i++;
p = p->next;
@@ -4158,7 +4009,7 @@ out:
#ifdef CONFIG_SLABINFO
void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
{
- struct slab *slabp;
+ struct page *page;
unsigned long active_objs;
unsigned long num_objs;
unsigned long active_slabs = 0;
@@ -4178,23 +4029,23 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
check_irq_on();
spin_lock_irq(&n->list_lock);
- list_for_each_entry(slabp, &n->slabs_full, list) {
- if (slabp->inuse != cachep->num && !error)
+ list_for_each_entry(page, &n->slabs_full, lru) {
+ if (page->active != cachep->num && !error)
error = "slabs_full accounting error";
active_objs += cachep->num;
active_slabs++;
}
- list_for_each_entry(slabp, &n->slabs_partial, list) {
- if (slabp->inuse == cachep->num && !error)
- error = "slabs_partial inuse accounting error";
- if (!slabp->inuse && !error)
- error = "slabs_partial/inuse accounting error";
- active_objs += slabp->inuse;
+ list_for_each_entry(page, &n->slabs_partial, lru) {
+ if (page->active == cachep->num && !error)
+ error = "slabs_partial accounting error";
+ if (!page->active && !error)
+ error = "slabs_partial accounting error";
+ active_objs += page->active;
active_slabs++;
}
- list_for_each_entry(slabp, &n->slabs_free, list) {
- if (slabp->inuse && !error)
- error = "slabs_free/inuse accounting error";
+ list_for_each_entry(page, &n->slabs_free, lru) {
+ if (page->active && !error)
+ error = "slabs_free accounting error";
num_slabs++;
}
free_objects += n->free_objects;
@@ -4346,15 +4197,27 @@ static inline int add_caller(unsigned long *n, unsigned long v)
return 1;
}
-static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
+static void handle_slab(unsigned long *n, struct kmem_cache *c,
+ struct page *page)
{
void *p;
- int i;
+ int i, j;
+
if (n[0] == n[1])
return;
- for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) {
- if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
+ for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
+ bool active = true;
+
+ for (j = page->active; j < c->num; j++) {
+ /* Skip freed item */
+ if (slab_freelist(page)[j] == i) {
+ active = false;
+ break;
+ }
+ }
+ if (!active)
continue;
+
if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
return;
}
@@ -4379,7 +4242,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
static int leaks_show(struct seq_file *m, void *p)
{
struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
- struct slab *slabp;
+ struct page *page;
struct kmem_cache_node *n;
const char *name;
unsigned long *x = m->private;
@@ -4403,10 +4266,10 @@ static int leaks_show(struct seq_file *m, void *p)
check_irq_on();
spin_lock_irq(&n->list_lock);
- list_for_each_entry(slabp, &n->slabs_full, list)
- handle_slab(x, cachep, slabp);
- list_for_each_entry(slabp, &n->slabs_partial, list)
- handle_slab(x, cachep, slabp);
+ list_for_each_entry(page, &n->slabs_full, lru)
+ handle_slab(x, cachep, page);
+ list_for_each_entry(page, &n->slabs_partial, lru)
+ handle_slab(x, cachep, page);
spin_unlock_irq(&n->list_lock);
}
name = cachep->name;
diff --git a/mm/slub.c b/mm/slub.c
index 7e8bd8d828bc..545a170ebf9f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -155,7 +155,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
/*
* Maximum number of desirable partial slabs.
* The existence of more partial slabs makes kmem_cache_shrink
- * sort the partial list by the number of objects in the.
+ * sort the partial list by the number of objects in use.
*/
#define MAX_PARTIAL 10
@@ -933,6 +933,16 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
* Hooks for other subsystems that check memory allocations. In a typical
* production configuration these hooks all should produce no code at all.
*/
+static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
+{
+ kmemleak_alloc(ptr, size, 1, flags);
+}
+
+static inline void kfree_hook(const void *x)
+{
+ kmemleak_free(x);
+}
+
static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
{
flags &= gfp_allowed_mask;
@@ -1217,8 +1227,8 @@ static unsigned long kmem_cache_flags(unsigned long object_size,
/*
* Enable debugging if selected on the kernel commandline.
*/
- if (slub_debug && (!slub_debug_slabs ||
- !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
+ if (slub_debug && (!slub_debug_slabs || (name &&
+ !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
flags |= slub_debug;
return flags;
@@ -1260,13 +1270,30 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
+static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
+{
+ kmemleak_alloc(ptr, size, 1, flags);
+}
+
+static inline void kfree_hook(const void *x)
+{
+ kmemleak_free(x);
+}
+
static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
{ return 0; }
static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
- void *object) {}
+ void *object)
+{
+ kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
+ flags & gfp_allowed_mask);
+}
-static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
+static inline void slab_free_hook(struct kmem_cache *s, void *x)
+{
+ kmemleak_free_recursive(x, s->flags);
+}
#endif /* CONFIG_SLUB_DEBUG */
@@ -2829,8 +2856,8 @@ static struct kmem_cache *kmem_cache_node;
* slab on the node for this slabcache. There are no concurrent accesses
* possible.
*
- * Note that this function only works on the kmalloc_node_cache
- * when allocating for the kmalloc_node_cache. This is used for bootstrapping
+ * Note that this function only works on the kmem_cache_node
+ * when allocating for the kmem_cache_node. This is used for bootstrapping
* memory on a fresh node that has no slab structures yet.
*/
static void early_kmem_cache_node_alloc(int node)
@@ -3272,7 +3299,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
if (page)
ptr = page_address(page);
- kmemleak_alloc(ptr, size, 1, flags);
+ kmalloc_large_node_hook(ptr, size, flags);
return ptr;
}
@@ -3336,7 +3363,7 @@ void kfree(const void *x)
page = virt_to_head_page(x);
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
- kmemleak_free(x);
+ kfree_hook(x);
__free_memcg_kmem_pages(page, compound_order(page));
return;
}
diff --git a/mm/swap.c b/mm/swap.c
index 7a9f80d451f5..84b26aaabd03 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -82,19 +82,6 @@ static void __put_compound_page(struct page *page)
static void put_compound_page(struct page *page)
{
- /*
- * hugetlbfs pages cannot be split from under us. If this is a
- * hugetlbfs page, check refcount on head page and release the page if
- * the refcount becomes zero.
- */
- if (PageHuge(page)) {
- page = compound_head(page);
- if (put_page_testzero(page))
- __put_compound_page(page);
-
- return;
- }
-
if (unlikely(PageTail(page))) {
/* __split_huge_page_refcount can run under us */
struct page *page_head = compound_trans_head(page);
@@ -111,14 +98,31 @@ static void put_compound_page(struct page *page)
* still hot on arches that do not support
* this_cpu_cmpxchg_double().
*/
- if (PageSlab(page_head)) {
- if (PageTail(page)) {
+ if (PageSlab(page_head) || PageHeadHuge(page_head)) {
+ if (likely(PageTail(page))) {
+ /*
+ * __split_huge_page_refcount
+ * cannot race here.
+ */
+ VM_BUG_ON(!PageHead(page_head));
+ atomic_dec(&page->_mapcount);
if (put_page_testzero(page_head))
VM_BUG_ON(1);
-
- atomic_dec(&page->_mapcount);
- goto skip_lock_tail;
+ if (put_page_testzero(page_head))
+ __put_compound_page(page_head);
+ return;
} else
+ /*
+ * __split_huge_page_refcount
+ * run before us, "page" was a
+ * THP tail. The split
+ * page_head has been freed
+ * and reallocated as slab or
+ * hugetlbfs page of smaller
+ * order (only possible if
+ * reallocated as slab on
+ * x86).
+ */
goto skip_lock;
}
/*
@@ -132,8 +136,27 @@ static void put_compound_page(struct page *page)
/* __split_huge_page_refcount run before us */
compound_unlock_irqrestore(page_head, flags);
skip_lock:
- if (put_page_testzero(page_head))
- __put_single_page(page_head);
+ if (put_page_testzero(page_head)) {
+ /*
+ * The head page may have been
+ * freed and reallocated as a
+ * compound page of smaller
+ * order and then freed again.
+ * All we know is that it
+ * cannot have become: a THP
+ * page, a compound page of
+ * higher order, a tail page.
+ * That is because we still
+ * hold the refcount of the
+ * split THP tail and
+ * page_head was the THP head
+ * before the split.
+ */
+ if (PageHead(page_head))
+ __put_compound_page(page_head);
+ else
+ __put_single_page(page_head);
+ }
out_put_single:
if (put_page_testzero(page))
__put_single_page(page);
@@ -155,7 +178,6 @@ out_put_single:
VM_BUG_ON(atomic_read(&page->_count) != 0);
compound_unlock_irqrestore(page_head, flags);
-skip_lock_tail:
if (put_page_testzero(page_head)) {
if (PageHead(page_head))
__put_compound_page(page_head);
@@ -198,51 +220,52 @@ bool __get_page_tail(struct page *page)
* proper PT lock that already serializes against
* split_huge_page().
*/
+ unsigned long flags;
bool got = false;
- struct page *page_head;
-
- /*
- * If this is a hugetlbfs page it cannot be split under us. Simply
- * increment refcount for the head page.
- */
- if (PageHuge(page)) {
- page_head = compound_head(page);
- atomic_inc(&page_head->_count);
- got = true;
- } else {
- unsigned long flags;
+ struct page *page_head = compound_trans_head(page);
- page_head = compound_trans_head(page);
- if (likely(page != page_head &&
- get_page_unless_zero(page_head))) {
-
- /* Ref to put_compound_page() comment. */
- if (PageSlab(page_head)) {
- if (likely(PageTail(page))) {
- __get_page_tail_foll(page, false);
- return true;
- } else {
- put_page(page_head);
- return false;
- }
- }
-
- /*
- * page_head wasn't a dangling pointer but it
- * may not be a head page anymore by the time
- * we obtain the lock. That is ok as long as it
- * can't be freed from under us.
- */
- flags = compound_lock_irqsave(page_head);
- /* here __split_huge_page_refcount won't run anymore */
+ if (likely(page != page_head && get_page_unless_zero(page_head))) {
+ /* Ref to put_compound_page() comment. */
+ if (PageSlab(page_head) || PageHeadHuge(page_head)) {
if (likely(PageTail(page))) {
+ /*
+ * This is a hugetlbfs page or a slab
+ * page. __split_huge_page_refcount
+ * cannot race here.
+ */
+ VM_BUG_ON(!PageHead(page_head));
__get_page_tail_foll(page, false);
- got = true;
- }
- compound_unlock_irqrestore(page_head, flags);
- if (unlikely(!got))
+ return true;
+ } else {
+ /*
+ * __split_huge_page_refcount run
+ * before us, "page" was a THP
+ * tail. The split page_head has been
+ * freed and reallocated as slab or
+ * hugetlbfs page of smaller order
+ * (only possible if reallocated as
+ * slab on x86).
+ */
put_page(page_head);
+ return false;
+ }
+ }
+
+ /*
+ * page_head wasn't a dangling pointer but it
+ * may not be a head page anymore by the time
+ * we obtain the lock. That is ok as long as it
+ * can't be freed from under us.
+ */
+ flags = compound_lock_irqsave(page_head);
+ /* here __split_huge_page_refcount won't run anymore */
+ if (likely(PageTail(page))) {
+ __get_page_tail_foll(page, false);
+ got = true;
}
+ compound_unlock_irqrestore(page_head, flags);
+ if (unlikely(!got))
+ put_page(page_head);
}
return got;
}
diff --git a/net/Kconfig b/net/Kconfig
index 0715db64a5c3..d334678c0bd8 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -224,7 +224,7 @@ source "net/hsr/Kconfig"
config RPS
boolean
- depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+ depends on SMP && SYSFS
default y
config RFS_ACCEL
@@ -235,7 +235,7 @@ config RFS_ACCEL
config XPS
boolean
- depends on SMP && USE_GENERIC_SMP_HELPERS
+ depends on SMP
default y
config NETPRIO_CGROUP
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3dc0c6cf02a8..c4638e6f0238 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1425,7 +1425,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
do {
if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
last_issued, &done,
- &used) == DMA_SUCCESS) {
+ &used) == DMA_COMPLETE) {
/* Safe to free early-copied skbs now */
__skb_queue_purge(&sk->sk_async_wait_queue);
break;
@@ -1433,7 +1433,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
struct sk_buff *skb;
while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
(dma_async_is_complete(skb->dma_cookie, done,
- used) == DMA_SUCCESS)) {
+ used) == DMA_COMPLETE)) {
__skb_dequeue(&sk->sk_async_wait_queue);
kfree_skb(skb);
}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index d0d14a04dce1..bf04b30a788a 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -471,15 +471,6 @@ struct rpc_filelist {
umode_t mode;
};
-static int rpc_delete_dentry(const struct dentry *dentry)
-{
- return 1;
-}
-
-static const struct dentry_operations rpc_dentry_operations = {
- .d_delete = rpc_delete_dentry,
-};
-
static struct inode *
rpc_get_inode(struct super_block *sb, umode_t mode)
{
@@ -1266,7 +1257,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = RPCAUTH_GSSMAGIC;
sb->s_op = &s_ops;
- sb->s_d_op = &rpc_dentry_operations;
+ sb->s_d_op = &simple_dentry_operations;
sb->s_time_gran = 1;
inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c
index db0e5cd34c70..91c4117637ae 100644
--- a/scripts/asn1_compiler.c
+++ b/scripts/asn1_compiler.c
@@ -1353,6 +1353,8 @@ static void render_out_of_line_list(FILE *out)
render_opcode(out, "ASN1_OP_END_SET_OF%s,\n", act);
render_opcode(out, "_jump_target(%u),\n", entry);
break;
+ default:
+ break;
}
if (e->action)
render_opcode(out, "_action(ACT_%s),\n",
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 61090e0ff613..9c9810030377 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3289,6 +3289,7 @@ sub process {
}
}
if (!defined $suppress_whiletrailers{$linenr} &&
+ defined($stat) && defined($cond) &&
$line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) {
my ($s, $c) = ($stat, $cond);
diff --git a/security/Makefile b/security/Makefile
index c26c81e92571..a5918e01a4f7 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -16,7 +16,6 @@ obj-$(CONFIG_MMU) += min_addr.o
# Object file lists
obj-$(CONFIG_SECURITY) += security.o capability.o
obj-$(CONFIG_SECURITYFS) += inode.o
-# Must precede capability.o in order to stack properly.
obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o
obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o
obj-$(CONFIG_AUDIT) += lsm_audit.o
diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c
index 031d2d9dd695..89c78658031f 100644
--- a/security/apparmor/audit.c
+++ b/security/apparmor/audit.c
@@ -111,7 +111,6 @@ static const char *const aa_audit_type[] = {
static void audit_pre(struct audit_buffer *ab, void *ca)
{
struct common_audit_data *sa = ca;
- struct task_struct *tsk = sa->aad->tsk ? sa->aad->tsk : current;
if (aa_g_audit_header) {
audit_log_format(ab, "apparmor=");
@@ -132,11 +131,6 @@ static void audit_pre(struct audit_buffer *ab, void *ca)
if (sa->aad->profile) {
struct aa_profile *profile = sa->aad->profile;
- pid_t pid;
- rcu_read_lock();
- pid = rcu_dereference(tsk->real_parent)->pid;
- rcu_read_unlock();
- audit_log_format(ab, " parent=%d", pid);
if (profile->ns != root_ns) {
audit_log_format(ab, " namespace=");
audit_log_untrustedstring(ab, profile->ns->base.hname);
@@ -149,12 +143,6 @@ static void audit_pre(struct audit_buffer *ab, void *ca)
audit_log_format(ab, " name=");
audit_log_untrustedstring(ab, sa->aad->name);
}
-
- if (sa->aad->tsk) {
- audit_log_format(ab, " pid=%d comm=", tsk->pid);
- audit_log_untrustedstring(ab, tsk->comm);
- }
-
}
/**
@@ -212,7 +200,7 @@ int aa_audit(int type, struct aa_profile *profile, gfp_t gfp,
if (sa->aad->type == AUDIT_APPARMOR_KILL)
(void)send_sig_info(SIGKILL, NULL,
- sa->aad->tsk ? sa->aad->tsk : current);
+ sa->u.tsk ? sa->u.tsk : current);
if (sa->aad->type == AUDIT_APPARMOR_ALLOWED)
return complain_error(sa->aad->error);
diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c
index 84d1f5f53877..1101c6f64bb7 100644
--- a/security/apparmor/capability.c
+++ b/security/apparmor/capability.c
@@ -53,8 +53,7 @@ static void audit_cb(struct audit_buffer *ab, void *va)
/**
* audit_caps - audit a capability
- * @profile: profile confining task (NOT NULL)
- * @task: task capability test was performed against (NOT NULL)
+ * @profile: profile being tested for confinement (NOT NULL)
* @cap: capability tested
* @error: error code returned by test
*
@@ -63,8 +62,7 @@ static void audit_cb(struct audit_buffer *ab, void *va)
*
* Returns: 0 or sa->error on success, error code on failure
*/
-static int audit_caps(struct aa_profile *profile, struct task_struct *task,
- int cap, int error)
+static int audit_caps(struct aa_profile *profile, int cap, int error)
{
struct audit_cache *ent;
int type = AUDIT_APPARMOR_AUTO;
@@ -73,7 +71,6 @@ static int audit_caps(struct aa_profile *profile, struct task_struct *task,
sa.type = LSM_AUDIT_DATA_CAP;
sa.aad = &aad;
sa.u.cap = cap;
- sa.aad->tsk = task;
sa.aad->op = OP_CAPABLE;
sa.aad->error = error;
@@ -124,8 +121,7 @@ static int profile_capable(struct aa_profile *profile, int cap)
/**
* aa_capable - test permission to use capability
- * @task: task doing capability test against (NOT NULL)
- * @profile: profile confining @task (NOT NULL)
+ * @profile: profile being tested against (NOT NULL)
* @cap: capability to be tested
* @audit: whether an audit record should be generated
*
@@ -133,8 +129,7 @@ static int profile_capable(struct aa_profile *profile, int cap)
*
* Returns: 0 on success, or else an error code.
*/
-int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap,
- int audit)
+int aa_capable(struct aa_profile *profile, int cap, int audit)
{
int error = profile_capable(profile, cap);
@@ -144,5 +139,5 @@ int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap,
return error;
}
- return audit_caps(profile, task, cap, error);
+ return audit_caps(profile, cap, error);
}
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 26c607c971f5..452567d3a08e 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -50,23 +50,21 @@ void aa_free_domain_entries(struct aa_domain *domain)
/**
* may_change_ptraced_domain - check if can change profile on ptraced task
- * @task: task we want to change profile of (NOT NULL)
* @to_profile: profile to change to (NOT NULL)
*
- * Check if the task is ptraced and if so if the tracing task is allowed
+ * Check if current is ptraced and if so if the tracing task is allowed
* to trace the new domain
*
* Returns: %0 or error if change not allowed
*/
-static int may_change_ptraced_domain(struct task_struct *task,
- struct aa_profile *to_profile)
+static int may_change_ptraced_domain(struct aa_profile *to_profile)
{
struct task_struct *tracer;
struct aa_profile *tracerp = NULL;
int error = 0;
rcu_read_lock();
- tracer = ptrace_parent(task);
+ tracer = ptrace_parent(current);
if (tracer)
/* released below */
tracerp = aa_get_task_profile(tracer);
@@ -75,7 +73,7 @@ static int may_change_ptraced_domain(struct task_struct *task,
if (!tracer || unconfined(tracerp))
goto out;
- error = aa_may_ptrace(tracer, tracerp, to_profile, PTRACE_MODE_ATTACH);
+ error = aa_may_ptrace(tracerp, to_profile, PTRACE_MODE_ATTACH);
out:
rcu_read_unlock();
@@ -477,7 +475,7 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
}
if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
- error = may_change_ptraced_domain(current, new_profile);
+ error = may_change_ptraced_domain(new_profile);
if (error) {
aa_put_profile(new_profile);
goto audit;
@@ -690,7 +688,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
}
}
- error = may_change_ptraced_domain(current, hat);
+ error = may_change_ptraced_domain(hat);
if (error) {
info = "ptraced";
error = -EPERM;
@@ -829,7 +827,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
}
/* check if tracing task is allowed to trace target domain */
- error = may_change_ptraced_domain(current, target);
+ error = may_change_ptraced_domain(target);
if (error) {
info = "ptrace prevents transition";
goto audit;
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index 30e8d7687259..ba3dfd17f23f 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -109,7 +109,6 @@ struct apparmor_audit_data {
void *profile;
const char *name;
const char *info;
- struct task_struct *tsk;
union {
void *target;
struct {
diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h
index 2e7c9d6a2f3b..fc3fa381d850 100644
--- a/security/apparmor/include/capability.h
+++ b/security/apparmor/include/capability.h
@@ -4,7 +4,7 @@
* This file contains AppArmor capability mediation definitions.
*
* Copyright (C) 1998-2008 Novell/SUSE
- * Copyright 2009-2010 Canonical Ltd.
+ * Copyright 2009-2013 Canonical Ltd.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -38,8 +38,7 @@ struct aa_caps {
extern struct aa_fs_entry aa_fs_entry_caps[];
-int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap,
- int audit);
+int aa_capable(struct aa_profile *profile, int cap, int audit);
static inline void aa_free_cap_rules(struct aa_caps *caps)
{
diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h
index aeda0fbc8b2f..288ca76e2fb1 100644
--- a/security/apparmor/include/ipc.h
+++ b/security/apparmor/include/ipc.h
@@ -19,8 +19,8 @@
struct aa_profile;
-int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer,
- struct aa_profile *tracee, unsigned int mode);
+int aa_may_ptrace(struct aa_profile *tracer, struct aa_profile *tracee,
+ unsigned int mode);
int aa_ptrace(struct task_struct *tracer, struct task_struct *tracee,
unsigned int mode);
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index c51d2266587e..777ac1c47253 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -54,15 +54,14 @@ static int aa_audit_ptrace(struct aa_profile *profile,
/**
* aa_may_ptrace - test if tracer task can trace the tracee
- * @tracer_task: task who will do the tracing (NOT NULL)
* @tracer: profile of the task doing the tracing (NOT NULL)
* @tracee: task to be traced
* @mode: whether PTRACE_MODE_READ || PTRACE_MODE_ATTACH
*
* Returns: %0 else error code if permission denied or error
*/
-int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer,
- struct aa_profile *tracee, unsigned int mode)
+int aa_may_ptrace(struct aa_profile *tracer, struct aa_profile *tracee,
+ unsigned int mode)
{
/* TODO: currently only based on capability, not extended ptrace
* rules,
@@ -72,7 +71,7 @@ int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer,
if (unconfined(tracer) || tracer == tracee)
return 0;
/* log this capability request */
- return aa_capable(tracer_task, tracer, CAP_SYS_PTRACE, 1);
+ return aa_capable(tracer, CAP_SYS_PTRACE, 1);
}
/**
@@ -101,7 +100,7 @@ int aa_ptrace(struct task_struct *tracer, struct task_struct *tracee,
if (!unconfined(tracer_p)) {
struct aa_profile *tracee_p = aa_get_task_profile(tracee);
- error = aa_may_ptrace(tracer, tracer_p, tracee_p, mode);
+ error = aa_may_ptrace(tracer_p, tracee_p, mode);
error = aa_audit_ptrace(tracer_p, tracee_p, error);
aa_put_profile(tracee_p);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index fb99e18123b4..4257b7e2796b 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -145,7 +145,7 @@ static int apparmor_capable(const struct cred *cred, struct user_namespace *ns,
if (!error) {
profile = aa_cred_profile(cred);
if (!unconfined(profile))
- error = aa_capable(current, profile, cap, audit);
+ error = aa_capable(profile, cap, audit);
}
return error;
}
diff --git a/security/capability.c b/security/capability.c
index dbeb9bc27b24..8b4f24ae4338 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -777,9 +777,15 @@ static int cap_xfrm_policy_delete_security(struct xfrm_sec_ctx *ctx)
return 0;
}
-static int cap_xfrm_state_alloc_security(struct xfrm_state *x,
- struct xfrm_user_sec_ctx *sec_ctx,
- u32 secid)
+static int cap_xfrm_state_alloc(struct xfrm_state *x,
+ struct xfrm_user_sec_ctx *sec_ctx)
+{
+ return 0;
+}
+
+static int cap_xfrm_state_alloc_acquire(struct xfrm_state *x,
+ struct xfrm_sec_ctx *polsec,
+ u32 secid)
{
return 0;
}
@@ -1101,7 +1107,8 @@ void __init security_fixup_ops(struct security_operations *ops)
set_to_cap_if_null(ops, xfrm_policy_clone_security);
set_to_cap_if_null(ops, xfrm_policy_free_security);
set_to_cap_if_null(ops, xfrm_policy_delete_security);
- set_to_cap_if_null(ops, xfrm_state_alloc_security);
+ set_to_cap_if_null(ops, xfrm_state_alloc);
+ set_to_cap_if_null(ops, xfrm_state_alloc_acquire);
set_to_cap_if_null(ops, xfrm_state_free_security);
set_to_cap_if_null(ops, xfrm_state_delete_security);
set_to_cap_if_null(ops, xfrm_policy_lookup);
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 0b759e17a131..77ca965ab684 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -13,7 +13,9 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/err.h>
+#include <linux/sched.h>
#include <linux/rbtree.h>
+#include <linux/cred.h>
#include <linux/key-type.h>
#include <linux/digsig.h>
@@ -21,21 +23,29 @@
static struct key *keyring[INTEGRITY_KEYRING_MAX];
+#ifdef CONFIG_IMA_TRUSTED_KEYRING
+static const char *keyring_name[INTEGRITY_KEYRING_MAX] = {
+ ".evm",
+ ".module",
+ ".ima",
+};
+#else
static const char *keyring_name[INTEGRITY_KEYRING_MAX] = {
"_evm",
"_module",
"_ima",
};
+#endif
int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
- const char *digest, int digestlen)
+ const char *digest, int digestlen)
{
if (id >= INTEGRITY_KEYRING_MAX)
return -EINVAL;
if (!keyring[id]) {
keyring[id] =
- request_key(&key_type_keyring, keyring_name[id], NULL);
+ request_key(&key_type_keyring, keyring_name[id], NULL);
if (IS_ERR(keyring[id])) {
int err = PTR_ERR(keyring[id]);
pr_err("no %s keyring: %d\n", keyring_name[id], err);
@@ -44,9 +54,10 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
}
}
- switch (sig[0]) {
+ switch (sig[1]) {
case 1:
- return digsig_verify(keyring[id], sig, siglen,
+ /* v1 API expect signature without xattr type */
+ return digsig_verify(keyring[id], sig + 1, siglen - 1,
digest, digestlen);
case 2:
return asymmetric_verify(keyring[id], sig, siglen,
@@ -55,3 +66,21 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
return -EOPNOTSUPP;
}
+
+int integrity_init_keyring(const unsigned int id)
+{
+ const struct cred *cred = current_cred();
+ const struct user_struct *user = cred->user;
+
+ keyring[id] = keyring_alloc(keyring_name[id], KUIDT_INIT(0),
+ KGIDT_INIT(0), cred,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
+ KEY_ALLOC_NOT_IN_QUOTA, user->uid_keyring);
+ if (!IS_ERR(keyring[id]))
+ set_bit(KEY_FLAG_TRUSTED_ONLY, &keyring[id]->flags);
+ else
+ pr_info("Can't allocate %s keyring (%ld)\n",
+ keyring_name[id], PTR_ERR(keyring[id]));
+ return 0;
+}
diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
index b4754667659d..9eae4809006b 100644
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -20,17 +20,6 @@
#include "integrity.h"
/*
- * signature format v2 - for using with asymmetric keys
- */
-struct signature_v2_hdr {
- uint8_t version; /* signature format version */
- uint8_t hash_algo; /* Digest algorithm [enum pkey_hash_algo] */
- uint32_t keyid; /* IMA key identifier - not X509/PGP specific*/
- uint16_t sig_size; /* signature size */
- uint8_t sig[0]; /* signature payload */
-} __packed;
-
-/*
* Request an asymmetric key.
*/
static struct key *request_asymmetric_key(struct key *keyring, uint32_t keyid)
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index af9b6852f4e1..336b3ddfe63f 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -123,7 +123,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
goto out;
}
- xattr_len = rc - 1;
+ xattr_len = rc;
/* check value type */
switch (xattr_data->type) {
@@ -143,7 +143,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
if (rc)
break;
rc = integrity_digsig_verify(INTEGRITY_KEYRING_EVM,
- xattr_data->digest, xattr_len,
+ (const char *)xattr_data, xattr_len,
calc.digest, sizeof(calc.digest));
if (!rc) {
/* we probably want to replace rsa with hmac here */
diff --git a/security/integrity/evm/evm_posix_acl.c b/security/integrity/evm/evm_posix_acl.c
index b1753e98bf9a..46408b9e62e8 100644
--- a/security/integrity/evm/evm_posix_acl.c
+++ b/security/integrity/evm/evm_posix_acl.c
@@ -11,8 +11,9 @@
#include <linux/module.h>
#include <linux/xattr.h>
+#include <linux/evm.h>
-int posix_xattr_acl(char *xattr)
+int posix_xattr_acl(const char *xattr)
{
int xattr_len = strlen(xattr);
diff --git a/security/integrity/iint.c b/security/integrity/iint.c
index 74522dbd10a6..c49d3f14cbec 100644
--- a/security/integrity/iint.c
+++ b/security/integrity/iint.c
@@ -70,6 +70,8 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode)
static void iint_free(struct integrity_iint_cache *iint)
{
+ kfree(iint->ima_hash);
+ iint->ima_hash = NULL;
iint->version = 0;
iint->flags = 0UL;
iint->ima_file_status = INTEGRITY_UNKNOWN;
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 39196abaff0d..dad8d4ca2437 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -9,6 +9,7 @@ config IMA
select CRYPTO_HMAC
select CRYPTO_MD5
select CRYPTO_SHA1
+ select CRYPTO_HASH_INFO
select TCG_TPM if HAS_IOMEM && !UML
select TCG_TIS if TCG_TPM && X86
select TCG_IBMVTPM if TCG_TPM && PPC64
@@ -45,6 +46,69 @@ config IMA_LSM_RULES
help
Disabling this option will disregard LSM based policy rules.
+choice
+ prompt "Default template"
+ default IMA_NG_TEMPLATE
+ depends on IMA
+ help
+ Select the default IMA measurement template.
+
+ The original 'ima' measurement list template contains a
+ hash, defined as 20 bytes, and a null terminated pathname,
+ limited to 255 characters. The 'ima-ng' measurement list
+ template permits both larger hash digests and longer
+ pathnames.
+
+ config IMA_TEMPLATE
+ bool "ima"
+ config IMA_NG_TEMPLATE
+ bool "ima-ng (default)"
+ config IMA_SIG_TEMPLATE
+ bool "ima-sig"
+endchoice
+
+config IMA_DEFAULT_TEMPLATE
+ string
+ depends on IMA
+ default "ima" if IMA_TEMPLATE
+ default "ima-ng" if IMA_NG_TEMPLATE
+ default "ima-sig" if IMA_SIG_TEMPLATE
+
+choice
+ prompt "Default integrity hash algorithm"
+ default IMA_DEFAULT_HASH_SHA1
+ depends on IMA
+ help
+ Select the default hash algorithm used for the measurement
+ list, integrity appraisal and audit log. The compiled default
+ hash algorithm can be overwritten using the kernel command
+ line 'ima_hash=' option.
+
+ config IMA_DEFAULT_HASH_SHA1
+ bool "SHA1 (default)"
+ depends on CRYPTO_SHA1
+
+ config IMA_DEFAULT_HASH_SHA256
+ bool "SHA256"
+ depends on CRYPTO_SHA256 && !IMA_TEMPLATE
+
+ config IMA_DEFAULT_HASH_SHA512
+ bool "SHA512"
+ depends on CRYPTO_SHA512 && !IMA_TEMPLATE
+
+ config IMA_DEFAULT_HASH_WP512
+ bool "WP512"
+ depends on CRYPTO_WP512 && !IMA_TEMPLATE
+endchoice
+
+config IMA_DEFAULT_HASH
+ string
+ depends on IMA
+ default "sha1" if IMA_DEFAULT_HASH_SHA1
+ default "sha256" if IMA_DEFAULT_HASH_SHA256
+ default "sha512" if IMA_DEFAULT_HASH_SHA512
+ default "wp512" if IMA_DEFAULT_HASH_WP512
+
config IMA_APPRAISE
bool "Appraise integrity measurements"
depends on IMA
@@ -59,3 +123,11 @@ config IMA_APPRAISE
For more information on integrity appraisal refer to:
<http://linux-ima.sourceforge.net>
If unsure, say N.
+
+config IMA_TRUSTED_KEYRING
+ bool "Require all keys on the _ima keyring be signed"
+ depends on IMA_APPRAISE && SYSTEM_TRUSTED_KEYRING
+ default y
+ help
+ This option requires that all keys added to the _ima
+ keyring be signed by a key on the system trusted keyring.
diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile
index 56dfee7cbf61..d79263d2fdbf 100644
--- a/security/integrity/ima/Makefile
+++ b/security/integrity/ima/Makefile
@@ -6,5 +6,5 @@
obj-$(CONFIG_IMA) += ima.o
ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \
- ima_policy.o
+ ima_policy.o ima_template.o ima_template_lib.o
ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index b3dd616560f7..bf03c6a16cc8 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -36,23 +36,48 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 };
#define IMA_HASH_BITS 9
#define IMA_MEASURE_HTABLE_SIZE (1 << IMA_HASH_BITS)
+#define IMA_TEMPLATE_FIELD_ID_MAX_LEN 16
+#define IMA_TEMPLATE_NUM_FIELDS_MAX 15
+
+#define IMA_TEMPLATE_IMA_NAME "ima"
+#define IMA_TEMPLATE_IMA_FMT "d|n"
+
/* set during initialization */
extern int ima_initialized;
extern int ima_used_chip;
-extern char *ima_hash;
+extern int ima_hash_algo;
extern int ima_appraise;
-/* IMA inode template definition */
-struct ima_template_data {
- u8 digest[IMA_DIGEST_SIZE]; /* sha1/md5 measurement hash */
- char file_name[IMA_EVENT_NAME_LEN_MAX + 1]; /* name + \0 */
+/* IMA template field data definition */
+struct ima_field_data {
+ u8 *data;
+ u32 len;
+};
+
+/* IMA template field definition */
+struct ima_template_field {
+ const char field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN];
+ int (*field_init) (struct integrity_iint_cache *iint, struct file *file,
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value,
+ int xattr_len, struct ima_field_data *field_data);
+ void (*field_show) (struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data);
+};
+
+/* IMA template descriptor definition */
+struct ima_template_desc {
+ char *name;
+ char *fmt;
+ int num_fields;
+ struct ima_template_field **fields;
};
struct ima_template_entry {
- u8 digest[IMA_DIGEST_SIZE]; /* sha1 or md5 measurement hash */
- const char *template_name;
- int template_len;
- struct ima_template_data template;
+ u8 digest[TPM_DIGEST_SIZE]; /* sha1 or md5 measurement hash */
+ struct ima_template_desc *template_desc; /* template descriptor */
+ u32 template_data_len;
+ struct ima_field_data template_data[0]; /* template related data */
};
struct ima_queue_entry {
@@ -69,13 +94,21 @@ int ima_fs_init(void);
void ima_fs_cleanup(void);
int ima_inode_alloc(struct inode *inode);
int ima_add_template_entry(struct ima_template_entry *entry, int violation,
- const char *op, struct inode *inode);
-int ima_calc_file_hash(struct file *file, char *digest);
-int ima_calc_buffer_hash(const void *data, int len, char *digest);
-int ima_calc_boot_aggregate(char *digest);
-void ima_add_violation(struct inode *inode, const unsigned char *filename,
+ const char *op, struct inode *inode,
+ const unsigned char *filename);
+int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash);
+int ima_calc_field_array_hash(struct ima_field_data *field_data, int num_fields,
+ struct ima_digest_data *hash);
+int __init ima_calc_boot_aggregate(struct ima_digest_data *hash);
+void ima_add_violation(struct file *file, const unsigned char *filename,
const char *op, const char *cause);
int ima_init_crypto(void);
+void ima_putc(struct seq_file *m, void *data, int datalen);
+void ima_print_digest(struct seq_file *m, u8 *digest, int size);
+struct ima_template_desc *ima_template_desc_current(void);
+int ima_init_template(void);
+
+int ima_init_template(void);
/*
* used to protect h_table and sha_table
@@ -98,14 +131,21 @@ static inline unsigned long ima_hash_key(u8 *digest)
int ima_get_action(struct inode *inode, int mask, int function);
int ima_must_measure(struct inode *inode, int mask, int function);
int ima_collect_measurement(struct integrity_iint_cache *iint,
- struct file *file);
+ struct file *file,
+ struct evm_ima_xattr_data **xattr_value,
+ int *xattr_len);
void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
- const unsigned char *filename);
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value,
+ int xattr_len);
void ima_audit_measurement(struct integrity_iint_cache *iint,
const unsigned char *filename);
+int ima_alloc_init_template(struct integrity_iint_cache *iint,
+ struct file *file, const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value,
+ int xattr_len, struct ima_template_entry **entry);
int ima_store_template(struct ima_template_entry *entry, int violation,
- struct inode *inode);
-void ima_template_show(struct seq_file *m, void *e, enum ima_show_type show);
+ struct inode *inode, const unsigned char *filename);
const char *ima_d_path(struct path *path, char **pathbuf);
/* rbtree tree calls to lookup, insert, delete
@@ -131,17 +171,25 @@ void ima_delete_rules(void);
#ifdef CONFIG_IMA_APPRAISE
int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
- struct file *file, const unsigned char *filename);
+ struct file *file, const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value,
+ int xattr_len);
int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
int func);
+void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len,
+ struct ima_digest_data *hash);
+int ima_read_xattr(struct dentry *dentry,
+ struct evm_ima_xattr_data **xattr_value);
#else
static inline int ima_appraise_measurement(int func,
struct integrity_iint_cache *iint,
struct file *file,
- const unsigned char *filename)
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value,
+ int xattr_len)
{
return INTEGRITY_UNKNOWN;
}
@@ -162,6 +210,19 @@ static inline enum integrity_status ima_get_cache_status(struct integrity_iint_c
{
return INTEGRITY_UNKNOWN;
}
+
+static inline void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value,
+ int xattr_len,
+ struct ima_digest_data *hash)
+{
+}
+
+static inline int ima_read_xattr(struct dentry *dentry,
+ struct evm_ima_xattr_data **xattr_value)
+{
+ return 0;
+}
+
#endif
/* LSM based policy rules require audit */
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 1c03e8f1e0e1..0e7540863fc2 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -18,9 +18,46 @@
#include <linux/fs.h>
#include <linux/xattr.h>
#include <linux/evm.h>
+#include <crypto/hash_info.h>
#include "ima.h"
-static const char *IMA_TEMPLATE_NAME = "ima";
+/*
+ * ima_alloc_init_template - create and initialize a new template entry
+ */
+int ima_alloc_init_template(struct integrity_iint_cache *iint,
+ struct file *file, const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value,
+ int xattr_len, struct ima_template_entry **entry)
+{
+ struct ima_template_desc *template_desc = ima_template_desc_current();
+ int i, result = 0;
+
+ *entry = kzalloc(sizeof(**entry) + template_desc->num_fields *
+ sizeof(struct ima_field_data), GFP_NOFS);
+ if (!*entry)
+ return -ENOMEM;
+
+ for (i = 0; i < template_desc->num_fields; i++) {
+ struct ima_template_field *field = template_desc->fields[i];
+ u32 len;
+
+ result = field->field_init(iint, file, filename,
+ xattr_value, xattr_len,
+ &((*entry)->template_data[i]));
+ if (result != 0)
+ goto out;
+
+ len = (*entry)->template_data[i].len;
+ (*entry)->template_data_len += sizeof(len);
+ (*entry)->template_data_len += len;
+ }
+ (*entry)->template_desc = template_desc;
+ return 0;
+out:
+ kfree(*entry);
+ *entry = NULL;
+ return result;
+}
/*
* ima_store_template - store ima template measurements
@@ -39,28 +76,34 @@ static const char *IMA_TEMPLATE_NAME = "ima";
* Returns 0 on success, error code otherwise
*/
int ima_store_template(struct ima_template_entry *entry,
- int violation, struct inode *inode)
+ int violation, struct inode *inode,
+ const unsigned char *filename)
{
const char *op = "add_template_measure";
const char *audit_cause = "hashing_error";
+ char *template_name = entry->template_desc->name;
int result;
-
- memset(entry->digest, 0, sizeof(entry->digest));
- entry->template_name = IMA_TEMPLATE_NAME;
- entry->template_len = sizeof(entry->template);
+ struct {
+ struct ima_digest_data hdr;
+ char digest[TPM_DIGEST_SIZE];
+ } hash;
if (!violation) {
- result = ima_calc_buffer_hash(&entry->template,
- entry->template_len,
- entry->digest);
+ int num_fields = entry->template_desc->num_fields;
+
+ /* this function uses default algo */
+ hash.hdr.algo = HASH_ALGO_SHA1;
+ result = ima_calc_field_array_hash(&entry->template_data[0],
+ num_fields, &hash.hdr);
if (result < 0) {
integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
- entry->template_name, op,
+ template_name, op,
audit_cause, result, 0);
return result;
}
+ memcpy(entry->digest, hash.hdr.digest, hash.hdr.length);
}
- result = ima_add_template_entry(entry, violation, op, inode);
+ result = ima_add_template_entry(entry, violation, op, inode, filename);
return result;
}
@@ -71,24 +114,24 @@ int ima_store_template(struct ima_template_entry *entry,
* By extending the PCR with 0xFF's instead of with zeroes, the PCR
* value is invalidated.
*/
-void ima_add_violation(struct inode *inode, const unsigned char *filename,
+void ima_add_violation(struct file *file, const unsigned char *filename,
const char *op, const char *cause)
{
struct ima_template_entry *entry;
+ struct inode *inode = file->f_dentry->d_inode;
int violation = 1;
int result;
/* can overflow, only indicator */
atomic_long_inc(&ima_htable.violations);
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry) {
+ result = ima_alloc_init_template(NULL, file, filename,
+ NULL, 0, &entry);
+ if (result < 0) {
result = -ENOMEM;
goto err_out;
}
- memset(&entry->template, 0, sizeof(entry->template));
- strncpy(entry->template.file_name, filename, IMA_EVENT_NAME_LEN_MAX);
- result = ima_store_template(entry, violation, inode);
+ result = ima_store_template(entry, violation, inode, filename);
if (result < 0)
kfree(entry);
err_out:
@@ -138,20 +181,42 @@ int ima_must_measure(struct inode *inode, int mask, int function)
* Return 0 on success, error code otherwise
*/
int ima_collect_measurement(struct integrity_iint_cache *iint,
- struct file *file)
+ struct file *file,
+ struct evm_ima_xattr_data **xattr_value,
+ int *xattr_len)
{
struct inode *inode = file_inode(file);
const char *filename = file->f_dentry->d_name.name;
int result = 0;
+ struct {
+ struct ima_digest_data hdr;
+ char digest[IMA_MAX_DIGEST_SIZE];
+ } hash;
+
+ if (xattr_value)
+ *xattr_len = ima_read_xattr(file->f_dentry, xattr_value);
if (!(iint->flags & IMA_COLLECTED)) {
u64 i_version = file_inode(file)->i_version;
- iint->ima_xattr.type = IMA_XATTR_DIGEST;
- result = ima_calc_file_hash(file, iint->ima_xattr.digest);
+ /* use default hash algorithm */
+ hash.hdr.algo = ima_hash_algo;
+
+ if (xattr_value)
+ ima_get_hash_algo(*xattr_value, *xattr_len, &hash.hdr);
+
+ result = ima_calc_file_hash(file, &hash.hdr);
if (!result) {
- iint->version = i_version;
- iint->flags |= IMA_COLLECTED;
+ int length = sizeof(hash.hdr) + hash.hdr.length;
+ void *tmpbuf = krealloc(iint->ima_hash, length,
+ GFP_NOFS);
+ if (tmpbuf) {
+ iint->ima_hash = tmpbuf;
+ memcpy(iint->ima_hash, &hash, length);
+ iint->version = i_version;
+ iint->flags |= IMA_COLLECTED;
+ } else
+ result = -ENOMEM;
}
}
if (result)
@@ -177,7 +242,9 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
* Must be called with iint->mutex held.
*/
void ima_store_measurement(struct integrity_iint_cache *iint,
- struct file *file, const unsigned char *filename)
+ struct file *file, const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value,
+ int xattr_len)
{
const char *op = "add_template_measure";
const char *audit_cause = "ENOMEM";
@@ -189,19 +256,15 @@ void ima_store_measurement(struct integrity_iint_cache *iint,
if (iint->flags & IMA_MEASURED)
return;
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry) {
+ result = ima_alloc_init_template(iint, file, filename,
+ xattr_value, xattr_len, &entry);
+ if (result < 0) {
integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename,
op, audit_cause, result, 0);
return;
}
- memset(&entry->template, 0, sizeof(entry->template));
- memcpy(entry->template.digest, iint->ima_xattr.digest, IMA_DIGEST_SIZE);
- strcpy(entry->template.file_name,
- (strlen(filename) > IMA_EVENT_NAME_LEN_MAX) ?
- file->f_dentry->d_name.name : filename);
- result = ima_store_template(entry, violation, inode);
+ result = ima_store_template(entry, violation, inode, filename);
if (!result || result == -EEXIST)
iint->flags |= IMA_MEASURED;
if (result < 0)
@@ -212,14 +275,16 @@ void ima_audit_measurement(struct integrity_iint_cache *iint,
const unsigned char *filename)
{
struct audit_buffer *ab;
- char hash[(IMA_DIGEST_SIZE * 2) + 1];
+ char hash[(iint->ima_hash->length * 2) + 1];
+ const char *algo_name = hash_algo_name[iint->ima_hash->algo];
+ char algo_hash[sizeof(hash) + strlen(algo_name) + 2];
int i;
if (iint->flags & IMA_AUDITED)
return;
- for (i = 0; i < IMA_DIGEST_SIZE; i++)
- hex_byte_pack(hash + (i * 2), iint->ima_xattr.digest[i]);
+ for (i = 0; i < iint->ima_hash->length; i++)
+ hex_byte_pack(hash + (i * 2), iint->ima_hash->digest[i]);
hash[i * 2] = '\0';
ab = audit_log_start(current->audit_context, GFP_KERNEL,
@@ -230,7 +295,8 @@ void ima_audit_measurement(struct integrity_iint_cache *iint,
audit_log_format(ab, "file=");
audit_log_untrustedstring(ab, filename);
audit_log_format(ab, " hash=");
- audit_log_untrustedstring(ab, hash);
+ snprintf(algo_hash, sizeof(algo_hash), "%s:%s", algo_name, hash);
+ audit_log_untrustedstring(ab, algo_hash);
audit_log_task_info(ab, current);
audit_log_end(ab);
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 2d4becab8918..46353ee517f6 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -15,6 +15,7 @@
#include <linux/magic.h>
#include <linux/ima.h>
#include <linux/evm.h>
+#include <crypto/hash_info.h>
#include "ima.h"
@@ -43,19 +44,31 @@ int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func)
}
static int ima_fix_xattr(struct dentry *dentry,
- struct integrity_iint_cache *iint)
+ struct integrity_iint_cache *iint)
{
- iint->ima_xattr.type = IMA_XATTR_DIGEST;
- return __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA,
- (u8 *)&iint->ima_xattr,
- sizeof(iint->ima_xattr), 0);
+ int rc, offset;
+ u8 algo = iint->ima_hash->algo;
+
+ if (algo <= HASH_ALGO_SHA1) {
+ offset = 1;
+ iint->ima_hash->xattr.sha1.type = IMA_XATTR_DIGEST;
+ } else {
+ offset = 0;
+ iint->ima_hash->xattr.ng.type = IMA_XATTR_DIGEST_NG;
+ iint->ima_hash->xattr.ng.algo = algo;
+ }
+ rc = __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA,
+ &iint->ima_hash->xattr.data[offset],
+ (sizeof(iint->ima_hash->xattr) - offset) +
+ iint->ima_hash->length, 0);
+ return rc;
}
/* Return specific func appraised cached result */
enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
int func)
{
- switch(func) {
+ switch (func) {
case MMAP_CHECK:
return iint->ima_mmap_status;
case BPRM_CHECK:
@@ -71,7 +84,7 @@ enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
static void ima_set_cache_status(struct integrity_iint_cache *iint,
int func, enum integrity_status status)
{
- switch(func) {
+ switch (func) {
case MMAP_CHECK:
iint->ima_mmap_status = status;
break;
@@ -90,7 +103,7 @@ static void ima_set_cache_status(struct integrity_iint_cache *iint,
static void ima_cache_flags(struct integrity_iint_cache *iint, int func)
{
- switch(func) {
+ switch (func) {
case MMAP_CHECK:
iint->flags |= (IMA_MMAP_APPRAISED | IMA_APPRAISED);
break;
@@ -107,6 +120,50 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func)
}
}
+void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len,
+ struct ima_digest_data *hash)
+{
+ struct signature_v2_hdr *sig;
+
+ if (!xattr_value || xattr_len < 2)
+ return;
+
+ switch (xattr_value->type) {
+ case EVM_IMA_XATTR_DIGSIG:
+ sig = (typeof(sig))xattr_value;
+ if (sig->version != 2 || xattr_len <= sizeof(*sig))
+ return;
+ hash->algo = sig->hash_algo;
+ break;
+ case IMA_XATTR_DIGEST_NG:
+ hash->algo = xattr_value->digest[0];
+ break;
+ case IMA_XATTR_DIGEST:
+ /* this is for backward compatibility */
+ if (xattr_len == 21) {
+ unsigned int zero = 0;
+ if (!memcmp(&xattr_value->digest[16], &zero, 4))
+ hash->algo = HASH_ALGO_MD5;
+ else
+ hash->algo = HASH_ALGO_SHA1;
+ } else if (xattr_len == 17)
+ hash->algo = HASH_ALGO_MD5;
+ break;
+ }
+}
+
+int ima_read_xattr(struct dentry *dentry,
+ struct evm_ima_xattr_data **xattr_value)
+{
+ struct inode *inode = dentry->d_inode;
+
+ if (!inode->i_op->getxattr)
+ return 0;
+
+ return vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)xattr_value,
+ 0, GFP_NOFS);
+}
+
/*
* ima_appraise_measurement - appraise file measurement
*
@@ -116,23 +173,22 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func)
* Return 0 on success, error code otherwise
*/
int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
- struct file *file, const unsigned char *filename)
+ struct file *file, const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value,
+ int xattr_len)
{
struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode;
- struct evm_ima_xattr_data *xattr_value = NULL;
enum integrity_status status = INTEGRITY_UNKNOWN;
const char *op = "appraise_data";
char *cause = "unknown";
- int rc;
+ int rc = xattr_len, hash_start = 0;
if (!ima_appraise)
return 0;
if (!inode->i_op->getxattr)
return INTEGRITY_UNKNOWN;
- rc = vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)&xattr_value,
- 0, GFP_NOFS);
if (rc <= 0) {
if (rc && rc != -ENODATA)
goto out;
@@ -153,14 +209,25 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
goto out;
}
switch (xattr_value->type) {
+ case IMA_XATTR_DIGEST_NG:
+ /* first byte contains algorithm id */
+ hash_start = 1;
case IMA_XATTR_DIGEST:
if (iint->flags & IMA_DIGSIG_REQUIRED) {
cause = "IMA signature required";
status = INTEGRITY_FAIL;
break;
}
- rc = memcmp(xattr_value->digest, iint->ima_xattr.digest,
- IMA_DIGEST_SIZE);
+ if (xattr_len - sizeof(xattr_value->type) - hash_start >=
+ iint->ima_hash->length)
+ /* xattr length may be longer. md5 hash in previous
+ version occupied 20 bytes in xattr, instead of 16
+ */
+ rc = memcmp(&xattr_value->digest[hash_start],
+ iint->ima_hash->digest,
+ iint->ima_hash->length);
+ else
+ rc = -EINVAL;
if (rc) {
cause = "invalid-hash";
status = INTEGRITY_FAIL;
@@ -171,9 +238,9 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
case EVM_IMA_XATTR_DIGSIG:
iint->flags |= IMA_DIGSIG;
rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
- xattr_value->digest, rc - 1,
- iint->ima_xattr.digest,
- IMA_DIGEST_SIZE);
+ (const char *)xattr_value, rc,
+ iint->ima_hash->digest,
+ iint->ima_hash->length);
if (rc == -EOPNOTSUPP) {
status = INTEGRITY_UNKNOWN;
} else if (rc) {
@@ -203,7 +270,6 @@ out:
ima_cache_flags(iint, func);
}
ima_set_cache_status(iint, func, status);
- kfree(xattr_value);
return status;
}
@@ -219,7 +285,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
if (iint->flags & IMA_DIGSIG)
return;
- rc = ima_collect_measurement(iint, file);
+ rc = ima_collect_measurement(iint, file, NULL, NULL);
if (rc < 0)
return;
@@ -315,3 +381,14 @@ int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name)
}
return result;
}
+
+#ifdef CONFIG_IMA_TRUSTED_KEYRING
+static int __init init_ima_keyring(void)
+{
+ int ret;
+
+ ret = integrity_init_keyring(INTEGRITY_KEYRING_IMA);
+ return 0;
+}
+late_initcall(init_ima_keyring);
+#endif
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index a02e0791cf15..676e0292dfec 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -20,6 +20,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <crypto/hash.h>
+#include <crypto/hash_info.h>
#include "ima.h"
static struct crypto_shash *ima_shash_tfm;
@@ -28,31 +29,58 @@ int ima_init_crypto(void)
{
long rc;
- ima_shash_tfm = crypto_alloc_shash(ima_hash, 0, 0);
+ ima_shash_tfm = crypto_alloc_shash(hash_algo_name[ima_hash_algo], 0, 0);
if (IS_ERR(ima_shash_tfm)) {
rc = PTR_ERR(ima_shash_tfm);
- pr_err("Can not allocate %s (reason: %ld)\n", ima_hash, rc);
+ pr_err("Can not allocate %s (reason: %ld)\n",
+ hash_algo_name[ima_hash_algo], rc);
return rc;
}
return 0;
}
+static struct crypto_shash *ima_alloc_tfm(enum hash_algo algo)
+{
+ struct crypto_shash *tfm = ima_shash_tfm;
+ int rc;
+
+ if (algo != ima_hash_algo && algo < HASH_ALGO__LAST) {
+ tfm = crypto_alloc_shash(hash_algo_name[algo], 0, 0);
+ if (IS_ERR(tfm)) {
+ rc = PTR_ERR(tfm);
+ pr_err("Can not allocate %s (reason: %d)\n",
+ hash_algo_name[algo], rc);
+ }
+ }
+ return tfm;
+}
+
+static void ima_free_tfm(struct crypto_shash *tfm)
+{
+ if (tfm != ima_shash_tfm)
+ crypto_free_shash(tfm);
+}
+
/*
* Calculate the MD5/SHA1 file digest
*/
-int ima_calc_file_hash(struct file *file, char *digest)
+static int ima_calc_file_hash_tfm(struct file *file,
+ struct ima_digest_data *hash,
+ struct crypto_shash *tfm)
{
loff_t i_size, offset = 0;
char *rbuf;
int rc, read = 0;
struct {
struct shash_desc shash;
- char ctx[crypto_shash_descsize(ima_shash_tfm)];
+ char ctx[crypto_shash_descsize(tfm)];
} desc;
- desc.shash.tfm = ima_shash_tfm;
+ desc.shash.tfm = tfm;
desc.shash.flags = 0;
+ hash->length = crypto_shash_digestsize(tfm);
+
rc = crypto_shash_init(&desc.shash);
if (rc != 0)
return rc;
@@ -85,27 +113,83 @@ int ima_calc_file_hash(struct file *file, char *digest)
}
kfree(rbuf);
if (!rc)
- rc = crypto_shash_final(&desc.shash, digest);
+ rc = crypto_shash_final(&desc.shash, hash->digest);
if (read)
file->f_mode &= ~FMODE_READ;
out:
return rc;
}
+int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
+{
+ struct crypto_shash *tfm;
+ int rc;
+
+ tfm = ima_alloc_tfm(hash->algo);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ rc = ima_calc_file_hash_tfm(file, hash, tfm);
+
+ ima_free_tfm(tfm);
+
+ return rc;
+}
+
/*
- * Calculate the hash of a given buffer
+ * Calculate the hash of template data
*/
-int ima_calc_buffer_hash(const void *data, int len, char *digest)
+static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data,
+ int num_fields,
+ struct ima_digest_data *hash,
+ struct crypto_shash *tfm)
{
struct {
struct shash_desc shash;
- char ctx[crypto_shash_descsize(ima_shash_tfm)];
+ char ctx[crypto_shash_descsize(tfm)];
} desc;
+ int rc, i;
- desc.shash.tfm = ima_shash_tfm;
+ desc.shash.tfm = tfm;
desc.shash.flags = 0;
- return crypto_shash_digest(&desc.shash, data, len, digest);
+ hash->length = crypto_shash_digestsize(tfm);
+
+ rc = crypto_shash_init(&desc.shash);
+ if (rc != 0)
+ return rc;
+
+ for (i = 0; i < num_fields; i++) {
+ rc = crypto_shash_update(&desc.shash,
+ (const u8 *) &field_data[i].len,
+ sizeof(field_data[i].len));
+ rc = crypto_shash_update(&desc.shash, field_data[i].data,
+ field_data[i].len);
+ if (rc)
+ break;
+ }
+
+ if (!rc)
+ rc = crypto_shash_final(&desc.shash, hash->digest);
+
+ return rc;
+}
+
+int ima_calc_field_array_hash(struct ima_field_data *field_data, int num_fields,
+ struct ima_digest_data *hash)
+{
+ struct crypto_shash *tfm;
+ int rc;
+
+ tfm = ima_alloc_tfm(hash->algo);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ rc = ima_calc_field_array_hash_tfm(field_data, num_fields, hash, tfm);
+
+ ima_free_tfm(tfm);
+
+ return rc;
}
static void __init ima_pcrread(int idx, u8 *pcr)
@@ -120,16 +204,17 @@ static void __init ima_pcrread(int idx, u8 *pcr)
/*
* Calculate the boot aggregate hash
*/
-int __init ima_calc_boot_aggregate(char *digest)
+static int __init ima_calc_boot_aggregate_tfm(char *digest,
+ struct crypto_shash *tfm)
{
- u8 pcr_i[IMA_DIGEST_SIZE];
+ u8 pcr_i[TPM_DIGEST_SIZE];
int rc, i;
struct {
struct shash_desc shash;
- char ctx[crypto_shash_descsize(ima_shash_tfm)];
+ char ctx[crypto_shash_descsize(tfm)];
} desc;
- desc.shash.tfm = ima_shash_tfm;
+ desc.shash.tfm = tfm;
desc.shash.flags = 0;
rc = crypto_shash_init(&desc.shash);
@@ -140,9 +225,26 @@ int __init ima_calc_boot_aggregate(char *digest)
for (i = TPM_PCR0; i < TPM_PCR8; i++) {
ima_pcrread(i, pcr_i);
/* now accumulate with current aggregate */
- rc = crypto_shash_update(&desc.shash, pcr_i, IMA_DIGEST_SIZE);
+ rc = crypto_shash_update(&desc.shash, pcr_i, TPM_DIGEST_SIZE);
}
if (!rc)
crypto_shash_final(&desc.shash, digest);
return rc;
}
+
+int __init ima_calc_boot_aggregate(struct ima_digest_data *hash)
+{
+ struct crypto_shash *tfm;
+ int rc;
+
+ tfm = ima_alloc_tfm(hash->algo);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ hash->length = crypto_shash_digestsize(tfm);
+ rc = ima_calc_boot_aggregate_tfm(hash->digest, tfm);
+
+ ima_free_tfm(tfm);
+
+ return rc;
+}
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 38477c9c3415..d47a7c86a21d 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -88,8 +88,7 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
* against concurrent list-extension
*/
rcu_read_lock();
- qe = list_entry_rcu(qe->later.next,
- struct ima_queue_entry, later);
+ qe = list_entry_rcu(qe->later.next, struct ima_queue_entry, later);
rcu_read_unlock();
(*pos)++;
@@ -100,7 +99,7 @@ static void ima_measurements_stop(struct seq_file *m, void *v)
{
}
-static void ima_putc(struct seq_file *m, void *data, int datalen)
+void ima_putc(struct seq_file *m, void *data, int datalen)
{
while (datalen--)
seq_putc(m, *(char *)data++);
@@ -111,6 +110,7 @@ static void ima_putc(struct seq_file *m, void *data, int datalen)
* char[20]=template digest
* 32bit-le=template name size
* char[n]=template name
+ * [eventdata length]
* eventdata[n]=template specific data
*/
static int ima_measurements_show(struct seq_file *m, void *v)
@@ -120,6 +120,7 @@ static int ima_measurements_show(struct seq_file *m, void *v)
struct ima_template_entry *e;
int namelen;
u32 pcr = CONFIG_IMA_MEASURE_PCR_IDX;
+ int i;
/* get entry */
e = qe->entry;
@@ -134,18 +135,25 @@ static int ima_measurements_show(struct seq_file *m, void *v)
ima_putc(m, &pcr, sizeof pcr);
/* 2nd: template digest */
- ima_putc(m, e->digest, IMA_DIGEST_SIZE);
+ ima_putc(m, e->digest, TPM_DIGEST_SIZE);
/* 3rd: template name size */
- namelen = strlen(e->template_name);
+ namelen = strlen(e->template_desc->name);
ima_putc(m, &namelen, sizeof namelen);
/* 4th: template name */
- ima_putc(m, (void *)e->template_name, namelen);
+ ima_putc(m, e->template_desc->name, namelen);
+
+ /* 5th: template length (except for 'ima' template) */
+ if (strcmp(e->template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0)
+ ima_putc(m, &e->template_data_len,
+ sizeof(e->template_data_len));
- /* 5th: template specific data */
- ima_template_show(m, (struct ima_template_data *)&e->template,
- IMA_SHOW_BINARY);
+ /* 6th: template specific data */
+ for (i = 0; i < e->template_desc->num_fields; i++) {
+ e->template_desc->fields[i]->field_show(m, IMA_SHOW_BINARY,
+ &e->template_data[i]);
+ }
return 0;
}
@@ -168,41 +176,21 @@ static const struct file_operations ima_measurements_ops = {
.release = seq_release,
};
-static void ima_print_digest(struct seq_file *m, u8 *digest)
+void ima_print_digest(struct seq_file *m, u8 *digest, int size)
{
int i;
- for (i = 0; i < IMA_DIGEST_SIZE; i++)
+ for (i = 0; i < size; i++)
seq_printf(m, "%02x", *(digest + i));
}
-void ima_template_show(struct seq_file *m, void *e, enum ima_show_type show)
-{
- struct ima_template_data *entry = e;
- int namelen;
-
- switch (show) {
- case IMA_SHOW_ASCII:
- ima_print_digest(m, entry->digest);
- seq_printf(m, " %s\n", entry->file_name);
- break;
- case IMA_SHOW_BINARY:
- ima_putc(m, entry->digest, IMA_DIGEST_SIZE);
-
- namelen = strlen(entry->file_name);
- ima_putc(m, &namelen, sizeof namelen);
- ima_putc(m, entry->file_name, namelen);
- default:
- break;
- }
-}
-
/* print in ascii */
static int ima_ascii_measurements_show(struct seq_file *m, void *v)
{
/* the list never shrinks, so we don't need a lock here */
struct ima_queue_entry *qe = v;
struct ima_template_entry *e;
+ int i;
/* get entry */
e = qe->entry;
@@ -213,14 +201,21 @@ static int ima_ascii_measurements_show(struct seq_file *m, void *v)
seq_printf(m, "%2d ", CONFIG_IMA_MEASURE_PCR_IDX);
/* 2nd: SHA1 template hash */
- ima_print_digest(m, e->digest);
+ ima_print_digest(m, e->digest, TPM_DIGEST_SIZE);
/* 3th: template name */
- seq_printf(m, " %s ", e->template_name);
+ seq_printf(m, " %s", e->template_desc->name);
/* 4th: template specific data */
- ima_template_show(m, (struct ima_template_data *)&e->template,
- IMA_SHOW_ASCII);
+ for (i = 0; i < e->template_desc->num_fields; i++) {
+ seq_puts(m, " ");
+ if (e->template_data[i].len == 0)
+ continue;
+
+ e->template_desc->fields[i]->field_show(m, IMA_SHOW_ASCII,
+ &e->template_data[i]);
+ }
+ seq_puts(m, "\n");
return 0;
}
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 162ea723db3d..15f34bd40abe 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -18,6 +18,7 @@
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/err.h>
+#include <crypto/hash_info.h>
#include "ima.h"
/* name for boot aggregate entry */
@@ -42,28 +43,38 @@ int ima_used_chip;
static void __init ima_add_boot_aggregate(void)
{
struct ima_template_entry *entry;
+ struct integrity_iint_cache tmp_iint, *iint = &tmp_iint;
const char *op = "add_boot_aggregate";
const char *audit_cause = "ENOMEM";
int result = -ENOMEM;
- int violation = 1;
+ int violation = 0;
+ struct {
+ struct ima_digest_data hdr;
+ char digest[TPM_DIGEST_SIZE];
+ } hash;
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- goto err_out;
+ memset(iint, 0, sizeof(*iint));
+ memset(&hash, 0, sizeof(hash));
+ iint->ima_hash = &hash.hdr;
+ iint->ima_hash->algo = HASH_ALGO_SHA1;
+ iint->ima_hash->length = SHA1_DIGEST_SIZE;
- memset(&entry->template, 0, sizeof(entry->template));
- strncpy(entry->template.file_name, boot_aggregate_name,
- IMA_EVENT_NAME_LEN_MAX);
if (ima_used_chip) {
- violation = 0;
- result = ima_calc_boot_aggregate(entry->template.digest);
+ result = ima_calc_boot_aggregate(&hash.hdr);
if (result < 0) {
audit_cause = "hashing_error";
kfree(entry);
goto err_out;
}
}
- result = ima_store_template(entry, violation, NULL);
+
+ result = ima_alloc_init_template(iint, NULL, boot_aggregate_name,
+ NULL, 0, &entry);
+ if (result < 0)
+ return;
+
+ result = ima_store_template(entry, violation, NULL,
+ boot_aggregate_name);
if (result < 0)
kfree(entry);
return;
@@ -74,7 +85,7 @@ err_out:
int __init ima_init(void)
{
- u8 pcr_i[IMA_DIGEST_SIZE];
+ u8 pcr_i[TPM_DIGEST_SIZE];
int rc;
ima_used_chip = 0;
@@ -88,6 +99,10 @@ int __init ima_init(void)
rc = ima_init_crypto();
if (rc)
return rc;
+ rc = ima_init_template();
+ if (rc != 0)
+ return rc;
+
ima_add_boot_aggregate(); /* boot aggregate must be first entry */
ima_init_policy();
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index e9508d5bbfcf..149ee1119f87 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/xattr.h>
#include <linux/ima.h>
+#include <crypto/hash_info.h>
#include "ima.h"
@@ -35,11 +36,33 @@ int ima_appraise = IMA_APPRAISE_ENFORCE;
int ima_appraise;
#endif
-char *ima_hash = "sha1";
+int ima_hash_algo = HASH_ALGO_SHA1;
+static int hash_setup_done;
+
static int __init hash_setup(char *str)
{
- if (strncmp(str, "md5", 3) == 0)
- ima_hash = "md5";
+ struct ima_template_desc *template_desc = ima_template_desc_current();
+ int i;
+
+ if (hash_setup_done)
+ return 1;
+
+ if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) {
+ if (strncmp(str, "sha1", 4) == 0)
+ ima_hash_algo = HASH_ALGO_SHA1;
+ else if (strncmp(str, "md5", 3) == 0)
+ ima_hash_algo = HASH_ALGO_MD5;
+ goto out;
+ }
+
+ for (i = 0; i < HASH_ALGO__LAST; i++) {
+ if (strcmp(str, hash_algo_name[i]) == 0) {
+ ima_hash_algo = i;
+ break;
+ }
+ }
+out:
+ hash_setup_done = 1;
return 1;
}
__setup("ima_hash=", hash_setup);
@@ -92,10 +115,9 @@ out:
pathname = dentry->d_name.name;
if (send_tomtou)
- ima_add_violation(inode, pathname,
- "invalid_pcr", "ToMToU");
+ ima_add_violation(file, pathname, "invalid_pcr", "ToMToU");
if (send_writers)
- ima_add_violation(inode, pathname,
+ ima_add_violation(file, pathname,
"invalid_pcr", "open_writers");
kfree(pathbuf);
}
@@ -144,9 +166,12 @@ static int process_measurement(struct file *file, const char *filename,
{
struct inode *inode = file_inode(file);
struct integrity_iint_cache *iint;
+ struct ima_template_desc *template_desc = ima_template_desc_current();
char *pathbuf = NULL;
const char *pathname = NULL;
int rc = -ENOMEM, action, must_appraise, _func;
+ struct evm_ima_xattr_data *xattr_value = NULL, **xattr_ptr = NULL;
+ int xattr_len = 0;
if (!ima_initialized || !S_ISREG(inode->i_mode))
return 0;
@@ -185,7 +210,13 @@ static int process_measurement(struct file *file, const char *filename,
goto out_digsig;
}
- rc = ima_collect_measurement(iint, file);
+ if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) {
+ if (action & IMA_APPRAISE_SUBMASK)
+ xattr_ptr = &xattr_value;
+ } else
+ xattr_ptr = &xattr_value;
+
+ rc = ima_collect_measurement(iint, file, xattr_ptr, &xattr_len);
if (rc != 0)
goto out_digsig;
@@ -194,9 +225,11 @@ static int process_measurement(struct file *file, const char *filename,
pathname = (const char *)file->f_dentry->d_name.name;
if (action & IMA_MEASURE)
- ima_store_measurement(iint, file, pathname);
+ ima_store_measurement(iint, file, pathname,
+ xattr_value, xattr_len);
if (action & IMA_APPRAISE_SUBMASK)
- rc = ima_appraise_measurement(_func, iint, file, pathname);
+ rc = ima_appraise_measurement(_func, iint, file, pathname,
+ xattr_value, xattr_len);
if (action & IMA_AUDIT)
ima_audit_measurement(iint, pathname);
kfree(pathbuf);
@@ -205,6 +238,7 @@ out_digsig:
rc = -EACCES;
out:
mutex_unlock(&inode->i_mutex);
+ kfree(xattr_value);
if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE))
return -EACCES;
return 0;
@@ -244,9 +278,9 @@ int ima_file_mmap(struct file *file, unsigned long prot)
int ima_bprm_check(struct linux_binprm *bprm)
{
return process_measurement(bprm->file,
- (strcmp(bprm->filename, bprm->interp) == 0) ?
- bprm->filename : bprm->interp,
- MAY_EXEC, BPRM_CHECK);
+ (strcmp(bprm->filename, bprm->interp) == 0) ?
+ bprm->filename : bprm->interp,
+ MAY_EXEC, BPRM_CHECK);
}
/**
@@ -263,8 +297,8 @@ int ima_file_check(struct file *file, int mask)
{
ima_rdwr_violation_check(file);
return process_measurement(file, NULL,
- mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
- FILE_CHECK);
+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
+ FILE_CHECK);
}
EXPORT_SYMBOL_GPL(ima_file_check);
@@ -294,6 +328,7 @@ static int __init init_ima(void)
{
int error;
+ hash_setup(CONFIG_IMA_DEFAULT_HASH);
error = ima_init();
if (!error)
ima_initialized = 1;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 399433ad614e..a9c3d3cd1990 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -73,7 +73,6 @@ static struct ima_rule_entry default_rules[] = {
{.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC},
{.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC},
{.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC},
- {.action = DONT_MEASURE,.fsmagic = RAMFS_MAGIC,.flags = IMA_FSMAGIC},
{.action = DONT_MEASURE,.fsmagic = DEVPTS_SUPER_MAGIC,.flags = IMA_FSMAGIC},
{.action = DONT_MEASURE,.fsmagic = BINFMTFS_MAGIC,.flags = IMA_FSMAGIC},
{.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC},
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
index ff63fe00c195..d85e99761f4f 100644
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -50,7 +50,7 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value)
key = ima_hash_key(digest_value);
rcu_read_lock();
hlist_for_each_entry_rcu(qe, &ima_htable.queue[key], hnext) {
- rc = memcmp(qe->entry->digest, digest_value, IMA_DIGEST_SIZE);
+ rc = memcmp(qe->entry->digest, digest_value, TPM_DIGEST_SIZE);
if (rc == 0) {
ret = qe;
break;
@@ -104,9 +104,10 @@ static int ima_pcr_extend(const u8 *hash)
* and extend the pcr.
*/
int ima_add_template_entry(struct ima_template_entry *entry, int violation,
- const char *op, struct inode *inode)
+ const char *op, struct inode *inode,
+ const unsigned char *filename)
{
- u8 digest[IMA_DIGEST_SIZE];
+ u8 digest[TPM_DIGEST_SIZE];
const char *audit_cause = "hash_added";
char tpm_audit_cause[AUDIT_CAUSE_LEN_MAX];
int audit_info = 1;
@@ -141,8 +142,7 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation,
}
out:
mutex_unlock(&ima_extend_list_mutex);
- integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
- entry->template.file_name,
+ integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename,
op, audit_cause, result, audit_info);
return result;
}
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
new file mode 100644
index 000000000000..4e5da990630b
--- /dev/null
+++ b/security/integrity/ima/ima_template.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2013 Politecnico di Torino, Italy
+ * TORSEC group -- http://security.polito.it
+ *
+ * Author: Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_template.c
+ * Helpers to manage template descriptors.
+ */
+#include <crypto/hash_info.h>
+
+#include "ima.h"
+#include "ima_template_lib.h"
+
+static struct ima_template_desc defined_templates[] = {
+ {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT},
+ {.name = "ima-ng",.fmt = "d-ng|n-ng"},
+ {.name = "ima-sig",.fmt = "d-ng|n-ng|sig"},
+};
+
+static struct ima_template_field supported_fields[] = {
+ {.field_id = "d",.field_init = ima_eventdigest_init,
+ .field_show = ima_show_template_digest},
+ {.field_id = "n",.field_init = ima_eventname_init,
+ .field_show = ima_show_template_string},
+ {.field_id = "d-ng",.field_init = ima_eventdigest_ng_init,
+ .field_show = ima_show_template_digest_ng},
+ {.field_id = "n-ng",.field_init = ima_eventname_ng_init,
+ .field_show = ima_show_template_string},
+ {.field_id = "sig",.field_init = ima_eventsig_init,
+ .field_show = ima_show_template_sig},
+};
+
+static struct ima_template_desc *ima_template;
+static struct ima_template_desc *lookup_template_desc(const char *name);
+
+static int __init ima_template_setup(char *str)
+{
+ struct ima_template_desc *template_desc;
+ int template_len = strlen(str);
+
+ /*
+ * Verify that a template with the supplied name exists.
+ * If not, use CONFIG_IMA_DEFAULT_TEMPLATE.
+ */
+ template_desc = lookup_template_desc(str);
+ if (!template_desc)
+ return 1;
+
+ /*
+ * Verify whether the current hash algorithm is supported
+ * by the 'ima' template.
+ */
+ if (template_len == 3 && strcmp(str, IMA_TEMPLATE_IMA_NAME) == 0 &&
+ ima_hash_algo != HASH_ALGO_SHA1 && ima_hash_algo != HASH_ALGO_MD5) {
+ pr_err("IMA: template does not support hash alg\n");
+ return 1;
+ }
+
+ ima_template = template_desc;
+ return 1;
+}
+__setup("ima_template=", ima_template_setup);
+
+static struct ima_template_desc *lookup_template_desc(const char *name)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(defined_templates); i++) {
+ if (strcmp(defined_templates[i].name, name) == 0)
+ return defined_templates + i;
+ }
+
+ return NULL;
+}
+
+static struct ima_template_field *lookup_template_field(const char *field_id)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(supported_fields); i++)
+ if (strncmp(supported_fields[i].field_id, field_id,
+ IMA_TEMPLATE_FIELD_ID_MAX_LEN) == 0)
+ return &supported_fields[i];
+ return NULL;
+}
+
+static int template_fmt_size(char *template_fmt)
+{
+ char c;
+ int template_fmt_len = strlen(template_fmt);
+ int i = 0, j = 0;
+
+ while (i < template_fmt_len) {
+ c = template_fmt[i];
+ if (c == '|')
+ j++;
+ i++;
+ }
+
+ return j + 1;
+}
+
+static int template_desc_init_fields(char *template_fmt,
+ struct ima_template_field ***fields,
+ int *num_fields)
+{
+ char *c, *template_fmt_ptr = template_fmt;
+ int template_num_fields = template_fmt_size(template_fmt);
+ int i, result = 0;
+
+ if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX)
+ return -EINVAL;
+
+ *fields = kzalloc(template_num_fields * sizeof(*fields), GFP_KERNEL);
+ if (*fields == NULL) {
+ result = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; (c = strsep(&template_fmt_ptr, "|")) != NULL &&
+ i < template_num_fields; i++) {
+ struct ima_template_field *f = lookup_template_field(c);
+
+ if (!f) {
+ result = -ENOENT;
+ goto out;
+ }
+ (*fields)[i] = f;
+ }
+ *num_fields = i;
+ return 0;
+out:
+ kfree(*fields);
+ *fields = NULL;
+ return result;
+}
+
+static int init_defined_templates(void)
+{
+ int i = 0;
+ int result = 0;
+
+ /* Init defined templates. */
+ for (i = 0; i < ARRAY_SIZE(defined_templates); i++) {
+ struct ima_template_desc *template = &defined_templates[i];
+
+ result = template_desc_init_fields(template->fmt,
+ &(template->fields),
+ &(template->num_fields));
+ if (result < 0)
+ return result;
+ }
+ return result;
+}
+
+struct ima_template_desc *ima_template_desc_current(void)
+{
+ if (!ima_template)
+ ima_template =
+ lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE);
+ return ima_template;
+}
+
+int ima_init_template(void)
+{
+ int result;
+
+ result = init_defined_templates();
+ if (result < 0)
+ return result;
+
+ return 0;
+}
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
new file mode 100644
index 000000000000..6d66ad6ed265
--- /dev/null
+++ b/security/integrity/ima/ima_template_lib.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) 2013 Politecnico di Torino, Italy
+ * TORSEC group -- http://security.polito.it
+ *
+ * Author: Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_template_lib.c
+ * Library of supported template fields.
+ */
+#include <crypto/hash_info.h>
+
+#include "ima_template_lib.h"
+
+static bool ima_template_hash_algo_allowed(u8 algo)
+{
+ if (algo == HASH_ALGO_SHA1 || algo == HASH_ALGO_MD5)
+ return true;
+
+ return false;
+}
+
+enum data_formats {
+ DATA_FMT_DIGEST = 0,
+ DATA_FMT_DIGEST_WITH_ALGO,
+ DATA_FMT_EVENT_NAME,
+ DATA_FMT_STRING,
+ DATA_FMT_HEX
+};
+
+static int ima_write_template_field_data(const void *data, const u32 datalen,
+ enum data_formats datafmt,
+ struct ima_field_data *field_data)
+{
+ u8 *buf, *buf_ptr;
+ u32 buflen;
+
+ switch (datafmt) {
+ case DATA_FMT_EVENT_NAME:
+ buflen = IMA_EVENT_NAME_LEN_MAX + 1;
+ break;
+ case DATA_FMT_STRING:
+ buflen = datalen + 1;
+ break;
+ default:
+ buflen = datalen;
+ }
+
+ buf = kzalloc(buflen, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ memcpy(buf, data, datalen);
+
+ /*
+ * Replace all space characters with underscore for event names and
+ * strings. This avoid that, during the parsing of a measurements list,
+ * filenames with spaces or that end with the suffix ' (deleted)' are
+ * split into multiple template fields (the space is the delimitator
+ * character for measurements lists in ASCII format).
+ */
+ if (datafmt == DATA_FMT_EVENT_NAME || datafmt == DATA_FMT_STRING) {
+ for (buf_ptr = buf; buf_ptr - buf < datalen; buf_ptr++)
+ if (*buf_ptr == ' ')
+ *buf_ptr = '_';
+ }
+
+ field_data->data = buf;
+ field_data->len = buflen;
+ return 0;
+}
+
+static void ima_show_template_data_ascii(struct seq_file *m,
+ enum ima_show_type show,
+ enum data_formats datafmt,
+ struct ima_field_data *field_data)
+{
+ u8 *buf_ptr = field_data->data, buflen = field_data->len;
+
+ switch (datafmt) {
+ case DATA_FMT_DIGEST_WITH_ALGO:
+ buf_ptr = strnchr(field_data->data, buflen, ':');
+ if (buf_ptr != field_data->data)
+ seq_printf(m, "%s", field_data->data);
+
+ /* skip ':' and '\0' */
+ buf_ptr += 2;
+ buflen -= buf_ptr - field_data->data;
+ case DATA_FMT_DIGEST:
+ case DATA_FMT_HEX:
+ if (!buflen)
+ break;
+ ima_print_digest(m, buf_ptr, buflen);
+ break;
+ case DATA_FMT_STRING:
+ seq_printf(m, "%s", buf_ptr);
+ break;
+ default:
+ break;
+ }
+}
+
+static void ima_show_template_data_binary(struct seq_file *m,
+ enum ima_show_type show,
+ enum data_formats datafmt,
+ struct ima_field_data *field_data)
+{
+ ima_putc(m, &field_data->len, sizeof(u32));
+ if (!field_data->len)
+ return;
+ ima_putc(m, field_data->data, field_data->len);
+}
+
+static void ima_show_template_field_data(struct seq_file *m,
+ enum ima_show_type show,
+ enum data_formats datafmt,
+ struct ima_field_data *field_data)
+{
+ switch (show) {
+ case IMA_SHOW_ASCII:
+ ima_show_template_data_ascii(m, show, datafmt, field_data);
+ break;
+ case IMA_SHOW_BINARY:
+ ima_show_template_data_binary(m, show, datafmt, field_data);
+ break;
+ default:
+ break;
+ }
+}
+
+void ima_show_template_digest(struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data)
+{
+ ima_show_template_field_data(m, show, DATA_FMT_DIGEST, field_data);
+}
+
+void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data)
+{
+ ima_show_template_field_data(m, show, DATA_FMT_DIGEST_WITH_ALGO,
+ field_data);
+}
+
+void ima_show_template_string(struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data)
+{
+ ima_show_template_field_data(m, show, DATA_FMT_STRING, field_data);
+}
+
+void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data)
+{
+ ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data);
+}
+
+static int ima_eventdigest_init_common(u8 *digest, u32 digestsize, u8 hash_algo,
+ struct ima_field_data *field_data,
+ bool size_limit)
+{
+ /*
+ * digest formats:
+ * - DATA_FMT_DIGEST: digest
+ * - DATA_FMT_DIGEST_WITH_ALGO: [<hash algo>] + ':' + '\0' + digest,
+ * where <hash algo> is provided if the hash algoritm is not
+ * SHA1 or MD5
+ */
+ u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 };
+ enum data_formats fmt = DATA_FMT_DIGEST;
+ u32 offset = 0;
+
+ if (!size_limit) {
+ fmt = DATA_FMT_DIGEST_WITH_ALGO;
+ if (hash_algo < HASH_ALGO__LAST)
+ offset += snprintf(buffer, CRYPTO_MAX_ALG_NAME + 1,
+ "%s", hash_algo_name[hash_algo]);
+ buffer[offset] = ':';
+ offset += 2;
+ }
+
+ if (digest)
+ memcpy(buffer + offset, digest, digestsize);
+ else
+ /*
+ * If digest is NULL, the event being recorded is a violation.
+ * Make room for the digest by increasing the offset of
+ * IMA_DIGEST_SIZE.
+ */
+ offset += IMA_DIGEST_SIZE;
+
+ return ima_write_template_field_data(buffer, offset + digestsize,
+ fmt, field_data);
+}
+
+/*
+ * This function writes the digest of an event (with size limit).
+ */
+int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file,
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value, int xattr_len,
+ struct ima_field_data *field_data)
+{
+ struct {
+ struct ima_digest_data hdr;
+ char digest[IMA_MAX_DIGEST_SIZE];
+ } hash;
+ u8 *cur_digest = NULL;
+ u32 cur_digestsize = 0;
+ struct inode *inode;
+ int result;
+
+ memset(&hash, 0, sizeof(hash));
+
+ if (!iint) /* recording a violation. */
+ goto out;
+
+ if (ima_template_hash_algo_allowed(iint->ima_hash->algo)) {
+ cur_digest = iint->ima_hash->digest;
+ cur_digestsize = iint->ima_hash->length;
+ goto out;
+ }
+
+ if (!file) /* missing info to re-calculate the digest */
+ return -EINVAL;
+
+ inode = file_inode(file);
+ hash.hdr.algo = ima_template_hash_algo_allowed(ima_hash_algo) ?
+ ima_hash_algo : HASH_ALGO_SHA1;
+ result = ima_calc_file_hash(file, &hash.hdr);
+ if (result) {
+ integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode,
+ filename, "collect_data",
+ "failed", result, 0);
+ return result;
+ }
+ cur_digest = hash.hdr.digest;
+ cur_digestsize = hash.hdr.length;
+out:
+ return ima_eventdigest_init_common(cur_digest, cur_digestsize, -1,
+ field_data, true);
+}
+
+/*
+ * This function writes the digest of an event (without size limit).
+ */
+int ima_eventdigest_ng_init(struct integrity_iint_cache *iint,
+ struct file *file, const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value,
+ int xattr_len, struct ima_field_data *field_data)
+{
+ u8 *cur_digest = NULL, hash_algo = HASH_ALGO__LAST;
+ u32 cur_digestsize = 0;
+
+ /* If iint is NULL, we are recording a violation. */
+ if (!iint)
+ goto out;
+
+ cur_digest = iint->ima_hash->digest;
+ cur_digestsize = iint->ima_hash->length;
+
+ hash_algo = iint->ima_hash->algo;
+out:
+ return ima_eventdigest_init_common(cur_digest, cur_digestsize,
+ hash_algo, field_data, false);
+}
+
+static int ima_eventname_init_common(struct integrity_iint_cache *iint,
+ struct file *file,
+ const unsigned char *filename,
+ struct ima_field_data *field_data,
+ bool size_limit)
+{
+ const char *cur_filename = NULL;
+ u32 cur_filename_len = 0;
+ enum data_formats fmt = size_limit ?
+ DATA_FMT_EVENT_NAME : DATA_FMT_STRING;
+
+ BUG_ON(filename == NULL && file == NULL);
+
+ if (filename) {
+ cur_filename = filename;
+ cur_filename_len = strlen(filename);
+
+ if (!size_limit || cur_filename_len <= IMA_EVENT_NAME_LEN_MAX)
+ goto out;
+ }
+
+ if (file) {
+ cur_filename = file->f_dentry->d_name.name;
+ cur_filename_len = strlen(cur_filename);
+ } else
+ /*
+ * Truncate filename if the latter is too long and
+ * the file descriptor is not available.
+ */
+ cur_filename_len = IMA_EVENT_NAME_LEN_MAX;
+out:
+ return ima_write_template_field_data(cur_filename, cur_filename_len,
+ fmt, field_data);
+}
+
+/*
+ * This function writes the name of an event (with size limit).
+ */
+int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file,
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value, int xattr_len,
+ struct ima_field_data *field_data)
+{
+ return ima_eventname_init_common(iint, file, filename,
+ field_data, true);
+}
+
+/*
+ * This function writes the name of an event (without size limit).
+ */
+int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file,
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value, int xattr_len,
+ struct ima_field_data *field_data)
+{
+ return ima_eventname_init_common(iint, file, filename,
+ field_data, false);
+}
+
+/*
+ * ima_eventsig_init - include the file signature as part of the template data
+ */
+int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file,
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value, int xattr_len,
+ struct ima_field_data *field_data)
+{
+ enum data_formats fmt = DATA_FMT_HEX;
+ int rc = 0;
+
+ if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG))
+ goto out;
+
+ rc = ima_write_template_field_data(xattr_value, xattr_len, fmt,
+ field_data);
+out:
+ return rc;
+}
diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h
new file mode 100644
index 000000000000..63f6b52cb1c2
--- /dev/null
+++ b/security/integrity/ima/ima_template_lib.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2013 Politecnico di Torino, Italy
+ * TORSEC group -- http://security.polito.it
+ *
+ * Author: Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_template_lib.h
+ * Header for the library of supported template fields.
+ */
+#ifndef __LINUX_IMA_TEMPLATE_LIB_H
+#define __LINUX_IMA_TEMPLATE_LIB_H
+
+#include <linux/seq_file.h>
+#include "ima.h"
+
+void ima_show_template_digest(struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data);
+void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data);
+void ima_show_template_string(struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data);
+void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data);
+int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file,
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value, int xattr_len,
+ struct ima_field_data *field_data);
+int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file,
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value, int xattr_len,
+ struct ima_field_data *field_data);
+int ima_eventdigest_ng_init(struct integrity_iint_cache *iint,
+ struct file *file, const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value,
+ int xattr_len, struct ima_field_data *field_data);
+int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file,
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value, int xattr_len,
+ struct ima_field_data *field_data);
+int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file,
+ const unsigned char *filename,
+ struct evm_ima_xattr_data *xattr_value, int xattr_len,
+ struct ima_field_data *field_data);
+#endif /* __LINUX_IMA_TEMPLATE_LIB_H */
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index c42fb7a70dee..b9e7c133734a 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -54,25 +54,57 @@ enum evm_ima_xattr_type {
IMA_XATTR_DIGEST = 0x01,
EVM_XATTR_HMAC,
EVM_IMA_XATTR_DIGSIG,
+ IMA_XATTR_DIGEST_NG,
};
struct evm_ima_xattr_data {
u8 type;
u8 digest[SHA1_DIGEST_SIZE];
-} __attribute__((packed));
+} __packed;
+
+#define IMA_MAX_DIGEST_SIZE 64
+
+struct ima_digest_data {
+ u8 algo;
+ u8 length;
+ union {
+ struct {
+ u8 unused;
+ u8 type;
+ } sha1;
+ struct {
+ u8 type;
+ u8 algo;
+ } ng;
+ u8 data[2];
+ } xattr;
+ u8 digest[0];
+} __packed;
+
+/*
+ * signature format v2 - for using with asymmetric keys
+ */
+struct signature_v2_hdr {
+ uint8_t type; /* xattr type */
+ uint8_t version; /* signature format version */
+ uint8_t hash_algo; /* Digest algorithm [enum pkey_hash_algo] */
+ uint32_t keyid; /* IMA key identifier - not X509/PGP specific */
+ uint16_t sig_size; /* signature size */
+ uint8_t sig[0]; /* signature payload */
+} __packed;
/* integrity data associated with an inode */
struct integrity_iint_cache {
- struct rb_node rb_node; /* rooted in integrity_iint_tree */
+ struct rb_node rb_node; /* rooted in integrity_iint_tree */
struct inode *inode; /* back pointer to inode in question */
u64 version; /* track inode changes */
unsigned long flags;
- struct evm_ima_xattr_data ima_xattr;
enum integrity_status ima_file_status:4;
enum integrity_status ima_mmap_status:4;
enum integrity_status ima_bprm_status:4;
enum integrity_status ima_module_status:4;
enum integrity_status evm_status:4;
+ struct ima_digest_data *ima_hash;
};
/* rbtree tree calls to lookup, insert, delete
@@ -89,7 +121,7 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode);
#ifdef CONFIG_INTEGRITY_SIGNATURE
int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
- const char *digest, int digestlen);
+ const char *digest, int digestlen);
#else
@@ -105,12 +137,19 @@ static inline int integrity_digsig_verify(const unsigned int id,
#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
int asymmetric_verify(struct key *keyring, const char *sig,
int siglen, const char *data, int datalen);
+
+int integrity_init_keyring(const unsigned int id);
#else
static inline int asymmetric_verify(struct key *keyring, const char *sig,
int siglen, const char *data, int datalen)
{
return -EOPNOTSUPP;
}
+
+static int integrity_init_keyring(const unsigned int id)
+{
+ return 0;
+}
#endif
#ifdef CONFIG_INTEGRITY_AUDIT
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index a90d6d300dbd..a4f3f8c48d6e 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -4,6 +4,7 @@
config KEYS
bool "Enable access key retention support"
+ select ASSOCIATIVE_ARRAY
help
This option provides support for retaining authentication tokens and
access keys in the kernel.
@@ -19,6 +20,34 @@ config KEYS
If you are unsure as to whether this is required, answer N.
+config PERSISTENT_KEYRINGS
+ bool "Enable register of persistent per-UID keyrings"
+ depends on KEYS
+ help
+ This option provides a register of persistent per-UID keyrings,
+ primarily aimed at Kerberos key storage. The keyrings are persistent
+ in the sense that they stay around after all processes of that UID
+ have exited, not that they survive the machine being rebooted.
+
+ A particular keyring may be accessed by either the user whose keyring
+ it is or by a process with administrative privileges. The active
+ LSMs gets to rule on which admin-level processes get to access the
+ cache.
+
+ Keyrings are created and added into the register upon demand and get
+ removed if they expire (a default timeout is set upon creation).
+
+config BIG_KEYS
+ bool "Large payload keys"
+ depends on KEYS
+ depends on TMPFS
+ help
+ This option provides support for holding large keys within the kernel
+ (for example Kerberos ticket caches). The data may be stored out to
+ swapspace by tmpfs.
+
+ If you are unsure as to whether this is required, answer N.
+
config TRUSTED_KEYS
tristate "TRUSTED KEYS"
depends on KEYS && TCG_TPM
diff --git a/security/keys/Makefile b/security/keys/Makefile
index 504aaa008388..dfb3a7bededf 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -18,9 +18,11 @@ obj-y := \
obj-$(CONFIG_KEYS_COMPAT) += compat.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_SYSCTL) += sysctl.o
+obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
#
# Key types
#
+obj-$(CONFIG_BIG_KEYS) += big_key.o
obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
new file mode 100644
index 000000000000..7f44c3207a9b
--- /dev/null
+++ b/security/keys/big_key.c
@@ -0,0 +1,207 @@
+/* Large capacity key type
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/file.h>
+#include <linux/shmem_fs.h>
+#include <linux/err.h>
+#include <keys/user-type.h>
+#include <keys/big_key-type.h>
+
+MODULE_LICENSE("GPL");
+
+/*
+ * If the data is under this limit, there's no point creating a shm file to
+ * hold it as the permanently resident metadata for the shmem fs will be at
+ * least as large as the data.
+ */
+#define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry))
+
+/*
+ * big_key defined keys take an arbitrary string as the description and an
+ * arbitrary blob of data as the payload
+ */
+struct key_type key_type_big_key = {
+ .name = "big_key",
+ .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
+ .instantiate = big_key_instantiate,
+ .match = user_match,
+ .revoke = big_key_revoke,
+ .destroy = big_key_destroy,
+ .describe = big_key_describe,
+ .read = big_key_read,
+};
+
+/*
+ * Instantiate a big key
+ */
+int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
+{
+ struct path *path = (struct path *)&key->payload.data2;
+ struct file *file;
+ ssize_t written;
+ size_t datalen = prep->datalen;
+ int ret;
+
+ ret = -EINVAL;
+ if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
+ goto error;
+
+ /* Set an arbitrary quota */
+ ret = key_payload_reserve(key, 16);
+ if (ret < 0)
+ goto error;
+
+ key->type_data.x[1] = datalen;
+
+ if (datalen > BIG_KEY_FILE_THRESHOLD) {
+ /* Create a shmem file to store the data in. This will permit the data
+ * to be swapped out if needed.
+ *
+ * TODO: Encrypt the stored data with a temporary key.
+ */
+ file = shmem_file_setup("", datalen, 0);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto err_quota;
+ }
+
+ written = kernel_write(file, prep->data, prep->datalen, 0);
+ if (written != datalen) {
+ ret = written;
+ if (written >= 0)
+ ret = -ENOMEM;
+ goto err_fput;
+ }
+
+ /* Pin the mount and dentry to the key so that we can open it again
+ * later
+ */
+ *path = file->f_path;
+ path_get(path);
+ fput(file);
+ } else {
+ /* Just store the data in a buffer */
+ void *data = kmalloc(datalen, GFP_KERNEL);
+ if (!data) {
+ ret = -ENOMEM;
+ goto err_quota;
+ }
+
+ key->payload.data = memcpy(data, prep->data, prep->datalen);
+ }
+ return 0;
+
+err_fput:
+ fput(file);
+err_quota:
+ key_payload_reserve(key, 0);
+error:
+ return ret;
+}
+
+/*
+ * dispose of the links from a revoked keyring
+ * - called with the key sem write-locked
+ */
+void big_key_revoke(struct key *key)
+{
+ struct path *path = (struct path *)&key->payload.data2;
+
+ /* clear the quota */
+ key_payload_reserve(key, 0);
+ if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD)
+ vfs_truncate(path, 0);
+}
+
+/*
+ * dispose of the data dangling from the corpse of a big_key key
+ */
+void big_key_destroy(struct key *key)
+{
+ if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) {
+ struct path *path = (struct path *)&key->payload.data2;
+ path_put(path);
+ path->mnt = NULL;
+ path->dentry = NULL;
+ } else {
+ kfree(key->payload.data);
+ key->payload.data = NULL;
+ }
+}
+
+/*
+ * describe the big_key key
+ */
+void big_key_describe(const struct key *key, struct seq_file *m)
+{
+ unsigned long datalen = key->type_data.x[1];
+
+ seq_puts(m, key->description);
+
+ if (key_is_instantiated(key))
+ seq_printf(m, ": %lu [%s]",
+ datalen,
+ datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
+}
+
+/*
+ * read the key data
+ * - the key's semaphore is read-locked
+ */
+long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
+{
+ unsigned long datalen = key->type_data.x[1];
+ long ret;
+
+ if (!buffer || buflen < datalen)
+ return datalen;
+
+ if (datalen > BIG_KEY_FILE_THRESHOLD) {
+ struct path *path = (struct path *)&key->payload.data2;
+ struct file *file;
+ loff_t pos;
+
+ file = dentry_open(path, O_RDONLY, current_cred());
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ pos = 0;
+ ret = vfs_read(file, buffer, datalen, &pos);
+ fput(file);
+ if (ret >= 0 && ret != datalen)
+ ret = -EIO;
+ } else {
+ ret = datalen;
+ if (copy_to_user(buffer, key->payload.data, datalen) != 0)
+ ret = -EFAULT;
+ }
+
+ return ret;
+}
+
+/*
+ * Module stuff
+ */
+static int __init big_key_init(void)
+{
+ return register_key_type(&key_type_big_key);
+}
+
+static void __exit big_key_cleanup(void)
+{
+ unregister_key_type(&key_type_big_key);
+}
+
+module_init(big_key_init);
+module_exit(big_key_cleanup);
diff --git a/security/keys/compat.c b/security/keys/compat.c
index d65fa7fa29ba..bbd32c729dbb 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -138,6 +138,9 @@ asmlinkage long compat_sys_keyctl(u32 option,
case KEYCTL_INVALIDATE:
return keyctl_invalidate_key(arg2);
+ case KEYCTL_GET_PERSISTENT:
+ return keyctl_get_persistent(arg2, arg3);
+
default:
return -EOPNOTSUPP;
}
diff --git a/security/keys/gc.c b/security/keys/gc.c
index d67c97bb1025..d3222b6d7d59 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -131,50 +131,6 @@ void key_gc_keytype(struct key_type *ktype)
}
/*
- * Garbage collect pointers from a keyring.
- *
- * Not called with any locks held. The keyring's key struct will not be
- * deallocated under us as only our caller may deallocate it.
- */
-static void key_gc_keyring(struct key *keyring, time_t limit)
-{
- struct keyring_list *klist;
- int loop;
-
- kenter("%x", key_serial(keyring));
-
- if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
- (1 << KEY_FLAG_REVOKED)))
- goto dont_gc;
-
- /* scan the keyring looking for dead keys */
- rcu_read_lock();
- klist = rcu_dereference(keyring->payload.subscriptions);
- if (!klist)
- goto unlock_dont_gc;
-
- loop = klist->nkeys;
- smp_rmb();
- for (loop--; loop >= 0; loop--) {
- struct key *key = rcu_dereference(klist->keys[loop]);
- if (key_is_dead(key, limit))
- goto do_gc;
- }
-
-unlock_dont_gc:
- rcu_read_unlock();
-dont_gc:
- kleave(" [no gc]");
- return;
-
-do_gc:
- rcu_read_unlock();
-
- keyring_gc(keyring, limit);
- kleave(" [gc]");
-}
-
-/*
* Garbage collect a list of unreferenced, detached keys
*/
static noinline void key_gc_unused_keys(struct list_head *keys)
@@ -392,8 +348,7 @@ found_unreferenced_key:
*/
found_keyring:
spin_unlock(&key_serial_lock);
- kdebug("scan keyring %d", key->serial);
- key_gc_keyring(key, limit);
+ keyring_gc(key, limit);
goto maybe_resched;
/* We found a dead key that is still referenced. Reset its type and
diff --git a/security/keys/internal.h b/security/keys/internal.h
index d4f1468b9b50..80b2aac4f50c 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -89,42 +89,53 @@ extern struct key_type *key_type_lookup(const char *type);
extern void key_type_put(struct key_type *ktype);
extern int __key_link_begin(struct key *keyring,
- const struct key_type *type,
- const char *description,
- unsigned long *_prealloc);
+ const struct keyring_index_key *index_key,
+ struct assoc_array_edit **_edit);
extern int __key_link_check_live_key(struct key *keyring, struct key *key);
-extern void __key_link(struct key *keyring, struct key *key,
- unsigned long *_prealloc);
+extern void __key_link(struct key *key, struct assoc_array_edit **_edit);
extern void __key_link_end(struct key *keyring,
- struct key_type *type,
- unsigned long prealloc);
+ const struct keyring_index_key *index_key,
+ struct assoc_array_edit *edit);
-extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
- const struct key_type *type,
- const char *description,
- key_perm_t perm);
+extern key_ref_t find_key_to_update(key_ref_t keyring_ref,
+ const struct keyring_index_key *index_key);
extern struct key *keyring_search_instkey(struct key *keyring,
key_serial_t target_id);
+extern int iterate_over_keyring(const struct key *keyring,
+ int (*func)(const struct key *key, void *data),
+ void *data);
+
typedef int (*key_match_func_t)(const struct key *, const void *);
+struct keyring_search_context {
+ struct keyring_index_key index_key;
+ const struct cred *cred;
+ key_match_func_t match;
+ const void *match_data;
+ unsigned flags;
+#define KEYRING_SEARCH_LOOKUP_TYPE 0x0001 /* [as type->def_lookup_type] */
+#define KEYRING_SEARCH_NO_STATE_CHECK 0x0002 /* Skip state checks */
+#define KEYRING_SEARCH_DO_STATE_CHECK 0x0004 /* Override NO_STATE_CHECK */
+#define KEYRING_SEARCH_NO_UPDATE_TIME 0x0008 /* Don't update times */
+#define KEYRING_SEARCH_NO_CHECK_PERM 0x0010 /* Don't check permissions */
+#define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020 /* Give an error on excessive depth */
+
+ int (*iterator)(const void *object, void *iterator_data);
+
+ /* Internal stuff */
+ int skipped_ret;
+ bool possessed;
+ key_ref_t result;
+ struct timespec now;
+};
+
extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
- const struct cred *cred,
- struct key_type *type,
- const void *description,
- key_match_func_t match,
- bool no_state_check);
-
-extern key_ref_t search_my_process_keyrings(struct key_type *type,
- const void *description,
- key_match_func_t match,
- bool no_state_check,
- const struct cred *cred);
-extern key_ref_t search_process_keyrings(struct key_type *type,
- const void *description,
- key_match_func_t match,
- const struct cred *cred);
+ struct keyring_search_context *ctx);
+
+extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
+extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
@@ -202,7 +213,7 @@ extern struct key *key_get_instantiation_authkey(key_serial_t target_id);
/*
* Determine whether a key is dead.
*/
-static inline bool key_is_dead(struct key *key, time_t limit)
+static inline bool key_is_dead(const struct key *key, time_t limit)
{
return
key->flags & ((1 << KEY_FLAG_DEAD) |
@@ -244,6 +255,15 @@ extern long keyctl_invalidate_key(key_serial_t);
extern long keyctl_instantiate_key_common(key_serial_t,
const struct iovec *,
unsigned, size_t, key_serial_t);
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+extern long keyctl_get_persistent(uid_t, key_serial_t);
+extern unsigned persistent_keyring_expiry;
+#else
+static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring)
+{
+ return -EOPNOTSUPP;
+}
+#endif
/*
* Debugging key validation
diff --git a/security/keys/key.c b/security/keys/key.c
index 8fb7c7bd4657..55d110f0aced 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -242,8 +242,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
}
}
- desclen = strlen(desc) + 1;
- quotalen = desclen + type->def_datalen;
+ desclen = strlen(desc);
+ quotalen = desclen + 1 + type->def_datalen;
/* get hold of the key tracking for this user */
user = key_user_lookup(uid);
@@ -277,7 +277,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
goto no_memory_2;
if (desc) {
- key->description = kmemdup(desc, desclen, GFP_KERNEL);
+ key->index_key.desc_len = desclen;
+ key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
if (!key->description)
goto no_memory_3;
}
@@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
atomic_set(&key->usage, 1);
init_rwsem(&key->sem);
lockdep_set_class(&key->sem, &type->lock_class);
- key->type = type;
+ key->index_key.type = type;
key->user = user;
key->quotalen = quotalen;
key->datalen = type->def_datalen;
@@ -299,6 +300,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
if (!(flags & KEY_ALLOC_NOT_IN_QUOTA))
key->flags |= 1 << KEY_FLAG_IN_QUOTA;
+ if (flags & KEY_ALLOC_TRUSTED)
+ key->flags |= 1 << KEY_FLAG_TRUSTED;
memset(&key->type_data, 0, sizeof(key->type_data));
@@ -408,7 +411,7 @@ static int __key_instantiate_and_link(struct key *key,
struct key_preparsed_payload *prep,
struct key *keyring,
struct key *authkey,
- unsigned long *_prealloc)
+ struct assoc_array_edit **_edit)
{
int ret, awaken;
@@ -435,7 +438,7 @@ static int __key_instantiate_and_link(struct key *key,
/* and link it into the destination keyring */
if (keyring)
- __key_link(keyring, key, _prealloc);
+ __key_link(key, _edit);
/* disable the authorisation key */
if (authkey)
@@ -475,7 +478,7 @@ int key_instantiate_and_link(struct key *key,
struct key *authkey)
{
struct key_preparsed_payload prep;
- unsigned long prealloc;
+ struct assoc_array_edit *edit;
int ret;
memset(&prep, 0, sizeof(prep));
@@ -489,17 +492,15 @@ int key_instantiate_and_link(struct key *key,
}
if (keyring) {
- ret = __key_link_begin(keyring, key->type, key->description,
- &prealloc);
+ ret = __key_link_begin(keyring, &key->index_key, &edit);
if (ret < 0)
goto error_free_preparse;
}
- ret = __key_instantiate_and_link(key, &prep, keyring, authkey,
- &prealloc);
+ ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit);
if (keyring)
- __key_link_end(keyring, key->type, prealloc);
+ __key_link_end(keyring, &key->index_key, edit);
error_free_preparse:
if (key->type->preparse)
@@ -537,7 +538,7 @@ int key_reject_and_link(struct key *key,
struct key *keyring,
struct key *authkey)
{
- unsigned long prealloc;
+ struct assoc_array_edit *edit;
struct timespec now;
int ret, awaken, link_ret = 0;
@@ -548,8 +549,7 @@ int key_reject_and_link(struct key *key,
ret = -EBUSY;
if (keyring)
- link_ret = __key_link_begin(keyring, key->type,
- key->description, &prealloc);
+ link_ret = __key_link_begin(keyring, &key->index_key, &edit);
mutex_lock(&key_construction_mutex);
@@ -557,9 +557,10 @@ int key_reject_and_link(struct key *key,
if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
/* mark the key as being negatively instantiated */
atomic_inc(&key->user->nikeys);
+ key->type_data.reject_error = -error;
+ smp_wmb();
set_bit(KEY_FLAG_NEGATIVE, &key->flags);
set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
- key->type_data.reject_error = -error;
now = current_kernel_time();
key->expiry = now.tv_sec + timeout;
key_schedule_gc(key->expiry + key_gc_delay);
@@ -571,7 +572,7 @@ int key_reject_and_link(struct key *key,
/* and link it into the destination keyring */
if (keyring && link_ret == 0)
- __key_link(keyring, key, &prealloc);
+ __key_link(key, &edit);
/* disable the authorisation key */
if (authkey)
@@ -581,7 +582,7 @@ int key_reject_and_link(struct key *key,
mutex_unlock(&key_construction_mutex);
if (keyring)
- __key_link_end(keyring, key->type, prealloc);
+ __key_link_end(keyring, &key->index_key, edit);
/* wake up anyone waiting for a key to be constructed */
if (awaken)
@@ -645,7 +646,7 @@ found:
/* this races with key_put(), but that doesn't matter since key_put()
* doesn't actually change the key
*/
- atomic_inc(&key->usage);
+ __key_get(key);
error:
spin_unlock(&key_serial_lock);
@@ -780,25 +781,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
key_perm_t perm,
unsigned long flags)
{
- unsigned long prealloc;
+ struct keyring_index_key index_key = {
+ .description = description,
+ };
struct key_preparsed_payload prep;
+ struct assoc_array_edit *edit;
const struct cred *cred = current_cred();
- struct key_type *ktype;
struct key *keyring, *key = NULL;
key_ref_t key_ref;
int ret;
/* look up the key type to see if it's one of the registered kernel
* types */
- ktype = key_type_lookup(type);
- if (IS_ERR(ktype)) {
+ index_key.type = key_type_lookup(type);
+ if (IS_ERR(index_key.type)) {
key_ref = ERR_PTR(-ENODEV);
goto error;
}
key_ref = ERR_PTR(-EINVAL);
- if (!ktype->match || !ktype->instantiate ||
- (!description && !ktype->preparse))
+ if (!index_key.type->match || !index_key.type->instantiate ||
+ (!index_key.description && !index_key.type->preparse))
goto error_put_type;
keyring = key_ref_to_ptr(keyring_ref);
@@ -812,21 +815,28 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
memset(&prep, 0, sizeof(prep));
prep.data = payload;
prep.datalen = plen;
- prep.quotalen = ktype->def_datalen;
- if (ktype->preparse) {
- ret = ktype->preparse(&prep);
+ prep.quotalen = index_key.type->def_datalen;
+ prep.trusted = flags & KEY_ALLOC_TRUSTED;
+ if (index_key.type->preparse) {
+ ret = index_key.type->preparse(&prep);
if (ret < 0) {
key_ref = ERR_PTR(ret);
goto error_put_type;
}
- if (!description)
- description = prep.description;
+ if (!index_key.description)
+ index_key.description = prep.description;
key_ref = ERR_PTR(-EINVAL);
- if (!description)
+ if (!index_key.description)
goto error_free_prep;
}
+ index_key.desc_len = strlen(index_key.description);
+
+ key_ref = ERR_PTR(-EPERM);
+ if (!prep.trusted && test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags))
+ goto error_free_prep;
+ flags |= prep.trusted ? KEY_ALLOC_TRUSTED : 0;
- ret = __key_link_begin(keyring, ktype, description, &prealloc);
+ ret = __key_link_begin(keyring, &index_key, &edit);
if (ret < 0) {
key_ref = ERR_PTR(ret);
goto error_free_prep;
@@ -844,10 +854,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
* key of the same type and description in the destination keyring and
* update that instead if possible
*/
- if (ktype->update) {
- key_ref = __keyring_search_one(keyring_ref, ktype, description,
- 0);
- if (!IS_ERR(key_ref))
+ if (index_key.type->update) {
+ key_ref = find_key_to_update(keyring_ref, &index_key);
+ if (key_ref)
goto found_matching_key;
}
@@ -856,23 +865,24 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
perm |= KEY_USR_VIEW;
- if (ktype->read)
+ if (index_key.type->read)
perm |= KEY_POS_READ;
- if (ktype == &key_type_keyring || ktype->update)
+ if (index_key.type == &key_type_keyring ||
+ index_key.type->update)
perm |= KEY_POS_WRITE;
}
/* allocate a new key */
- key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred,
- perm, flags);
+ key = key_alloc(index_key.type, index_key.description,
+ cred->fsuid, cred->fsgid, cred, perm, flags);
if (IS_ERR(key)) {
key_ref = ERR_CAST(key);
goto error_link_end;
}
/* instantiate it and link it into the target keyring */
- ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc);
+ ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit);
if (ret < 0) {
key_put(key);
key_ref = ERR_PTR(ret);
@@ -882,12 +892,12 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
error_link_end:
- __key_link_end(keyring, ktype, prealloc);
+ __key_link_end(keyring, &index_key, edit);
error_free_prep:
- if (ktype->preparse)
- ktype->free_preparse(&prep);
+ if (index_key.type->preparse)
+ index_key.type->free_preparse(&prep);
error_put_type:
- key_type_put(ktype);
+ key_type_put(index_key.type);
error:
return key_ref;
@@ -895,7 +905,7 @@ error:
/* we found a matching key, so we're going to try to update it
* - we can drop the locks first as we have the key pinned
*/
- __key_link_end(keyring, ktype, prealloc);
+ __key_link_end(keyring, &index_key, edit);
key_ref = __key_update(key_ref, &prep);
goto error_free_prep;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 33cfd27b4de2..cee72ce64222 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1667,6 +1667,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
case KEYCTL_INVALIDATE:
return keyctl_invalidate_key((key_serial_t) arg2);
+ case KEYCTL_GET_PERSISTENT:
+ return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3);
+
default:
return -EOPNOTSUPP;
}
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 6ece7f2e5707..69f0cb7bab7e 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -1,6 +1,6 @@
/* Keyring handling
*
- * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -17,25 +17,11 @@
#include <linux/seq_file.h>
#include <linux/err.h>
#include <keys/keyring-type.h>
+#include <keys/user-type.h>
+#include <linux/assoc_array_priv.h>
#include <linux/uaccess.h>
#include "internal.h"
-#define rcu_dereference_locked_keyring(keyring) \
- (rcu_dereference_protected( \
- (keyring)->payload.subscriptions, \
- rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
-
-#define rcu_deref_link_locked(klist, index, keyring) \
- (rcu_dereference_protected( \
- (klist)->keys[index], \
- rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
-
-#define MAX_KEYRING_LINKS \
- min_t(size_t, USHRT_MAX - 1, \
- ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *)))
-
-#define KEY_LINK_FIXQUOTA 1UL
-
/*
* When plumbing the depths of the key tree, this sets a hard limit
* set on how deep we're willing to go.
@@ -47,6 +33,28 @@
*/
#define KEYRING_NAME_HASH_SIZE (1 << 5)
+/*
+ * We mark pointers we pass to the associative array with bit 1 set if
+ * they're keyrings and clear otherwise.
+ */
+#define KEYRING_PTR_SUBTYPE 0x2UL
+
+static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x)
+{
+ return (unsigned long)x & KEYRING_PTR_SUBTYPE;
+}
+static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x)
+{
+ void *object = assoc_array_ptr_to_leaf(x);
+ return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE);
+}
+static inline void *keyring_key_to_ptr(struct key *key)
+{
+ if (key->type == &key_type_keyring)
+ return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE);
+ return key;
+}
+
static struct list_head keyring_name_hash[KEYRING_NAME_HASH_SIZE];
static DEFINE_RWLOCK(keyring_name_lock);
@@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc)
*/
static int keyring_instantiate(struct key *keyring,
struct key_preparsed_payload *prep);
-static int keyring_match(const struct key *keyring, const void *criterion);
static void keyring_revoke(struct key *keyring);
static void keyring_destroy(struct key *keyring);
static void keyring_describe(const struct key *keyring, struct seq_file *m);
@@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring,
struct key_type key_type_keyring = {
.name = "keyring",
- .def_datalen = sizeof(struct keyring_list),
+ .def_datalen = 0,
.instantiate = keyring_instantiate,
- .match = keyring_match,
+ .match = user_match,
.revoke = keyring_revoke,
.destroy = keyring_destroy,
.describe = keyring_describe,
@@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring,
ret = -EINVAL;
if (prep->datalen == 0) {
+ assoc_array_init(&keyring->keys);
/* make the keyring available by name if it has one */
keyring_publish_name(keyring);
ret = 0;
@@ -136,15 +144,226 @@ static int keyring_instantiate(struct key *keyring,
}
/*
- * Match keyrings on their name
+ * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit. Ideally we'd
+ * fold the carry back too, but that requires inline asm.
+ */
+static u64 mult_64x32_and_fold(u64 x, u32 y)
+{
+ u64 hi = (u64)(u32)(x >> 32) * y;
+ u64 lo = (u64)(u32)(x) * y;
+ return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32);
+}
+
+/*
+ * Hash a key type and description.
+ */
+static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key)
+{
+ const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP;
+ const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK;
+ const char *description = index_key->description;
+ unsigned long hash, type;
+ u32 piece;
+ u64 acc;
+ int n, desc_len = index_key->desc_len;
+
+ type = (unsigned long)index_key->type;
+
+ acc = mult_64x32_and_fold(type, desc_len + 13);
+ acc = mult_64x32_and_fold(acc, 9207);
+ for (;;) {
+ n = desc_len;
+ if (n <= 0)
+ break;
+ if (n > 4)
+ n = 4;
+ piece = 0;
+ memcpy(&piece, description, n);
+ description += n;
+ desc_len -= n;
+ acc = mult_64x32_and_fold(acc, piece);
+ acc = mult_64x32_and_fold(acc, 9207);
+ }
+
+ /* Fold the hash down to 32 bits if need be. */
+ hash = acc;
+ if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32)
+ hash ^= acc >> 32;
+
+ /* Squidge all the keyrings into a separate part of the tree to
+ * ordinary keys by making sure the lowest level segment in the hash is
+ * zero for keyrings and non-zero otherwise.
+ */
+ if (index_key->type != &key_type_keyring && (hash & level_mask) == 0)
+ return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1;
+ if (index_key->type == &key_type_keyring && (hash & level_mask) != 0)
+ return (hash + (hash << level_shift)) & ~level_mask;
+ return hash;
+}
+
+/*
+ * Build the next index key chunk.
+ *
+ * On 32-bit systems the index key is laid out as:
+ *
+ * 0 4 5 9...
+ * hash desclen typeptr desc[]
+ *
+ * On 64-bit systems:
+ *
+ * 0 8 9 17...
+ * hash desclen typeptr desc[]
+ *
+ * We return it one word-sized chunk at a time.
*/
-static int keyring_match(const struct key *keyring, const void *description)
+static unsigned long keyring_get_key_chunk(const void *data, int level)
+{
+ const struct keyring_index_key *index_key = data;
+ unsigned long chunk = 0;
+ long offset = 0;
+ int desc_len = index_key->desc_len, n = sizeof(chunk);
+
+ level /= ASSOC_ARRAY_KEY_CHUNK_SIZE;
+ switch (level) {
+ case 0:
+ return hash_key_type_and_desc(index_key);
+ case 1:
+ return ((unsigned long)index_key->type << 8) | desc_len;
+ case 2:
+ if (desc_len == 0)
+ return (u8)((unsigned long)index_key->type >>
+ (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
+ n--;
+ offset = 1;
+ default:
+ offset += sizeof(chunk) - 1;
+ offset += (level - 3) * sizeof(chunk);
+ if (offset >= desc_len)
+ return 0;
+ desc_len -= offset;
+ if (desc_len > n)
+ desc_len = n;
+ offset += desc_len;
+ do {
+ chunk <<= 8;
+ chunk |= ((u8*)index_key->description)[--offset];
+ } while (--desc_len > 0);
+
+ if (level == 2) {
+ chunk <<= 8;
+ chunk |= (u8)((unsigned long)index_key->type >>
+ (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
+ }
+ return chunk;
+ }
+}
+
+static unsigned long keyring_get_object_key_chunk(const void *object, int level)
+{
+ const struct key *key = keyring_ptr_to_key(object);
+ return keyring_get_key_chunk(&key->index_key, level);
+}
+
+static bool keyring_compare_object(const void *object, const void *data)
{
- return keyring->description &&
- strcmp(keyring->description, description) == 0;
+ const struct keyring_index_key *index_key = data;
+ const struct key *key = keyring_ptr_to_key(object);
+
+ return key->index_key.type == index_key->type &&
+ key->index_key.desc_len == index_key->desc_len &&
+ memcmp(key->index_key.description, index_key->description,
+ index_key->desc_len) == 0;
}
/*
+ * Compare the index keys of a pair of objects and determine the bit position
+ * at which they differ - if they differ.
+ */
+static int keyring_diff_objects(const void *_a, const void *_b)
+{
+ const struct key *key_a = keyring_ptr_to_key(_a);
+ const struct key *key_b = keyring_ptr_to_key(_b);
+ const struct keyring_index_key *a = &key_a->index_key;
+ const struct keyring_index_key *b = &key_b->index_key;
+ unsigned long seg_a, seg_b;
+ int level, i;
+
+ level = 0;
+ seg_a = hash_key_type_and_desc(a);
+ seg_b = hash_key_type_and_desc(b);
+ if ((seg_a ^ seg_b) != 0)
+ goto differ;
+
+ /* The number of bits contributed by the hash is controlled by a
+ * constant in the assoc_array headers. Everything else thereafter we
+ * can deal with as being machine word-size dependent.
+ */
+ level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8;
+ seg_a = a->desc_len;
+ seg_b = b->desc_len;
+ if ((seg_a ^ seg_b) != 0)
+ goto differ;
+
+ /* The next bit may not work on big endian */
+ level++;
+ seg_a = (unsigned long)a->type;
+ seg_b = (unsigned long)b->type;
+ if ((seg_a ^ seg_b) != 0)
+ goto differ;
+
+ level += sizeof(unsigned long);
+ if (a->desc_len == 0)
+ goto same;
+
+ i = 0;
+ if (((unsigned long)a->description | (unsigned long)b->description) &
+ (sizeof(unsigned long) - 1)) {
+ do {
+ seg_a = *(unsigned long *)(a->description + i);
+ seg_b = *(unsigned long *)(b->description + i);
+ if ((seg_a ^ seg_b) != 0)
+ goto differ_plus_i;
+ i += sizeof(unsigned long);
+ } while (i < (a->desc_len & (sizeof(unsigned long) - 1)));
+ }
+
+ for (; i < a->desc_len; i++) {
+ seg_a = *(unsigned char *)(a->description + i);
+ seg_b = *(unsigned char *)(b->description + i);
+ if ((seg_a ^ seg_b) != 0)
+ goto differ_plus_i;
+ }
+
+same:
+ return -1;
+
+differ_plus_i:
+ level += i;
+differ:
+ i = level * 8 + __ffs(seg_a ^ seg_b);
+ return i;
+}
+
+/*
+ * Free an object after stripping the keyring flag off of the pointer.
+ */
+static void keyring_free_object(void *object)
+{
+ key_put(keyring_ptr_to_key(object));
+}
+
+/*
+ * Operations for keyring management by the index-tree routines.
+ */
+static const struct assoc_array_ops keyring_assoc_array_ops = {
+ .get_key_chunk = keyring_get_key_chunk,
+ .get_object_key_chunk = keyring_get_object_key_chunk,
+ .compare_object = keyring_compare_object,
+ .diff_objects = keyring_diff_objects,
+ .free_object = keyring_free_object,
+};
+
+/*
* Clean up a keyring when it is destroyed. Unpublish its name if it had one
* and dispose of its data.
*
@@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description)
*/
static void keyring_destroy(struct key *keyring)
{
- struct keyring_list *klist;
- int loop;
-
if (keyring->description) {
write_lock(&keyring_name_lock);
@@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring)
write_unlock(&keyring_name_lock);
}
- klist = rcu_access_pointer(keyring->payload.subscriptions);
- if (klist) {
- for (loop = klist->nkeys - 1; loop >= 0; loop--)
- key_put(rcu_access_pointer(klist->keys[loop]));
- kfree(klist);
- }
+ assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops);
}
/*
@@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring)
*/
static void keyring_describe(const struct key *keyring, struct seq_file *m)
{
- struct keyring_list *klist;
-
if (keyring->description)
seq_puts(m, keyring->description);
else
seq_puts(m, "[anon]");
if (key_is_instantiated(keyring)) {
- rcu_read_lock();
- klist = rcu_dereference(keyring->payload.subscriptions);
- if (klist)
- seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys);
+ if (keyring->keys.nr_leaves_on_tree != 0)
+ seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree);
else
seq_puts(m, ": empty");
- rcu_read_unlock();
}
}
+struct keyring_read_iterator_context {
+ size_t qty;
+ size_t count;
+ key_serial_t __user *buffer;
+};
+
+static int keyring_read_iterator(const void *object, void *data)
+{
+ struct keyring_read_iterator_context *ctx = data;
+ const struct key *key = keyring_ptr_to_key(object);
+ int ret;
+
+ kenter("{%s,%d},,{%zu/%zu}",
+ key->type->name, key->serial, ctx->count, ctx->qty);
+
+ if (ctx->count >= ctx->qty)
+ return 1;
+
+ ret = put_user(key->serial, ctx->buffer);
+ if (ret < 0)
+ return ret;
+ ctx->buffer++;
+ ctx->count += sizeof(key->serial);
+ return 0;
+}
+
/*
* Read a list of key IDs from the keyring's contents in binary form
*
- * The keyring's semaphore is read-locked by the caller.
+ * The keyring's semaphore is read-locked by the caller. This prevents someone
+ * from modifying it under us - which could cause us to read key IDs multiple
+ * times.
*/
static long keyring_read(const struct key *keyring,
char __user *buffer, size_t buflen)
{
- struct keyring_list *klist;
- struct key *key;
- size_t qty, tmp;
- int loop, ret;
+ struct keyring_read_iterator_context ctx;
+ unsigned long nr_keys;
+ int ret;
- ret = 0;
- klist = rcu_dereference_locked_keyring(keyring);
- if (klist) {
- /* calculate how much data we could return */
- qty = klist->nkeys * sizeof(key_serial_t);
-
- if (buffer && buflen > 0) {
- if (buflen > qty)
- buflen = qty;
-
- /* copy the IDs of the subscribed keys into the
- * buffer */
- ret = -EFAULT;
-
- for (loop = 0; loop < klist->nkeys; loop++) {
- key = rcu_deref_link_locked(klist, loop,
- keyring);
-
- tmp = sizeof(key_serial_t);
- if (tmp > buflen)
- tmp = buflen;
-
- if (copy_to_user(buffer,
- &key->serial,
- tmp) != 0)
- goto error;
-
- buflen -= tmp;
- if (buflen == 0)
- break;
- buffer += tmp;
- }
- }
+ kenter("{%d},,%zu", key_serial(keyring), buflen);
+
+ if (buflen & (sizeof(key_serial_t) - 1))
+ return -EINVAL;
+
+ nr_keys = keyring->keys.nr_leaves_on_tree;
+ if (nr_keys == 0)
+ return 0;
- ret = qty;
+ /* Calculate how much data we could return */
+ ctx.qty = nr_keys * sizeof(key_serial_t);
+
+ if (!buffer || !buflen)
+ return ctx.qty;
+
+ if (buflen > ctx.qty)
+ ctx.qty = buflen;
+
+ /* Copy the IDs of the subscribed keys into the buffer */
+ ctx.buffer = (key_serial_t __user *)buffer;
+ ctx.count = 0;
+ ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
+ if (ret < 0) {
+ kleave(" = %d [iterate]", ret);
+ return ret;
}
-error:
- return ret;
+ kleave(" = %zu [ok]", ctx.count);
+ return ctx.count;
}
/*
@@ -277,227 +500,361 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
}
EXPORT_SYMBOL(keyring_alloc);
-/**
- * keyring_search_aux - Search a keyring tree for a key matching some criteria
- * @keyring_ref: A pointer to the keyring with possession indicator.
- * @cred: The credentials to use for permissions checks.
- * @type: The type of key to search for.
- * @description: Parameter for @match.
- * @match: Function to rule on whether or not a key is the one required.
- * @no_state_check: Don't check if a matching key is bad
- *
- * Search the supplied keyring tree for a key that matches the criteria given.
- * The root keyring and any linked keyrings must grant Search permission to the
- * caller to be searchable and keys can only be found if they too grant Search
- * to the caller. The possession flag on the root keyring pointer controls use
- * of the possessor bits in permissions checking of the entire tree. In
- * addition, the LSM gets to forbid keyring searches and key matches.
- *
- * The search is performed as a breadth-then-depth search up to the prescribed
- * limit (KEYRING_SEARCH_MAX_DEPTH).
- *
- * Keys are matched to the type provided and are then filtered by the match
- * function, which is given the description to use in any way it sees fit. The
- * match function may use any attributes of a key that it wishes to to
- * determine the match. Normally the match function from the key type would be
- * used.
- *
- * RCU is used to prevent the keyring key lists from disappearing without the
- * need to take lots of locks.
- *
- * Returns a pointer to the found key and increments the key usage count if
- * successful; -EAGAIN if no matching keys were found, or if expired or revoked
- * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
- * specified keyring wasn't a keyring.
- *
- * In the case of a successful return, the possession attribute from
- * @keyring_ref is propagated to the returned key reference.
+/*
+ * Iteration function to consider each key found.
*/
-key_ref_t keyring_search_aux(key_ref_t keyring_ref,
- const struct cred *cred,
- struct key_type *type,
- const void *description,
- key_match_func_t match,
- bool no_state_check)
+static int keyring_search_iterator(const void *object, void *iterator_data)
{
- struct {
- /* Need a separate keylist pointer for RCU purposes */
- struct key *keyring;
- struct keyring_list *keylist;
- int kix;
- } stack[KEYRING_SEARCH_MAX_DEPTH];
-
- struct keyring_list *keylist;
- struct timespec now;
- unsigned long possessed, kflags;
- struct key *keyring, *key;
- key_ref_t key_ref;
- long err;
- int sp, nkeys, kix;
+ struct keyring_search_context *ctx = iterator_data;
+ const struct key *key = keyring_ptr_to_key(object);
+ unsigned long kflags = key->flags;
- keyring = key_ref_to_ptr(keyring_ref);
- possessed = is_key_possessed(keyring_ref);
- key_check(keyring);
+ kenter("{%d}", key->serial);
- /* top keyring must have search permission to begin the search */
- err = key_task_permission(keyring_ref, cred, KEY_SEARCH);
- if (err < 0) {
- key_ref = ERR_PTR(err);
- goto error;
+ /* ignore keys not of this type */
+ if (key->type != ctx->index_key.type) {
+ kleave(" = 0 [!type]");
+ return 0;
}
- key_ref = ERR_PTR(-ENOTDIR);
- if (keyring->type != &key_type_keyring)
- goto error;
+ /* skip invalidated, revoked and expired keys */
+ if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
+ if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED))) {
+ ctx->result = ERR_PTR(-EKEYREVOKED);
+ kleave(" = %d [invrev]", ctx->skipped_ret);
+ goto skipped;
+ }
- rcu_read_lock();
+ if (key->expiry && ctx->now.tv_sec >= key->expiry) {
+ ctx->result = ERR_PTR(-EKEYEXPIRED);
+ kleave(" = %d [expire]", ctx->skipped_ret);
+ goto skipped;
+ }
+ }
- now = current_kernel_time();
- err = -EAGAIN;
- sp = 0;
-
- /* firstly we should check to see if this top-level keyring is what we
- * are looking for */
- key_ref = ERR_PTR(-EAGAIN);
- kflags = keyring->flags;
- if (keyring->type == type && match(keyring, description)) {
- key = keyring;
- if (no_state_check)
- goto found;
+ /* keys that don't match */
+ if (!ctx->match(key, ctx->match_data)) {
+ kleave(" = 0 [!match]");
+ return 0;
+ }
- /* check it isn't negative and hasn't expired or been
- * revoked */
- if (kflags & (1 << KEY_FLAG_REVOKED))
- goto error_2;
- if (key->expiry && now.tv_sec >= key->expiry)
- goto error_2;
- key_ref = ERR_PTR(key->type_data.reject_error);
- if (kflags & (1 << KEY_FLAG_NEGATIVE))
- goto error_2;
- goto found;
+ /* key must have search permissions */
+ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
+ key_task_permission(make_key_ref(key, ctx->possessed),
+ ctx->cred, KEY_SEARCH) < 0) {
+ ctx->result = ERR_PTR(-EACCES);
+ kleave(" = %d [!perm]", ctx->skipped_ret);
+ goto skipped;
}
- /* otherwise, the top keyring must not be revoked, expired, or
- * negatively instantiated if we are to search it */
- key_ref = ERR_PTR(-EAGAIN);
- if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
- (1 << KEY_FLAG_REVOKED) |
- (1 << KEY_FLAG_NEGATIVE)) ||
- (keyring->expiry && now.tv_sec >= keyring->expiry))
- goto error_2;
-
- /* start processing a new keyring */
-descend:
- kflags = keyring->flags;
- if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
- (1 << KEY_FLAG_REVOKED)))
- goto not_this_keyring;
+ if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
+ /* we set a different error code if we pass a negative key */
+ if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
+ smp_rmb();
+ ctx->result = ERR_PTR(key->type_data.reject_error);
+ kleave(" = %d [neg]", ctx->skipped_ret);
+ goto skipped;
+ }
+ }
- keylist = rcu_dereference(keyring->payload.subscriptions);
- if (!keylist)
- goto not_this_keyring;
+ /* Found */
+ ctx->result = make_key_ref(key, ctx->possessed);
+ kleave(" = 1 [found]");
+ return 1;
- /* iterate through the keys in this keyring first */
- nkeys = keylist->nkeys;
- smp_rmb();
- for (kix = 0; kix < nkeys; kix++) {
- key = rcu_dereference(keylist->keys[kix]);
- kflags = key->flags;
+skipped:
+ return ctx->skipped_ret;
+}
- /* ignore keys not of this type */
- if (key->type != type)
- continue;
+/*
+ * Search inside a keyring for a key. We can search by walking to it
+ * directly based on its index-key or we can iterate over the entire
+ * tree looking for it, based on the match function.
+ */
+static int search_keyring(struct key *keyring, struct keyring_search_context *ctx)
+{
+ if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) ==
+ KEYRING_SEARCH_LOOKUP_DIRECT) {
+ const void *object;
+
+ object = assoc_array_find(&keyring->keys,
+ &keyring_assoc_array_ops,
+ &ctx->index_key);
+ return object ? ctx->iterator(object, ctx) : 0;
+ }
+ return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx);
+}
- /* skip invalidated, revoked and expired keys */
- if (!no_state_check) {
- if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
- (1 << KEY_FLAG_REVOKED)))
- continue;
+/*
+ * Search a tree of keyrings that point to other keyrings up to the maximum
+ * depth.
+ */
+static bool search_nested_keyrings(struct key *keyring,
+ struct keyring_search_context *ctx)
+{
+ struct {
+ struct key *keyring;
+ struct assoc_array_node *node;
+ int slot;
+ } stack[KEYRING_SEARCH_MAX_DEPTH];
- if (key->expiry && now.tv_sec >= key->expiry)
- continue;
- }
+ struct assoc_array_shortcut *shortcut;
+ struct assoc_array_node *node;
+ struct assoc_array_ptr *ptr;
+ struct key *key;
+ int sp = 0, slot;
- /* keys that don't match */
- if (!match(key, description))
- continue;
+ kenter("{%d},{%s,%s}",
+ keyring->serial,
+ ctx->index_key.type->name,
+ ctx->index_key.description);
- /* key must have search permissions */
- if (key_task_permission(make_key_ref(key, possessed),
- cred, KEY_SEARCH) < 0)
- continue;
+ if (ctx->index_key.description)
+ ctx->index_key.desc_len = strlen(ctx->index_key.description);
- if (no_state_check)
+ /* Check to see if this top-level keyring is what we are looking for
+ * and whether it is valid or not.
+ */
+ if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE ||
+ keyring_compare_object(keyring, &ctx->index_key)) {
+ ctx->skipped_ret = 2;
+ ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK;
+ switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) {
+ case 1:
goto found;
-
- /* we set a different error code if we pass a negative key */
- if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
- err = key->type_data.reject_error;
- continue;
+ case 2:
+ return false;
+ default:
+ break;
}
+ }
+
+ ctx->skipped_ret = 0;
+ if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+ ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK;
+ /* Start processing a new keyring */
+descend_to_keyring:
+ kdebug("descend to %d", keyring->serial);
+ if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED)))
+ goto not_this_keyring;
+
+ /* Search through the keys in this keyring before its searching its
+ * subtrees.
+ */
+ if (search_keyring(keyring, ctx))
goto found;
- }
- /* search through the keyrings nested in this one */
- kix = 0;
-ascend:
- nkeys = keylist->nkeys;
- smp_rmb();
- for (; kix < nkeys; kix++) {
- key = rcu_dereference(keylist->keys[kix]);
- if (key->type != &key_type_keyring)
- continue;
+ /* Then manually iterate through the keyrings nested in this one.
+ *
+ * Start from the root node of the index tree. Because of the way the
+ * hash function has been set up, keyrings cluster on the leftmost
+ * branch of the root node (root slot 0) or in the root node itself.
+ * Non-keyrings avoid the leftmost branch of the root entirely (root
+ * slots 1-15).
+ */
+ ptr = ACCESS_ONCE(keyring->keys.root);
+ if (!ptr)
+ goto not_this_keyring;
- /* recursively search nested keyrings
- * - only search keyrings for which we have search permission
+ if (assoc_array_ptr_is_shortcut(ptr)) {
+ /* If the root is a shortcut, either the keyring only contains
+ * keyring pointers (everything clusters behind root slot 0) or
+ * doesn't contain any keyring pointers.
*/
- if (sp >= KEYRING_SEARCH_MAX_DEPTH)
+ shortcut = assoc_array_ptr_to_shortcut(ptr);
+ smp_read_barrier_depends();
+ if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0)
+ goto not_this_keyring;
+
+ ptr = ACCESS_ONCE(shortcut->next_node);
+ node = assoc_array_ptr_to_node(ptr);
+ goto begin_node;
+ }
+
+ node = assoc_array_ptr_to_node(ptr);
+ smp_read_barrier_depends();
+
+ ptr = node->slots[0];
+ if (!assoc_array_ptr_is_meta(ptr))
+ goto begin_node;
+
+descend_to_node:
+ /* Descend to a more distal node in this keyring's content tree and go
+ * through that.
+ */
+ kdebug("descend");
+ if (assoc_array_ptr_is_shortcut(ptr)) {
+ shortcut = assoc_array_ptr_to_shortcut(ptr);
+ smp_read_barrier_depends();
+ ptr = ACCESS_ONCE(shortcut->next_node);
+ BUG_ON(!assoc_array_ptr_is_node(ptr));
+ node = assoc_array_ptr_to_node(ptr);
+ }
+
+begin_node:
+ kdebug("begin_node");
+ smp_read_barrier_depends();
+ slot = 0;
+ascend_to_node:
+ /* Go through the slots in a node */
+ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+ ptr = ACCESS_ONCE(node->slots[slot]);
+
+ if (assoc_array_ptr_is_meta(ptr) && node->back_pointer)
+ goto descend_to_node;
+
+ if (!keyring_ptr_is_keyring(ptr))
continue;
- if (key_task_permission(make_key_ref(key, possessed),
- cred, KEY_SEARCH) < 0)
+ key = keyring_ptr_to_key(ptr);
+
+ if (sp >= KEYRING_SEARCH_MAX_DEPTH) {
+ if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) {
+ ctx->result = ERR_PTR(-ELOOP);
+ return false;
+ }
+ goto not_this_keyring;
+ }
+
+ /* Search a nested keyring */
+ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
+ key_task_permission(make_key_ref(key, ctx->possessed),
+ ctx->cred, KEY_SEARCH) < 0)
continue;
/* stack the current position */
stack[sp].keyring = keyring;
- stack[sp].keylist = keylist;
- stack[sp].kix = kix;
+ stack[sp].node = node;
+ stack[sp].slot = slot;
sp++;
/* begin again with the new keyring */
keyring = key;
- goto descend;
+ goto descend_to_keyring;
}
- /* the keyring we're looking at was disqualified or didn't contain a
- * matching key */
+ /* We've dealt with all the slots in the current node, so now we need
+ * to ascend to the parent and continue processing there.
+ */
+ ptr = ACCESS_ONCE(node->back_pointer);
+ slot = node->parent_slot;
+
+ if (ptr && assoc_array_ptr_is_shortcut(ptr)) {
+ shortcut = assoc_array_ptr_to_shortcut(ptr);
+ smp_read_barrier_depends();
+ ptr = ACCESS_ONCE(shortcut->back_pointer);
+ slot = shortcut->parent_slot;
+ }
+ if (!ptr)
+ goto not_this_keyring;
+ node = assoc_array_ptr_to_node(ptr);
+ smp_read_barrier_depends();
+ slot++;
+
+ /* If we've ascended to the root (zero backpointer), we must have just
+ * finished processing the leftmost branch rather than the root slots -
+ * so there can't be any more keyrings for us to find.
+ */
+ if (node->back_pointer) {
+ kdebug("ascend %d", slot);
+ goto ascend_to_node;
+ }
+
+ /* The keyring we're looking at was disqualified or didn't contain a
+ * matching key.
+ */
not_this_keyring:
- if (sp > 0) {
- /* resume the processing of a keyring higher up in the tree */
- sp--;
- keyring = stack[sp].keyring;
- keylist = stack[sp].keylist;
- kix = stack[sp].kix + 1;
- goto ascend;
+ kdebug("not_this_keyring %d", sp);
+ if (sp <= 0) {
+ kleave(" = false");
+ return false;
}
- key_ref = ERR_PTR(err);
- goto error_2;
+ /* Resume the processing of a keyring higher up in the tree */
+ sp--;
+ keyring = stack[sp].keyring;
+ node = stack[sp].node;
+ slot = stack[sp].slot + 1;
+ kdebug("ascend to %d [%d]", keyring->serial, slot);
+ goto ascend_to_node;
- /* we found a viable match */
+ /* We found a viable match */
found:
- atomic_inc(&key->usage);
- key->last_used_at = now.tv_sec;
- keyring->last_used_at = now.tv_sec;
- while (sp > 0)
- stack[--sp].keyring->last_used_at = now.tv_sec;
+ key = key_ref_to_ptr(ctx->result);
key_check(key);
- key_ref = make_key_ref(key, possessed);
-error_2:
+ if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) {
+ key->last_used_at = ctx->now.tv_sec;
+ keyring->last_used_at = ctx->now.tv_sec;
+ while (sp > 0)
+ stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
+ }
+ kleave(" = true");
+ return true;
+}
+
+/**
+ * keyring_search_aux - Search a keyring tree for a key matching some criteria
+ * @keyring_ref: A pointer to the keyring with possession indicator.
+ * @ctx: The keyring search context.
+ *
+ * Search the supplied keyring tree for a key that matches the criteria given.
+ * The root keyring and any linked keyrings must grant Search permission to the
+ * caller to be searchable and keys can only be found if they too grant Search
+ * to the caller. The possession flag on the root keyring pointer controls use
+ * of the possessor bits in permissions checking of the entire tree. In
+ * addition, the LSM gets to forbid keyring searches and key matches.
+ *
+ * The search is performed as a breadth-then-depth search up to the prescribed
+ * limit (KEYRING_SEARCH_MAX_DEPTH).
+ *
+ * Keys are matched to the type provided and are then filtered by the match
+ * function, which is given the description to use in any way it sees fit. The
+ * match function may use any attributes of a key that it wishes to to
+ * determine the match. Normally the match function from the key type would be
+ * used.
+ *
+ * RCU can be used to prevent the keyring key lists from disappearing without
+ * the need to take lots of locks.
+ *
+ * Returns a pointer to the found key and increments the key usage count if
+ * successful; -EAGAIN if no matching keys were found, or if expired or revoked
+ * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
+ * specified keyring wasn't a keyring.
+ *
+ * In the case of a successful return, the possession attribute from
+ * @keyring_ref is propagated to the returned key reference.
+ */
+key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+ struct keyring_search_context *ctx)
+{
+ struct key *keyring;
+ long err;
+
+ ctx->iterator = keyring_search_iterator;
+ ctx->possessed = is_key_possessed(keyring_ref);
+ ctx->result = ERR_PTR(-EAGAIN);
+
+ keyring = key_ref_to_ptr(keyring_ref);
+ key_check(keyring);
+
+ if (keyring->type != &key_type_keyring)
+ return ERR_PTR(-ENOTDIR);
+
+ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) {
+ err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
+
+ rcu_read_lock();
+ ctx->now = current_kernel_time();
+ if (search_nested_keyrings(keyring, ctx))
+ __key_get(key_ref_to_ptr(ctx->result));
rcu_read_unlock();
-error:
- return key_ref;
+ return ctx->result;
}
/**
@@ -507,77 +864,73 @@ error:
* @description: The name of the keyring we want to find.
*
* As keyring_search_aux() above, but using the current task's credentials and
- * type's default matching function.
+ * type's default matching function and preferred search method.
*/
key_ref_t keyring_search(key_ref_t keyring,
struct key_type *type,
const char *description)
{
- if (!type->match)
+ struct keyring_search_context ctx = {
+ .index_key.type = type,
+ .index_key.description = description,
+ .cred = current_cred(),
+ .match = type->match,
+ .match_data = description,
+ .flags = (type->def_lookup_type |
+ KEYRING_SEARCH_DO_STATE_CHECK),
+ };
+
+ if (!ctx.match)
return ERR_PTR(-ENOKEY);
- return keyring_search_aux(keyring, current->cred,
- type, description, type->match, false);
+ return keyring_search_aux(keyring, &ctx);
}
EXPORT_SYMBOL(keyring_search);
/*
- * Search the given keyring only (no recursion).
+ * Search the given keyring for a key that might be updated.
*
* The caller must guarantee that the keyring is a keyring and that the
- * permission is granted to search the keyring as no check is made here.
- *
- * RCU is used to make it unnecessary to lock the keyring key list here.
+ * permission is granted to modify the keyring as no check is made here. The
+ * caller must also hold a lock on the keyring semaphore.
*
* Returns a pointer to the found key with usage count incremented if
- * successful and returns -ENOKEY if not found. Revoked keys and keys not
- * providing the requested permission are skipped over.
+ * successful and returns NULL if not found. Revoked and invalidated keys are
+ * skipped over.
*
* If successful, the possession indicator is propagated from the keyring ref
* to the returned key reference.
*/
-key_ref_t __keyring_search_one(key_ref_t keyring_ref,
- const struct key_type *ktype,
- const char *description,
- key_perm_t perm)
+key_ref_t find_key_to_update(key_ref_t keyring_ref,
+ const struct keyring_index_key *index_key)
{
- struct keyring_list *klist;
- unsigned long possessed;
struct key *keyring, *key;
- int nkeys, loop;
+ const void *object;
keyring = key_ref_to_ptr(keyring_ref);
- possessed = is_key_possessed(keyring_ref);
- rcu_read_lock();
+ kenter("{%d},{%s,%s}",
+ keyring->serial, index_key->type->name, index_key->description);
- klist = rcu_dereference(keyring->payload.subscriptions);
- if (klist) {
- nkeys = klist->nkeys;
- smp_rmb();
- for (loop = 0; loop < nkeys ; loop++) {
- key = rcu_dereference(klist->keys[loop]);
- if (key->type == ktype &&
- (!key->type->match ||
- key->type->match(key, description)) &&
- key_permission(make_key_ref(key, possessed),
- perm) == 0 &&
- !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
- (1 << KEY_FLAG_REVOKED)))
- )
- goto found;
- }
- }
+ object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops,
+ index_key);
- rcu_read_unlock();
- return ERR_PTR(-ENOKEY);
+ if (object)
+ goto found;
+
+ kleave(" = NULL");
+ return NULL;
found:
- atomic_inc(&key->usage);
- keyring->last_used_at = key->last_used_at =
- current_kernel_time().tv_sec;
- rcu_read_unlock();
- return make_key_ref(key, possessed);
+ key = keyring_ptr_to_key(object);
+ if (key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED))) {
+ kleave(" = NULL [x]");
+ return NULL;
+ }
+ __key_get(key);
+ kleave(" = {%d}", key->serial);
+ return make_key_ref(key, is_key_possessed(keyring_ref));
}
/*
@@ -640,6 +993,19 @@ out:
return keyring;
}
+static int keyring_detect_cycle_iterator(const void *object,
+ void *iterator_data)
+{
+ struct keyring_search_context *ctx = iterator_data;
+ const struct key *key = keyring_ptr_to_key(object);
+
+ kenter("{%d}", key->serial);
+
+ BUG_ON(key != ctx->match_data);
+ ctx->result = ERR_PTR(-EDEADLK);
+ return 1;
+}
+
/*
* See if a cycle will will be created by inserting acyclic tree B in acyclic
* tree A at the topmost level (ie: as a direct child of A).
@@ -649,116 +1015,39 @@ out:
*/
static int keyring_detect_cycle(struct key *A, struct key *B)
{
- struct {
- struct keyring_list *keylist;
- int kix;
- } stack[KEYRING_SEARCH_MAX_DEPTH];
-
- struct keyring_list *keylist;
- struct key *subtree, *key;
- int sp, nkeys, kix, ret;
+ struct keyring_search_context ctx = {
+ .index_key = A->index_key,
+ .match_data = A,
+ .iterator = keyring_detect_cycle_iterator,
+ .flags = (KEYRING_SEARCH_LOOKUP_DIRECT |
+ KEYRING_SEARCH_NO_STATE_CHECK |
+ KEYRING_SEARCH_NO_UPDATE_TIME |
+ KEYRING_SEARCH_NO_CHECK_PERM |
+ KEYRING_SEARCH_DETECT_TOO_DEEP),
+ };
rcu_read_lock();
-
- ret = -EDEADLK;
- if (A == B)
- goto cycle_detected;
-
- subtree = B;
- sp = 0;
-
- /* start processing a new keyring */
-descend:
- if (test_bit(KEY_FLAG_REVOKED, &subtree->flags))
- goto not_this_keyring;
-
- keylist = rcu_dereference(subtree->payload.subscriptions);
- if (!keylist)
- goto not_this_keyring;
- kix = 0;
-
-ascend:
- /* iterate through the remaining keys in this keyring */
- nkeys = keylist->nkeys;
- smp_rmb();
- for (; kix < nkeys; kix++) {
- key = rcu_dereference(keylist->keys[kix]);
-
- if (key == A)
- goto cycle_detected;
-
- /* recursively check nested keyrings */
- if (key->type == &key_type_keyring) {
- if (sp >= KEYRING_SEARCH_MAX_DEPTH)
- goto too_deep;
-
- /* stack the current position */
- stack[sp].keylist = keylist;
- stack[sp].kix = kix;
- sp++;
-
- /* begin again with the new keyring */
- subtree = key;
- goto descend;
- }
- }
-
- /* the keyring we're looking at was disqualified or didn't contain a
- * matching key */
-not_this_keyring:
- if (sp > 0) {
- /* resume the checking of a keyring higher up in the tree */
- sp--;
- keylist = stack[sp].keylist;
- kix = stack[sp].kix + 1;
- goto ascend;
- }
-
- ret = 0; /* no cycles detected */
-
-error:
+ search_nested_keyrings(B, &ctx);
rcu_read_unlock();
- return ret;
-
-too_deep:
- ret = -ELOOP;
- goto error;
-
-cycle_detected:
- ret = -EDEADLK;
- goto error;
-}
-
-/*
- * Dispose of a keyring list after the RCU grace period, freeing the unlinked
- * key
- */
-static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
-{
- struct keyring_list *klist =
- container_of(rcu, struct keyring_list, rcu);
-
- if (klist->delkey != USHRT_MAX)
- key_put(rcu_access_pointer(klist->keys[klist->delkey]));
- kfree(klist);
+ return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result);
}
/*
* Preallocate memory so that a key can be linked into to a keyring.
*/
-int __key_link_begin(struct key *keyring, const struct key_type *type,
- const char *description, unsigned long *_prealloc)
+int __key_link_begin(struct key *keyring,
+ const struct keyring_index_key *index_key,
+ struct assoc_array_edit **_edit)
__acquires(&keyring->sem)
__acquires(&keyring_serialise_link_sem)
{
- struct keyring_list *klist, *nklist;
- unsigned long prealloc;
- unsigned max;
- time_t lowest_lru;
- size_t size;
- int loop, lru, ret;
+ struct assoc_array_edit *edit;
+ int ret;
+
+ kenter("%d,%s,%s,",
+ keyring->serial, index_key->type->name, index_key->description);
- kenter("%d,%s,%s,", key_serial(keyring), type->name, description);
+ BUG_ON(index_key->desc_len == 0);
if (keyring->type != &key_type_keyring)
return -ENOTDIR;
@@ -771,100 +1060,39 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
/* serialise link/link calls to prevent parallel calls causing a cycle
* when linking two keyring in opposite orders */
- if (type == &key_type_keyring)
+ if (index_key->type == &key_type_keyring)
down_write(&keyring_serialise_link_sem);
- klist = rcu_dereference_locked_keyring(keyring);
-
- /* see if there's a matching key we can displace */
- lru = -1;
- if (klist && klist->nkeys > 0) {
- lowest_lru = TIME_T_MAX;
- for (loop = klist->nkeys - 1; loop >= 0; loop--) {
- struct key *key = rcu_deref_link_locked(klist, loop,
- keyring);
- if (key->type == type &&
- strcmp(key->description, description) == 0) {
- /* Found a match - we'll replace the link with
- * one to the new key. We record the slot
- * position.
- */
- klist->delkey = loop;
- prealloc = 0;
- goto done;
- }
- if (key->last_used_at < lowest_lru) {
- lowest_lru = key->last_used_at;
- lru = loop;
- }
- }
- }
-
- /* If the keyring is full then do an LRU discard */
- if (klist &&
- klist->nkeys == klist->maxkeys &&
- klist->maxkeys >= MAX_KEYRING_LINKS) {
- kdebug("LRU discard %d\n", lru);
- klist->delkey = lru;
- prealloc = 0;
- goto done;
- }
-
- /* check that we aren't going to overrun the user's quota */
- ret = key_payload_reserve(keyring,
- keyring->datalen + KEYQUOTA_LINK_BYTES);
- if (ret < 0)
+ /* Create an edit script that will insert/replace the key in the
+ * keyring tree.
+ */
+ edit = assoc_array_insert(&keyring->keys,
+ &keyring_assoc_array_ops,
+ index_key,
+ NULL);
+ if (IS_ERR(edit)) {
+ ret = PTR_ERR(edit);
goto error_sem;
+ }
- if (klist && klist->nkeys < klist->maxkeys) {
- /* there's sufficient slack space to append directly */
- klist->delkey = klist->nkeys;
- prealloc = KEY_LINK_FIXQUOTA;
- } else {
- /* grow the key list */
- max = 4;
- if (klist) {
- max += klist->maxkeys;
- if (max > MAX_KEYRING_LINKS)
- max = MAX_KEYRING_LINKS;
- BUG_ON(max <= klist->maxkeys);
- }
-
- size = sizeof(*klist) + sizeof(struct key *) * max;
-
- ret = -ENOMEM;
- nklist = kmalloc(size, GFP_KERNEL);
- if (!nklist)
- goto error_quota;
-
- nklist->maxkeys = max;
- if (klist) {
- memcpy(nklist->keys, klist->keys,
- sizeof(struct key *) * klist->nkeys);
- nklist->delkey = klist->nkeys;
- nklist->nkeys = klist->nkeys + 1;
- klist->delkey = USHRT_MAX;
- } else {
- nklist->nkeys = 1;
- nklist->delkey = 0;
- }
-
- /* add the key into the new space */
- RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL);
- prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA;
+ /* If we're not replacing a link in-place then we're going to need some
+ * extra quota.
+ */
+ if (!edit->dead_leaf) {
+ ret = key_payload_reserve(keyring,
+ keyring->datalen + KEYQUOTA_LINK_BYTES);
+ if (ret < 0)
+ goto error_cancel;
}
-done:
- *_prealloc = prealloc;
+ *_edit = edit;
kleave(" = 0");
return 0;
-error_quota:
- /* undo the quota changes */
- key_payload_reserve(keyring,
- keyring->datalen - KEYQUOTA_LINK_BYTES);
+error_cancel:
+ assoc_array_cancel_edit(edit);
error_sem:
- if (type == &key_type_keyring)
+ if (index_key->type == &key_type_keyring)
up_write(&keyring_serialise_link_sem);
error_krsem:
up_write(&keyring->sem);
@@ -895,60 +1123,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key)
* holds at most one link to any given key of a particular type+description
* combination.
*/
-void __key_link(struct key *keyring, struct key *key,
- unsigned long *_prealloc)
+void __key_link(struct key *key, struct assoc_array_edit **_edit)
{
- struct keyring_list *klist, *nklist;
- struct key *discard;
-
- nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA);
- *_prealloc = 0;
-
- kenter("%d,%d,%p", keyring->serial, key->serial, nklist);
-
- klist = rcu_dereference_locked_keyring(keyring);
-
- atomic_inc(&key->usage);
- keyring->last_used_at = key->last_used_at =
- current_kernel_time().tv_sec;
-
- /* there's a matching key we can displace or an empty slot in a newly
- * allocated list we can fill */
- if (nklist) {
- kdebug("reissue %hu/%hu/%hu",
- nklist->delkey, nklist->nkeys, nklist->maxkeys);
-
- RCU_INIT_POINTER(nklist->keys[nklist->delkey], key);
-
- rcu_assign_pointer(keyring->payload.subscriptions, nklist);
-
- /* dispose of the old keyring list and, if there was one, the
- * displaced key */
- if (klist) {
- kdebug("dispose %hu/%hu/%hu",
- klist->delkey, klist->nkeys, klist->maxkeys);
- call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
- }
- } else if (klist->delkey < klist->nkeys) {
- kdebug("replace %hu/%hu/%hu",
- klist->delkey, klist->nkeys, klist->maxkeys);
-
- discard = rcu_dereference_protected(
- klist->keys[klist->delkey],
- rwsem_is_locked(&keyring->sem));
- rcu_assign_pointer(klist->keys[klist->delkey], key);
- /* The garbage collector will take care of RCU
- * synchronisation */
- key_put(discard);
- } else {
- /* there's sufficient slack space to append directly */
- kdebug("append %hu/%hu/%hu",
- klist->delkey, klist->nkeys, klist->maxkeys);
-
- RCU_INIT_POINTER(klist->keys[klist->delkey], key);
- smp_wmb();
- klist->nkeys++;
- }
+ __key_get(key);
+ assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key));
+ assoc_array_apply_edit(*_edit);
+ *_edit = NULL;
}
/*
@@ -956,24 +1136,22 @@ void __key_link(struct key *keyring, struct key *key,
*
* Must be called with __key_link_begin() having being called.
*/
-void __key_link_end(struct key *keyring, struct key_type *type,
- unsigned long prealloc)
+void __key_link_end(struct key *keyring,
+ const struct keyring_index_key *index_key,
+ struct assoc_array_edit *edit)
__releases(&keyring->sem)
__releases(&keyring_serialise_link_sem)
{
- BUG_ON(type == NULL);
- BUG_ON(type->name == NULL);
- kenter("%d,%s,%lx", keyring->serial, type->name, prealloc);
+ BUG_ON(index_key->type == NULL);
+ kenter("%d,%s,", keyring->serial, index_key->type->name);
- if (type == &key_type_keyring)
+ if (index_key->type == &key_type_keyring)
up_write(&keyring_serialise_link_sem);
- if (prealloc) {
- if (prealloc & KEY_LINK_FIXQUOTA)
- key_payload_reserve(keyring,
- keyring->datalen -
- KEYQUOTA_LINK_BYTES);
- kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA));
+ if (edit && !edit->dead_leaf) {
+ key_payload_reserve(keyring,
+ keyring->datalen - KEYQUOTA_LINK_BYTES);
+ assoc_array_cancel_edit(edit);
}
up_write(&keyring->sem);
}
@@ -1000,20 +1178,28 @@ void __key_link_end(struct key *keyring, struct key_type *type,
*/
int key_link(struct key *keyring, struct key *key)
{
- unsigned long prealloc;
+ struct assoc_array_edit *edit;
int ret;
+ kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage));
+
key_check(keyring);
key_check(key);
- ret = __key_link_begin(keyring, key->type, key->description, &prealloc);
+ if (test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags) &&
+ !test_bit(KEY_FLAG_TRUSTED, &key->flags))
+ return -EPERM;
+
+ ret = __key_link_begin(keyring, &key->index_key, &edit);
if (ret == 0) {
+ kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage));
ret = __key_link_check_live_key(keyring, key);
if (ret == 0)
- __key_link(keyring, key, &prealloc);
- __key_link_end(keyring, key->type, prealloc);
+ __key_link(key, &edit);
+ __key_link_end(keyring, &key->index_key, edit);
}
+ kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage));
return ret;
}
EXPORT_SYMBOL(key_link);
@@ -1037,90 +1223,37 @@ EXPORT_SYMBOL(key_link);
*/
int key_unlink(struct key *keyring, struct key *key)
{
- struct keyring_list *klist, *nklist;
- int loop, ret;
+ struct assoc_array_edit *edit;
+ int ret;
key_check(keyring);
key_check(key);
- ret = -ENOTDIR;
if (keyring->type != &key_type_keyring)
- goto error;
+ return -ENOTDIR;
down_write(&keyring->sem);
- klist = rcu_dereference_locked_keyring(keyring);
- if (klist) {
- /* search the keyring for the key */
- for (loop = 0; loop < klist->nkeys; loop++)
- if (rcu_access_pointer(klist->keys[loop]) == key)
- goto key_is_present;
+ edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops,
+ &key->index_key);
+ if (IS_ERR(edit)) {
+ ret = PTR_ERR(edit);
+ goto error;
}
-
- up_write(&keyring->sem);
ret = -ENOENT;
- goto error;
-
-key_is_present:
- /* we need to copy the key list for RCU purposes */
- nklist = kmalloc(sizeof(*klist) +
- sizeof(struct key *) * klist->maxkeys,
- GFP_KERNEL);
- if (!nklist)
- goto nomem;
- nklist->maxkeys = klist->maxkeys;
- nklist->nkeys = klist->nkeys - 1;
-
- if (loop > 0)
- memcpy(&nklist->keys[0],
- &klist->keys[0],
- loop * sizeof(struct key *));
-
- if (loop < nklist->nkeys)
- memcpy(&nklist->keys[loop],
- &klist->keys[loop + 1],
- (nklist->nkeys - loop) * sizeof(struct key *));
-
- /* adjust the user's quota */
- key_payload_reserve(keyring,
- keyring->datalen - KEYQUOTA_LINK_BYTES);
-
- rcu_assign_pointer(keyring->payload.subscriptions, nklist);
-
- up_write(&keyring->sem);
-
- /* schedule for later cleanup */
- klist->delkey = loop;
- call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
+ if (edit == NULL)
+ goto error;
+ assoc_array_apply_edit(edit);
+ key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES);
ret = 0;
error:
- return ret;
-nomem:
- ret = -ENOMEM;
up_write(&keyring->sem);
- goto error;
+ return ret;
}
EXPORT_SYMBOL(key_unlink);
-/*
- * Dispose of a keyring list after the RCU grace period, releasing the keys it
- * links to.
- */
-static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
-{
- struct keyring_list *klist;
- int loop;
-
- klist = container_of(rcu, struct keyring_list, rcu);
-
- for (loop = klist->nkeys - 1; loop >= 0; loop--)
- key_put(rcu_access_pointer(klist->keys[loop]));
-
- kfree(klist);
-}
-
/**
* keyring_clear - Clear a keyring
* @keyring: The keyring to clear.
@@ -1131,33 +1264,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
*/
int keyring_clear(struct key *keyring)
{
- struct keyring_list *klist;
+ struct assoc_array_edit *edit;
int ret;
- ret = -ENOTDIR;
- if (keyring->type == &key_type_keyring) {
- /* detach the pointer block with the locks held */
- down_write(&keyring->sem);
-
- klist = rcu_dereference_locked_keyring(keyring);
- if (klist) {
- /* adjust the quota */
- key_payload_reserve(keyring,
- sizeof(struct keyring_list));
-
- rcu_assign_pointer(keyring->payload.subscriptions,
- NULL);
- }
-
- up_write(&keyring->sem);
+ if (keyring->type != &key_type_keyring)
+ return -ENOTDIR;
- /* free the keys after the locks have been dropped */
- if (klist)
- call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+ down_write(&keyring->sem);
+ edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
+ if (IS_ERR(edit)) {
+ ret = PTR_ERR(edit);
+ } else {
+ if (edit)
+ assoc_array_apply_edit(edit);
+ key_payload_reserve(keyring, 0);
ret = 0;
}
+ up_write(&keyring->sem);
return ret;
}
EXPORT_SYMBOL(keyring_clear);
@@ -1169,111 +1294,68 @@ EXPORT_SYMBOL(keyring_clear);
*/
static void keyring_revoke(struct key *keyring)
{
- struct keyring_list *klist;
+ struct assoc_array_edit *edit;
+
+ edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
+ if (!IS_ERR(edit)) {
+ if (edit)
+ assoc_array_apply_edit(edit);
+ key_payload_reserve(keyring, 0);
+ }
+}
+
+static bool keyring_gc_select_iterator(void *object, void *iterator_data)
+{
+ struct key *key = keyring_ptr_to_key(object);
+ time_t *limit = iterator_data;
- klist = rcu_dereference_locked_keyring(keyring);
+ if (key_is_dead(key, *limit))
+ return false;
+ key_get(key);
+ return true;
+}
- /* adjust the quota */
- key_payload_reserve(keyring, 0);
+static int keyring_gc_check_iterator(const void *object, void *iterator_data)
+{
+ const struct key *key = keyring_ptr_to_key(object);
+ time_t *limit = iterator_data;
- if (klist) {
- rcu_assign_pointer(keyring->payload.subscriptions, NULL);
- call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
- }
+ key_check(key);
+ return key_is_dead(key, *limit);
}
/*
- * Collect garbage from the contents of a keyring, replacing the old list with
- * a new one with the pointers all shuffled down.
+ * Garbage collect pointers from a keyring.
*
- * Dead keys are classed as oned that are flagged as being dead or are revoked,
- * expired or negative keys that were revoked or expired before the specified
- * limit.
+ * Not called with any locks held. The keyring's key struct will not be
+ * deallocated under us as only our caller may deallocate it.
*/
void keyring_gc(struct key *keyring, time_t limit)
{
- struct keyring_list *klist, *new;
- struct key *key;
- int loop, keep, max;
-
- kenter("{%x,%s}", key_serial(keyring), keyring->description);
-
- down_write(&keyring->sem);
-
- klist = rcu_dereference_locked_keyring(keyring);
- if (!klist)
- goto no_klist;
-
- /* work out how many subscriptions we're keeping */
- keep = 0;
- for (loop = klist->nkeys - 1; loop >= 0; loop--)
- if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring),
- limit))
- keep++;
-
- if (keep == klist->nkeys)
- goto just_return;
-
- /* allocate a new keyring payload */
- max = roundup(keep, 4);
- new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
- GFP_KERNEL);
- if (!new)
- goto nomem;
- new->maxkeys = max;
- new->nkeys = 0;
- new->delkey = 0;
-
- /* install the live keys
- * - must take care as expired keys may be updated back to life
- */
- keep = 0;
- for (loop = klist->nkeys - 1; loop >= 0; loop--) {
- key = rcu_deref_link_locked(klist, loop, keyring);
- if (!key_is_dead(key, limit)) {
- if (keep >= max)
- goto discard_new;
- RCU_INIT_POINTER(new->keys[keep++], key_get(key));
- }
- }
- new->nkeys = keep;
-
- /* adjust the quota */
- key_payload_reserve(keyring,
- sizeof(struct keyring_list) +
- KEYQUOTA_LINK_BYTES * keep);
+ int result;
- if (keep == 0) {
- rcu_assign_pointer(keyring->payload.subscriptions, NULL);
- kfree(new);
- } else {
- rcu_assign_pointer(keyring->payload.subscriptions, new);
- }
+ kenter("%x{%s}", keyring->serial, keyring->description ?: "");
- up_write(&keyring->sem);
+ if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED)))
+ goto dont_gc;
- call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
- kleave(" [yes]");
- return;
-
-discard_new:
- new->nkeys = keep;
- keyring_clear_rcu_disposal(&new->rcu);
- up_write(&keyring->sem);
- kleave(" [discard]");
- return;
-
-just_return:
- up_write(&keyring->sem);
- kleave(" [no dead]");
- return;
+ /* scan the keyring looking for dead keys */
+ rcu_read_lock();
+ result = assoc_array_iterate(&keyring->keys,
+ keyring_gc_check_iterator, &limit);
+ rcu_read_unlock();
+ if (result == true)
+ goto do_gc;
-no_klist:
- up_write(&keyring->sem);
- kleave(" [no_klist]");
+dont_gc:
+ kleave(" [no gc]");
return;
-nomem:
+do_gc:
+ down_write(&keyring->sem);
+ assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops,
+ keyring_gc_select_iterator, &limit);
up_write(&keyring->sem);
- kleave(" [oom]");
+ kleave(" [gc]");
}
diff --git a/security/keys/persistent.c b/security/keys/persistent.c
new file mode 100644
index 000000000000..0ad3ee283781
--- /dev/null
+++ b/security/keys/persistent.c
@@ -0,0 +1,167 @@
+/* General persistent per-UID keyrings register
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/user_namespace.h>
+#include "internal.h"
+
+unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */
+
+/*
+ * Create the persistent keyring register for the current user namespace.
+ *
+ * Called with the namespace's sem locked for writing.
+ */
+static int key_create_persistent_register(struct user_namespace *ns)
+{
+ struct key *reg = keyring_alloc(".persistent_register",
+ KUIDT_INIT(0), KGIDT_INIT(0),
+ current_cred(),
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
+ KEY_ALLOC_NOT_IN_QUOTA, NULL);
+ if (IS_ERR(reg))
+ return PTR_ERR(reg);
+
+ ns->persistent_keyring_register = reg;
+ return 0;
+}
+
+/*
+ * Create the persistent keyring for the specified user.
+ *
+ * Called with the namespace's sem locked for writing.
+ */
+static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid,
+ struct keyring_index_key *index_key)
+{
+ struct key *persistent;
+ key_ref_t reg_ref, persistent_ref;
+
+ if (!ns->persistent_keyring_register) {
+ long err = key_create_persistent_register(ns);
+ if (err < 0)
+ return ERR_PTR(err);
+ } else {
+ reg_ref = make_key_ref(ns->persistent_keyring_register, true);
+ persistent_ref = find_key_to_update(reg_ref, index_key);
+ if (persistent_ref)
+ return persistent_ref;
+ }
+
+ persistent = keyring_alloc(index_key->description,
+ uid, INVALID_GID, current_cred(),
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
+ KEY_ALLOC_NOT_IN_QUOTA,
+ ns->persistent_keyring_register);
+ if (IS_ERR(persistent))
+ return ERR_CAST(persistent);
+
+ return make_key_ref(persistent, true);
+}
+
+/*
+ * Get the persistent keyring for a specific UID and link it to the nominated
+ * keyring.
+ */
+static long key_get_persistent(struct user_namespace *ns, kuid_t uid,
+ key_ref_t dest_ref)
+{
+ struct keyring_index_key index_key;
+ struct key *persistent;
+ key_ref_t reg_ref, persistent_ref;
+ char buf[32];
+ long ret;
+
+ /* Look in the register if it exists */
+ index_key.type = &key_type_keyring;
+ index_key.description = buf;
+ index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid));
+
+ if (ns->persistent_keyring_register) {
+ reg_ref = make_key_ref(ns->persistent_keyring_register, true);
+ down_read(&ns->persistent_keyring_register_sem);
+ persistent_ref = find_key_to_update(reg_ref, &index_key);
+ up_read(&ns->persistent_keyring_register_sem);
+
+ if (persistent_ref)
+ goto found;
+ }
+
+ /* It wasn't in the register, so we'll need to create it. We might
+ * also need to create the register.
+ */
+ down_write(&ns->persistent_keyring_register_sem);
+ persistent_ref = key_create_persistent(ns, uid, &index_key);
+ up_write(&ns->persistent_keyring_register_sem);
+ if (!IS_ERR(persistent_ref))
+ goto found;
+
+ return PTR_ERR(persistent_ref);
+
+found:
+ ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK);
+ if (ret == 0) {
+ persistent = key_ref_to_ptr(persistent_ref);
+ ret = key_link(key_ref_to_ptr(dest_ref), persistent);
+ if (ret == 0) {
+ key_set_timeout(persistent, persistent_keyring_expiry);
+ ret = persistent->serial;
+ }
+ }
+
+ key_ref_put(persistent_ref);
+ return ret;
+}
+
+/*
+ * Get the persistent keyring for a specific UID and link it to the nominated
+ * keyring.
+ */
+long keyctl_get_persistent(uid_t _uid, key_serial_t destid)
+{
+ struct user_namespace *ns = current_user_ns();
+ key_ref_t dest_ref;
+ kuid_t uid;
+ long ret;
+
+ /* -1 indicates the current user */
+ if (_uid == (uid_t)-1) {
+ uid = current_uid();
+ } else {
+ uid = make_kuid(ns, _uid);
+ if (!uid_valid(uid))
+ return -EINVAL;
+
+ /* You can only see your own persistent cache if you're not
+ * sufficiently privileged.
+ */
+ if (!uid_eq(uid, current_uid()) &&
+ !uid_eq(uid, current_euid()) &&
+ !ns_capable(ns, CAP_SETUID))
+ return -EPERM;
+ }
+
+ /* There must be a destination keyring */
+ dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE);
+ if (IS_ERR(dest_ref))
+ return PTR_ERR(dest_ref);
+ if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) {
+ ret = -ENOTDIR;
+ goto out_put_dest;
+ }
+
+ ret = key_get_persistent(ns, uid, dest_ref);
+
+out_put_dest:
+ key_ref_put(dest_ref);
+ return ret;
+}
diff --git a/security/keys/proc.c b/security/keys/proc.c
index 217b6855e815..88e9a466940f 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -182,7 +182,6 @@ static void proc_keys_stop(struct seq_file *p, void *v)
static int proc_keys_show(struct seq_file *m, void *v)
{
- const struct cred *cred = current_cred();
struct rb_node *_p = v;
struct key *key = rb_entry(_p, struct key, serial_node);
struct timespec now;
@@ -191,15 +190,23 @@ static int proc_keys_show(struct seq_file *m, void *v)
char xbuf[12];
int rc;
+ struct keyring_search_context ctx = {
+ .index_key.type = key->type,
+ .index_key.description = key->description,
+ .cred = current_cred(),
+ .match = lookup_user_key_possessed,
+ .match_data = key,
+ .flags = (KEYRING_SEARCH_NO_STATE_CHECK |
+ KEYRING_SEARCH_LOOKUP_DIRECT),
+ };
+
key_ref = make_key_ref(key, 0);
/* determine if the key is possessed by this process (a test we can
* skip if the key does not indicate the possessor can view it
*/
if (key->perm & KEY_POS_VIEW) {
- skey_ref = search_my_process_keyrings(key->type, key,
- lookup_user_key_possessed,
- true, cred);
+ skey_ref = search_my_process_keyrings(&ctx);
if (!IS_ERR(skey_ref)) {
key_ref_put(skey_ref);
key_ref = make_key_ref(key, 1);
@@ -211,7 +218,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
* - the caller holds a spinlock, and thus the RCU read lock, making our
* access to __current_cred() safe
*/
- rc = key_task_permission(key_ref, cred, KEY_VIEW);
+ rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW);
if (rc < 0)
return 0;
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 42defae1e161..0cf8a130a267 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -235,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
if (IS_ERR(keyring))
return PTR_ERR(keyring);
} else {
- atomic_inc(&keyring->usage);
+ __key_get(keyring);
}
/* install the keyring */
@@ -319,11 +319,7 @@ void key_fsgid_changed(struct task_struct *tsk)
* In the case of a successful return, the possession attribute is set on the
* returned key reference.
*/
-key_ref_t search_my_process_keyrings(struct key_type *type,
- const void *description,
- key_match_func_t match,
- bool no_state_check,
- const struct cred *cred)
+key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
{
key_ref_t key_ref, ret, err;
@@ -339,10 +335,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
err = ERR_PTR(-EAGAIN);
/* search the thread keyring first */
- if (cred->thread_keyring) {
+ if (ctx->cred->thread_keyring) {
key_ref = keyring_search_aux(
- make_key_ref(cred->thread_keyring, 1),
- cred, type, description, match, no_state_check);
+ make_key_ref(ctx->cred->thread_keyring, 1), ctx);
if (!IS_ERR(key_ref))
goto found;
@@ -358,10 +353,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
}
/* search the process keyring second */
- if (cred->process_keyring) {
+ if (ctx->cred->process_keyring) {
key_ref = keyring_search_aux(
- make_key_ref(cred->process_keyring, 1),
- cred, type, description, match, no_state_check);
+ make_key_ref(ctx->cred->process_keyring, 1), ctx);
if (!IS_ERR(key_ref))
goto found;
@@ -379,11 +373,11 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
}
/* search the session keyring */
- if (cred->session_keyring) {
+ if (ctx->cred->session_keyring) {
rcu_read_lock();
key_ref = keyring_search_aux(
- make_key_ref(rcu_dereference(cred->session_keyring), 1),
- cred, type, description, match, no_state_check);
+ make_key_ref(rcu_dereference(ctx->cred->session_keyring), 1),
+ ctx);
rcu_read_unlock();
if (!IS_ERR(key_ref))
@@ -402,10 +396,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
}
}
/* or search the user-session keyring */
- else if (cred->user->session_keyring) {
+ else if (ctx->cred->user->session_keyring) {
key_ref = keyring_search_aux(
- make_key_ref(cred->user->session_keyring, 1),
- cred, type, description, match, no_state_check);
+ make_key_ref(ctx->cred->user->session_keyring, 1),
+ ctx);
if (!IS_ERR(key_ref))
goto found;
@@ -437,18 +431,14 @@ found:
*
* Return same as search_my_process_keyrings().
*/
-key_ref_t search_process_keyrings(struct key_type *type,
- const void *description,
- key_match_func_t match,
- const struct cred *cred)
+key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
{
struct request_key_auth *rka;
key_ref_t key_ref, ret = ERR_PTR(-EACCES), err;
might_sleep();
- key_ref = search_my_process_keyrings(type, description, match,
- false, cred);
+ key_ref = search_my_process_keyrings(ctx);
if (!IS_ERR(key_ref))
goto found;
err = key_ref;
@@ -457,18 +447,21 @@ key_ref_t search_process_keyrings(struct key_type *type,
* search the keyrings of the process mentioned there
* - we don't permit access to request_key auth keys via this method
*/
- if (cred->request_key_auth &&
- cred == current_cred() &&
- type != &key_type_request_key_auth
+ if (ctx->cred->request_key_auth &&
+ ctx->cred == current_cred() &&
+ ctx->index_key.type != &key_type_request_key_auth
) {
+ const struct cred *cred = ctx->cred;
+
/* defend against the auth key being revoked */
down_read(&cred->request_key_auth->sem);
- if (key_validate(cred->request_key_auth) == 0) {
- rka = cred->request_key_auth->payload.data;
+ if (key_validate(ctx->cred->request_key_auth) == 0) {
+ rka = ctx->cred->request_key_auth->payload.data;
- key_ref = search_process_keyrings(type, description,
- match, rka->cred);
+ ctx->cred = rka->cred;
+ key_ref = search_process_keyrings(ctx);
+ ctx->cred = cred;
up_read(&cred->request_key_auth->sem);
@@ -522,19 +515,23 @@ int lookup_user_key_possessed(const struct key *key, const void *target)
key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
key_perm_t perm)
{
+ struct keyring_search_context ctx = {
+ .match = lookup_user_key_possessed,
+ .flags = (KEYRING_SEARCH_NO_STATE_CHECK |
+ KEYRING_SEARCH_LOOKUP_DIRECT),
+ };
struct request_key_auth *rka;
- const struct cred *cred;
struct key *key;
key_ref_t key_ref, skey_ref;
int ret;
try_again:
- cred = get_current_cred();
+ ctx.cred = get_current_cred();
key_ref = ERR_PTR(-ENOKEY);
switch (id) {
case KEY_SPEC_THREAD_KEYRING:
- if (!cred->thread_keyring) {
+ if (!ctx.cred->thread_keyring) {
if (!(lflags & KEY_LOOKUP_CREATE))
goto error;
@@ -546,13 +543,13 @@ try_again:
goto reget_creds;
}
- key = cred->thread_keyring;
- atomic_inc(&key->usage);
+ key = ctx.cred->thread_keyring;
+ __key_get(key);
key_ref = make_key_ref(key, 1);
break;
case KEY_SPEC_PROCESS_KEYRING:
- if (!cred->process_keyring) {
+ if (!ctx.cred->process_keyring) {
if (!(lflags & KEY_LOOKUP_CREATE))
goto error;
@@ -564,13 +561,13 @@ try_again:
goto reget_creds;
}
- key = cred->process_keyring;
- atomic_inc(&key->usage);
+ key = ctx.cred->process_keyring;
+ __key_get(key);
key_ref = make_key_ref(key, 1);
break;
case KEY_SPEC_SESSION_KEYRING:
- if (!cred->session_keyring) {
+ if (!ctx.cred->session_keyring) {
/* always install a session keyring upon access if one
* doesn't exist yet */
ret = install_user_keyrings();
@@ -580,13 +577,13 @@ try_again:
ret = join_session_keyring(NULL);
else
ret = install_session_keyring(
- cred->user->session_keyring);
+ ctx.cred->user->session_keyring);
if (ret < 0)
goto error;
goto reget_creds;
- } else if (cred->session_keyring ==
- cred->user->session_keyring &&
+ } else if (ctx.cred->session_keyring ==
+ ctx.cred->user->session_keyring &&
lflags & KEY_LOOKUP_CREATE) {
ret = join_session_keyring(NULL);
if (ret < 0)
@@ -595,33 +592,33 @@ try_again:
}
rcu_read_lock();
- key = rcu_dereference(cred->session_keyring);
- atomic_inc(&key->usage);
+ key = rcu_dereference(ctx.cred->session_keyring);
+ __key_get(key);
rcu_read_unlock();
key_ref = make_key_ref(key, 1);
break;
case KEY_SPEC_USER_KEYRING:
- if (!cred->user->uid_keyring) {
+ if (!ctx.cred->user->uid_keyring) {
ret = install_user_keyrings();
if (ret < 0)
goto error;
}
- key = cred->user->uid_keyring;
- atomic_inc(&key->usage);
+ key = ctx.cred->user->uid_keyring;
+ __key_get(key);
key_ref = make_key_ref(key, 1);
break;
case KEY_SPEC_USER_SESSION_KEYRING:
- if (!cred->user->session_keyring) {
+ if (!ctx.cred->user->session_keyring) {
ret = install_user_keyrings();
if (ret < 0)
goto error;
}
- key = cred->user->session_keyring;
- atomic_inc(&key->usage);
+ key = ctx.cred->user->session_keyring;
+ __key_get(key);
key_ref = make_key_ref(key, 1);
break;
@@ -631,29 +628,29 @@ try_again:
goto error;
case KEY_SPEC_REQKEY_AUTH_KEY:
- key = cred->request_key_auth;
+ key = ctx.cred->request_key_auth;
if (!key)
goto error;
- atomic_inc(&key->usage);
+ __key_get(key);
key_ref = make_key_ref(key, 1);
break;
case KEY_SPEC_REQUESTOR_KEYRING:
- if (!cred->request_key_auth)
+ if (!ctx.cred->request_key_auth)
goto error;
- down_read(&cred->request_key_auth->sem);
+ down_read(&ctx.cred->request_key_auth->sem);
if (test_bit(KEY_FLAG_REVOKED,
- &cred->request_key_auth->flags)) {
+ &ctx.cred->request_key_auth->flags)) {
key_ref = ERR_PTR(-EKEYREVOKED);
key = NULL;
} else {
- rka = cred->request_key_auth->payload.data;
+ rka = ctx.cred->request_key_auth->payload.data;
key = rka->dest_keyring;
- atomic_inc(&key->usage);
+ __key_get(key);
}
- up_read(&cred->request_key_auth->sem);
+ up_read(&ctx.cred->request_key_auth->sem);
if (!key)
goto error;
key_ref = make_key_ref(key, 1);
@@ -673,9 +670,13 @@ try_again:
key_ref = make_key_ref(key, 0);
/* check to see if we possess the key */
- skey_ref = search_process_keyrings(key->type, key,
- lookup_user_key_possessed,
- cred);
+ ctx.index_key.type = key->type;
+ ctx.index_key.description = key->description;
+ ctx.index_key.desc_len = strlen(key->description);
+ ctx.match_data = key;
+ kdebug("check possessed");
+ skey_ref = search_process_keyrings(&ctx);
+ kdebug("possessed=%p", skey_ref);
if (!IS_ERR(skey_ref)) {
key_put(key);
@@ -715,14 +716,14 @@ try_again:
goto invalid_key;
/* check the permissions */
- ret = key_task_permission(key_ref, cred, perm);
+ ret = key_task_permission(key_ref, ctx.cred, perm);
if (ret < 0)
goto invalid_key;
key->last_used_at = current_kernel_time().tv_sec;
error:
- put_cred(cred);
+ put_cred(ctx.cred);
return key_ref;
invalid_key:
@@ -733,7 +734,7 @@ invalid_key:
/* if we attempted to install a keyring, then it may have caused new
* creds to be installed */
reget_creds:
- put_cred(cred);
+ put_cred(ctx.cred);
goto try_again;
}
@@ -856,3 +857,13 @@ void key_change_session_keyring(struct callback_head *twork)
commit_creds(new);
}
+
+/*
+ * Make sure that root's user and user-session keyrings exist.
+ */
+static int __init init_root_keyring(void)
+{
+ return install_user_keyrings();
+}
+
+late_initcall(init_root_keyring);
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index c411f9bb156b..381411941cc1 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -345,33 +345,34 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
* May return a key that's already under construction instead if there was a
* race between two thread calling request_key().
*/
-static int construct_alloc_key(struct key_type *type,
- const char *description,
+static int construct_alloc_key(struct keyring_search_context *ctx,
struct key *dest_keyring,
unsigned long flags,
struct key_user *user,
struct key **_key)
{
- const struct cred *cred = current_cred();
- unsigned long prealloc;
+ struct assoc_array_edit *edit;
struct key *key;
key_perm_t perm;
key_ref_t key_ref;
int ret;
- kenter("%s,%s,,,", type->name, description);
+ kenter("%s,%s,,,",
+ ctx->index_key.type->name, ctx->index_key.description);
*_key = NULL;
mutex_lock(&user->cons_lock);
perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
perm |= KEY_USR_VIEW;
- if (type->read)
+ if (ctx->index_key.type->read)
perm |= KEY_POS_READ;
- if (type == &key_type_keyring || type->update)
+ if (ctx->index_key.type == &key_type_keyring ||
+ ctx->index_key.type->update)
perm |= KEY_POS_WRITE;
- key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
+ key = key_alloc(ctx->index_key.type, ctx->index_key.description,
+ ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred,
perm, flags);
if (IS_ERR(key))
goto alloc_failed;
@@ -379,8 +380,7 @@ static int construct_alloc_key(struct key_type *type,
set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
if (dest_keyring) {
- ret = __key_link_begin(dest_keyring, type, description,
- &prealloc);
+ ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
if (ret < 0)
goto link_prealloc_failed;
}
@@ -390,16 +390,16 @@ static int construct_alloc_key(struct key_type *type,
* waited for locks */
mutex_lock(&key_construction_mutex);
- key_ref = search_process_keyrings(type, description, type->match, cred);
+ key_ref = search_process_keyrings(ctx);
if (!IS_ERR(key_ref))
goto key_already_present;
if (dest_keyring)
- __key_link(dest_keyring, key, &prealloc);
+ __key_link(key, &edit);
mutex_unlock(&key_construction_mutex);
if (dest_keyring)
- __key_link_end(dest_keyring, type, prealloc);
+ __key_link_end(dest_keyring, &ctx->index_key, edit);
mutex_unlock(&user->cons_lock);
*_key = key;
kleave(" = 0 [%d]", key_serial(key));
@@ -414,8 +414,8 @@ key_already_present:
if (dest_keyring) {
ret = __key_link_check_live_key(dest_keyring, key);
if (ret == 0)
- __key_link(dest_keyring, key, &prealloc);
- __key_link_end(dest_keyring, type, prealloc);
+ __key_link(key, &edit);
+ __key_link_end(dest_keyring, &ctx->index_key, edit);
if (ret < 0)
goto link_check_failed;
}
@@ -444,8 +444,7 @@ alloc_failed:
/*
* Commence key construction.
*/
-static struct key *construct_key_and_link(struct key_type *type,
- const char *description,
+static struct key *construct_key_and_link(struct keyring_search_context *ctx,
const char *callout_info,
size_t callout_len,
void *aux,
@@ -464,8 +463,7 @@ static struct key *construct_key_and_link(struct key_type *type,
construct_get_dest_keyring(&dest_keyring);
- ret = construct_alloc_key(type, description, dest_keyring, flags, user,
- &key);
+ ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
key_user_put(user);
if (ret == 0) {
@@ -529,17 +527,24 @@ struct key *request_key_and_link(struct key_type *type,
struct key *dest_keyring,
unsigned long flags)
{
- const struct cred *cred = current_cred();
+ struct keyring_search_context ctx = {
+ .index_key.type = type,
+ .index_key.description = description,
+ .cred = current_cred(),
+ .match = type->match,
+ .match_data = description,
+ .flags = KEYRING_SEARCH_LOOKUP_DIRECT,
+ };
struct key *key;
key_ref_t key_ref;
int ret;
kenter("%s,%s,%p,%zu,%p,%p,%lx",
- type->name, description, callout_info, callout_len, aux,
- dest_keyring, flags);
+ ctx.index_key.type->name, ctx.index_key.description,
+ callout_info, callout_len, aux, dest_keyring, flags);
/* search all the process keyrings for a key */
- key_ref = search_process_keyrings(type, description, type->match, cred);
+ key_ref = search_process_keyrings(&ctx);
if (!IS_ERR(key_ref)) {
key = key_ref_to_ptr(key_ref);
@@ -562,9 +567,8 @@ struct key *request_key_and_link(struct key_type *type,
if (!callout_info)
goto error;
- key = construct_key_and_link(type, description, callout_info,
- callout_len, aux, dest_keyring,
- flags);
+ key = construct_key_and_link(&ctx, callout_info, callout_len,
+ aux, dest_keyring, flags);
}
error:
@@ -592,8 +596,10 @@ int wait_for_key_construction(struct key *key, bool intr)
intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
if (ret < 0)
return ret;
- if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+ if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
+ smp_rmb();
return key->type_data.reject_error;
+ }
return key_validate(key);
}
EXPORT_SYMBOL(wait_for_key_construction);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 85730d5a5a59..7495a93b4b90 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <asm/uaccess.h>
#include "internal.h"
+#include <keys/user-type.h>
static int request_key_auth_instantiate(struct key *,
struct key_preparsed_payload *);
@@ -222,32 +223,26 @@ error_alloc:
}
/*
- * See if an authorisation key is associated with a particular key.
- */
-static int key_get_instantiation_authkey_match(const struct key *key,
- const void *_id)
-{
- struct request_key_auth *rka = key->payload.data;
- key_serial_t id = (key_serial_t)(unsigned long) _id;
-
- return rka->target_key->serial == id;
-}
-
-/*
* Search the current process's keyrings for the authorisation key for
* instantiation of a key.
*/
struct key *key_get_instantiation_authkey(key_serial_t target_id)
{
- const struct cred *cred = current_cred();
+ char description[16];
+ struct keyring_search_context ctx = {
+ .index_key.type = &key_type_request_key_auth,
+ .index_key.description = description,
+ .cred = current_cred(),
+ .match = user_match,
+ .match_data = description,
+ .flags = KEYRING_SEARCH_LOOKUP_DIRECT,
+ };
struct key *authkey;
key_ref_t authkey_ref;
- authkey_ref = search_process_keyrings(
- &key_type_request_key_auth,
- (void *) (unsigned long) target_id,
- key_get_instantiation_authkey_match,
- cred);
+ sprintf(description, "%x", target_id);
+
+ authkey_ref = search_process_keyrings(&ctx);
if (IS_ERR(authkey_ref)) {
authkey = ERR_CAST(authkey_ref);
diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c
index ee32d181764a..8c0af08760c8 100644
--- a/security/keys/sysctl.c
+++ b/security/keys/sysctl.c
@@ -61,5 +61,16 @@ ctl_table key_sysctls[] = {
.extra1 = (void *) &zero,
.extra2 = (void *) &max,
},
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+ {
+ .procname = "persistent_keyring_expiry",
+ .data = &persistent_keyring_expiry,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *) &zero,
+ .extra2 = (void *) &max,
+ },
+#endif
{ }
};
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 55dc88939185..faa2caeb593f 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -25,14 +25,15 @@ static int logon_vet_description(const char *desc);
* arbitrary blob of data as the payload
*/
struct key_type key_type_user = {
- .name = "user",
- .instantiate = user_instantiate,
- .update = user_update,
- .match = user_match,
- .revoke = user_revoke,
- .destroy = user_destroy,
- .describe = user_describe,
- .read = user_read,
+ .name = "user",
+ .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
+ .instantiate = user_instantiate,
+ .update = user_update,
+ .match = user_match,
+ .revoke = user_revoke,
+ .destroy = user_destroy,
+ .describe = user_describe,
+ .read = user_read,
};
EXPORT_SYMBOL_GPL(key_type_user);
@@ -45,6 +46,7 @@ EXPORT_SYMBOL_GPL(key_type_user);
*/
struct key_type key_type_logon = {
.name = "logon",
+ .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
.instantiate = user_instantiate,
.update = user_update,
.match = user_match,
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 234bc2ab450c..9a62045e6282 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -397,7 +397,8 @@ void common_lsm_audit(struct common_audit_data *a,
if (a == NULL)
return;
/* we use GFP_ATOMIC so we won't sleep */
- ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
+ ab = audit_log_start(current->audit_context, GFP_ATOMIC | __GFP_NOWARN,
+ AUDIT_AVC);
if (ab == NULL)
return;
diff --git a/security/security.c b/security/security.c
index 4dc31f4f2700..15b6928592ef 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1340,22 +1340,17 @@ int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
return security_ops->xfrm_policy_delete_security(ctx);
}
-int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+int security_xfrm_state_alloc(struct xfrm_state *x,
+ struct xfrm_user_sec_ctx *sec_ctx)
{
- return security_ops->xfrm_state_alloc_security(x, sec_ctx, 0);
+ return security_ops->xfrm_state_alloc(x, sec_ctx);
}
EXPORT_SYMBOL(security_xfrm_state_alloc);
int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
struct xfrm_sec_ctx *polsec, u32 secid)
{
- if (!polsec)
- return 0;
- /*
- * We want the context to be taken from secid which is usually
- * from the sock.
- */
- return security_ops->xfrm_state_alloc_security(x, NULL, secid);
+ return security_ops->xfrm_state_alloc_acquire(x, polsec, secid);
}
int security_xfrm_state_delete(struct xfrm_state *x)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c540795fb3f2..794c3ca49eac 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -95,7 +95,9 @@
#include "audit.h"
#include "avc_ss.h"
-#define NUM_SEL_MNT_OPTS 5
+#define SB_TYPE_FMT "%s%s%s"
+#define SB_SUBTYPE(sb) (sb->s_subtype && sb->s_subtype[0])
+#define SB_TYPE_ARGS(sb) sb->s_type->name, SB_SUBTYPE(sb) ? "." : "", SB_SUBTYPE(sb) ? sb->s_subtype : ""
extern struct security_operations *security_ops;
@@ -139,12 +141,28 @@ static struct kmem_cache *sel_inode_cache;
* This function checks the SECMARK reference counter to see if any SECMARK
* targets are currently configured, if the reference counter is greater than
* zero SECMARK is considered to be enabled. Returns true (1) if SECMARK is
- * enabled, false (0) if SECMARK is disabled.
+ * enabled, false (0) if SECMARK is disabled. If the always_check_network
+ * policy capability is enabled, SECMARK is always considered enabled.
*
*/
static int selinux_secmark_enabled(void)
{
- return (atomic_read(&selinux_secmark_refcount) > 0);
+ return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount));
+}
+
+/**
+ * selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled
+ *
+ * Description:
+ * This function checks if NetLabel or labeled IPSEC is enabled. Returns true
+ * (1) if any are enabled or false (0) if neither are enabled. If the
+ * always_check_network policy capability is enabled, peer labeling
+ * is always considered enabled.
+ *
+ */
+static int selinux_peerlbl_enabled(void)
+{
+ return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled());
}
/*
@@ -309,8 +327,11 @@ enum {
Opt_defcontext = 3,
Opt_rootcontext = 4,
Opt_labelsupport = 5,
+ Opt_nextmntopt = 6,
};
+#define NUM_SEL_MNT_OPTS (Opt_nextmntopt - 1)
+
static const match_table_t tokens = {
{Opt_context, CONTEXT_STR "%s"},
{Opt_fscontext, FSCONTEXT_STR "%s"},
@@ -355,6 +376,29 @@ static int may_context_mount_inode_relabel(u32 sid,
return rc;
}
+static int selinux_is_sblabel_mnt(struct super_block *sb)
+{
+ struct superblock_security_struct *sbsec = sb->s_security;
+
+ if (sbsec->behavior == SECURITY_FS_USE_XATTR ||
+ sbsec->behavior == SECURITY_FS_USE_TRANS ||
+ sbsec->behavior == SECURITY_FS_USE_TASK)
+ return 1;
+
+ /* Special handling for sysfs. Is genfs but also has setxattr handler*/
+ if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
+ return 1;
+
+ /*
+ * Special handling for rootfs. Is genfs but supports
+ * setting SELinux context on in-core inodes.
+ */
+ if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0)
+ return 1;
+
+ return 0;
+}
+
static int sb_finish_set_opts(struct super_block *sb)
{
struct superblock_security_struct *sbsec = sb->s_security;
@@ -369,8 +413,8 @@ static int sb_finish_set_opts(struct super_block *sb)
the first boot of the SELinux kernel before we have
assigned xattr values to the filesystem. */
if (!root_inode->i_op->getxattr) {
- printk(KERN_WARNING "SELinux: (dev %s, type %s) has no "
- "xattr support\n", sb->s_id, sb->s_type->name);
+ printk(KERN_WARNING "SELinux: (dev %s, type "SB_TYPE_FMT") has no "
+ "xattr support\n", sb->s_id, SB_TYPE_ARGS(sb));
rc = -EOPNOTSUPP;
goto out;
}
@@ -378,35 +422,27 @@ static int sb_finish_set_opts(struct super_block *sb)
if (rc < 0 && rc != -ENODATA) {
if (rc == -EOPNOTSUPP)
printk(KERN_WARNING "SELinux: (dev %s, type "
- "%s) has no security xattr handler\n",
- sb->s_id, sb->s_type->name);
+ SB_TYPE_FMT") has no security xattr handler\n",
+ sb->s_id, SB_TYPE_ARGS(sb));
else
printk(KERN_WARNING "SELinux: (dev %s, type "
- "%s) getxattr errno %d\n", sb->s_id,
- sb->s_type->name, -rc);
+ SB_TYPE_FMT") getxattr errno %d\n", sb->s_id,
+ SB_TYPE_ARGS(sb), -rc);
goto out;
}
}
- sbsec->flags |= (SE_SBINITIALIZED | SE_SBLABELSUPP);
-
if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
- printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n",
- sb->s_id, sb->s_type->name);
+ printk(KERN_ERR "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), unknown behavior\n",
+ sb->s_id, SB_TYPE_ARGS(sb));
else
- printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n",
- sb->s_id, sb->s_type->name,
+ printk(KERN_DEBUG "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), %s\n",
+ sb->s_id, SB_TYPE_ARGS(sb),
labeling_behaviors[sbsec->behavior-1]);
- if (sbsec->behavior == SECURITY_FS_USE_GENFS ||
- sbsec->behavior == SECURITY_FS_USE_MNTPOINT ||
- sbsec->behavior == SECURITY_FS_USE_NONE ||
- sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
- sbsec->flags &= ~SE_SBLABELSUPP;
-
- /* Special handling for sysfs. Is genfs but also has setxattr handler*/
- if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
- sbsec->flags |= SE_SBLABELSUPP;
+ sbsec->flags |= SE_SBINITIALIZED;
+ if (selinux_is_sblabel_mnt(sb))
+ sbsec->flags |= SBLABEL_MNT;
/* Initialize the root inode. */
rc = inode_doinit_with_dentry(root_inode, root);
@@ -460,15 +496,18 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
if (!ss_initialized)
return -EINVAL;
+ /* make sure we always check enough bits to cover the mask */
+ BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS));
+
tmp = sbsec->flags & SE_MNTMASK;
/* count the number of mount options for this sb */
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < NUM_SEL_MNT_OPTS; i++) {
if (tmp & 0x01)
opts->num_mnt_opts++;
tmp >>= 1;
}
/* Check if the Label support flag is set */
- if (sbsec->flags & SE_SBLABELSUPP)
+ if (sbsec->flags & SBLABEL_MNT)
opts->num_mnt_opts++;
opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC);
@@ -515,9 +554,9 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
opts->mnt_opts[i] = context;
opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT;
}
- if (sbsec->flags & SE_SBLABELSUPP) {
+ if (sbsec->flags & SBLABEL_MNT) {
opts->mnt_opts[i] = NULL;
- opts->mnt_opts_flags[i++] = SE_SBLABELSUPP;
+ opts->mnt_opts_flags[i++] = SBLABEL_MNT;
}
BUG_ON(i != opts->num_mnt_opts);
@@ -561,7 +600,6 @@ static int selinux_set_mnt_opts(struct super_block *sb,
const struct cred *cred = current_cred();
int rc = 0, i;
struct superblock_security_struct *sbsec = sb->s_security;
- const char *name = sb->s_type->name;
struct inode *inode = sbsec->sb->s_root->d_inode;
struct inode_security_struct *root_isec = inode->i_security;
u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
@@ -614,14 +652,14 @@ static int selinux_set_mnt_opts(struct super_block *sb,
for (i = 0; i < num_opts; i++) {
u32 sid;
- if (flags[i] == SE_SBLABELSUPP)
+ if (flags[i] == SBLABEL_MNT)
continue;
rc = security_context_to_sid(mount_options[i],
strlen(mount_options[i]), &sid);
if (rc) {
printk(KERN_WARNING "SELinux: security_context_to_sid"
- "(%s) failed for (dev %s, type %s) errno=%d\n",
- mount_options[i], sb->s_id, name, rc);
+ "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n",
+ mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc);
goto out;
}
switch (flags[i]) {
@@ -685,9 +723,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
* Determine the labeling behavior to use for this
* filesystem type.
*/
- rc = security_fs_use((sbsec->flags & SE_SBPROC) ?
- "proc" : sb->s_type->name,
- &sbsec->behavior, &sbsec->sid);
+ rc = security_fs_use(sb);
if (rc) {
printk(KERN_WARNING
"%s: security_fs_use(%s) returned %d\n",
@@ -770,7 +806,8 @@ out:
out_double_mount:
rc = -EINVAL;
printk(KERN_WARNING "SELinux: mount invalid. Same superblock, different "
- "security settings for (dev %s, type %s)\n", sb->s_id, name);
+ "security settings for (dev %s, type "SB_TYPE_FMT")\n", sb->s_id,
+ SB_TYPE_ARGS(sb));
goto out;
}
@@ -1037,7 +1074,7 @@ static void selinux_write_opts(struct seq_file *m,
case DEFCONTEXT_MNT:
prefix = DEFCONTEXT_STR;
break;
- case SE_SBLABELSUPP:
+ case SBLABEL_MNT:
seq_putc(m, ',');
seq_puts(m, LABELSUPP_STR);
continue;
@@ -1649,7 +1686,7 @@ static int may_create(struct inode *dir,
if (rc)
return rc;
- if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
+ if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
rc = security_transition_sid(sid, dsec->sid, tclass,
&dentry->d_name, &newsid);
if (rc)
@@ -2437,14 +2474,14 @@ static int selinux_sb_remount(struct super_block *sb, void *data)
u32 sid;
size_t len;
- if (flags[i] == SE_SBLABELSUPP)
+ if (flags[i] == SBLABEL_MNT)
continue;
len = strlen(mount_options[i]);
rc = security_context_to_sid(mount_options[i], len, &sid);
if (rc) {
printk(KERN_WARNING "SELinux: security_context_to_sid"
- "(%s) failed for (dev %s, type %s) errno=%d\n",
- mount_options[i], sb->s_id, sb->s_type->name, rc);
+ "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n",
+ mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc);
goto out_free_opts;
}
rc = -EINVAL;
@@ -2482,8 +2519,8 @@ out_free_secdata:
return rc;
out_bad_option:
printk(KERN_WARNING "SELinux: unable to change security options "
- "during remount (dev %s, type=%s)\n", sb->s_id,
- sb->s_type->name);
+ "during remount (dev %s, type "SB_TYPE_FMT")\n", sb->s_id,
+ SB_TYPE_ARGS(sb));
goto out_free_opts;
}
@@ -2606,7 +2643,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
if ((sbsec->flags & SE_SBINITIALIZED) &&
(sbsec->behavior == SECURITY_FS_USE_MNTPOINT))
newsid = sbsec->mntpoint_sid;
- else if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
+ else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
rc = security_transition_sid(sid, dsec->sid,
inode_mode_to_security_class(inode->i_mode),
qstr, &newsid);
@@ -2628,7 +2665,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
isec->initialized = 1;
}
- if (!ss_initialized || !(sbsec->flags & SE_SBLABELSUPP))
+ if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT))
return -EOPNOTSUPP;
if (name)
@@ -2830,7 +2867,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
return selinux_inode_setotherxattr(dentry, name);
sbsec = inode->i_sb->s_security;
- if (!(sbsec->flags & SE_SBLABELSUPP))
+ if (!(sbsec->flags & SBLABEL_MNT))
return -EOPNOTSUPP;
if (!inode_owner_or_capable(inode))
@@ -3791,8 +3828,12 @@ static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid)
u32 nlbl_sid;
u32 nlbl_type;
- selinux_skb_xfrm_sid(skb, &xfrm_sid);
- selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid);
+ err = selinux_skb_xfrm_sid(skb, &xfrm_sid);
+ if (unlikely(err))
+ return -EACCES;
+ err = selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid);
+ if (unlikely(err))
+ return -EACCES;
err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid);
if (unlikely(err)) {
@@ -4246,7 +4287,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
return selinux_sock_rcv_skb_compat(sk, skb, family);
secmark_active = selinux_secmark_enabled();
- peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+ peerlbl_active = selinux_peerlbl_enabled();
if (!secmark_active && !peerlbl_active)
return 0;
@@ -4628,7 +4669,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
secmark_active = selinux_secmark_enabled();
netlbl_active = netlbl_enabled();
- peerlbl_active = netlbl_active || selinux_xfrm_enabled();
+ peerlbl_active = selinux_peerlbl_enabled();
if (!secmark_active && !peerlbl_active)
return NF_ACCEPT;
@@ -4780,7 +4821,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
return NF_ACCEPT;
#endif
secmark_active = selinux_secmark_enabled();
- peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+ peerlbl_active = selinux_peerlbl_enabled();
if (!secmark_active && !peerlbl_active)
return NF_ACCEPT;
@@ -5784,7 +5825,8 @@ static struct security_operations selinux_ops = {
.xfrm_policy_clone_security = selinux_xfrm_policy_clone,
.xfrm_policy_free_security = selinux_xfrm_policy_free,
.xfrm_policy_delete_security = selinux_xfrm_policy_delete,
- .xfrm_state_alloc_security = selinux_xfrm_state_alloc,
+ .xfrm_state_alloc = selinux_xfrm_state_alloc,
+ .xfrm_state_alloc_acquire = selinux_xfrm_state_alloc_acquire,
.xfrm_state_free_security = selinux_xfrm_state_free,
.xfrm_state_delete_security = selinux_xfrm_state_delete,
.xfrm_policy_lookup = selinux_xfrm_policy_lookup,
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index aa47bcabb5f6..b1dfe1049450 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -58,8 +58,8 @@ struct superblock_security_struct {
u32 sid; /* SID of file system superblock */
u32 def_sid; /* default SID for labeling */
u32 mntpoint_sid; /* SECURITY_FS_USE_MNTPOINT context for files */
- unsigned int behavior; /* labeling behavior */
- unsigned char flags; /* which mount options were specified */
+ unsigned short behavior; /* labeling behavior */
+ unsigned short flags; /* which mount options were specified */
struct mutex lock;
struct list_head isec_head;
spinlock_t isec_lock;
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 8fd8e18ea340..fe341ae37004 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -45,14 +45,15 @@
/* Mask for just the mount related flags */
#define SE_MNTMASK 0x0f
/* Super block security struct flags for mount options */
+/* BE CAREFUL, these need to be the low order bits for selinux_get_mnt_opts */
#define CONTEXT_MNT 0x01
#define FSCONTEXT_MNT 0x02
#define ROOTCONTEXT_MNT 0x04
#define DEFCONTEXT_MNT 0x08
+#define SBLABEL_MNT 0x10
/* Non-mount related flags */
-#define SE_SBINITIALIZED 0x10
-#define SE_SBPROC 0x20
-#define SE_SBLABELSUPP 0x40
+#define SE_SBINITIALIZED 0x0100
+#define SE_SBPROC 0x0200
#define CONTEXT_STR "context="
#define FSCONTEXT_STR "fscontext="
@@ -68,12 +69,15 @@ extern int selinux_enabled;
enum {
POLICYDB_CAPABILITY_NETPEER,
POLICYDB_CAPABILITY_OPENPERM,
+ POLICYDB_CAPABILITY_REDHAT1,
+ POLICYDB_CAPABILITY_ALWAYSNETWORK,
__POLICYDB_CAPABILITY_MAX
};
#define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
extern int selinux_policycap_netpeer;
extern int selinux_policycap_openperm;
+extern int selinux_policycap_alwaysnetwork;
/*
* type_datum properties
@@ -172,8 +176,7 @@ int security_get_allow_unknown(void);
#define SECURITY_FS_USE_NATIVE 7 /* use native label support */
#define SECURITY_FS_USE_MAX 7 /* Highest SECURITY_FS_USE_XXX */
-int security_fs_use(const char *fstype, unsigned int *behavior,
- u32 *sid);
+int security_fs_use(struct super_block *sb);
int security_genfs_sid(const char *fstype, char *name, u16 sclass,
u32 *sid);
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 6713f04e30ba..0dec76c64cf5 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -10,29 +10,21 @@
#include <net/flow.h>
int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
- struct xfrm_user_sec_ctx *sec_ctx);
+ struct xfrm_user_sec_ctx *uctx);
int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
struct xfrm_sec_ctx **new_ctxp);
void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx);
int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx);
int selinux_xfrm_state_alloc(struct xfrm_state *x,
- struct xfrm_user_sec_ctx *sec_ctx, u32 secid);
+ struct xfrm_user_sec_ctx *uctx);
+int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
+ struct xfrm_sec_ctx *polsec, u32 secid);
void selinux_xfrm_state_free(struct xfrm_state *x);
int selinux_xfrm_state_delete(struct xfrm_state *x);
int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
- struct xfrm_policy *xp, const struct flowi *fl);
-
-/*
- * Extract the security blob from the sock (it's actually on the socket)
- */
-static inline struct inode_security_struct *get_sock_isec(struct sock *sk)
-{
- if (!sk->sk_socket)
- return NULL;
-
- return SOCK_INODE(sk->sk_socket)->i_security;
-}
+ struct xfrm_policy *xp,
+ const struct flowi *fl);
#ifdef CONFIG_SECURITY_NETWORK_XFRM
extern atomic_t selinux_xfrm_refcount;
@@ -42,10 +34,10 @@ static inline int selinux_xfrm_enabled(void)
return (atomic_read(&selinux_xfrm_refcount) > 0);
}
-int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
- struct common_audit_data *ad);
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
- struct common_audit_data *ad, u8 proto);
+int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+ struct common_audit_data *ad);
+int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+ struct common_audit_data *ad, u8 proto);
int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
static inline void selinux_xfrm_notify_policyload(void)
@@ -64,19 +56,21 @@ static inline int selinux_xfrm_enabled(void)
return 0;
}
-static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
- struct common_audit_data *ad)
+static inline int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+ struct common_audit_data *ad)
{
return 0;
}
-static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
- struct common_audit_data *ad, u8 proto)
+static inline int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+ struct common_audit_data *ad,
+ u8 proto)
{
return 0;
}
-static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
+static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid,
+ int ckall)
{
*sid = SECSID_NULL;
return 0;
@@ -87,10 +81,9 @@ static inline void selinux_xfrm_notify_policyload(void)
}
#endif
-static inline void selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid)
+static inline int selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid)
{
- int err = selinux_xfrm_decode_session(skb, sid, 0);
- BUG_ON(err);
+ return selinux_xfrm_decode_session(skb, sid, 0);
}
#endif /* _SELINUX_XFRM_H_ */
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index da4b8b233280..6235d052338b 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -442,8 +442,7 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
sksec->nlbl_state != NLBL_CONNLABELED)
return 0;
- local_bh_disable();
- bh_lock_sock_nested(sk);
+ lock_sock(sk);
/* connected sockets are allowed to disconnect when the address family
* is set to AF_UNSPEC, if that is what is happening we want to reset
@@ -464,7 +463,6 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
sksec->nlbl_state = NLBL_CONNLABELED;
socket_connect_return:
- bh_unlock_sock(sk);
- local_bh_enable();
+ release_sock(sk);
return rc;
}
diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
index c5454c0477c3..03a72c32afd7 100644
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -166,6 +166,7 @@ static void sel_netnode_insert(struct sel_netnode *node)
break;
default:
BUG();
+ return;
}
/* we need to impose a limit on the growth of the hash table so check
@@ -225,6 +226,7 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid)
break;
default:
BUG();
+ ret = -EINVAL;
}
if (ret != 0)
goto out;
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 855e464e92ef..332ac8a80cf5 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -116,6 +116,8 @@ static struct nlmsg_perm nlmsg_audit_perms[] =
{ AUDIT_MAKE_EQUIV, NETLINK_AUDIT_SOCKET__NLMSG_WRITE },
{ AUDIT_TTY_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ },
{ AUDIT_TTY_SET, NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT },
+ { AUDIT_GET_FEATURE, NETLINK_AUDIT_SOCKET__NLMSG_READ },
+ { AUDIT_SET_FEATURE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE },
};
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index ff427733c290..5122affe06a8 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -44,7 +44,9 @@
/* Policy capability filenames */
static char *policycap_names[] = {
"network_peer_controls",
- "open_perms"
+ "open_perms",
+ "redhat1",
+ "always_check_network"
};
unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE;
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index 30f119b1d1ec..820313a04d49 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -213,7 +213,12 @@ netlbl_import_failure:
}
#endif /* CONFIG_NETLABEL */
-int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
+/*
+ * Check to see if all the bits set in e2 are also set in e1. Optionally,
+ * if last_e2bit is non-zero, the highest set bit in e2 cannot exceed
+ * last_e2bit.
+ */
+int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit)
{
struct ebitmap_node *n1, *n2;
int i;
@@ -223,14 +228,25 @@ int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
n1 = e1->node;
n2 = e2->node;
+
while (n1 && n2 && (n1->startbit <= n2->startbit)) {
if (n1->startbit < n2->startbit) {
n1 = n1->next;
continue;
}
- for (i = 0; i < EBITMAP_UNIT_NUMS; i++) {
+ for (i = EBITMAP_UNIT_NUMS - 1; (i >= 0) && !n2->maps[i]; )
+ i--; /* Skip trailing NULL map entries */
+ if (last_e2bit && (i >= 0)) {
+ u32 lastsetbit = n2->startbit + i * EBITMAP_UNIT_SIZE +
+ __fls(n2->maps[i]);
+ if (lastsetbit > last_e2bit)
+ return 0;
+ }
+
+ while (i >= 0) {
if ((n1->maps[i] & n2->maps[i]) != n2->maps[i])
return 0;
+ i--;
}
n1 = n1->next;
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h
index 922f8afa89dd..712c8a7b8e8b 100644
--- a/security/selinux/ss/ebitmap.h
+++ b/security/selinux/ss/ebitmap.h
@@ -16,7 +16,13 @@
#include <net/netlabel.h>
-#define EBITMAP_UNIT_NUMS ((32 - sizeof(void *) - sizeof(u32)) \
+#ifdef CONFIG_64BIT
+#define EBITMAP_NODE_SIZE 64
+#else
+#define EBITMAP_NODE_SIZE 32
+#endif
+
+#define EBITMAP_UNIT_NUMS ((EBITMAP_NODE_SIZE-sizeof(void *)-sizeof(u32))\
/ sizeof(unsigned long))
#define EBITMAP_UNIT_SIZE BITS_PER_LONG
#define EBITMAP_SIZE (EBITMAP_UNIT_NUMS * EBITMAP_UNIT_SIZE)
@@ -117,7 +123,7 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n,
int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2);
int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src);
-int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2);
+int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit);
int ebitmap_get_bit(struct ebitmap *e, unsigned long bit);
int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value);
void ebitmap_destroy(struct ebitmap *e);
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 40de8d3f208e..c85bc1ec040c 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -160,8 +160,6 @@ void mls_sid_to_context(struct context *context,
int mls_level_isvalid(struct policydb *p, struct mls_level *l)
{
struct level_datum *levdatum;
- struct ebitmap_node *node;
- int i;
if (!l->sens || l->sens > p->p_levels.nprim)
return 0;
@@ -170,19 +168,13 @@ int mls_level_isvalid(struct policydb *p, struct mls_level *l)
if (!levdatum)
return 0;
- ebitmap_for_each_positive_bit(&l->cat, node, i) {
- if (i > p->p_cats.nprim)
- return 0;
- if (!ebitmap_get_bit(&levdatum->level->cat, i)) {
- /*
- * Category may not be associated with
- * sensitivity.
- */
- return 0;
- }
- }
-
- return 1;
+ /*
+ * Return 1 iff all the bits set in l->cat are also be set in
+ * levdatum->level->cat and no bit in l->cat is larger than
+ * p->p_cats.nprim.
+ */
+ return ebitmap_contains(&levdatum->level->cat, &l->cat,
+ p->p_cats.nprim);
}
int mls_range_isvalid(struct policydb *p, struct mls_range *r)
diff --git a/security/selinux/ss/mls_types.h b/security/selinux/ss/mls_types.h
index 03bed52a8052..e93648774137 100644
--- a/security/selinux/ss/mls_types.h
+++ b/security/selinux/ss/mls_types.h
@@ -35,7 +35,7 @@ static inline int mls_level_eq(struct mls_level *l1, struct mls_level *l2)
static inline int mls_level_dom(struct mls_level *l1, struct mls_level *l2)
{
return ((l1->sens >= l2->sens) &&
- ebitmap_contains(&l1->cat, &l2->cat));
+ ebitmap_contains(&l1->cat, &l2->cat, 0));
}
#define mls_level_incomp(l1, l2) \
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index c8adde3aff8f..f6195ebde3c9 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -3203,9 +3203,8 @@ static int range_write_helper(void *key, void *data, void *ptr)
static int range_write(struct policydb *p, void *fp)
{
- size_t nel;
__le32 buf[1];
- int rc;
+ int rc, nel;
struct policy_data pd;
pd.p = p;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index b4feecc3fe01..ee470a0b5c27 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -72,6 +72,7 @@
int selinux_policycap_netpeer;
int selinux_policycap_openperm;
+int selinux_policycap_alwaysnetwork;
static DEFINE_RWLOCK(policy_rwlock);
@@ -1812,6 +1813,8 @@ static void security_load_policycaps(void)
POLICYDB_CAPABILITY_NETPEER);
selinux_policycap_openperm = ebitmap_get_bit(&policydb.policycaps,
POLICYDB_CAPABILITY_OPENPERM);
+ selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps,
+ POLICYDB_CAPABILITY_ALWAYSNETWORK);
}
static int security_preserve_bools(struct policydb *p);
@@ -2323,43 +2326,74 @@ out:
/**
* security_fs_use - Determine how to handle labeling for a filesystem.
- * @fstype: filesystem type
- * @behavior: labeling behavior
- * @sid: SID for filesystem (superblock)
+ * @sb: superblock in question
*/
-int security_fs_use(
- const char *fstype,
- unsigned int *behavior,
- u32 *sid)
+int security_fs_use(struct super_block *sb)
{
int rc = 0;
struct ocontext *c;
+ struct superblock_security_struct *sbsec = sb->s_security;
+ const char *fstype = sb->s_type->name;
+ const char *subtype = (sb->s_subtype && sb->s_subtype[0]) ? sb->s_subtype : NULL;
+ struct ocontext *base = NULL;
read_lock(&policy_rwlock);
- c = policydb.ocontexts[OCON_FSUSE];
- while (c) {
- if (strcmp(fstype, c->u.name) == 0)
+ for (c = policydb.ocontexts[OCON_FSUSE]; c; c = c->next) {
+ char *sub;
+ int baselen;
+
+ baselen = strlen(fstype);
+
+ /* if base does not match, this is not the one */
+ if (strncmp(fstype, c->u.name, baselen))
+ continue;
+
+ /* if there is no subtype, this is the one! */
+ if (!subtype)
+ break;
+
+ /* skip past the base in this entry */
+ sub = c->u.name + baselen;
+
+ /* entry is only a base. save it. keep looking for subtype */
+ if (sub[0] == '\0') {
+ base = c;
+ continue;
+ }
+
+ /* entry is not followed by a subtype, so it is not a match */
+ if (sub[0] != '.')
+ continue;
+
+ /* whew, we found a subtype of this fstype */
+ sub++; /* move past '.' */
+
+ /* exact match of fstype AND subtype */
+ if (!strcmp(subtype, sub))
break;
- c = c->next;
}
+ /* in case we had found an fstype match but no subtype match */
+ if (!c)
+ c = base;
+
if (c) {
- *behavior = c->v.behavior;
+ sbsec->behavior = c->v.behavior;
if (!c->sid[0]) {
rc = sidtab_context_to_sid(&sidtab, &c->context[0],
&c->sid[0]);
if (rc)
goto out;
}
- *sid = c->sid[0];
+ sbsec->sid = c->sid[0];
} else {
- rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid);
+ rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, &sbsec->sid);
if (rc) {
- *behavior = SECURITY_FS_USE_NONE;
+ sbsec->behavior = SECURITY_FS_USE_NONE;
rc = 0;
} else {
- *behavior = SECURITY_FS_USE_GENFS;
+ sbsec->behavior = SECURITY_FS_USE_GENFS;
}
}
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index d03081886214..a91d205ec0c6 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -56,7 +56,7 @@
atomic_t selinux_xfrm_refcount = ATOMIC_INIT(0);
/*
- * Returns true if an LSM/SELinux context
+ * Returns true if the context is an LSM/SELinux context.
*/
static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx)
{
@@ -66,7 +66,7 @@ static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx)
}
/*
- * Returns true if the xfrm contains a security blob for SELinux
+ * Returns true if the xfrm contains a security blob for SELinux.
*/
static inline int selinux_authorizable_xfrm(struct xfrm_state *x)
{
@@ -74,48 +74,111 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x)
}
/*
- * LSM hook implementation that authorizes that a flow can use
- * a xfrm policy rule.
+ * Allocates a xfrm_sec_state and populates it using the supplied security
+ * xfrm_user_sec_ctx context.
*/
-int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp,
+ struct xfrm_user_sec_ctx *uctx)
{
int rc;
- u32 sel_sid;
+ const struct task_security_struct *tsec = current_security();
+ struct xfrm_sec_ctx *ctx = NULL;
+ u32 str_len;
- /* Context sid is either set to label or ANY_ASSOC */
- if (ctx) {
- if (!selinux_authorizable_ctx(ctx))
- return -EINVAL;
-
- sel_sid = ctx->ctx_sid;
- } else
- /*
- * All flows should be treated as polmatch'ing an
- * otherwise applicable "non-labeled" policy. This
- * would prevent inadvertent "leaks".
- */
- return 0;
+ if (ctxp == NULL || uctx == NULL ||
+ uctx->ctx_doi != XFRM_SC_DOI_LSM ||
+ uctx->ctx_alg != XFRM_SC_ALG_SELINUX)
+ return -EINVAL;
- rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION,
- ASSOCIATION__POLMATCH,
- NULL);
+ str_len = uctx->ctx_len;
+ if (str_len >= PAGE_SIZE)
+ return -ENOMEM;
- if (rc == -EACCES)
- return -ESRCH;
+ ctx = kmalloc(sizeof(*ctx) + str_len + 1, GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->ctx_doi = XFRM_SC_DOI_LSM;
+ ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+ ctx->ctx_len = str_len;
+ memcpy(ctx->ctx_str, &uctx[1], str_len);
+ ctx->ctx_str[str_len] = '\0';
+ rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid);
+ if (rc)
+ goto err;
+
+ rc = avc_has_perm(tsec->sid, ctx->ctx_sid,
+ SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, NULL);
+ if (rc)
+ goto err;
+
+ *ctxp = ctx;
+ atomic_inc(&selinux_xfrm_refcount);
+ return 0;
+
+err:
+ kfree(ctx);
return rc;
}
/*
+ * Free the xfrm_sec_ctx structure.
+ */
+static void selinux_xfrm_free(struct xfrm_sec_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ atomic_dec(&selinux_xfrm_refcount);
+ kfree(ctx);
+}
+
+/*
+ * Authorize the deletion of a labeled SA or policy rule.
+ */
+static int selinux_xfrm_delete(struct xfrm_sec_ctx *ctx)
+{
+ const struct task_security_struct *tsec = current_security();
+
+ if (!ctx)
+ return 0;
+
+ return avc_has_perm(tsec->sid, ctx->ctx_sid,
+ SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
+ NULL);
+}
+
+/*
+ * LSM hook implementation that authorizes that a flow can use a xfrm policy
+ * rule.
+ */
+int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+{
+ int rc;
+
+ /* All flows should be treated as polmatch'ing an otherwise applicable
+ * "non-labeled" policy. This would prevent inadvertent "leaks". */
+ if (!ctx)
+ return 0;
+
+ /* Context sid is either set to label or ANY_ASSOC */
+ if (!selinux_authorizable_ctx(ctx))
+ return -EINVAL;
+
+ rc = avc_has_perm(fl_secid, ctx->ctx_sid,
+ SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, NULL);
+ return (rc == -EACCES ? -ESRCH : rc);
+}
+
+/*
* LSM hook implementation that authorizes that a state matches
* the given policy, flow combo.
*/
-
-int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp,
- const struct flowi *fl)
+int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
+ struct xfrm_policy *xp,
+ const struct flowi *fl)
{
u32 state_sid;
- int rc;
if (!xp->security)
if (x->security)
@@ -138,187 +201,80 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *
if (fl->flowi_secid != state_sid)
return 0;
- rc = avc_has_perm(fl->flowi_secid, state_sid, SECCLASS_ASSOCIATION,
- ASSOCIATION__SENDTO,
- NULL)? 0:1;
-
- /*
- * We don't need a separate SA Vs. policy polmatch check
- * since the SA is now of the same label as the flow and
- * a flow Vs. policy polmatch check had already happened
- * in selinux_xfrm_policy_lookup() above.
- */
-
- return rc;
+ /* We don't need a separate SA Vs. policy polmatch check since the SA
+ * is now of the same label as the flow and a flow Vs. policy polmatch
+ * check had already happened in selinux_xfrm_policy_lookup() above. */
+ return (avc_has_perm(fl->flowi_secid, state_sid,
+ SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO,
+ NULL) ? 0 : 1);
}
/*
* LSM hook implementation that checks and/or returns the xfrm sid for the
* incoming packet.
*/
-
int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
{
+ u32 sid_session = SECSID_NULL;
struct sec_path *sp;
- *sid = SECSID_NULL;
-
if (skb == NULL)
- return 0;
+ goto out;
sp = skb->sp;
if (sp) {
- int i, sid_set = 0;
+ int i;
- for (i = sp->len-1; i >= 0; i--) {
+ for (i = sp->len - 1; i >= 0; i--) {
struct xfrm_state *x = sp->xvec[i];
if (selinux_authorizable_xfrm(x)) {
struct xfrm_sec_ctx *ctx = x->security;
- if (!sid_set) {
- *sid = ctx->ctx_sid;
- sid_set = 1;
-
+ if (sid_session == SECSID_NULL) {
+ sid_session = ctx->ctx_sid;
if (!ckall)
- break;
- } else if (*sid != ctx->ctx_sid)
+ goto out;
+ } else if (sid_session != ctx->ctx_sid) {
+ *sid = SECSID_NULL;
return -EINVAL;
+ }
}
}
}
- return 0;
-}
-
-/*
- * Security blob allocation for xfrm_policy and xfrm_state
- * CTX does not have a meaningful value on input
- */
-static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
- struct xfrm_user_sec_ctx *uctx, u32 sid)
-{
- int rc = 0;
- const struct task_security_struct *tsec = current_security();
- struct xfrm_sec_ctx *ctx = NULL;
- char *ctx_str = NULL;
- u32 str_len;
-
- BUG_ON(uctx && sid);
-
- if (!uctx)
- goto not_from_user;
-
- if (uctx->ctx_alg != XFRM_SC_ALG_SELINUX)
- return -EINVAL;
-
- str_len = uctx->ctx_len;
- if (str_len >= PAGE_SIZE)
- return -ENOMEM;
-
- *ctxp = ctx = kmalloc(sizeof(*ctx) +
- str_len + 1,
- GFP_KERNEL);
-
- if (!ctx)
- return -ENOMEM;
-
- ctx->ctx_doi = uctx->ctx_doi;
- ctx->ctx_len = str_len;
- ctx->ctx_alg = uctx->ctx_alg;
-
- memcpy(ctx->ctx_str,
- uctx+1,
- str_len);
- ctx->ctx_str[str_len] = 0;
- rc = security_context_to_sid(ctx->ctx_str,
- str_len,
- &ctx->ctx_sid);
-
- if (rc)
- goto out;
-
- /*
- * Does the subject have permission to set security context?
- */
- rc = avc_has_perm(tsec->sid, ctx->ctx_sid,
- SECCLASS_ASSOCIATION,
- ASSOCIATION__SETCONTEXT, NULL);
- if (rc)
- goto out;
-
- return rc;
-
-not_from_user:
- rc = security_sid_to_context(sid, &ctx_str, &str_len);
- if (rc)
- goto out;
-
- *ctxp = ctx = kmalloc(sizeof(*ctx) +
- str_len,
- GFP_ATOMIC);
-
- if (!ctx) {
- rc = -ENOMEM;
- goto out;
- }
-
- ctx->ctx_doi = XFRM_SC_DOI_LSM;
- ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
- ctx->ctx_sid = sid;
- ctx->ctx_len = str_len;
- memcpy(ctx->ctx_str,
- ctx_str,
- str_len);
-
- goto out2;
-
out:
- *ctxp = NULL;
- kfree(ctx);
-out2:
- kfree(ctx_str);
- return rc;
+ *sid = sid_session;
+ return 0;
}
/*
- * LSM hook implementation that allocs and transfers uctx spec to
- * xfrm_policy.
+ * LSM hook implementation that allocs and transfers uctx spec to xfrm_policy.
*/
int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
struct xfrm_user_sec_ctx *uctx)
{
- int err;
-
- BUG_ON(!uctx);
-
- err = selinux_xfrm_sec_ctx_alloc(ctxp, uctx, 0);
- if (err == 0)
- atomic_inc(&selinux_xfrm_refcount);
-
- return err;
+ return selinux_xfrm_alloc_user(ctxp, uctx);
}
-
/*
- * LSM hook implementation that copies security data structure from old to
- * new for policy cloning.
+ * LSM hook implementation that copies security data structure from old to new
+ * for policy cloning.
*/
int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
struct xfrm_sec_ctx **new_ctxp)
{
struct xfrm_sec_ctx *new_ctx;
- if (old_ctx) {
- new_ctx = kmalloc(sizeof(*old_ctx) + old_ctx->ctx_len,
- GFP_ATOMIC);
- if (!new_ctx)
- return -ENOMEM;
+ if (!old_ctx)
+ return 0;
+
+ new_ctx = kmemdup(old_ctx, sizeof(*old_ctx) + old_ctx->ctx_len,
+ GFP_ATOMIC);
+ if (!new_ctx)
+ return -ENOMEM;
+ atomic_inc(&selinux_xfrm_refcount);
+ *new_ctxp = new_ctx;
- memcpy(new_ctx, old_ctx, sizeof(*new_ctx));
- memcpy(new_ctx->ctx_str, old_ctx->ctx_str, new_ctx->ctx_len);
- atomic_inc(&selinux_xfrm_refcount);
- *new_ctxp = new_ctx;
- }
return 0;
}
@@ -327,8 +283,7 @@ int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
*/
void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
{
- atomic_dec(&selinux_xfrm_refcount);
- kfree(ctx);
+ selinux_xfrm_free(ctx);
}
/*
@@ -336,31 +291,55 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
*/
int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
{
- const struct task_security_struct *tsec = current_security();
-
- if (!ctx)
- return 0;
+ return selinux_xfrm_delete(ctx);
+}
- return avc_has_perm(tsec->sid, ctx->ctx_sid,
- SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
- NULL);
+/*
+ * LSM hook implementation that allocates a xfrm_sec_state, populates it using
+ * the supplied security context, and assigns it to the xfrm_state.
+ */
+int selinux_xfrm_state_alloc(struct xfrm_state *x,
+ struct xfrm_user_sec_ctx *uctx)
+{
+ return selinux_xfrm_alloc_user(&x->security, uctx);
}
/*
- * LSM hook implementation that allocs and transfers sec_ctx spec to
- * xfrm_state.
+ * LSM hook implementation that allocates a xfrm_sec_state and populates based
+ * on a secid.
*/
-int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx,
- u32 secid)
+int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
+ struct xfrm_sec_ctx *polsec, u32 secid)
{
- int err;
+ int rc;
+ struct xfrm_sec_ctx *ctx;
+ char *ctx_str = NULL;
+ int str_len;
+
+ if (!polsec)
+ return 0;
- BUG_ON(!x);
+ if (secid == 0)
+ return -EINVAL;
- err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, secid);
- if (err == 0)
- atomic_inc(&selinux_xfrm_refcount);
- return err;
+ rc = security_sid_to_context(secid, &ctx_str, &str_len);
+ if (rc)
+ return rc;
+
+ ctx = kmalloc(sizeof(*ctx) + str_len, GFP_ATOMIC);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->ctx_doi = XFRM_SC_DOI_LSM;
+ ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+ ctx->ctx_sid = secid;
+ ctx->ctx_len = str_len;
+ memcpy(ctx->ctx_str, ctx_str, str_len);
+ kfree(ctx_str);
+
+ x->security = ctx;
+ atomic_inc(&selinux_xfrm_refcount);
+ return 0;
}
/*
@@ -368,24 +347,15 @@ int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uct
*/
void selinux_xfrm_state_free(struct xfrm_state *x)
{
- atomic_dec(&selinux_xfrm_refcount);
- kfree(x->security);
+ selinux_xfrm_free(x->security);
}
- /*
- * LSM hook implementation that authorizes deletion of labeled SAs.
- */
+/*
+ * LSM hook implementation that authorizes deletion of labeled SAs.
+ */
int selinux_xfrm_state_delete(struct xfrm_state *x)
{
- const struct task_security_struct *tsec = current_security();
- struct xfrm_sec_ctx *ctx = x->security;
-
- if (!ctx)
- return 0;
-
- return avc_has_perm(tsec->sid, ctx->ctx_sid,
- SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
- NULL);
+ return selinux_xfrm_delete(x->security);
}
/*
@@ -395,14 +365,12 @@ int selinux_xfrm_state_delete(struct xfrm_state *x)
* we need to check for unlabelled access since this may not have
* gone thru the IPSec process.
*/
-int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
- struct common_audit_data *ad)
+int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+ struct common_audit_data *ad)
{
- int i, rc = 0;
- struct sec_path *sp;
- u32 sel_sid = SECINITSID_UNLABELED;
-
- sp = skb->sp;
+ int i;
+ struct sec_path *sp = skb->sp;
+ u32 peer_sid = SECINITSID_UNLABELED;
if (sp) {
for (i = 0; i < sp->len; i++) {
@@ -410,23 +378,17 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
if (x && selinux_authorizable_xfrm(x)) {
struct xfrm_sec_ctx *ctx = x->security;
- sel_sid = ctx->ctx_sid;
+ peer_sid = ctx->ctx_sid;
break;
}
}
}
- /*
- * This check even when there's no association involved is
- * intended, according to Trent Jaeger, to make sure a
- * process can't engage in non-ipsec communication unless
- * explicitly allowed by policy.
- */
-
- rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION,
- ASSOCIATION__RECVFROM, ad);
-
- return rc;
+ /* This check even when there's no association involved is intended,
+ * according to Trent Jaeger, to make sure a process can't engage in
+ * non-IPsec communication unless explicitly allowed by policy. */
+ return avc_has_perm(sk_sid, peer_sid,
+ SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, ad);
}
/*
@@ -436,49 +398,38 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
* If we do have a authorizable security association, then it has already been
* checked in the selinux_xfrm_state_pol_flow_match hook above.
*/
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
- struct common_audit_data *ad, u8 proto)
+int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+ struct common_audit_data *ad, u8 proto)
{
struct dst_entry *dst;
- int rc = 0;
-
- dst = skb_dst(skb);
-
- if (dst) {
- struct dst_entry *dst_test;
-
- for (dst_test = dst; dst_test != NULL;
- dst_test = dst_test->child) {
- struct xfrm_state *x = dst_test->xfrm;
-
- if (x && selinux_authorizable_xfrm(x))
- goto out;
- }
- }
switch (proto) {
case IPPROTO_AH:
case IPPROTO_ESP:
case IPPROTO_COMP:
- /*
- * We should have already seen this packet once before
- * it underwent xfrm(s). No need to subject it to the
- * unlabeled check.
- */
- goto out;
+ /* We should have already seen this packet once before it
+ * underwent xfrm(s). No need to subject it to the unlabeled
+ * check. */
+ return 0;
default:
break;
}
- /*
- * This check even when there's no association involved is
- * intended, according to Trent Jaeger, to make sure a
- * process can't engage in non-ipsec communication unless
- * explicitly allowed by policy.
- */
+ dst = skb_dst(skb);
+ if (dst) {
+ struct dst_entry *iter;
- rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION,
- ASSOCIATION__SENDTO, ad);
-out:
- return rc;
+ for (iter = dst; iter != NULL; iter = iter->child) {
+ struct xfrm_state *x = iter->xfrm;
+
+ if (x && selinux_authorizable_xfrm(x))
+ return 0;
+ }
+ }
+
+ /* This check even when there's no association involved is intended,
+ * according to Trent Jaeger, to make sure a process can't engage in
+ * non-IPsec communication unless explicitly allowed by policy. */
+ return avc_has_perm(sk_sid, SECINITSID_UNLABELED,
+ SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, ad);
}
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 076b8e8a51ab..364cc64fce71 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -177,9 +177,13 @@ struct smk_port_label {
#define SMACK_CIPSO_MAXCATNUM 184 /* 23 * 8 */
/*
- * Flag for transmute access
+ * Flags for untraditional access modes.
+ * It shouldn't be necessary to avoid conflicts with definitions
+ * in fs.h, but do so anyway.
*/
-#define MAY_TRANSMUTE 64
+#define MAY_TRANSMUTE 0x00001000 /* Controls directory labeling */
+#define MAY_LOCK 0x00002000 /* Locks should be writes, but ... */
+
/*
* Just to make the common cases easier to deal with
*/
@@ -188,9 +192,9 @@ struct smk_port_label {
#define MAY_NOT 0
/*
- * Number of access types used by Smack (rwxat)
+ * Number of access types used by Smack (rwxatl)
*/
-#define SMK_NUM_ACCESS_TYPE 5
+#define SMK_NUM_ACCESS_TYPE 6
/* SMACK data */
struct smack_audit_data {
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index b3b59b1e93d6..14293cd9b1e5 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -84,6 +84,8 @@ int log_policy = SMACK_AUDIT_DENIED;
*
* Do the object check first because that is more
* likely to differ.
+ *
+ * Allowing write access implies allowing locking.
*/
int smk_access_entry(char *subject_label, char *object_label,
struct list_head *rule_list)
@@ -99,6 +101,11 @@ int smk_access_entry(char *subject_label, char *object_label,
}
}
+ /*
+ * MAY_WRITE implies MAY_LOCK.
+ */
+ if ((may & MAY_WRITE) == MAY_WRITE)
+ may |= MAY_LOCK;
return may;
}
@@ -245,6 +252,7 @@ out_audit:
static inline void smack_str_from_perm(char *string, int access)
{
int i = 0;
+
if (access & MAY_READ)
string[i++] = 'r';
if (access & MAY_WRITE)
@@ -255,6 +263,8 @@ static inline void smack_str_from_perm(char *string, int access)
string[i++] = 'a';
if (access & MAY_TRANSMUTE)
string[i++] = 't';
+ if (access & MAY_LOCK)
+ string[i++] = 'l';
string[i] = '\0';
}
/**
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 8825375cc031..b0be893ad44d 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -185,7 +185,7 @@ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
smk_ad_setfield_u_tsk(&ad, ctp);
- rc = smk_curacc(skp->smk_known, MAY_READWRITE, &ad);
+ rc = smk_curacc(skp->smk_known, mode, &ad);
return rc;
}
@@ -1146,7 +1146,7 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd,
* @file: the object
* @cmd: unused
*
- * Returns 0 if current has write access, error code otherwise
+ * Returns 0 if current has lock access, error code otherwise
*/
static int smack_file_lock(struct file *file, unsigned int cmd)
{
@@ -1154,7 +1154,7 @@ static int smack_file_lock(struct file *file, unsigned int cmd)
smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
smk_ad_setfield_u_fs_path(&ad, file->f_path);
- return smk_curacc(file->f_security, MAY_WRITE, &ad);
+ return smk_curacc(file->f_security, MAY_LOCK, &ad);
}
/**
@@ -1178,8 +1178,13 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
switch (cmd) {
case F_GETLK:
+ break;
case F_SETLK:
case F_SETLKW:
+ smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+ smk_ad_setfield_u_fs_path(&ad, file->f_path);
+ rc = smk_curacc(file->f_security, MAY_LOCK, &ad);
+ break;
case F_SETOWN:
case F_SETSIG:
smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 80f4b4a45725..160aa08e3cd5 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -139,7 +139,7 @@ const char *smack_cipso_option = SMACK_CIPSO_OPTION;
* SMK_LOADLEN: Smack rule length
*/
#define SMK_OACCESS "rwxa"
-#define SMK_ACCESS "rwxat"
+#define SMK_ACCESS "rwxatl"
#define SMK_OACCESSLEN (sizeof(SMK_OACCESS) - 1)
#define SMK_ACCESSLEN (sizeof(SMK_ACCESS) - 1)
#define SMK_OLOADLEN (SMK_LABELLEN + SMK_LABELLEN + SMK_OACCESSLEN)
@@ -282,6 +282,10 @@ static int smk_perm_from_str(const char *string)
case 'T':
perm |= MAY_TRANSMUTE;
break;
+ case 'l':
+ case 'L':
+ perm |= MAY_LOCK;
+ break;
default:
return perm;
}
@@ -452,7 +456,7 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf,
/*
* Minor hack for backward compatibility
*/
- if (count != SMK_OLOADLEN && count != SMK_LOADLEN)
+ if (count < SMK_OLOADLEN || count > SMK_LOADLEN)
return -EINVAL;
} else {
if (count >= PAGE_SIZE) {
@@ -592,6 +596,8 @@ static void smk_rule_show(struct seq_file *s, struct smack_rule *srp, int max)
seq_putc(s, 'a');
if (srp->smk_access & MAY_TRANSMUTE)
seq_putc(s, 't');
+ if (srp->smk_access & MAY_LOCK)
+ seq_putc(s, 'l');
seq_putc(s, '\n');
}
diff --git a/sound/soc/davinci/davinci-pcm.c b/sound/soc/davinci/davinci-pcm.c
index fa64cd85204f..fb5d107f5603 100644
--- a/sound/soc/davinci/davinci-pcm.c
+++ b/sound/soc/davinci/davinci-pcm.c
@@ -238,7 +238,7 @@ static void davinci_pcm_dma_irq(unsigned link, u16 ch_status, void *data)
print_buf_info(prtd->ram_channel, "i ram_channel");
pr_debug("davinci_pcm: link=%d, status=0x%x\n", link, ch_status);
- if (unlikely(ch_status != DMA_COMPLETE))
+ if (unlikely(ch_status != EDMA_DMA_COMPLETE))
return;
if (snd_pcm_running(substream)) {
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index fe702076ca46..9d77f13c2d25 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2,7 +2,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel turbo-capable processors.
*
- * Copyright (c) 2012 Intel Corporation.
+ * Copyright (c) 2013 Intel Corporation.
* Len Brown <len.brown@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
@@ -47,6 +47,8 @@ unsigned int skip_c1;
unsigned int do_nhm_cstates;
unsigned int do_snb_cstates;
unsigned int do_c8_c9_c10;
+unsigned int do_slm_cstates;
+unsigned int use_c1_residency_msr;
unsigned int has_aperf;
unsigned int has_epb;
unsigned int units = 1000000000; /* Ghz etc */
@@ -81,6 +83,8 @@ double rapl_joule_counter_range;
#define RAPL_DRAM (1 << 3)
#define RAPL_PKG_PERF_STATUS (1 << 4)
#define RAPL_DRAM_PERF_STATUS (1 << 5)
+#define RAPL_PKG_POWER_INFO (1 << 6)
+#define RAPL_CORE_POLICY (1 << 7)
#define TJMAX_DEFAULT 100
#define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -96,7 +100,7 @@ struct thread_data {
unsigned long long tsc;
unsigned long long aperf;
unsigned long long mperf;
- unsigned long long c1; /* derived */
+ unsigned long long c1;
unsigned long long extra_msr64;
unsigned long long extra_delta64;
unsigned long long extra_msr32;
@@ -266,7 +270,7 @@ void print_header(void)
outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64);
if (do_nhm_cstates)
outp += sprintf(outp, " %%c1");
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %%c3");
if (do_nhm_cstates)
outp += sprintf(outp, " %%c6");
@@ -280,9 +284,9 @@ void print_header(void)
if (do_snb_cstates)
outp += sprintf(outp, " %%pc2");
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %%pc3");
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %%pc6");
if (do_snb_cstates)
outp += sprintf(outp, " %%pc7");
@@ -480,7 +484,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);
if (do_nhm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);
@@ -499,9 +503,9 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (do_snb_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
if (do_snb_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
@@ -648,17 +652,24 @@ delta_thread(struct thread_data *new, struct thread_data *old,
}
- /*
- * As counter collection is not atomic,
- * it is possible for mperf's non-halted cycles + idle states
- * to exceed TSC's all cycles: show c1 = 0% in that case.
- */
- if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
- old->c1 = 0;
- else {
- /* normal case, derive c1 */
- old->c1 = old->tsc - old->mperf - core_delta->c3
+ if (use_c1_residency_msr) {
+ /*
+ * Some models have a dedicated C1 residency MSR,
+ * which should be more accurate than the derivation below.
+ */
+ } else {
+ /*
+ * As counter collection is not atomic,
+ * it is possible for mperf's non-halted cycles + idle states
+ * to exceed TSC's all cycles: show c1 = 0% in that case.
+ */
+ if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
+ old->c1 = 0;
+ else {
+ /* normal case, derive c1 */
+ old->c1 = old->tsc - old->mperf - core_delta->c3
- core_delta->c6 - core_delta->c7;
+ }
}
if (old->mperf == 0) {
@@ -872,13 +883,21 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
return -5;
+ if (use_c1_residency_msr) {
+ if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+ return -6;
+ }
+
/* collect core counters only for 1st thread in core */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0;
- if (do_nhm_cstates) {
+ if (do_nhm_cstates && !do_slm_cstates) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
+ }
+
+ if (do_nhm_cstates) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
}
@@ -898,7 +917,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
- if (do_nhm_cstates) {
+ if (do_nhm_cstates && !do_slm_cstates) {
if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
return -9;
if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
@@ -977,7 +996,7 @@ void print_verbose_header(void)
ratio, bclk, ratio * bclk);
get_msr(0, MSR_IA32_POWER_CTL, &msr);
- fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n",
+ fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
msr, msr & 0x2 ? "EN" : "DIS");
if (!do_ivt_turbo_ratio_limit)
@@ -1046,25 +1065,28 @@ print_nhm_turbo_ratio_limits:
switch(msr & 0x7) {
case 0:
- fprintf(stderr, "pc0");
+ fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0");
break;
case 1:
- fprintf(stderr, do_snb_cstates ? "pc2" : "pc0");
+ fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0");
break;
case 2:
- fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3");
+ fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3");
break;
case 3:
- fprintf(stderr, "pc6");
+ fprintf(stderr, do_slm_cstates ? "invalid" : "pc6");
break;
case 4:
- fprintf(stderr, "pc7");
+ fprintf(stderr, do_slm_cstates ? "pc4" : "pc7");
break;
case 5:
- fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid");
+ fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid");
+ break;
+ case 6:
+ fprintf(stderr, do_slm_cstates ? "pc6" : "invalid");
break;
case 7:
- fprintf(stderr, "unlimited");
+ fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited");
break;
default:
fprintf(stderr, "invalid");
@@ -1460,6 +1482,8 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
case 0x3F: /* HSW */
case 0x45: /* HSW */
case 0x46: /* HSW */
+ case 0x37: /* BYT */
+ case 0x4D: /* AVN */
return 1;
case 0x2E: /* Nehalem-EX Xeon - Beckton */
case 0x2F: /* Westmere-EX Xeon - Eagleton */
@@ -1532,14 +1556,33 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
+double get_tdp(model)
+{
+ unsigned long long msr;
+
+ if (do_rapl & RAPL_PKG_POWER_INFO)
+ if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
+ return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+
+ switch (model) {
+ case 0x37:
+ case 0x4D:
+ return 30.0;
+ default:
+ return 135.0;
+ }
+}
+
+
/*
* rapl_probe()
*
- * sets do_rapl
+ * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
*/
void rapl_probe(unsigned int family, unsigned int model)
{
unsigned long long msr;
+ unsigned int time_unit;
double tdp;
if (!genuine_intel)
@@ -1555,11 +1598,15 @@ void rapl_probe(unsigned int family, unsigned int model)
case 0x3F: /* HSW */
case 0x45: /* HSW */
case 0x46: /* HSW */
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
break;
case 0x2D:
case 0x3E:
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS;
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ break;
+ case 0x37: /* BYT */
+ case 0x4D: /* AVN */
+ do_rapl = RAPL_PKG | RAPL_CORES ;
break;
default:
return;
@@ -1570,19 +1617,22 @@ void rapl_probe(unsigned int family, unsigned int model)
return;
rapl_power_units = 1.0 / (1 << (msr & 0xF));
- rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
- rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
+ if (model == 0x37)
+ rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
+ else
+ rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
- /* get TDP to determine energy counter range */
- if (get_msr(0, MSR_PKG_POWER_INFO, &msr))
- return;
+ time_unit = msr >> 16 & 0xF;
+ if (time_unit == 0)
+ time_unit = 0xA;
- tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+ rapl_time_units = 1.0 / (1 << (time_unit));
- rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
+ tdp = get_tdp(model);
+ rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (verbose)
- fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range);
+ fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
return;
}
@@ -1668,7 +1718,6 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
int cpu;
- double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units;
if (!do_rapl)
return 0;
@@ -1686,23 +1735,13 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
return -1;
- local_rapl_power_units = 1.0 / (1 << (msr & 0xF));
- local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
- local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
-
- if (local_rapl_power_units != rapl_power_units)
- fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu);
- if (local_rapl_energy_units != rapl_energy_units)
- fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu);
- if (local_rapl_time_units != rapl_time_units)
- fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu);
-
if (verbose) {
fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
- local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units);
+ rapl_power_units, rapl_energy_units, rapl_time_units);
}
- if (do_rapl & RAPL_PKG) {
+ if (do_rapl & RAPL_PKG_POWER_INFO) {
+
if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
return -5;
@@ -1714,6 +1753,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
+ }
+ if (do_rapl & RAPL_PKG) {
+
if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
return -9;
@@ -1749,12 +1791,16 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
print_power_limit_msr(cpu, msr, "DRAM Limit");
}
- if (do_rapl & RAPL_CORES) {
+ if (do_rapl & RAPL_CORE_POLICY) {
if (verbose) {
if (get_msr(cpu, MSR_PP0_POLICY, &msr))
return -7;
fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
+ }
+ }
+ if (do_rapl & RAPL_CORES) {
+ if (verbose) {
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
return -9;
@@ -1813,10 +1859,48 @@ int has_c8_c9_c10(unsigned int family, unsigned int model)
}
+int is_slm(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+ switch (model) {
+ case 0x37: /* BYT */
+ case 0x4D: /* AVN */
+ return 1;
+ }
+ return 0;
+}
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
+
+double slm_bclk(void)
+{
+ unsigned long long msr = 3;
+ unsigned int i;
+ double freq;
+
+ if (get_msr(0, MSR_FSB_FREQ, &msr))
+ fprintf(stderr, "SLM BCLK: unknown\n");
+
+ i = msr & 0xf;
+ if (i >= SLM_BCLK_FREQS) {
+ fprintf(stderr, "SLM BCLK[%d] invalid\n", i);
+ msr = 3;
+ }
+ freq = slm_freq_table[i];
+
+ fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq);
+
+ return freq;
+}
+
double discover_bclk(unsigned int family, unsigned int model)
{
if (is_snb(family, model))
return 100.00;
+ else if (is_slm(family, model))
+ return slm_bclk();
else
return 133.33;
}
@@ -1873,7 +1957,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
cpu, msr, target_c_local);
- if (target_c_local < 85 || target_c_local > 120)
+ if (target_c_local < 85 || target_c_local > 127)
goto guess;
tcc_activation_temp = target_c_local;
@@ -1970,6 +2054,7 @@ void check_cpuid()
do_smi = do_nhm_cstates;
do_snb_cstates = is_snb(family, model);
do_c8_c9_c10 = has_c8_c9_c10(family, model);
+ do_slm_cstates = is_slm(family, model);
bclk = discover_bclk(family, model);
do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
@@ -2331,7 +2416,7 @@ int main(int argc, char **argv)
cmdline(argc, argv);
if (verbose)
- fprintf(stderr, "turbostat v3.4 April 17, 2013"
+ fprintf(stderr, "turbostat v3.5 April 26, 2013"
" - Len Brown <lenb@kernel.org>\n");
turbostat_init();
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 662f34c3287e..a0aa84b5941a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1615,8 +1615,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
{
- return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
- offset, len);
+ const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+
+ return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_clear_guest_page);