diff options
783 files changed, 26952 insertions, 7560 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd index 1cf2adf46b11..cd9213ccf3dc 100644 --- a/Documentation/ABI/testing/sysfs-bus-rbd +++ b/Documentation/ABI/testing/sysfs-bus-rbd @@ -70,6 +70,10 @@ snap_* A directory per each snapshot +parent + + Information identifying the pool, image, and snapshot id for + the parent image in a layered rbd image (format 2 only). Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name> ------------------------------------------------------------- diff --git a/Documentation/ABI/testing/sysfs-devices-node b/Documentation/ABI/testing/sysfs-devices-node deleted file mode 100644 index 453a210c3ceb..000000000000 --- a/Documentation/ABI/testing/sysfs-devices-node +++ /dev/null @@ -1,7 +0,0 @@ -What: /sys/devices/system/node/nodeX/compact -Date: February 2010 -Contact: Mel Gorman <mel@csn.ul.ie> -Description: - When this file is written to, all memory within that node - will be compacted. When it completes, memory will be freed - into blocks which have as many contiguous pages as possible diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index a0b6250add79..4a4fb295ceef 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -468,11 +468,46 @@ To map a single region, you do: size_t size = buffer->len; dma_handle = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dma_handle)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } and to unmap it: dma_unmap_single(dev, dma_handle, size, direction); +You should call dma_mapping_error() as dma_map_single() could fail and return +error. Not all dma implementations support dma_mapping_error() interface. +However, it is a good practice to call dma_mapping_error() interface, which +will invoke the generic mapping error check interface. Doing so will ensure +that the mapping code will work correctly on all dma implementations without +any dependency on the specifics of the underlying implementation. Using the +returned address without checking for errors could result in failures ranging +from panics to silent data corruption. Couple of example of incorrect ways to +check for errors that make assumptions about the underlying dma implementation +are as follows and these are applicable to dma_map_page() as well. + +Incorrect example 1: + dma_addr_t dma_handle; + + dma_handle = dma_map_single(dev, addr, size, direction); + if ((dma_handle & 0xffff != 0) || (dma_handle >= 0x1000000)) { + goto map_error; + } + +Incorrect example 2: + dma_addr_t dma_handle; + + dma_handle = dma_map_single(dev, addr, size, direction); + if (dma_handle == DMA_ERROR_CODE) { + goto map_error; + } + You should call dma_unmap_single when the DMA activity is finished, e.g. from the interrupt which told you that the DMA transfer is done. @@ -489,6 +524,14 @@ Specifically: size_t size = buffer->len; dma_handle = dma_map_page(dev, page, offset, size, direction); + if (dma_mapping_error(dma_handle)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } ... @@ -496,6 +539,12 @@ Specifically: Here, "offset" means byte offset within the given page. +You should call dma_mapping_error() as dma_map_page() could fail and return +error as outlined under the dma_map_single() discussion. + +You should call dma_unmap_page when the DMA activity is finished, e.g. +from the interrupt which told you that the DMA transfer is done. + With scatterlists, you map a region gathered from several regions by: int i, count = dma_map_sg(dev, sglist, nents, direction); @@ -578,6 +627,14 @@ to use the dma_sync_*() interfaces. dma_addr_t mapping; mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_handle)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } cp->rx_buf = buffer; cp->rx_len = len; @@ -658,6 +715,75 @@ failure can be determined by: * delay and try again later or * reset driver. */ + goto map_error_handling; + } + +- unmap pages that are already mapped, when mapping error occurs in the middle + of a multiple page mapping attempt. These example are applicable to + dma_map_page() as well. + +Example 1: + dma_addr_t dma_handle1; + dma_addr_t dma_handle2; + + dma_handle1 = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dev, dma_handle1)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling1; + } + dma_handle2 = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dev, dma_handle2)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling2; + } + + ... + + map_error_handling2: + dma_unmap_single(dma_handle1); + map_error_handling1: + +Example 2: (if buffers are allocated a loop, unmap all mapped buffers when + mapping error is detected in the middle) + + dma_addr_t dma_addr; + dma_addr_t array[DMA_BUFFERS]; + int save_index = 0; + + for (i = 0; i < DMA_BUFFERS; i++) { + + ... + + dma_addr = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dev, dma_addr)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } + array[i].dma_addr = dma_addr; + save_index++; + } + + ... + + map_error_handling: + + for (i = 0; i < save_index; i++) { + + ... + + dma_unmap_single(array[i].dma_addr); } Networking drivers must call dev_kfree_skb to free the socket buffer diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 66bd97a95f10..78a6c569d204 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -678,3 +678,15 @@ out of dma_debug_entries. These entries are preallocated at boot. The number of preallocated entries is defined per architecture. If it is too low for you boot with 'dma_debug_entries=<your_desired_number>' to overwrite the architectural default. + +void debug_dmap_mapping_error(struct device *dev, dma_addr_t dma_addr); + +dma-debug interface debug_dma_mapping_error() to debug drivers that fail +to check dma mapping errors on addresses returned by dma_map_single() and +dma_map_page() interfaces. This interface clears a flag set by +debug_dma_map_page() to indicate that dma_mapping_error() has been called by +the driver. When driver does unmap, debug_dma_unmap() checks the flag and if +this flag is still set, prints warning message that includes call trace that +leads up to the unmap. This interface can be called from dma_mapping_error() +routines to enable dma mapping error check debugging. + diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt index 8cf24f6f0a99..7b53da5cb75b 100644 --- a/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt +++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt @@ -13,7 +13,7 @@ Recommended properties: Example: -spi@7000d600 { +spi@7000c380 { compatible = "nvidia,tegra20-sflash"; reg = <0x7000c380 0x80>; interrupts = <0 39 0x04>; diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt index f5b1ad1a1ec3..eefe15e3d95e 100644 --- a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt +++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt @@ -13,7 +13,7 @@ Recommended properties: Example: -slink@7000d600 { +spi@7000d600 { compatible = "nvidia,tegra20-slink"; reg = <0x7000d600 0x200>; interrupts = <0 82 0x04>; diff --git a/Documentation/devicetree/bindings/spi/spi_atmel.txt b/Documentation/devicetree/bindings/spi/spi_atmel.txt new file mode 100644 index 000000000000..07e04cdc0c9e --- /dev/null +++ b/Documentation/devicetree/bindings/spi/spi_atmel.txt @@ -0,0 +1,26 @@ +Atmel SPI device + +Required properties: +- compatible : should be "atmel,at91rm9200-spi". +- reg: Address and length of the register set for the device +- interrupts: Should contain spi interrupt +- cs-gpios: chipselects + +Example: + +spi1: spi@fffcc000 { + compatible = "atmel,at91rm9200-spi"; + reg = <0xfffcc000 0x4000>; + interrupts = <13 4 5>; + #address-cells = <1>; + #size-cells = <0>; + cs-gpios = <&pioB 3 0>; + status = "okay"; + + mmc-slot@0 { + compatible = "mmc-spi-slot"; + reg = <0>; + gpios = <&pioC 4 0>; /* CD */ + spi-max-frequency = <25000000>; + }; +}; diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 7b52ba7bf32a..8042050eb265 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -50,6 +50,8 @@ ext4.txt - info, mount options and specifications for the Ext4 filesystem. files.txt - info on file management in the Linux kernel. +f2fs.txt + - info and mount options for the F2FS filesystem. fuse.txt - info on the Filesystem in User SpacE including mount options. gfs2.txt diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index e540a24e5d06..f48e0c6b4c42 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -80,7 +80,6 @@ rename: yes (all) (see below) readlink: no follow_link: no put_link: no -truncate: yes (see below) setattr: yes permission: no (may not block if called in rcu-walk mode) get_acl: no @@ -96,11 +95,6 @@ atomic_open: yes Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. - ->truncate() is never called directly - it's a callback, not a -method. It's called by vmtruncate() - deprecated library function used by -->setattr(). Locking information above applies to that call (i.e. is -inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been -passed). See Documentation/filesystems/directory-locking for more detailed discussion of the locking scheme for directory operations. diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt index 382d52cdaf2d..d78bab9622c6 100644 --- a/Documentation/filesystems/caching/backend-api.txt +++ b/Documentation/filesystems/caching/backend-api.txt @@ -308,6 +308,18 @@ performed on the denizens of the cache. These are held in a structure of type: obtained by calling object->cookie->def->get_aux()/get_attr(). + (*) Invalidate data object [mandatory]: + + int (*invalidate_object)(struct fscache_operation *op) + + This is called to invalidate a data object (as pointed to by op->object). + All the data stored for this object should be discarded and an + attr_changed operation should be performed. The caller will follow up + with an object update operation. + + fscache_op_complete() must be called on op before returning. + + (*) Discard object [mandatory]: void (*drop_object)(struct fscache_object *object) @@ -419,7 +431,10 @@ performed on the denizens of the cache. These are held in a structure of type: If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS returned if possible or fscache_end_io() called with a suitable error - code.. + code. + + fscache_put_retrieval() should be called after a page or pages are dealt + with. This will complete the operation when all pages are dealt with. (*) Request pages be read from cache [mandatory]: @@ -526,6 +541,27 @@ FS-Cache provides some utilities that a cache backend may make use of: error value should be 0 if successful and an error otherwise. + (*) Record that one or more pages being retrieved or allocated have been dealt + with: + + void fscache_retrieval_complete(struct fscache_retrieval *op, + int n_pages); + + This is called to record the fact that one or more pages have been dealt + with and are no longer the concern of this operation. When the number of + pages remaining in the operation reaches 0, the operation will be + completed. + + + (*) Record operation completion: + + void fscache_op_complete(struct fscache_operation *op); + + This is called to record the completion of an operation. This deducts + this operation from the parent object's run state, potentially permitting + one or more pending operations to start running. + + (*) Set highest store limit: void fscache_set_store_limit(struct fscache_object *object, diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index 7cc6bf2871eb..97e6c0ecc5ef 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt @@ -35,8 +35,9 @@ This document contains the following sections: (12) Index and data file update (13) Miscellaneous cookie operations (14) Cookie unregistration - (15) Index and data file invalidation - (16) FS-Cache specific page flags. + (15) Index invalidation + (16) Data file invalidation + (17) FS-Cache specific page flags. ============================= @@ -767,13 +768,42 @@ the cookies for "child" indices, objects and pages have been relinquished first. -================================ -INDEX AND DATA FILE INVALIDATION -================================ +================== +INDEX INVALIDATION +================== + +There is no direct way to invalidate an index subtree. To do this, the caller +should relinquish and retire the cookie they have, and then acquire a new one. + + +====================== +DATA FILE INVALIDATION +====================== + +Sometimes it will be necessary to invalidate an object that contains data. +Typically this will be necessary when the server tells the netfs of a foreign +change - at which point the netfs has to throw away all the state it had for an +inode and reload from the server. + +To indicate that a cache object should be invalidated, the following function +can be called: + + void fscache_invalidate(struct fscache_cookie *cookie); + +This can be called with spinlocks held as it defers the work to a thread pool. +All extant storage, retrieval and attribute change ops at this point are +cancelled and discarded. Some future operations will be rejected until the +cache has had a chance to insert a barrier in the operations queue. After +that, operations will be queued again behind the invalidation operation. + +The invalidation operation will perform an attribute change operation and an +auxiliary data update operation as it is very likely these will have changed. + +Using the following function, the netfs can wait for the invalidation operation +to have reached a point at which it can start submitting ordinary operations +once again: -There is no direct way to invalidate an index subtree or a data file. To do -this, the caller should relinquish and retire the cookie they have, and then -acquire a new one. + void fscache_wait_on_invalidate(struct fscache_cookie *cookie); =========================== diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt index 58313348da87..100ff41127e4 100644 --- a/Documentation/filesystems/caching/object.txt +++ b/Documentation/filesystems/caching/object.txt @@ -216,7 +216,14 @@ servicing netfs requests: The normal running state. In this state, requests the netfs makes will be passed on to the cache. - (6) State FSCACHE_OBJECT_UPDATING. + (6) State FSCACHE_OBJECT_INVALIDATING. + + The object is undergoing invalidation. When the state comes here, it + discards all pending read, write and attribute change operations as it is + going to clear out the cache entirely and reinitialise it. It will then + continue to the FSCACHE_OBJECT_UPDATING state. + + (7) State FSCACHE_OBJECT_UPDATING. The state machine comes here to update the object in the cache from the netfs's records. This involves updating the auxiliary data that is used @@ -225,13 +232,13 @@ servicing netfs requests: And there are terminal states in which an object cleans itself up, deallocates memory and potentially deletes stuff from disk: - (7) State FSCACHE_OBJECT_LC_DYING. + (8) State FSCACHE_OBJECT_LC_DYING. The object comes here if it is dying because of a lookup or creation error. This would be due to a disk error or system error of some sort. Temporary data is cleaned up, and the parent is released. - (8) State FSCACHE_OBJECT_DYING. + (9) State FSCACHE_OBJECT_DYING. The object comes here if it is dying due to an error, because its parent cookie has been relinquished by the netfs or because the cache is being @@ -241,27 +248,27 @@ memory and potentially deletes stuff from disk: can destroy themselves. This object waits for all its children to go away before advancing to the next state. - (9) State FSCACHE_OBJECT_ABORT_INIT. +(10) State FSCACHE_OBJECT_ABORT_INIT. The object comes to this state if it was waiting on its parent in FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself so that the parent may proceed from the FSCACHE_OBJECT_DYING state. -(10) State FSCACHE_OBJECT_RELEASING. -(11) State FSCACHE_OBJECT_RECYCLING. +(11) State FSCACHE_OBJECT_RELEASING. +(12) State FSCACHE_OBJECT_RECYCLING. The object comes to one of these two states when dying once it is rid of all its children, if it is dying because the netfs relinquished its cookie. In the first state, the cached data is expected to persist, and in the second it will be deleted. -(12) State FSCACHE_OBJECT_WITHDRAWING. +(13) State FSCACHE_OBJECT_WITHDRAWING. The object transits to this state if the cache decides it wants to withdraw the object from service, perhaps to make space, but also due to error or just because the whole cache is being withdrawn. -(13) State FSCACHE_OBJECT_DEAD. +(14) State FSCACHE_OBJECT_DEAD. The object transits to this state when the in-memory object record is ready to be deleted. The object processor shouldn't ever see an object in diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt index b6b070c57cbf..bee2a5f93d60 100644 --- a/Documentation/filesystems/caching/operations.txt +++ b/Documentation/filesystems/caching/operations.txt @@ -174,7 +174,7 @@ Operations are used through the following procedure: necessary (the object might have died whilst the thread was waiting). When it has finished doing its processing, it should call - fscache_put_operation() on it. + fscache_op_complete() and fscache_put_operation() on it. (4) The operation holds an effective lock upon the object, preventing other exclusive ops conflicting until it is released. The operation can be diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt new file mode 100644 index 000000000000..8fbd8b46ee34 --- /dev/null +++ b/Documentation/filesystems/f2fs.txt @@ -0,0 +1,421 @@ +================================================================================ +WHAT IS Flash-Friendly File System (F2FS)? +================================================================================ + +NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have +been equipped on a variety systems ranging from mobile to server systems. Since +they are known to have different characteristics from the conventional rotating +disks, a file system, an upper layer to the storage device, should adapt to the +changes from the sketch in the design level. + +F2FS is a file system exploiting NAND flash memory-based storage devices, which +is based on Log-structured File System (LFS). The design has been focused on +addressing the fundamental issues in LFS, which are snowball effect of wandering +tree and high cleaning overhead. + +Since a NAND flash memory-based storage device shows different characteristic +according to its internal geometry or flash memory management scheme, namely FTL, +F2FS and its tools support various parameters not only for configuring on-disk +layout, but also for selecting allocation and cleaning algorithms. + +The file system formatting tool, "mkfs.f2fs", is available from the following +git tree: +>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git + +For reporting bugs and sending patches, please use the following mailing list: +>> linux-f2fs-devel@lists.sourceforge.net + +================================================================================ +BACKGROUND AND DESIGN ISSUES +================================================================================ + +Log-structured File System (LFS) +-------------------------------- +"A log-structured file system writes all modifications to disk sequentially in +a log-like structure, thereby speeding up both file writing and crash recovery. +The log is the only structure on disk; it contains indexing information so that +files can be read back from the log efficiently. In order to maintain large free +areas on disk for fast writing, we divide the log into segments and use a +segment cleaner to compress the live information from heavily fragmented +segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and +implementation of a log-structured file system", ACM Trans. Computer Systems +10, 1, 26–52. + +Wandering Tree Problem +---------------------- +In LFS, when a file data is updated and written to the end of log, its direct +pointer block is updated due to the changed location. Then the indirect pointer +block is also updated due to the direct pointer block update. In this manner, +the upper index structures such as inode, inode map, and checkpoint block are +also updated recursively. This problem is called as wandering tree problem [1], +and in order to enhance the performance, it should eliminate or relax the update +propagation as much as possible. + +[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/ + +Cleaning Overhead +----------------- +Since LFS is based on out-of-place writes, it produces so many obsolete blocks +scattered across the whole storage. In order to serve new empty log space, it +needs to reclaim these obsolete blocks seamlessly to users. This job is called +as a cleaning process. + +The process consists of three operations as follows. +1. A victim segment is selected through referencing segment usage table. +2. It loads parent index structures of all the data in the victim identified by + segment summary blocks. +3. It checks the cross-reference between the data and its parent index structure. +4. It moves valid data selectively. + +This cleaning job may cause unexpected long delays, so the most important goal +is to hide the latencies to users. And also definitely, it should reduce the +amount of valid data to be moved, and move them quickly as well. + +================================================================================ +KEY FEATURES +================================================================================ + +Flash Awareness +--------------- +- Enlarge the random write area for better performance, but provide the high + spatial locality +- Align FS data structures to the operational units in FTL as best efforts + +Wandering Tree Problem +---------------------- +- Use a term, “node”, that represents inodes as well as various pointer blocks +- Introduce Node Address Table (NAT) containing the locations of all the “node” + blocks; this will cut off the update propagation. + +Cleaning Overhead +----------------- +- Support a background cleaning process +- Support greedy and cost-benefit algorithms for victim selection policies +- Support multi-head logs for static/dynamic hot and cold data separation +- Introduce adaptive logging for efficient block allocation + +================================================================================ +MOUNT OPTIONS +================================================================================ + +background_gc_off Turn off cleaning operations, namely garbage collection, + triggered in background when I/O subsystem is idle. +disable_roll_forward Disable the roll-forward recovery routine +discard Issue discard/TRIM commands when a segment is cleaned. +no_heap Disable heap-style segment allocation which finds free + segments for data from the beginning of main area, while + for node from the end of main area. +nouser_xattr Disable Extended User Attributes. Note: xattr is enabled + by default if CONFIG_F2FS_FS_XATTR is selected. +noacl Disable POSIX Access Control List. Note: acl is enabled + by default if CONFIG_F2FS_FS_POSIX_ACL is selected. +active_logs=%u Support configuring the number of active logs. In the + current design, f2fs supports only 2, 4, and 6 logs. + Default number is 6. +disable_ext_identify Disable the extension list configured by mkfs, so f2fs + does not aware of cold files such as media files. + +================================================================================ +DEBUGFS ENTRIES +================================================================================ + +/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as +f2fs. Each file shows the whole f2fs information. + +/sys/kernel/debug/f2fs/status includes: + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +================================================================================ +USAGE +================================================================================ + +1. Download userland tools and compile them. + +2. Skip, if f2fs was compiled statically inside kernel. + Otherwise, insert the f2fs.ko module. + # insmod f2fs.ko + +3. Create a directory trying to mount + # mkdir /mnt/f2fs + +4. Format the block device, and then mount as f2fs + # mkfs.f2fs -l label /dev/block_device + # mount -t f2fs /dev/block_device /mnt/f2fs + +Format options +-------------- +-l [label] : Give a volume label, up to 256 unicode name. +-a [0 or 1] : Split start location of each area for heap-based allocation. + 1 is set by default, which performs this. +-o [int] : Set overprovision ratio in percent over volume size. + 5 is set by default. +-s [int] : Set the number of segments per section. + 1 is set by default. +-z [int] : Set the number of sections per zone. + 1 is set by default. +-e [str] : Set basic extension list. e.g. "mp3,gif,mov" + +================================================================================ +DESIGN +================================================================================ + +On-disk Layout +-------------- + +F2FS divides the whole volume into a number of segments, each of which is fixed +to 2MB in size. A section is composed of consecutive segments, and a zone +consists of a set of sections. By default, section and zone sizes are set to one +segment size identically, but users can easily modify the sizes by mkfs. + +F2FS splits the entire volume into six areas, and all the areas except superblock +consists of multiple segments as described below. + + align with the zone size <-| + |-> align with the segment size + _________________________________________________________________________ + | | | Node | Segment | Segment | | + | Superblock | Checkpoint | Address | Info. | Summary | Main | + | (SB) | (CP) | Table (NAT) | Table (SIT) | Area (SSA) | | + |____________|_____2______|______N______|______N______|______N_____|__N___| + . . + . . + . . + ._________________________________________. + |_Segment_|_..._|_Segment_|_..._|_Segment_| + . . + ._________._________ + |_section_|__...__|_ + . . + .________. + |__zone__| + +- Superblock (SB) + : It is located at the beginning of the partition, and there exist two copies + to avoid file system crash. It contains basic partition information and some + default parameters of f2fs. + +- Checkpoint (CP) + : It contains file system information, bitmaps for valid NAT/SIT sets, orphan + inode lists, and summary entries of current active segments. + +- Node Address Table (NAT) + : It is composed of a block address table for all the node blocks stored in + Main area. + +- Segment Information Table (SIT) + : It contains segment information such as valid block count and bitmap for the + validity of all the blocks. + +- Segment Summary Area (SSA) + : It contains summary entries which contains the owner information of all the + data and node blocks stored in Main area. + +- Main Area + : It contains file and directory data including their indices. + +In order to avoid misalignment between file system and flash-based storage, F2FS +aligns the start block address of CP with the segment size. Also, it aligns the +start block address of Main area with the zone size by reserving some segments +in SSA area. + +Reference the following survey for additional technical details. +https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey + +File System Metadata Structure +------------------------------ + +F2FS adopts the checkpointing scheme to maintain file system consistency. At +mount time, F2FS first tries to find the last valid checkpoint data by scanning +CP area. In order to reduce the scanning time, F2FS uses only two copies of CP. +One of them always indicates the last valid data, which is called as shadow copy +mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism. + +For file system consistency, each CP points to which NAT and SIT copies are +valid, as shown as below. + + +--------+----------+---------+ + | CP | NAT | SIT | + +--------+----------+---------+ + . . . . + . . . . + . . . . + +-------+-------+--------+--------+--------+--------+ + | CP #0 | CP #1 | NAT #0 | NAT #1 | SIT #0 | SIT #1 | + +-------+-------+--------+--------+--------+--------+ + | ^ ^ + | | | + `----------------------------------------' + +Index Structure +--------------- + +The key data structure to manage the data locations is a "node". Similar to +traditional file structures, F2FS has three types of node: inode, direct node, +indirect node. F2FS assigns 4KB to an inode block which contains 923 data block +indices, two direct node pointers, two indirect node pointers, and one double +indirect node pointer as described below. One direct node block contains 1018 +data blocks, and one indirect node block contains also 1018 node blocks. Thus, +one inode block (i.e., a file) covers: + + 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB. + + Inode block (4KB) + |- data (923) + |- direct node (2) + | `- data (1018) + |- indirect node (2) + | `- direct node (1018) + | `- data (1018) + `- double indirect node (1) + `- indirect node (1018) + `- direct node (1018) + `- data (1018) + +Note that, all the node blocks are mapped by NAT which means the location of +each node is translated by the NAT table. In the consideration of the wandering +tree problem, F2FS is able to cut off the propagation of node updates caused by +leaf data writes. + +Directory Structure +------------------- + +A directory entry occupies 11 bytes, which consists of the following attributes. + +- hash hash value of the file name +- ino inode number +- len the length of file name +- type file type such as directory, symlink, etc + +A dentry block consists of 214 dentry slots and file names. Therein a bitmap is +used to represent whether each dentry is valid or not. A dentry block occupies +4KB with the following composition. + + Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) + + dentries(11 * 214 bytes) + file name (8 * 214 bytes) + + [Bucket] + +--------------------------------+ + |dentry block 1 | dentry block 2 | + +--------------------------------+ + . . + . . + . [Dentry Block Structure: 4KB] . + +--------+----------+----------+------------+ + | bitmap | reserved | dentries | file names | + +--------+----------+----------+------------+ + [Dentry Block: 4KB] . . + . . + . . + +------+------+-----+------+ + | hash | ino | len | type | + +------+------+-----+------+ + [Dentry Structure: 11 bytes] + +F2FS implements multi-level hash tables for directory structure. Each level has +a hash table with dedicated number of hash buckets as shown below. Note that +"A(2B)" means a bucket includes 2 data blocks. + +---------------------- +A : bucket +B : block +N : MAX_DIR_HASH_DEPTH +---------------------- + +level #0 | A(2B) + | +level #1 | A(2B) - A(2B) + | +level #2 | A(2B) - A(2B) - A(2B) - A(2B) + . | . . . . +level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) + . | . . . . +level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) + +The number of blocks and buckets are determined by, + + ,- 2, if n < MAX_DIR_HASH_DEPTH / 2, + # of blocks in level #n = | + `- 4, Otherwise + + ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, + # of buckets in level #n = | + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise + +When F2FS finds a file name in a directory, at first a hash value of the file +name is calculated. Then, F2FS scans the hash table in level #0 to find the +dentry consisting of the file name and its inode number. If not found, F2FS +scans the next hash table in level #1. In this way, F2FS scans hash tables in +each levels incrementally from 1 to N. In each levels F2FS needs to scan only +one bucket determined by the following equation, which shows O(log(# of files)) +complexity. + + bucket number to scan in level #n = (hash value) % (# of buckets in level #n) + +In the case of file creation, F2FS finds empty consecutive slots that cover the +file name. F2FS searches the empty slots in the hash tables of whole levels from +1 to N in the same way as the lookup operation. + +The following figure shows an example of two cases holding children. + --------------> Dir <-------------- + | | + child child + + child - child [hole] - child + + child - child - child [hole] - [hole] - child + + Case 1: Case 2: + Number of children = 6, Number of children = 3, + File size = 7 File size = 7 + +Default Block Allocation +------------------------ + +At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node +and Hot/Warm/Cold data. + +- Hot node contains direct node blocks of directories. +- Warm node contains direct node blocks except hot node blocks. +- Cold node contains indirect node blocks +- Hot data contains dentry blocks +- Warm data contains data blocks except hot and cold data blocks +- Cold data contains multimedia data or migrated data blocks + +LFS has two schemes for free space management: threaded log and copy-and-compac- +tion. The copy-and-compaction scheme which is known as cleaning, is well-suited +for devices showing very good sequential write performance, since free segments +are served all the time for writing new data. However, it suffers from cleaning +overhead under high utilization. Contrarily, the threaded log scheme suffers +from random writes, but no cleaning process is needed. F2FS adopts a hybrid +scheme where the copy-and-compaction scheme is adopted by default, but the +policy is dynamically changed to the threaded log scheme according to the file +system status. + +In order to align F2FS with underlying flash-based storage, F2FS allocates a +segment in a unit of section. F2FS expects that the section size would be the +same as the unit size of garbage collection in FTL. Furthermore, with respect +to the mapping granularity in FTL, F2FS allocates each section of the active +logs from different zones as much as possible, since FTL can write the data in +the active logs into one allocation unit according to its mapping granularity. + +Cleaning process +---------------- + +F2FS does cleaning both on demand and in the background. On-demand cleaning is +triggered when there are not enough free segments to serve VFS calls. Background +cleaner is operated by a kernel thread, and triggers the cleaning job when the +system is idle. + +F2FS supports two victim selection policies: greedy and cost-benefit algorithms. +In the greedy algorithm, F2FS selects a victim segment having the smallest number +of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment +according to the segment age and the number of valid blocks in order to address +log block thrashing problem in the greedy algorithm. F2FS adopts the greedy +algorithm for on-demand cleaner, while background cleaner adopts cost-benefit +algorithm. + +In order to identify whether the data in the victim segment are valid or not, +F2FS manages a bitmap. Each bit represents the validity of a block, and the +bitmap is composed of a bit stream covering whole blocks in main area. diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index 092fad92a3f0..01c2db769791 100644 --- a/Documentation/filesystems/nfs/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt @@ -39,21 +39,10 @@ interoperability problems with future clients. Known issues: from a linux client are possible, but we aren't really conformant with the spec (for example, we don't use kerberos on the backchannel correctly). - - Incomplete backchannel support: incomplete backchannel gss - support and no support for BACKCHANNEL_CTL mean that - callbacks (hence delegations and layouts) may not be - available and clients confused by the incomplete - implementation may fail. - We do not support SSV, which provides security for shared client-server state (thus preventing unauthorized tampering with locks and opens, for example). It is mandatory for servers to support this, though no clients use it yet. - - Mandatory operations which we do not support, such as - DESTROY_CLIENTID, are not currently used by clients, but will be - (and the spec recommends their uses in common cases), and - clients should not be expected to know how to recover from the - case where they are not supported. This will eventually cause - interoperability failures. In addition, some limitations are inherited from the current NFSv4 implementation: @@ -89,7 +78,7 @@ Operations | | MNI | or OPT) | | +----------------------+------------+--------------+----------------+ | ACCESS | REQ | | Section 18.1 | -NS | BACKCHANNEL_CTL | REQ | | Section 18.33 | +I | BACKCHANNEL_CTL | REQ | | Section 18.33 | I | BIND_CONN_TO_SESSION | REQ | | Section 18.34 | | CLOSE | REQ | | Section 18.2 | | COMMIT | REQ | | Section 18.3 | @@ -99,7 +88,7 @@ NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 | | DELEGRETURN | OPT | FDELG, | Section 18.6 | | | | DDELG, pNFS | | | | | (REQ) | | -NS | DESTROY_CLIENTID | REQ | | Section 18.50 | +I | DESTROY_CLIENTID | REQ | | Section 18.50 | I | DESTROY_SESSION | REQ | | Section 18.37 | I | EXCHANGE_ID | REQ | | Section 18.35 | I | FREE_STATEID | REQ | | Section 18.38 | @@ -192,7 +181,6 @@ EXCHANGE_ID: CREATE_SESSION: * backchannel attributes are ignored -* backchannel security parameters are ignored SEQUENCE: * no support for dynamic slot table renegotiation (optional) @@ -202,7 +190,7 @@ Nonstandard compound limitations: ca_maxrequestsize request and a ca_maxresponsesize reply, so we may fail to live up to the promise we made in CREATE_SESSION fore channel negotiation. -* No more than one IO operation (read, write, readdir) allowed per - compound. +* No more than one read-like operation allowed per compound; encoding + replies that cross page boundaries (except for read data) not handled. See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 0742feebc6e2..0472c31c163b 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -281,7 +281,7 @@ ext2_write_failed and callers for an example. [mandatory] - ->truncate is going away. The whole truncate sequence needs to be + ->truncate is gone. The whole truncate sequence needs to be implemented in ->setattr, which is now mandatory for filesystems implementing on-disk size changes. Start with a copy of the old inode_setattr and vmtruncate, and the reorder the vmtruncate + foofs_vmtruncate sequence to diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 2ee133e030c3..e3869098163e 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -350,7 +350,6 @@ struct inode_operations { int (*readlink) (struct dentry *, char __user *,int); void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); - void (*truncate) (struct inode *); int (*permission) (struct inode *, int); int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); @@ -431,16 +430,6 @@ otherwise noted. started might not be in the page cache at the end of the walk). - truncate: Deprecated. This will not be called if ->setsize is defined. - Called by the VFS to change the size of a file. The - i_size field of the inode is set to the desired size by the - VFS before this method is called. This method is called by - the truncate(2) system call and related functionality. - - Note: ->truncate and vmtruncate are deprecated. Do not add new - instances/calls of these. Filesystems should be converted to do their - truncate sequence via ->setattr(). - permission: called by the VFS to check for access rights on a POSIX-like filesystem. diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87 index 87850d86c559..8386aadc0a82 100644 --- a/Documentation/hwmon/it87 +++ b/Documentation/hwmon/it87 @@ -209,3 +209,13 @@ doesn't use CPU cycles. Trip points must be set properly before switching to automatic fan speed control mode. The driver will perform basic integrity checks before actually switching to automatic control mode. + + +Temperature offset attributes +----------------------------- + +The driver supports temp[1-3]_offset sysfs attributes to adjust the reported +temperature for thermal diodes or diode-connected thermal transistors. +If a temperature sensor is configured for thermistors, the attribute values +are ignored. If the thermal sensor type is Intel PECI, the temperature offset +must be programmed to the critical CPU temperature. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ddd84d627185..363e348bff9b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -446,12 +446,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. possible to determine what the correct size should be. This option provides an override for these situations. - capability.disable= - [SECURITY] Disable capabilities. This would normally - be used only if an alternative security model is to be - configured. Potentially dangerous and should only be - used if you are entirely sure of the consequences. - ccw_timeout_log [S390] See Documentation/s390/CommonIO for details. @@ -124,7 +124,7 @@ $(if $(KBUILD_OUTPUT),, \ PHONY += $(MAKECMDGOALS) sub-make $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make - $(Q)@: + @: sub-make: FORCE $(if $(KBUILD_VERBOSE:1=),@)$(MAKE) -C $(KBUILD_OUTPUT) \ @@ -1027,11 +1027,14 @@ clean: rm-dirs := $(CLEAN_DIRS) clean: rm-files := $(CLEAN_FILES) clean-dirs := $(addprefix _clean_, . $(vmlinux-alldirs) Documentation samples) -PHONY += $(clean-dirs) clean archclean +PHONY += $(clean-dirs) clean archclean vmlinuxclean $(clean-dirs): $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@) -clean: archclean +vmlinuxclean: + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/link-vmlinux.sh clean + +clean: archclean vmlinuxclean # mrproper - Delete all generated files, including .config # @@ -1258,7 +1261,6 @@ scripts: ; endif # KBUILD_EXTMOD clean: $(clean-dirs) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/link-vmlinux.sh clean $(call cmd,rmdirs) $(call cmd,rmfiles) @find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \ diff --git a/arch/Kconfig b/arch/Kconfig index 8e9e3246b2b4..7f8f281f2585 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -291,12 +291,6 @@ config ARCH_WANT_OLD_COMPAT_IPC select ARCH_WANT_COMPAT_IPC_PARSE_VERSION bool -config GENERIC_KERNEL_THREAD - bool - -config GENERIC_KERNEL_EXECVE - bool - config HAVE_ARCH_SECCOMP_FILTER bool help @@ -362,6 +356,9 @@ config MODULES_USE_ELF_REL Modules only use ELF REL relocations. Modules with ELF RELA relocations will give an error. +config GENERIC_SIGALTSTACK + bool + # # ABI hall of shame # diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 5dd7f5db24d4..9d5904cc7712 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -20,10 +20,9 @@ config ALPHA select GENERIC_CMOS_UPDATE select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA + select GENERIC_SIGALTSTACK help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h index df9a6cd748d5..21128505ddbe 100644 --- a/arch/alpha/include/asm/ptrace.h +++ b/arch/alpha/include/asm/ptrace.h @@ -8,6 +8,7 @@ #define user_mode(regs) (((regs)->ps & 8) != 0) #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) +#define current_user_stack_pointer() rdusp() #define task_pt_regs(task) \ ((struct pt_regs *) (task_stack_page(task) + 2*PAGE_SIZE) - 1) diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index d6069ff3b1c8..b3396ee039b7 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -15,7 +15,6 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/alpha/include/uapi/asm/signal.h b/arch/alpha/include/uapi/asm/signal.h index 965bbfa59c65..dd4ca4bcbb4a 100644 --- a/arch/alpha/include/uapi/asm/signal.h +++ b/arch/alpha/include/uapi/asm/signal.h @@ -84,12 +84,6 @@ typedef unsigned long sigset_t; #define SA_ONESHOT SA_RESETHAND #define SA_NOMASK SA_NODEFER -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 4096 #define SIGSTKSZ 16384 diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 336393c9c11f..02d02c047f17 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -122,12 +122,6 @@ SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask) return sigsuspend(&blocked); } -asmlinkage int -sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss) -{ - return do_sigaltstack(uss, uoss, rdusp()); -} - /* * Do a signal return; undo the signal stack. */ @@ -418,9 +412,7 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(set->sig[0], &frame->uc.uc_osf_sigmask); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(oldsp), &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __save_altstack(&frame->uc.uc_stack, oldsp); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], oldsp); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8c83d98424c7..f95ba14ae3d0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -12,8 +12,6 @@ config ARM select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select GENERIC_PCI_IOMAP select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 49ca86e37b8d..fe4d9c3ad761 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -44,7 +44,7 @@ #else -#include <mach/debug-macro.S> +#include CONFIG_DEBUG_LL_INCLUDE .macro writeb, ch, rb senduart \ch, \rb diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 0f441740c22a..e44da40d984f 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -42,11 +42,10 @@ dtb-$(CONFIG_ARCH_DOVE) += dove-cm-a510.dtb \ dtb-$(CONFIG_ARCH_EXYNOS) += exynos4210-origen.dtb \ exynos4210-smdkv310.dtb \ exynos4210-trats.dtb \ - exynos5250-smdk5250.dtb \ - exynos5440-ssdk5440.dtb \ exynos4412-smdk4412.dtb \ exynos5250-smdk5250.dtb \ - exynos5250-snow.dtb + exynos5250-snow.dtb \ + exynos5440-ssdk5440.dtb dtb-$(CONFIG_ARCH_HIGHBANK) += highbank.dtb \ ecx-2000.dtb dtb-$(CONFIG_ARCH_INTEGRATOR) += integratorap.dtb \ @@ -107,6 +106,7 @@ dtb-$(CONFIG_ARCH_OMAP2PLUS) += omap2420-h4.dtb \ omap3-evm.dtb \ omap3-tobi.dtb \ omap4-panda.dtb \ + omap4-panda-a4.dtb \ omap4-panda-es.dtb \ omap4-var-som.dtb \ omap4-sdp.dtb \ @@ -131,8 +131,8 @@ dtb-$(CONFIG_ARCH_SPEAR3XX)+= spear300-evb.dtb \ spear320-evb.dtb \ spear320-hmi.dtb dtb-$(CONFIG_ARCH_SPEAR6XX)+= spear600-evb.dtb -dtb-$(CONFIG_ARCH_SUNXI) += sun4i-cubieboard.dtb \ - sun5i-olinuxino.dtb +dtb-$(CONFIG_ARCH_SUNXI) += sun4i-a10-cubieboard.dtb \ + sun5i-a13-olinuxino.dtb dtb-$(CONFIG_ARCH_TEGRA) += tegra20-harmony.dtb \ tegra20-medcom-wide.dtb \ tegra20-paz00.dtb \ diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index 8e6251f1f7a3..32ec62cf5385 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -368,14 +368,14 @@ compatible = "atmel,at91rm9200-ssc"; reg = <0xfff98000 0x4000>; interrupts = <16 4 5>; - status = "disable"; + status = "disabled"; }; ssc1: ssc@fff9c000 { compatible = "atmel,at91rm9200-ssc"; reg = <0xfff9c000 0x4000>; interrupts = <17 4 5>; - status = "disable"; + status = "disabled"; }; macb0: ethernet@fffbc000 { diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index fa1ae0c5479c..231858ffd850 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -425,14 +425,14 @@ compatible = "atmel,at91sam9g45-ssc"; reg = <0xfff9c000 0x4000>; interrupts = <16 4 5>; - status = "disable"; + status = "disabled"; }; ssc1: ssc@fffa0000 { compatible = "atmel,at91sam9g45-ssc"; reg = <0xfffa0000 0x4000>; interrupts = <17 4 5>; - status = "disable"; + status = "disabled"; }; adc0: adc@fffb0000 { diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 617ede541ca2..40ac3a4eb1ab 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -92,7 +92,7 @@ compatible = "atmel,at91sam9g45-ssc"; reg = <0xf0010000 0x4000>; interrupts = <28 4 5>; - status = "disable"; + status = "disabled"; }; tcb0: timer@f8008000 { diff --git a/arch/arm/boot/dts/imx27-3ds.dts b/arch/arm/boot/dts/imx27-3ds.dts index b01c0d745fc5..fa04c7b18bcb 100644 --- a/arch/arm/boot/dts/imx27-3ds.dts +++ b/arch/arm/boot/dts/imx27-3ds.dts @@ -21,17 +21,17 @@ }; soc { - aipi@10000000 { /* aipi */ - + aipi@10000000 { /* aipi1 */ uart1: serial@1000a000 { fsl,uart-has-rtscts; status = "okay"; }; + }; - fec@1002b000 { + aipi@10020000 { /* aipi2 */ + ethernet@1002b000 { status = "okay"; }; }; }; - }; diff --git a/arch/arm/boot/dts/imx27-phytec-phycore.dts b/arch/arm/boot/dts/imx27-phytec-phycore.dts index af50469e34b2..53b0ec0c228e 100644 --- a/arch/arm/boot/dts/imx27-phytec-phycore.dts +++ b/arch/arm/boot/dts/imx27-phytec-phycore.dts @@ -21,8 +21,7 @@ }; soc { - aipi@10000000 { /* aipi */ - + aipi@10000000 { /* aipi1 */ serial@1000a000 { fsl,uart-has-rtscts; status = "okay"; @@ -38,10 +37,6 @@ status = "okay"; }; - ethernet@1002b000 { - status = "okay"; - }; - i2c@1001d000 { clock-frequency = <400000>; status = "okay"; @@ -60,6 +55,12 @@ }; }; }; + + aipi@10020000 { /* aipi2 */ + ethernet@1002b000 { + status = "okay"; + }; + }; }; nor_flash@c0000000 { diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index b8d3905915ac..5a82cb5707a8 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -55,7 +55,7 @@ compatible = "fsl,aipi-bus", "simple-bus"; #address-cells = <1>; #size-cells = <1>; - reg = <0x10000000 0x10000000>; + reg = <0x10000000 0x20000>; ranges; wdog: wdog@10002000 { @@ -211,6 +211,15 @@ status = "disabled"; }; + }; + + aipi@10020000 { /* AIPI2 */ + compatible = "fsl,aipi-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x10020000 0x20000>; + ranges; + fec: ethernet@1002b000 { compatible = "fsl,imx27-fec"; reg = <0x1002b000 0x4000>; diff --git a/arch/arm/boot/dts/omap2420-h4.dts b/arch/arm/boot/dts/omap2420-h4.dts index 77b84e17c477..9b0d07746cba 100644 --- a/arch/arm/boot/dts/omap2420-h4.dts +++ b/arch/arm/boot/dts/omap2420-h4.dts @@ -15,6 +15,6 @@ memory { device_type = "memory"; - reg = <0x80000000 0x84000000>; /* 64 MB */ + reg = <0x80000000 0x4000000>; /* 64 MB */ }; }; diff --git a/arch/arm/boot/dts/sun4i-cubieboard.dts b/arch/arm/boot/dts/sun4i-a10-cubieboard.dts index f4ca126ad994..5cab82540437 100644 --- a/arch/arm/boot/dts/sun4i-cubieboard.dts +++ b/arch/arm/boot/dts/sun4i-a10-cubieboard.dts @@ -11,11 +11,11 @@ */ /dts-v1/; -/include/ "sun4i.dtsi" +/include/ "sun4i-a10.dtsi" / { model = "Cubietech Cubieboard"; - compatible = "cubietech,cubieboard", "allwinner,sun4i"; + compatible = "cubietech,a10-cubieboard", "allwinner,sun4i-a10"; aliases { serial0 = &uart0; diff --git a/arch/arm/boot/dts/sun4i.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index e61fdd47bd01..e61fdd47bd01 100644 --- a/arch/arm/boot/dts/sun4i.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi diff --git a/arch/arm/boot/dts/sun5i-olinuxino.dts b/arch/arm/boot/dts/sun5i-a13-olinuxino.dts index d6ff889a5d87..498a091a4ea2 100644 --- a/arch/arm/boot/dts/sun5i-olinuxino.dts +++ b/arch/arm/boot/dts/sun5i-a13-olinuxino.dts @@ -12,11 +12,11 @@ */ /dts-v1/; -/include/ "sun5i.dtsi" +/include/ "sun5i-a13.dtsi" / { model = "Olimex A13-Olinuxino"; - compatible = "olimex,a13-olinuxino", "allwinner,sun5i"; + compatible = "olimex,a13-olinuxino", "allwinner,sun5i-a13"; chosen { bootargs = "earlyprintk console=ttyS0,115200"; diff --git a/arch/arm/boot/dts/sun5i.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index 59a2d265a98e..59a2d265a98e 100644 --- a/arch/arm/boot/dts/sun5i.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 67d06324e74a..5b579b951503 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -91,6 +91,7 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { + debug_dma_mapping_error(dev, dma_addr); return dma_addr == DMA_ERROR_CODE; } diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 7cd13cc62624..21a2700d2957 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -41,7 +41,6 @@ #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_SOCKETCALL #endif -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm/include/uapi/asm/signal.h b/arch/arm/include/uapi/asm/signal.h index 921c57fdc52e..33073bdcf091 100644 --- a/arch/arm/include/uapi/asm/signal.h +++ b/arch/arm/include/uapi/asm/signal.h @@ -87,13 +87,6 @@ typedef unsigned long sigset_t; #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 9a89bf4aefe1..3f6cbb2e3eda 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -733,7 +733,7 @@ void __init setup_arch(char **cmdline_p) setup_processor(); mdesc = setup_machine_fdt(__atags_pointer); if (!mdesc) - mdesc = setup_machine_tags(__atags_pointer, machine_arch_type); + mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type); machine_desc = mdesc; machine_name = mdesc->name; diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index df745188f5de..ab1017bd1667 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -109,10 +109,12 @@ static void set_segfault(struct pt_regs *regs, unsigned long addr) { siginfo_t info; + down_read(¤t->mm->mmap_sem); if (find_vma(current->mm, addr) == NULL) info.si_code = SEGV_MAPERR; else info.si_code = SEGV_ACCERR; + up_read(¤t->mm->mmap_sem); info.si_signo = SIGSEGV; info.si_errno = 0; diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b9f38e388b43..11c1785bf63e 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -140,6 +140,8 @@ SECTIONS } #endif + NOTES + _etext = .; /* End of text and rodata section */ #ifndef CONFIG_XIP_KERNEL @@ -295,8 +297,6 @@ SECTIONS } #endif - NOTES - BSS_SECTION(0, 0, 0) _end = .; diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 7211772edd9d..0299915575a8 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -41,6 +41,7 @@ #include <mach/cp_intc.h> #include <mach/da8xx.h> #include <mach/mux.h> +#include <mach/sram.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> diff --git a/arch/arm/mach-ep93xx/include/mach/uncompress.h b/arch/arm/mach-ep93xx/include/mach/uncompress.h index 16026c2b1c8c..d64274fc5760 100644 --- a/arch/arm/mach-ep93xx/include/mach/uncompress.h +++ b/arch/arm/mach-ep93xx/include/mach/uncompress.h @@ -47,13 +47,9 @@ static void __raw_writel(unsigned int value, unsigned int ptr) static inline void putc(int c) { - int i; - - for (i = 0; i < 1000; i++) { - /* Transmit fifo not full? */ - if (!(__raw_readb(PHYS_UART_FLAG) & UART_FLAG_TXFF)) - break; - } + /* Transmit fifo not full? */ + while (__raw_readb(PHYS_UART_FLAG) & UART_FLAG_TXFF) + ; __raw_writeb(c, PHYS_UART_DATA); } diff --git a/arch/arm/mach-exynos/clock-exynos4.c b/arch/arm/mach-exynos/clock-exynos4.c index efead60b9436..bbcb3dea0d40 100644 --- a/arch/arm/mach-exynos/clock-exynos4.c +++ b/arch/arm/mach-exynos/clock-exynos4.c @@ -529,7 +529,7 @@ static struct clk exynos4_init_clocks_off[] = { .enable = exynos4_clk_ip_fsys_ctrl, .ctrlbit = (1 << 8), }, { - .name = "dwmmc", + .name = "biu", .parent = &exynos4_clk_aclk_133.clk, .enable = exynos4_clk_ip_fsys_ctrl, .ctrlbit = (1 << 9), @@ -1134,7 +1134,7 @@ static struct clksrc_clk exynos4_clksrcs[] = { .reg_div = { .reg = EXYNOS4_CLKDIV_MFC, .shift = 0, .size = 4 }, }, { .clk = { - .name = "sclk_dwmmc", + .name = "ciu", .parent = &exynos4_clk_dout_mmc4.clk, .enable = exynos4_clksrc_mask_fsys_ctrl, .ctrlbit = (1 << 16), diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c index ddd4b72c6f9a..d6d0dc651089 100644 --- a/arch/arm/mach-exynos/common.c +++ b/arch/arm/mach-exynos/common.c @@ -679,7 +679,8 @@ void __init exynos5_init_irq(void) * Theses parameters should be NULL and 0 because EXYNOS4 * uses GIC instead of VIC. */ - s5p_init_irq(NULL, 0); + if (!of_machine_is_compatible("samsung,exynos5440")) + s5p_init_irq(NULL, 0); gic_arch_extn.irq_set_wake = s3c_irq_wake; } diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h index dac146df79ac..04744f9c120f 100644 --- a/arch/arm/mach-exynos/common.h +++ b/arch/arm/mach-exynos/common.h @@ -25,7 +25,7 @@ void exynos_init_late(void); #ifdef CONFIG_PM_GENERIC_DOMAINS int exynos_pm_late_initcall(void); #else -static int exynos_pm_late_initcall(void) { return 0; } +static inline int exynos_pm_late_initcall(void) { return 0; } #endif #ifdef CONFIG_ARCH_EXYNOS4 diff --git a/arch/arm/mach-exynos/dev-audio.c b/arch/arm/mach-exynos/dev-audio.c index a1cb42c39590..9d1a60951d7b 100644 --- a/arch/arm/mach-exynos/dev-audio.c +++ b/arch/arm/mach-exynos/dev-audio.c @@ -23,11 +23,6 @@ #include <mach/irqs.h> #include <mach/regs-audss.h> -static const char *rclksrc[] = { - [0] = "busclk", - [1] = "i2sclk", -}; - static int exynos4_cfg_i2s(struct platform_device *pdev) { /* configure GPIO for i2s port */ @@ -55,7 +50,6 @@ static struct s3c_audio_pdata i2sv5_pdata = { .i2s = { .quirks = QUIRK_PRI_6CHAN | QUIRK_SEC_DAI | QUIRK_NEED_RSTCLR, - .src_clk = rclksrc, .idma_addr = EXYNOS4_AUDSS_INT_MEM, }, }, @@ -78,17 +72,11 @@ struct platform_device exynos4_device_i2s0 = { }, }; -static const char *rclksrc_v3[] = { - [0] = "sclk_i2s", - [1] = "no_such_clock", -}; - static struct s3c_audio_pdata i2sv3_pdata = { .cfg_gpio = exynos4_cfg_i2s, .type = { .i2s = { .quirks = QUIRK_NO_MUXPSR, - .src_clk = rclksrc_v3, }, }, }; diff --git a/arch/arm/mach-exynos/mach-exynos5-dt.c b/arch/arm/mach-exynos/mach-exynos5-dt.c index f038c8cadca4..e99d3d8f2bcf 100644 --- a/arch/arm/mach-exynos/mach-exynos5-dt.c +++ b/arch/arm/mach-exynos/mach-exynos5-dt.c @@ -163,6 +163,7 @@ static char const *exynos5_dt_compat[] __initdata = { static void __init exynos5_reserve(void) { +#ifdef CONFIG_S5P_DEV_MFC struct s5p_mfc_dt_meminfo mfc_mem; /* Reserve memory for MFC only if it's available */ @@ -170,6 +171,7 @@ static void __init exynos5_reserve(void) if (of_scan_flat_dt(s5p_fdt_find_mfc_mem, &mfc_mem)) s5p_mfc_reserve_mem(mfc_mem.roff, mfc_mem.rsize, mfc_mem.loff, mfc_mem.lsize); +#endif } DT_MACHINE_START(EXYNOS5_DT, "SAMSUNG EXYNOS5 (Flattened Device Tree)") diff --git a/arch/arm/mach-exynos/mach-origen.c b/arch/arm/mach-exynos/mach-origen.c index e6f4191cd14c..5e34b9c16196 100644 --- a/arch/arm/mach-exynos/mach-origen.c +++ b/arch/arm/mach-exynos/mach-origen.c @@ -621,7 +621,7 @@ static struct pwm_lookup origen_pwm_lookup[] = { PWM_LOOKUP("s3c24xx-pwm.0", 0, "pwm-backlight.0", NULL), }; -#ifdef CONFIG_DRM_EXYNOS +#ifdef CONFIG_DRM_EXYNOS_FIMD static struct exynos_drm_fimd_pdata drm_fimd_pdata = { .panel = { .timing = { @@ -793,7 +793,7 @@ static void __init origen_machine_init(void) s5p_i2c_hdmiphy_set_platdata(NULL); s5p_hdmi_set_platdata(&hdmiphy_info, NULL, 0); -#ifdef CONFIG_DRM_EXYNOS +#ifdef CONFIG_DRM_EXYNOS_FIMD s5p_device_fimd0.dev.platform_data = &drm_fimd_pdata; exynos4_fimd0_gpio_setup_24bpp(); #else diff --git a/arch/arm/mach-exynos/mach-smdk4x12.c b/arch/arm/mach-exynos/mach-smdk4x12.c index a1555a73c7af..ae6da40c2aa9 100644 --- a/arch/arm/mach-exynos/mach-smdk4x12.c +++ b/arch/arm/mach-exynos/mach-smdk4x12.c @@ -246,7 +246,7 @@ static struct samsung_keypad_platdata smdk4x12_keypad_data __initdata = { .cols = 8, }; -#ifdef CONFIG_DRM_EXYNOS +#ifdef CONFIG_DRM_EXYNOS_FIMD static struct exynos_drm_fimd_pdata drm_fimd_pdata = { .panel = { .timing = { @@ -360,7 +360,7 @@ static void __init smdk4x12_machine_init(void) s3c_hsotg_set_platdata(&smdk4x12_hsotg_pdata); -#ifdef CONFIG_DRM_EXYNOS +#ifdef CONFIG_DRM_EXYNOS_FIMD s5p_device_fimd0.dev.platform_data = &drm_fimd_pdata; exynos4_fimd0_gpio_setup_24bpp(); #else diff --git a/arch/arm/mach-exynos/mach-smdkv310.c b/arch/arm/mach-exynos/mach-smdkv310.c index b7384241fb03..35548e3c097d 100644 --- a/arch/arm/mach-exynos/mach-smdkv310.c +++ b/arch/arm/mach-exynos/mach-smdkv310.c @@ -159,7 +159,7 @@ static struct platform_device smdkv310_lcd_lte480wv = { .dev.platform_data = &smdkv310_lcd_lte480wv_data, }; -#ifdef CONFIG_DRM_EXYNOS +#ifdef CONFIG_DRM_EXYNOS_FIMD static struct exynos_drm_fimd_pdata drm_fimd_pdata = { .panel = { .timing = { @@ -402,7 +402,7 @@ static void __init smdkv310_machine_init(void) samsung_bl_set(&smdkv310_bl_gpio_info, &smdkv310_bl_data); pwm_add_table(smdkv310_pwm_lookup, ARRAY_SIZE(smdkv310_pwm_lookup)); -#ifdef CONFIG_DRM_EXYNOS +#ifdef CONFIG_DRM_EXYNOS_FIMD s5p_device_fimd0.dev.platform_data = &drm_fimd_pdata; exynos4_fimd0_gpio_setup_24bpp(); #else diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 4ca8ff14a5bf..c5c840e947b8 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -198,7 +198,7 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus) { int i; - if (!soc_is_exynos5250()) + if (!(soc_is_exynos5250() || soc_is_exynos5440())) scu_enable(scu_base_addr()); /* diff --git a/arch/arm/mach-imx/clk-imx51-imx53.c b/arch/arm/mach-imx/clk-imx51-imx53.c index e8c0473c7568..579023f59dc1 100644 --- a/arch/arm/mach-imx/clk-imx51-imx53.c +++ b/arch/arm/mach-imx/clk-imx51-imx53.c @@ -319,6 +319,7 @@ int __init mx51_clocks_init(unsigned long rate_ckil, unsigned long rate_osc, unsigned long rate_ckih1, unsigned long rate_ckih2) { int i; + u32 val; struct device_node *np; clk[pll1_sw] = imx_clk_pllv2("pll1_sw", "osc", MX51_DPLL1_BASE); @@ -390,6 +391,21 @@ int __init mx51_clocks_init(unsigned long rate_ckil, unsigned long rate_osc, imx_print_silicon_rev("i.MX51", mx51_revision()); clk_disable_unprepare(clk[iim_gate]); + /* + * Reference Manual says: Functionality of CCDR[18] and CLPCR[23] is no + * longer supported. Set to one for better power saving. + * + * The effect of not setting these bits is that MIPI clocks can't be + * enabled without the IPU clock being enabled aswell. + */ + val = readl(MXC_CCM_CCDR); + val |= 1 << 18; + writel(val, MXC_CCM_CCDR); + + val = readl(MXC_CCM_CLPCR); + val |= 1 << 23; + writel(val, MXC_CCM_CLPCR); + return 0; } diff --git a/arch/arm/plat-mxc/devices/platform-mx2-emma.c b/arch/arm/mach-imx/devices/platform-mx2-emma.c index 508404ddd4ea..11bd01d402f2 100644 --- a/arch/arm/plat-mxc/devices/platform-mx2-emma.c +++ b/arch/arm/mach-imx/devices/platform-mx2-emma.c @@ -6,8 +6,8 @@ * the terms of the GNU General Public License version 2 as published by the * Free Software Foundation. */ -#include <mach/hardware.h> -#include <mach/devices-common.h> +#include "../hardware.h" +#include "devices-common.h" #define imx_mx2_emmaprp_data_entry_single(soc) \ { \ diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index be0f62bf9037..41b581fd0213 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -26,6 +26,8 @@ config SOC_HAS_OMAP2_SDRC config SOC_HAS_REALTIME_COUNTER bool "Real time free running counter" + depends on SOC_OMAP5 + default y config ARCH_OMAP2 bool "TI OMAP2" @@ -79,7 +81,6 @@ config SOC_OMAP5 select ARM_GIC select CPU_V7 select HAVE_SMP - select SOC_HAS_REALTIME_COUNTER select COMMON_CLK comment "OMAP Core Type" diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c index 7b201546834d..bb73afc9ac17 100644 --- a/arch/arm/mach-omap2/board-3430sdp.c +++ b/arch/arm/mach-omap2/board-3430sdp.c @@ -157,6 +157,7 @@ static struct omap_dss_device sdp3430_lcd_device = { static struct tfp410_platform_data dvi_panel = { .power_down_gpio = -1, + .i2c_bus_num = -1, }; static struct omap_dss_device sdp3430_dvi_device = { diff --git a/arch/arm/mach-omap2/board-am3517evm.c b/arch/arm/mach-omap2/board-am3517evm.c index 4be58fd071f6..f81a303b87ff 100644 --- a/arch/arm/mach-omap2/board-am3517evm.c +++ b/arch/arm/mach-omap2/board-am3517evm.c @@ -208,6 +208,7 @@ static struct omap_dss_device am3517_evm_tv_device = { static struct tfp410_platform_data dvi_panel = { .power_down_gpio = -1, + .i2c_bus_num = -1, }; static struct omap_dss_device am3517_evm_dvi_device = { diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c index c8e37dc00892..b3102c2f4a3c 100644 --- a/arch/arm/mach-omap2/board-cm-t35.c +++ b/arch/arm/mach-omap2/board-cm-t35.c @@ -241,6 +241,7 @@ static struct omap_dss_device cm_t35_lcd_device = { static struct tfp410_platform_data dvi_panel = { .power_down_gpio = CM_T35_DVI_EN_GPIO, + .i2c_bus_num = -1, }; static struct omap_dss_device cm_t35_dvi_device = { diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c index 7667eb749522..12865af25d3a 100644 --- a/arch/arm/mach-omap2/board-devkit8000.c +++ b/arch/arm/mach-omap2/board-devkit8000.c @@ -141,6 +141,7 @@ static struct omap_dss_device devkit8000_lcd_device = { static struct tfp410_platform_data dvi_panel = { .power_down_gpio = -1, + .i2c_bus_num = 1, }; static struct omap_dss_device devkit8000_dvi_device = { diff --git a/arch/arm/mach-omap2/board-h4.c b/arch/arm/mach-omap2/board-h4.c index 9a3878ec2256..3be1311f9e33 100644 --- a/arch/arm/mach-omap2/board-h4.c +++ b/arch/arm/mach-omap2/board-h4.c @@ -27,14 +27,12 @@ #include <linux/io.h> #include <linux/input/matrix_keypad.h> #include <linux/mfd/menelaus.h> +#include <linux/omap-dma.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> -#include <linux/omap-dma.h> -#include <plat/debug-devices.h> - #include <video/omapdss.h> #include <video/omap-panel-generic-dpi.h> @@ -42,11 +40,9 @@ #include "mux.h" #include "control.h" #include "gpmc.h" +#include "gpmc-smc91x.h" #define H4_FLASH_CS 0 -#define H4_SMC91X_CS 1 - -#define H4_ETHR_GPIO_IRQ 92 #if defined(CONFIG_KEYBOARD_MATRIX) || defined(CONFIG_KEYBOARD_MATRIX_MODULE) static const uint32_t board_matrix_keys[] = { @@ -250,71 +246,31 @@ static u32 is_gpmc_muxed(void) return 0; } -static inline void __init h4_init_debug(void) -{ - int eth_cs; - unsigned long cs_mem_base; - unsigned int muxed, rate; - struct clk *gpmc_fck; - - eth_cs = H4_SMC91X_CS; +#if defined(CONFIG_SMC91X) || defined(CONFIG_SMC91x_MODULE) - gpmc_fck = clk_get(NULL, "gpmc_fck"); /* Always on ENABLE_ON_INIT */ - if (IS_ERR(gpmc_fck)) { - WARN_ON(1); - return; - } - - clk_prepare_enable(gpmc_fck); - rate = clk_get_rate(gpmc_fck); - clk_disable_unprepare(gpmc_fck); - clk_put(gpmc_fck); +static struct omap_smc91x_platform_data board_smc91x_data = { + .cs = 1, + .gpio_irq = 92, + .flags = GPMC_TIMINGS_SMC91C96 | IORESOURCE_IRQ_LOWLEVEL, +}; +static void __init board_smc91x_init(void) +{ if (is_gpmc_muxed()) - muxed = 0x200; - else - muxed = 0; - - /* Make sure CS1 timings are correct */ - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG1, - 0x00011000 | muxed); - - if (rate >= 160000000) { - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG2, 0x001f1f01); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG3, 0x00080803); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG4, 0x1c0b1c0a); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG5, 0x041f1F1F); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG6, 0x000004C4); - } else if (rate >= 130000000) { - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG2, 0x001f1f00); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG3, 0x00080802); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG4, 0x1C091C09); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG5, 0x041f1F1F); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG6, 0x000004C4); - } else {/* rate = 100000000 */ - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG2, 0x001f1f00); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG3, 0x00080802); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG4, 0x1C091C09); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG5, 0x031A1F1F); - gpmc_cs_write_reg(eth_cs, GPMC_CS_CONFIG6, 0x000003C2); - } - - if (gpmc_cs_request(eth_cs, SZ_16M, &cs_mem_base) < 0) { - printk(KERN_ERR "Failed to request GPMC mem for smc91x\n"); - goto out; - } + board_smc91x_data.flags |= GPMC_MUX_ADD_DATA; - udelay(100); + omap_mux_init_gpio(board_smc91x_data.gpio_irq, OMAP_PIN_INPUT); + gpmc_smc91x_init(&board_smc91x_data); +} - omap_mux_init_gpio(92, 0); - if (debug_card_init(cs_mem_base, H4_ETHR_GPIO_IRQ) < 0) - gpmc_cs_free(eth_cs); +#else -out: - clk_disable_unprepare(gpmc_fck); - clk_put(gpmc_fck); +static inline void board_smc91x_init(void) +{ } +#endif + static void __init h4_init_flash(void) { unsigned long base; @@ -371,6 +327,7 @@ static void __init omap_h4_init(void) omap_serial_init(); omap_sdrc_init(NULL, NULL); h4_init_flash(); + board_smc91x_init(); omap_display_init(&h4_dss_data); } diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c index 54647d6286b4..3985f35aee06 100644 --- a/arch/arm/mach-omap2/board-omap3evm.c +++ b/arch/arm/mach-omap2/board-omap3evm.c @@ -240,6 +240,7 @@ static struct omap_dss_device omap3_evm_tv_device = { static struct tfp410_platform_data dvi_panel = { .power_down_gpio = OMAP3EVM_DVI_PANEL_EN_GPIO, + .i2c_bus_num = -1, }; static struct omap_dss_device omap3_evm_dvi_device = { diff --git a/arch/arm/mach-omap2/board-omap3stalker.c b/arch/arm/mach-omap2/board-omap3stalker.c index d8638b3b4f94..53a6cbcf9747 100644 --- a/arch/arm/mach-omap2/board-omap3stalker.c +++ b/arch/arm/mach-omap2/board-omap3stalker.c @@ -118,6 +118,7 @@ static struct omap_dss_device omap3_stalker_tv_device = { static struct tfp410_platform_data dvi_panel = { .power_down_gpio = DSS_ENABLE_GPIO, + .i2c_bus_num = -1, }; static struct omap_dss_device omap3_stalker_dvi_device = { diff --git a/arch/arm/mach-omap2/cclock44xx_data.c b/arch/arm/mach-omap2/cclock44xx_data.c index aa56c3e5bb34..5789a5e25563 100644 --- a/arch/arm/mach-omap2/cclock44xx_data.c +++ b/arch/arm/mach-omap2/cclock44xx_data.c @@ -40,6 +40,14 @@ #define OMAP4430_MODULEMODE_HWCTRL_SHIFT 0 #define OMAP4430_MODULEMODE_SWCTRL_SHIFT 1 +/* + * OMAP4 ABE DPLL default frequency. In OMAP4460 TRM version V, section + * "3.6.3.2.3 CM1_ABE Clock Generator" states that the "DPLL_ABE_X2_CLK + * must be set to 196.608 MHz" and hence, the DPLL locked frequency is + * half of this value. + */ +#define OMAP4_DPLL_ABE_DEFFREQ 98304000 + /* Root clocks */ DEFINE_CLK_FIXED_RATE(extalt_clkin_ck, CLK_IS_ROOT, 59000000, 0x0); @@ -124,6 +132,8 @@ static struct dpll_data dpll_abe_dd = { .enable_mask = OMAP4430_DPLL_EN_MASK, .autoidle_mask = OMAP4430_AUTO_DPLL_MODE_MASK, .idlest_mask = OMAP4430_ST_DPLL_CLK_MASK, + .m4xen_mask = OMAP4430_DPLL_REGM4XEN_MASK, + .lpmode_mask = OMAP4430_DPLL_LPMODE_EN_MASK, .max_multiplier = 2047, .max_divider = 128, .min_divider = 1, @@ -233,7 +243,7 @@ static struct dpll_data dpll_core_dd = { static const char *dpll_core_ck_parents[] = { - "sys_clkin_ck", + "sys_clkin_ck", "core_hsd_byp_clk_mux_ck" }; static struct clk dpll_core_ck; @@ -286,9 +296,9 @@ DEFINE_CLK_DIVIDER(div_core_ck, "dpll_core_m5x2_ck", &dpll_core_m5x2_ck, 0x0, OMAP4430_CM_CLKSEL_CORE, OMAP4430_CLKSEL_CORE_SHIFT, OMAP4430_CLKSEL_CORE_WIDTH, 0x0, NULL); -DEFINE_CLK_OMAP_HSDIVIDER(div_iva_hs_clk, "dpll_core_m5x2_ck", - &dpll_core_m5x2_ck, 0x0, OMAP4430_CM_BYPCLK_DPLL_IVA, - OMAP4430_CLKSEL_0_1_MASK); +DEFINE_CLK_DIVIDER(div_iva_hs_clk, "dpll_core_m5x2_ck", &dpll_core_m5x2_ck, + 0x0, OMAP4430_CM_BYPCLK_DPLL_IVA, OMAP4430_CLKSEL_0_1_SHIFT, + OMAP4430_CLKSEL_0_1_WIDTH, CLK_DIVIDER_POWER_OF_TWO, NULL); DEFINE_CLK_DIVIDER(div_mpu_hs_clk, "dpll_core_m5x2_ck", &dpll_core_m5x2_ck, 0x0, OMAP4430_CM_BYPCLK_DPLL_MPU, OMAP4430_CLKSEL_0_1_SHIFT, @@ -363,8 +373,21 @@ static struct dpll_data dpll_iva_dd = { .min_divider = 1, }; +static const char *dpll_iva_ck_parents[] = { + "sys_clkin_ck", "iva_hsd_byp_clk_mux_ck" +}; + static struct clk dpll_iva_ck; +static const struct clk_ops dpll_ck_ops = { + .enable = &omap3_noncore_dpll_enable, + .disable = &omap3_noncore_dpll_disable, + .recalc_rate = &omap3_dpll_recalc, + .round_rate = &omap2_dpll_round_rate, + .set_rate = &omap3_noncore_dpll_set_rate, + .get_parent = &omap2_init_dpll_parent, +}; + static struct clk_hw_omap dpll_iva_ck_hw = { .hw = { .clk = &dpll_iva_ck, @@ -373,7 +396,7 @@ static struct clk_hw_omap dpll_iva_ck_hw = { .ops = &clkhwops_omap3_dpll, }; -DEFINE_STRUCT_CLK(dpll_iva_ck, dpll_core_ck_parents, dpll_abe_ck_ops); +DEFINE_STRUCT_CLK(dpll_iva_ck, dpll_iva_ck_parents, dpll_ck_ops); static const char *dpll_iva_x2_ck_parents[] = { "dpll_iva_ck", @@ -416,6 +439,10 @@ static struct dpll_data dpll_mpu_dd = { .min_divider = 1, }; +static const char *dpll_mpu_ck_parents[] = { + "sys_clkin_ck", "div_mpu_hs_clk" +}; + static struct clk dpll_mpu_ck; static struct clk_hw_omap dpll_mpu_ck_hw = { @@ -426,7 +453,7 @@ static struct clk_hw_omap dpll_mpu_ck_hw = { .ops = &clkhwops_omap3_dpll, }; -DEFINE_STRUCT_CLK(dpll_mpu_ck, dpll_core_ck_parents, dpll_abe_ck_ops); +DEFINE_STRUCT_CLK(dpll_mpu_ck, dpll_mpu_ck_parents, dpll_ck_ops); DEFINE_CLK_FIXED_FACTOR(mpu_periphclk, "dpll_mpu_ck", &dpll_mpu_ck, 0x0, 1, 2); @@ -464,6 +491,9 @@ static struct dpll_data dpll_per_dd = { .min_divider = 1, }; +static const char *dpll_per_ck_parents[] = { + "sys_clkin_ck", "per_hsd_byp_clk_mux_ck" +}; static struct clk dpll_per_ck; @@ -475,7 +505,7 @@ static struct clk_hw_omap dpll_per_ck_hw = { .ops = &clkhwops_omap3_dpll, }; -DEFINE_STRUCT_CLK(dpll_per_ck, dpll_core_ck_parents, dpll_abe_ck_ops); +DEFINE_STRUCT_CLK(dpll_per_ck, dpll_per_ck_parents, dpll_ck_ops); DEFINE_CLK_DIVIDER(dpll_per_m2_ck, "dpll_per_ck", &dpll_per_ck, 0x0, OMAP4430_CM_DIV_M2_DPLL_PER, OMAP4430_DPLL_CLKOUT_DIV_SHIFT, @@ -559,6 +589,10 @@ static struct dpll_data dpll_usb_dd = { .min_divider = 1, }; +static const char *dpll_usb_ck_parents[] = { + "sys_clkin_ck", "usb_hs_clk_div_ck" +}; + static struct clk dpll_usb_ck; static struct clk_hw_omap dpll_usb_ck_hw = { @@ -569,7 +603,7 @@ static struct clk_hw_omap dpll_usb_ck_hw = { .ops = &clkhwops_omap3_dpll, }; -DEFINE_STRUCT_CLK(dpll_usb_ck, dpll_core_ck_parents, dpll_abe_ck_ops); +DEFINE_STRUCT_CLK(dpll_usb_ck, dpll_usb_ck_parents, dpll_ck_ops); static const char *dpll_usb_clkdcoldo_ck_parents[] = { "dpll_usb_ck", @@ -696,9 +730,13 @@ DEFINE_CLK_DIVIDER(syc_clk_div_ck, "sys_clkin_ck", &sys_clkin_ck, 0x0, OMAP4430_CM_ABE_DSS_SYS_CLKSEL, OMAP4430_CLKSEL_0_0_SHIFT, OMAP4430_CLKSEL_0_0_WIDTH, 0x0, NULL); +static const char *dbgclk_mux_ck_parents[] = { + "sys_clkin_ck" +}; + static struct clk dbgclk_mux_ck; DEFINE_STRUCT_CLK_HW_OMAP(dbgclk_mux_ck, NULL); -DEFINE_STRUCT_CLK(dbgclk_mux_ck, dpll_core_ck_parents, +DEFINE_STRUCT_CLK(dbgclk_mux_ck, dbgclk_mux_ck_parents, dpll_usb_clkdcoldo_ck_ops); /* Leaf clocks controlled by modules */ @@ -1935,10 +1973,10 @@ static struct omap_clk omap44xx_clks[] = { CLK("4803e000.timer", "timer_sys_ck", &sys_clkin_ck, CK_443X), CLK("48086000.timer", "timer_sys_ck", &sys_clkin_ck, CK_443X), CLK("48088000.timer", "timer_sys_ck", &sys_clkin_ck, CK_443X), - CLK("49038000.timer", "timer_sys_ck", &syc_clk_div_ck, CK_443X), - CLK("4903a000.timer", "timer_sys_ck", &syc_clk_div_ck, CK_443X), - CLK("4903c000.timer", "timer_sys_ck", &syc_clk_div_ck, CK_443X), - CLK("4903e000.timer", "timer_sys_ck", &syc_clk_div_ck, CK_443X), + CLK("40138000.timer", "timer_sys_ck", &syc_clk_div_ck, CK_443X), + CLK("4013a000.timer", "timer_sys_ck", &syc_clk_div_ck, CK_443X), + CLK("4013c000.timer", "timer_sys_ck", &syc_clk_div_ck, CK_443X), + CLK("4013e000.timer", "timer_sys_ck", &syc_clk_div_ck, CK_443X), CLK(NULL, "cpufreq_ck", &dpll_mpu_ck, CK_443X), }; @@ -1955,6 +1993,7 @@ int __init omap4xxx_clk_init(void) { u32 cpu_clkflg; struct omap_clk *c; + int rc; if (cpu_is_omap443x()) { cpu_mask = RATE_IN_4430; @@ -1983,5 +2022,18 @@ int __init omap4xxx_clk_init(void) omap2_clk_enable_init_clocks(enable_init_clks, ARRAY_SIZE(enable_init_clks)); + /* + * On OMAP4460 the ABE DPLL fails to turn on if in idle low-power + * state when turning the ABE clock domain. Workaround this by + * locking the ABE DPLL on boot. + */ + if (cpu_is_omap446x()) { + rc = clk_set_parent(&abe_dpll_refclk_mux_ck, &sys_32k_ck); + if (!rc) + rc = clk_set_rate(&dpll_abe_ck, OMAP4_DPLL_ABE_DEFFREQ); + if (rc) + pr_err("%s: failed to configure ABE DPLL!\n", __func__); + } + return 0; } diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index 9917f793c3b6..b40204837bd7 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -195,6 +195,10 @@ struct clksel { * @enable_mask: mask of the DPLL mode bitfield in @control_reg * @last_rounded_rate: cache of the last rate result of omap2_dpll_round_rate() * @last_rounded_m: cache of the last M result of omap2_dpll_round_rate() + * @last_rounded_m4xen: cache of the last M4X result of + * omap4_dpll_regm4xen_round_rate() + * @last_rounded_lpmode: cache of the last lpmode result of + * omap4_dpll_lpmode_recalc() * @max_multiplier: maximum valid non-bypass multiplier value (actual) * @last_rounded_n: cache of the last N result of omap2_dpll_round_rate() * @min_divider: minimum valid non-bypass divider value (actual) @@ -205,6 +209,8 @@ struct clksel { * @autoidle_mask: mask of the DPLL autoidle mode bitfield in @autoidle_reg * @freqsel_mask: mask of the DPLL jitter correction bitfield in @control_reg * @idlest_mask: mask of the DPLL idle status bitfield in @idlest_reg + * @lpmode_mask: mask of the DPLL low-power mode bitfield in @control_reg + * @m4xen_mask: mask of the DPLL M4X multiplier bitfield in @control_reg * @auto_recal_bit: bitshift of the driftguard enable bit in @control_reg * @recal_en_bit: bitshift of the PRM_IRQENABLE_* bit for recalibration IRQs * @recal_st_bit: bitshift of the PRM_IRQSTATUS_* bit for recalibration IRQs @@ -233,6 +239,8 @@ struct dpll_data { u32 enable_mask; unsigned long last_rounded_rate; u16 last_rounded_m; + u8 last_rounded_m4xen; + u8 last_rounded_lpmode; u16 max_multiplier; u8 last_rounded_n; u8 min_divider; @@ -245,6 +253,8 @@ struct dpll_data { u32 idlest_mask; u32 dco_mask; u32 sddiv_mask; + u32 lpmode_mask; + u32 m4xen_mask; u8 auto_recal_bit; u8 recal_en_bit; u8 recal_st_bit; diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c index 384873580b23..7faf82d4e85c 100644 --- a/arch/arm/mach-omap2/clockdomain.c +++ b/arch/arm/mach-omap2/clockdomain.c @@ -998,7 +998,8 @@ int clkdm_clk_disable(struct clockdomain *clkdm, struct clk *clk) spin_lock_irqsave(&clkdm->lock, flags); /* corner case: disabling unused clocks */ - if (__clk_get_enable_count(clk) == 0) + if ((__clk_get_enable_count(clk) == 0) && + (atomic_read(&clkdm->usecount) == 0)) goto ccd_exit; if (atomic_read(&clkdm->usecount) == 0) { diff --git a/arch/arm/mach-omap2/common.c b/arch/arm/mach-omap2/common.c index 5c2fd4863b2b..2dabb9ecb986 100644 --- a/arch/arm/mach-omap2/common.c +++ b/arch/arm/mach-omap2/common.c @@ -16,8 +16,6 @@ #include <linux/init.h> #include <linux/platform_data/dsp-omap.h> -#include <plat/vram.h> - #include "common.h" #include "omap-secure.h" @@ -32,7 +30,6 @@ int __weak omap_secure_ram_reserve_memblock(void) void __init omap_reserve(void) { - omap_vram_reserve_sdram_memblock(); omap_dsp_reserve_sdram_memblock(); omap_secure_ram_reserve_memblock(); omap_barrier_reserve_memblock(); diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index bca7a8885703..22590dbe8f14 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -40,6 +40,8 @@ struct omap3_idle_statedata { u32 core_state; }; +static struct powerdomain *mpu_pd, *core_pd, *per_pd, *cam_pd; + static struct omap3_idle_statedata omap3_idle_data[] = { { .mpu_state = PWRDM_POWER_ON, @@ -71,7 +73,7 @@ static struct omap3_idle_statedata omap3_idle_data[] = { }, }; -static struct powerdomain *mpu_pd, *core_pd, *per_pd, *cam_pd; +/* Private functions */ static int __omap3_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, @@ -260,11 +262,11 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev, return ret; } -DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev); +static DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev); -struct cpuidle_driver omap3_idle_driver = { - .name = "omap3_idle", - .owner = THIS_MODULE, +static struct cpuidle_driver omap3_idle_driver = { + .name = "omap3_idle", + .owner = THIS_MODULE, .states = { { .enter = omap3_enter_idle_bm, @@ -327,6 +329,8 @@ struct cpuidle_driver omap3_idle_driver = { .safe_state_index = 0, }; +/* Public functions */ + /** * omap3_idle_init - Init routine for OMAP3 idle * diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index 288bee6cbb76..d639aef0deda 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -54,6 +54,8 @@ static struct clockdomain *cpu_clkdm[NR_CPUS]; static atomic_t abort_barrier; static bool cpu_done[NR_CPUS]; +/* Private functions */ + /** * omap4_enter_idle_coupled_[simple/coupled] - OMAP4 cpuidle entry functions * @dev: cpuidle device @@ -161,9 +163,19 @@ fail: return index; } -DEFINE_PER_CPU(struct cpuidle_device, omap4_idle_dev); +/* + * For each cpu, setup the broadcast timer because local timers + * stops for the states above C1. + */ +static void omap_setup_broadcast_timer(void *arg) +{ + int cpu = smp_processor_id(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu); +} + +static DEFINE_PER_CPU(struct cpuidle_device, omap4_idle_dev); -struct cpuidle_driver omap4_idle_driver = { +static struct cpuidle_driver omap4_idle_driver = { .name = "omap4_idle", .owner = THIS_MODULE, .en_core_tk_irqen = 1, @@ -178,7 +190,7 @@ struct cpuidle_driver omap4_idle_driver = { .desc = "MPUSS ON" }, { - /* C2 - CPU0 OFF + CPU1 OFF + MPU CSWR */ + /* C2 - CPU0 OFF + CPU1 OFF + MPU CSWR */ .exit_latency = 328 + 440, .target_residency = 960, .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_COUPLED, @@ -200,15 +212,7 @@ struct cpuidle_driver omap4_idle_driver = { .safe_state_index = 0, }; -/* - * For each cpu, setup the broadcast timer because local timers - * stops for the states above C1. - */ -static void omap_setup_broadcast_timer(void *arg) -{ - int cpu = smp_processor_id(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu); -} +/* Public functions */ /** * omap4_idle_init - Init routine for OMAP4 idle diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c index 4abb8b5e9bc0..5e304d0719a2 100644 --- a/arch/arm/mach-omap2/devices.c +++ b/arch/arm/mach-omap2/devices.c @@ -226,7 +226,7 @@ static struct platform_device omap3isp_device = { }; static struct omap_iommu_arch_data omap3_isp_iommu = { - .name = "isp", + .name = "mmu_isp", }; int omap3_init_camera(struct isp_platform_data *pdata) diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index fafb28c0dcbc..2bb18838cba9 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -291,16 +291,13 @@ static void _lookup_sddiv(struct clk_hw_omap *clk, u8 *sd_div, u16 m, u8 n) /* * _omap3_noncore_dpll_program - set non-core DPLL M,N values directly - * @clk: struct clk * of DPLL to set - * @m: DPLL multiplier to set - * @n: DPLL divider to set - * @freqsel: FREQSEL value to set + * @clk: struct clk * of DPLL to set + * @freqsel: FREQSEL value to set * - * Program the DPLL with the supplied M, N values, and wait for the DPLL to - * lock.. Returns -EINVAL upon error, or 0 upon success. + * Program the DPLL with the last M, N values calculated, and wait for + * the DPLL to lock. Returns -EINVAL upon error, or 0 upon success. */ -static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 m, u8 n, - u16 freqsel) +static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel) { struct dpll_data *dd = clk->dpll_data; u8 dco, sd_div; @@ -323,23 +320,45 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 m, u8 n, /* Set DPLL multiplier, divider */ v = __raw_readl(dd->mult_div1_reg); v &= ~(dd->mult_mask | dd->div1_mask); - v |= m << __ffs(dd->mult_mask); - v |= (n - 1) << __ffs(dd->div1_mask); + v |= dd->last_rounded_m << __ffs(dd->mult_mask); + v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask); /* Configure dco and sd_div for dplls that have these fields */ if (dd->dco_mask) { - _lookup_dco(clk, &dco, m, n); + _lookup_dco(clk, &dco, dd->last_rounded_m, dd->last_rounded_n); v &= ~(dd->dco_mask); v |= dco << __ffs(dd->dco_mask); } if (dd->sddiv_mask) { - _lookup_sddiv(clk, &sd_div, m, n); + _lookup_sddiv(clk, &sd_div, dd->last_rounded_m, + dd->last_rounded_n); v &= ~(dd->sddiv_mask); v |= sd_div << __ffs(dd->sddiv_mask); } __raw_writel(v, dd->mult_div1_reg); + /* Set 4X multiplier and low-power mode */ + if (dd->m4xen_mask || dd->lpmode_mask) { + v = __raw_readl(dd->control_reg); + + if (dd->m4xen_mask) { + if (dd->last_rounded_m4xen) + v |= dd->m4xen_mask; + else + v &= ~dd->m4xen_mask; + } + + if (dd->lpmode_mask) { + if (dd->last_rounded_lpmode) + v |= dd->lpmode_mask; + else + v &= ~dd->lpmode_mask; + } + + __raw_writel(v, dd->control_reg); + } + /* We let the clock framework set the other output dividers later */ /* REVISIT: Set ramp-up delay? */ @@ -492,8 +511,7 @@ int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate, pr_debug("%s: %s: set rate: locking rate to %lu.\n", __func__, __clk_get_name(hw->clk), rate); - ret = omap3_noncore_dpll_program(clk, dd->last_rounded_m, - dd->last_rounded_n, freqsel); + ret = omap3_noncore_dpll_program(clk, freqsel); if (!ret) new_parent = dd->clk_ref; } diff --git a/arch/arm/mach-omap2/dpll44xx.c b/arch/arm/mach-omap2/dpll44xx.c index d3326c474fdc..d28b0f726715 100644 --- a/arch/arm/mach-omap2/dpll44xx.c +++ b/arch/arm/mach-omap2/dpll44xx.c @@ -20,6 +20,15 @@ #include "clock44xx.h" #include "cm-regbits-44xx.h" +/* + * Maximum DPLL input frequency (FINT) and output frequency (FOUT) that + * can supported when using the DPLL low-power mode. Frequencies are + * defined in OMAP4430/60 Public TRM section 3.6.3.3.2 "Enable Control, + * Status, and Low-Power Operation Mode". + */ +#define OMAP4_DPLL_LP_FINT_MAX 1000000 +#define OMAP4_DPLL_LP_FOUT_MAX 100000000 + /* Supported only on OMAP4 */ int omap4_dpllmx_gatectrl_read(struct clk_hw_omap *clk) { @@ -82,6 +91,31 @@ const struct clk_hw_omap_ops clkhwops_omap4_dpllmx = { }; /** + * omap4_dpll_lpmode_recalc - compute DPLL low-power setting + * @dd: pointer to the dpll data structure + * + * Calculates if low-power mode can be enabled based upon the last + * multiplier and divider values calculated. If low-power mode can be + * enabled, then the bit to enable low-power mode is stored in the + * last_rounded_lpmode variable. This implementation is based upon the + * criteria for enabling low-power mode as described in the OMAP4430/60 + * Public TRM section 3.6.3.3.2 "Enable Control, Status, and Low-Power + * Operation Mode". + */ +static void omap4_dpll_lpmode_recalc(struct dpll_data *dd) +{ + long fint, fout; + + fint = __clk_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1); + fout = fint * dd->last_rounded_m; + + if ((fint < OMAP4_DPLL_LP_FINT_MAX) && (fout < OMAP4_DPLL_LP_FOUT_MAX)) + dd->last_rounded_lpmode = 1; + else + dd->last_rounded_lpmode = 0; +} + +/** * omap4_dpll_regm4xen_recalc - compute DPLL rate, considering REGM4XEN bit * @clk: struct clk * of the DPLL to compute the rate for * @@ -130,7 +164,6 @@ long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw, unsigned long *parent_rate) { struct clk_hw_omap *clk = to_clk_hw_omap(hw); - u32 v; struct dpll_data *dd; long r; @@ -139,18 +172,31 @@ long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw, dd = clk->dpll_data; - /* regm4xen adds a multiplier of 4 to DPLL calculations */ - v = __raw_readl(dd->control_reg) & OMAP4430_DPLL_REGM4XEN_MASK; - - if (v) - target_rate = target_rate / OMAP4430_REGM4XEN_MULT; + dd->last_rounded_m4xen = 0; + /* + * First try to compute the DPLL configuration for + * target rate without using the 4X multiplier. + */ r = omap2_dpll_round_rate(hw, target_rate, NULL); + if (r != ~0) + goto out; + + /* + * If we did not find a valid DPLL configuration, try again, but + * this time see if using the 4X multiplier can help. Enabling the + * 4X multiplier is equivalent to dividing the target rate by 4. + */ + r = omap2_dpll_round_rate(hw, target_rate / OMAP4430_REGM4XEN_MULT, + NULL); if (r == ~0) return r; - if (v) - clk->dpll_data->last_rounded_rate *= OMAP4430_REGM4XEN_MULT; + dd->last_rounded_rate *= OMAP4430_REGM4XEN_MULT; + dd->last_rounded_m4xen = 1; + +out: + omap4_dpll_lpmode_recalc(dd); - return clk->dpll_data->last_rounded_rate; + return dd->last_rounded_rate; } diff --git a/arch/arm/mach-omap2/i2c.c b/arch/arm/mach-omap2/i2c.c index df6d6acbc9ed..b9074dde3b9c 100644 --- a/arch/arm/mach-omap2/i2c.c +++ b/arch/arm/mach-omap2/i2c.c @@ -22,6 +22,7 @@ #include "soc.h" #include "omap_hwmod.h" #include "omap_device.h" +#include "omap-pm.h" #include "prm.h" #include "common.h" diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c index 26126343d6ac..6a217c98db54 100644 --- a/arch/arm/mach-omap2/mux.c +++ b/arch/arm/mach-omap2/mux.c @@ -135,10 +135,7 @@ static int __init _omap_mux_init_gpio(struct omap_mux_partition *partition, old_mode = omap_mux_read(partition, gpio_mux->reg_offset); mux_mode = val & ~(OMAP_MUX_NR_MODES - 1); - if (partition->flags & OMAP_MUX_GPIO_IN_MODE3) - mux_mode |= OMAP_MUX_MODE3; - else - mux_mode |= OMAP_MUX_MODE4; + mux_mode |= partition->gpio; pr_debug("%s: Setting signal %s.gpio%i 0x%04x -> 0x%04x\n", __func__, gpio_mux->muxnames[0], gpio, old_mode, mux_mode); omap_mux_write(partition, mux_mode, gpio_mux->reg_offset); @@ -800,7 +797,7 @@ int __init omap_mux_late_init(void) struct omap_mux *m = &e->mux; u16 mode = omap_mux_read(partition, m->reg_offset); - if (OMAP_MODE_GPIO(mode)) + if (OMAP_MODE_GPIO(partition, mode)) continue; #ifndef CONFIG_DEBUG_FS @@ -1065,7 +1062,7 @@ static void __init omap_mux_init_list(struct omap_mux_partition *partition, } #else /* Skip pins that are not muxed as GPIO by bootloader */ - if (!OMAP_MODE_GPIO(omap_mux_read(partition, + if (!OMAP_MODE_GPIO(partition, omap_mux_read(partition, superset->reg_offset))) { superset++; continue; @@ -1132,6 +1129,7 @@ int __init omap_mux_init(const char *name, u32 flags, partition->name = name; partition->flags = flags; + partition->gpio = flags & OMAP_MUX_MODE7; partition->size = mux_size; partition->phys = mux_pbase; partition->base = ioremap(mux_pbase, mux_size); diff --git a/arch/arm/mach-omap2/mux.h b/arch/arm/mach-omap2/mux.h index 76f9b3c2f586..fdb22f14021f 100644 --- a/arch/arm/mach-omap2/mux.h +++ b/arch/arm/mach-omap2/mux.h @@ -58,7 +58,8 @@ #define OMAP_PIN_OFF_INPUT_PULLDOWN (OMAP_OFF_EN | OMAP_OFF_PULL_EN) #define OMAP_PIN_OFF_WAKEUPENABLE OMAP_WAKEUP_EN -#define OMAP_MODE_GPIO(x) (((x) & OMAP_MUX_MODE7) == OMAP_MUX_MODE4) +#define OMAP_MODE_GPIO(partition, x) (((x) & OMAP_MUX_MODE7) == \ + partition->gpio) #define OMAP_MODE_UART(x) (((x) & OMAP_MUX_MODE7) == OMAP_MUX_MODE0) /* Flags for omapX_mux_init */ @@ -79,13 +80,20 @@ /* * omap_mux_init flags definition: * + * OMAP_GPIO_MUX_MODE, bits 0-2: gpio muxing mode, same like pad control + * register which includes values from 0-7. * OMAP_MUX_REG_8BIT: Ensure that access to padconf is done in 8 bits. * The default value is 16 bits. - * OMAP_MUX_GPIO_IN_MODE3: The GPIO is selected in mode3. - * The default is mode4. */ -#define OMAP_MUX_REG_8BIT (1 << 0) -#define OMAP_MUX_GPIO_IN_MODE3 (1 << 1) +#define OMAP_MUX_GPIO_IN_MODE0 OMAP_MUX_MODE0 +#define OMAP_MUX_GPIO_IN_MODE1 OMAP_MUX_MODE1 +#define OMAP_MUX_GPIO_IN_MODE2 OMAP_MUX_MODE2 +#define OMAP_MUX_GPIO_IN_MODE3 OMAP_MUX_MODE3 +#define OMAP_MUX_GPIO_IN_MODE4 OMAP_MUX_MODE4 +#define OMAP_MUX_GPIO_IN_MODE5 OMAP_MUX_MODE5 +#define OMAP_MUX_GPIO_IN_MODE6 OMAP_MUX_MODE6 +#define OMAP_MUX_GPIO_IN_MODE7 OMAP_MUX_MODE7 +#define OMAP_MUX_REG_8BIT (1 << 3) /** * struct omap_board_data - board specific device data @@ -105,6 +113,7 @@ struct omap_board_data { * struct mux_partition - contain partition related information * @name: name of the current partition * @flags: flags specific to this partition + * @gpio: gpio mux mode * @phys: physical address * @size: partition size * @base: virtual address after ioremap @@ -114,6 +123,7 @@ struct omap_board_data { struct omap_mux_partition { const char *name; u32 flags; + u32 gpio; u32 phys; u32 size; void __iomem *base; diff --git a/arch/arm/mach-omap2/mux34xx.c b/arch/arm/mach-omap2/mux34xx.c index c47140bbbec4..c53609f46294 100644 --- a/arch/arm/mach-omap2/mux34xx.c +++ b/arch/arm/mach-omap2/mux34xx.c @@ -2053,7 +2053,7 @@ int __init omap3_mux_init(struct omap_board_mux *board_subset, int flags) return -EINVAL; } - return omap_mux_init("core", 0, + return omap_mux_init("core", OMAP_MUX_GPIO_IN_MODE4, OMAP3_CONTROL_PADCONF_MUX_PBASE, OMAP3_CONTROL_PADCONF_MUX_SIZE, omap3_muxmodes, package_subset, board_subset, diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c index a6a4ff8744b7..6da4f7ae9d7f 100644 --- a/arch/arm/mach-omap2/omap-iommu.c +++ b/arch/arm/mach-omap2/omap-iommu.c @@ -12,153 +12,60 @@ #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/err.h> +#include <linux/slab.h> #include <linux/platform_data/iommu-omap.h> +#include "omap_hwmod.h" +#include "omap_device.h" -#include "soc.h" -#include "common.h" - -struct iommu_device { - resource_size_t base; - int irq; - struct iommu_platform_data pdata; - struct resource res[2]; -}; -static struct iommu_device *devices; -static int num_iommu_devices; - -#ifdef CONFIG_ARCH_OMAP3 -static struct iommu_device omap3_devices[] = { - { - .base = 0x480bd400, - .irq = 24 + OMAP_INTC_START, - .pdata = { - .name = "isp", - .nr_tlb_entries = 8, - .clk_name = "cam_ick", - .da_start = 0x0, - .da_end = 0xFFFFF000, - }, - }, -#if defined(CONFIG_OMAP_IOMMU_IVA2) - { - .base = 0x5d000000, - .irq = 28 + OMAP_INTC_START, - .pdata = { - .name = "iva2", - .nr_tlb_entries = 32, - .clk_name = "iva2_ck", - .da_start = 0x11000000, - .da_end = 0xFFFFF000, - }, - }, -#endif -}; -#define NR_OMAP3_IOMMU_DEVICES ARRAY_SIZE(omap3_devices) -static struct platform_device *omap3_iommu_pdev[NR_OMAP3_IOMMU_DEVICES]; -#else -#define omap3_devices NULL -#define NR_OMAP3_IOMMU_DEVICES 0 -#define omap3_iommu_pdev NULL -#endif - -#ifdef CONFIG_ARCH_OMAP4 -static struct iommu_device omap4_devices[] = { - { - .base = OMAP4_MMU1_BASE, - .irq = 100 + OMAP44XX_IRQ_GIC_START, - .pdata = { - .name = "ducati", - .nr_tlb_entries = 32, - .clk_name = "ipu_fck", - .da_start = 0x0, - .da_end = 0xFFFFF000, - }, - }, - { - .base = OMAP4_MMU2_BASE, - .irq = 28 + OMAP44XX_IRQ_GIC_START, - .pdata = { - .name = "tesla", - .nr_tlb_entries = 32, - .clk_name = "dsp_fck", - .da_start = 0x0, - .da_end = 0xFFFFF000, - }, - }, -}; -#define NR_OMAP4_IOMMU_DEVICES ARRAY_SIZE(omap4_devices) -static struct platform_device *omap4_iommu_pdev[NR_OMAP4_IOMMU_DEVICES]; -#else -#define omap4_devices NULL -#define NR_OMAP4_IOMMU_DEVICES 0 -#define omap4_iommu_pdev NULL -#endif - -static struct platform_device **omap_iommu_pdev; - -static int __init omap_iommu_init(void) +static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused) { - int i, err; - struct resource res[] = { - { .flags = IORESOURCE_MEM }, - { .flags = IORESOURCE_IRQ }, - }; + struct platform_device *pdev; + struct iommu_platform_data *pdata; + struct omap_mmu_dev_attr *a = (struct omap_mmu_dev_attr *)oh->dev_attr; + static int i; + + pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + pdata->name = oh->name; + pdata->nr_tlb_entries = a->nr_tlb_entries; + pdata->da_start = a->da_start; + pdata->da_end = a->da_end; + + if (oh->rst_lines_cnt == 1) { + pdata->reset_name = oh->rst_lines->name; + pdata->assert_reset = omap_device_assert_hardreset; + pdata->deassert_reset = omap_device_deassert_hardreset; + } - if (cpu_is_omap34xx()) { - devices = omap3_devices; - omap_iommu_pdev = omap3_iommu_pdev; - num_iommu_devices = NR_OMAP3_IOMMU_DEVICES; - } else if (cpu_is_omap44xx()) { - devices = omap4_devices; - omap_iommu_pdev = omap4_iommu_pdev; - num_iommu_devices = NR_OMAP4_IOMMU_DEVICES; - } else - return -ENODEV; + pdev = omap_device_build("omap-iommu", i, oh, pdata, sizeof(*pdata), + NULL, 0, 0); - for (i = 0; i < num_iommu_devices; i++) { - struct platform_device *pdev; - const struct iommu_device *d = &devices[i]; + kfree(pdata); - pdev = platform_device_alloc("omap-iommu", i); - if (!pdev) { - err = -ENOMEM; - goto err_out; - } + if (IS_ERR(pdev)) { + pr_err("%s: device build err: %ld\n", __func__, PTR_ERR(pdev)); + return PTR_ERR(pdev); + } - res[0].start = d->base; - res[0].end = d->base + MMU_REG_SIZE - 1; - res[1].start = res[1].end = d->irq; + i++; - err = platform_device_add_resources(pdev, res, - ARRAY_SIZE(res)); - if (err) - goto err_out; - err = platform_device_add_data(pdev, &d->pdata, - sizeof(d->pdata)); - if (err) - goto err_out; - err = platform_device_add(pdev); - if (err) - goto err_out; - omap_iommu_pdev[i] = pdev; - } return 0; +} -err_out: - while (i--) - platform_device_put(omap_iommu_pdev[i]); - return err; +static int __init omap_iommu_init(void) +{ + return omap_hwmod_for_each_by_class("mmu", omap_iommu_dev_init, NULL); } /* must be ready before omap3isp is probed */ subsys_initcall(omap_iommu_init); static void __exit omap_iommu_exit(void) { - int i; - - for (i = 0; i < num_iommu_devices; i++) - platform_device_unregister(omap_iommu_pdev[i]); + /* Do nothing */ } module_exit(omap_iommu_exit); diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c index 272b0178dba6..f9fab942d5ba 100644 --- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c @@ -653,7 +653,7 @@ static struct omap_hwmod omap44xx_dsp_hwmod = { .mpu_irqs = omap44xx_dsp_irqs, .rst_lines = omap44xx_dsp_resets, .rst_lines_cnt = ARRAY_SIZE(omap44xx_dsp_resets), - .main_clk = "dsp_fck", + .main_clk = "dpll_iva_m4x2_ck", .prcm = { .omap4 = { .clkctrl_offs = OMAP4_CM_TESLA_TESLA_CLKCTRL_OFFSET, @@ -1679,7 +1679,7 @@ static struct omap_hwmod omap44xx_ipu_hwmod = { .mpu_irqs = omap44xx_ipu_irqs, .rst_lines = omap44xx_ipu_resets, .rst_lines_cnt = ARRAY_SIZE(omap44xx_ipu_resets), - .main_clk = "ipu_fck", + .main_clk = "ducati_clk_mux_ck", .prcm = { .omap4 = { .clkctrl_offs = OMAP4_CM_DUCATI_DUCATI_CLKCTRL_OFFSET, diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index 93d102535c85..04fdbc4c499b 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -27,8 +27,7 @@ #include <linux/pm_runtime.h> #include <linux/console.h> #include <linux/omap-dma.h> - -#include <plat/omap-serial.h> +#include <linux/platform_data/serial-omap.h> #include "common.h" #include "omap_hwmod.h" diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 7016637b531c..691aa674665a 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -175,7 +175,7 @@ static struct device_node * __init omap_get_timer_dt(struct of_device_id *match, continue; } - prom_add_property(np, &device_disabled); + of_add_property(np, &device_disabled); return np; } @@ -190,7 +190,7 @@ static struct device_node * __init omap_get_timer_dt(struct of_device_id *match, * kernel registering these devices remove them dynamically from the device * tree on boot. */ -void __init omap_dmtimer_init(void) +static void __init omap_dmtimer_init(void) { struct device_node *np; @@ -210,7 +210,7 @@ void __init omap_dmtimer_init(void) * * Get the timer errata flags that are specific to the OMAP device being used. */ -u32 __init omap_dm_timer_get_errata(void) +static u32 __init omap_dm_timer_get_errata(void) { if (cpu_is_omap24xx()) return 0; @@ -392,7 +392,7 @@ static struct of_device_id omap_counter_match[] __initdata = { }; /* Setup free-running counter for clocksource */ -static int __init omap2_sync32k_clocksource_init(void) +static int __init __maybe_unused omap2_sync32k_clocksource_init(void) { int ret; struct device_node *np = NULL; diff --git a/arch/arm/mach-omap2/usb-host.c b/arch/arm/mach-omap2/usb-host.c index d1dbe125b34f..2e44e8a22884 100644 --- a/arch/arm/mach-omap2/usb-host.c +++ b/arch/arm/mach-omap2/usb-host.c @@ -508,6 +508,10 @@ void __init usbhs_init(const struct usbhs_omap_board_data *pdata) if (cpu_is_omap34xx()) { setup_ehci_io_mux(pdata->port_mode); setup_ohci_io_mux(pdata->port_mode); + + if (omap_rev() <= OMAP3430_REV_ES2_1) + usbhs_data.single_ulpi_bypass = true; + } else if (cpu_is_omap44xx()) { setup_4430ehci_io_mux(pdata->port_mode); setup_4430ohci_io_mux(pdata->port_mode); diff --git a/arch/arm/mach-realview/include/mach/board-eb.h b/arch/arm/mach-realview/include/mach/board-eb.h index 124bce6b4d7b..a301e61a5554 100644 --- a/arch/arm/mach-realview/include/mach/board-eb.h +++ b/arch/arm/mach-realview/include/mach/board-eb.h @@ -47,7 +47,7 @@ #define REALVIEW_EB_USB_BASE 0x4F000000 /* USB */ #ifdef CONFIG_REALVIEW_EB_ARM11MP_REVB -#define REALVIEW_EB11MP_PRIV_MEM_BASE 0x1F000000 +#define REALVIEW_EB11MP_PRIV_MEM_BASE 0x10100000 #define REALVIEW_EB11MP_L220_BASE 0x10102000 /* L220 registers */ #define REALVIEW_EB11MP_SYS_PLD_CTRL1 0xD8 /* Register offset for MPCore sysctl */ #else diff --git a/arch/arm/mach-s3c64xx/clock.c b/arch/arm/mach-s3c64xx/clock.c index 1a6f85777449..803711e283b2 100644 --- a/arch/arm/mach-s3c64xx/clock.c +++ b/arch/arm/mach-s3c64xx/clock.c @@ -149,25 +149,6 @@ static struct clk init_clocks_off[] = { .enable = s3c64xx_pclk_ctrl, .ctrlbit = S3C6410_CLKCON_PCLK_I2C1, }, { - .name = "iis", - .devname = "samsung-i2s.0", - .parent = &clk_p, - .enable = s3c64xx_pclk_ctrl, - .ctrlbit = S3C_CLKCON_PCLK_IIS0, - }, { - .name = "iis", - .devname = "samsung-i2s.1", - .parent = &clk_p, - .enable = s3c64xx_pclk_ctrl, - .ctrlbit = S3C_CLKCON_PCLK_IIS1, - }, { -#ifdef CONFIG_CPU_S3C6410 - .name = "iis", - .parent = &clk_p, - .enable = s3c64xx_pclk_ctrl, - .ctrlbit = S3C6410_CLKCON_PCLK_IIS2, - }, { -#endif .name = "keypad", .parent = &clk_p, .enable = s3c64xx_pclk_ctrl, @@ -337,6 +318,32 @@ static struct clk clk_48m_spi1 = { .ctrlbit = S3C_CLKCON_SCLK_SPI1_48, }; +static struct clk clk_i2s0 = { + .name = "iis", + .devname = "samsung-i2s.0", + .parent = &clk_p, + .enable = s3c64xx_pclk_ctrl, + .ctrlbit = S3C_CLKCON_PCLK_IIS0, +}; + +static struct clk clk_i2s1 = { + .name = "iis", + .devname = "samsung-i2s.1", + .parent = &clk_p, + .enable = s3c64xx_pclk_ctrl, + .ctrlbit = S3C_CLKCON_PCLK_IIS1, +}; + +#ifdef CONFIG_CPU_S3C6410 +static struct clk clk_i2s2 = { + .name = "iis", + .devname = "samsung-i2s.2", + .parent = &clk_p, + .enable = s3c64xx_pclk_ctrl, + .ctrlbit = S3C6410_CLKCON_PCLK_IIS2, +}; +#endif + static struct clk init_clocks[] = { { .name = "lcd", @@ -660,6 +667,7 @@ static struct clksrc_sources clkset_audio1 = { .nr_sources = ARRAY_SIZE(clkset_audio1_list), }; +#ifdef CONFIG_CPU_S3C6410 static struct clk *clkset_audio2_list[] = { [0] = &clk_mout_epll.clk, [1] = &clk_dout_mpll, @@ -672,6 +680,7 @@ static struct clksrc_sources clkset_audio2 = { .sources = clkset_audio2_list, .nr_sources = ARRAY_SIZE(clkset_audio2_list), }; +#endif static struct clksrc_clk clksrcs[] = { { @@ -685,36 +694,6 @@ static struct clksrc_clk clksrcs[] = { .sources = &clkset_uhost, }, { .clk = { - .name = "audio-bus", - .devname = "samsung-i2s.0", - .ctrlbit = S3C_CLKCON_SCLK_AUDIO0, - .enable = s3c64xx_sclk_ctrl, - }, - .reg_src = { .reg = S3C_CLK_SRC, .shift = 7, .size = 3 }, - .reg_div = { .reg = S3C_CLK_DIV2, .shift = 8, .size = 4 }, - .sources = &clkset_audio0, - }, { - .clk = { - .name = "audio-bus", - .devname = "samsung-i2s.1", - .ctrlbit = S3C_CLKCON_SCLK_AUDIO1, - .enable = s3c64xx_sclk_ctrl, - }, - .reg_src = { .reg = S3C_CLK_SRC, .shift = 10, .size = 3 }, - .reg_div = { .reg = S3C_CLK_DIV2, .shift = 12, .size = 4 }, - .sources = &clkset_audio1, - }, { - .clk = { - .name = "audio-bus", - .devname = "samsung-i2s.2", - .ctrlbit = S3C6410_CLKCON_SCLK_AUDIO2, - .enable = s3c64xx_sclk_ctrl, - }, - .reg_src = { .reg = S3C6410_CLK_SRC2, .shift = 0, .size = 3 }, - .reg_div = { .reg = S3C_CLK_DIV2, .shift = 24, .size = 4 }, - .sources = &clkset_audio2, - }, { - .clk = { .name = "irda-bus", .ctrlbit = S3C_CLKCON_SCLK_IRDA, .enable = s3c64xx_sclk_ctrl, @@ -805,6 +784,43 @@ static struct clksrc_clk clk_sclk_spi1 = { .sources = &clkset_spi_mmc, }; +static struct clksrc_clk clk_audio_bus0 = { + .clk = { + .name = "audio-bus", + .devname = "samsung-i2s.0", + .ctrlbit = S3C_CLKCON_SCLK_AUDIO0, + .enable = s3c64xx_sclk_ctrl, + }, + .reg_src = { .reg = S3C_CLK_SRC, .shift = 7, .size = 3 }, + .reg_div = { .reg = S3C_CLK_DIV2, .shift = 8, .size = 4 }, + .sources = &clkset_audio0, +}; + +static struct clksrc_clk clk_audio_bus1 = { + .clk = { + .name = "audio-bus", + .devname = "samsung-i2s.1", + .ctrlbit = S3C_CLKCON_SCLK_AUDIO1, + .enable = s3c64xx_sclk_ctrl, + }, + .reg_src = { .reg = S3C_CLK_SRC, .shift = 10, .size = 3 }, + .reg_div = { .reg = S3C_CLK_DIV2, .shift = 12, .size = 4 }, + .sources = &clkset_audio1, +}; + +#ifdef CONFIG_CPU_S3C6410 +static struct clksrc_clk clk_audio_bus2 = { + .clk = { + .name = "audio-bus", + .devname = "samsung-i2s.2", + .ctrlbit = S3C6410_CLKCON_SCLK_AUDIO2, + .enable = s3c64xx_sclk_ctrl, + }, + .reg_src = { .reg = S3C6410_CLK_SRC2, .shift = 0, .size = 3 }, + .reg_div = { .reg = S3C_CLK_DIV2, .shift = 24, .size = 4 }, + .sources = &clkset_audio2, +}; +#endif /* Clock initialisation code */ static struct clksrc_clk *init_parents[] = { @@ -820,6 +836,8 @@ static struct clksrc_clk *clksrc_cdev[] = { &clk_sclk_mmc2, &clk_sclk_spi0, &clk_sclk_spi1, + &clk_audio_bus0, + &clk_audio_bus1, }; static struct clk *clk_cdev[] = { @@ -828,6 +846,8 @@ static struct clk *clk_cdev[] = { &clk_hsmmc2, &clk_48m_spi0, &clk_48m_spi1, + &clk_i2s0, + &clk_i2s1, }; static struct clk_lookup s3c64xx_clk_lookup[] = { @@ -844,6 +864,14 @@ static struct clk_lookup s3c64xx_clk_lookup[] = { CLKDEV_INIT("s3c6410-spi.0", "spi_busclk2", &clk_48m_spi0), CLKDEV_INIT("s3c6410-spi.1", "spi_busclk1", &clk_sclk_spi1.clk), CLKDEV_INIT("s3c6410-spi.1", "spi_busclk2", &clk_48m_spi1), + CLKDEV_INIT("samsung-i2s.0", "i2s_opclk0", &clk_i2s0), + CLKDEV_INIT("samsung-i2s.0", "i2s_opclk1", &clk_audio_bus0.clk), + CLKDEV_INIT("samsung-i2s.1", "i2s_opclk0", &clk_i2s1), + CLKDEV_INIT("samsung-i2s.1", "i2s_opclk1", &clk_audio_bus1.clk), +#ifdef CONFIG_CPU_S3C6410 + CLKDEV_INIT("samsung-i2s.2", "i2s_opclk0", &clk_i2s2), + CLKDEV_INIT("samsung-i2s.2", "i2s_opclk1", &clk_audio_bus2.clk), +#endif }; #define GET_DIV(clk, field) ((((clk) & field##_MASK) >> field##_SHIFT) + 1) diff --git a/arch/arm/mach-s3c64xx/dev-audio.c b/arch/arm/mach-s3c64xx/dev-audio.c index 35f3e07eaccc..e367e87bbc29 100644 --- a/arch/arm/mach-s3c64xx/dev-audio.c +++ b/arch/arm/mach-s3c64xx/dev-audio.c @@ -23,11 +23,6 @@ #include <linux/platform_data/asoc-s3c.h> #include <plat/gpio-cfg.h> -static const char *rclksrc[] = { - [0] = "iis", - [1] = "audio-bus", -}; - static int s3c64xx_i2s_cfg_gpio(struct platform_device *pdev) { unsigned int base; @@ -64,11 +59,6 @@ static struct resource s3c64xx_iis0_resource[] = { static struct s3c_audio_pdata i2sv3_pdata = { .cfg_gpio = s3c64xx_i2s_cfg_gpio, - .type = { - .i2s = { - .src_clk = rclksrc, - }, - }, }; struct platform_device s3c64xx_device_iis0 = { @@ -110,7 +100,6 @@ static struct s3c_audio_pdata i2sv4_pdata = { .type = { .i2s = { .quirks = QUIRK_PRI_6CHAN, - .src_clk = rclksrc, }, }, }; diff --git a/arch/arm/mach-s5p64x0/clock-s5p6440.c b/arch/arm/mach-s5p64x0/clock-s5p6440.c index 000445596ec4..5112371079d0 100644 --- a/arch/arm/mach-s5p64x0/clock-s5p6440.c +++ b/arch/arm/mach-s5p64x0/clock-s5p6440.c @@ -243,12 +243,6 @@ static struct clk init_clocks_off[] = { .enable = s5p64x0_pclk_ctrl, .ctrlbit = (1 << 25), }, { - .name = "iis", - .devname = "samsung-i2s.0", - .parent = &clk_pclk_low.clk, - .enable = s5p64x0_pclk_ctrl, - .ctrlbit = (1 << 26), - }, { .name = "dsim", .parent = &clk_pclk_low.clk, .enable = s5p64x0_pclk_ctrl, @@ -405,15 +399,6 @@ static struct clksrc_clk clksrcs[] = { .sources = &clkset_group1, .reg_src = { .reg = S5P64X0_CLK_SRC1, .shift = 8, .size = 2 }, .reg_div = { .reg = S5P64X0_CLK_DIV3, .shift = 4, .size = 4 }, - }, { - .clk = { - .name = "sclk_audio2", - .ctrlbit = (1 << 11), - .enable = s5p64x0_sclk_ctrl, - }, - .sources = &clkset_audio, - .reg_src = { .reg = S5P64X0_CLK_SRC1, .shift = 0, .size = 3 }, - .reg_div = { .reg = S5P64X0_CLK_DIV2, .shift = 24, .size = 4 }, }, }; @@ -464,6 +449,26 @@ static struct clksrc_clk clk_sclk_uclk = { .reg_div = { .reg = S5P64X0_CLK_DIV2, .shift = 16, .size = 4 }, }; +static struct clk clk_i2s0 = { + .name = "iis", + .devname = "samsung-i2s.0", + .parent = &clk_pclk_low.clk, + .enable = s5p64x0_pclk_ctrl, + .ctrlbit = (1 << 26), +}; + +static struct clksrc_clk clk_audio_bus2 = { + .clk = { + .name = "sclk_audio2", + .devname = "samsung-i2s.0", + .ctrlbit = (1 << 11), + .enable = s5p64x0_sclk_ctrl, + }, + .sources = &clkset_audio, + .reg_src = { .reg = S5P64X0_CLK_SRC1, .shift = 0, .size = 3 }, + .reg_div = { .reg = S5P64X0_CLK_DIV2, .shift = 24, .size = 4 }, +}; + static struct clksrc_clk clk_sclk_spi0 = { .clk = { .name = "sclk_spi", @@ -506,13 +511,18 @@ static struct clk dummy_apb_pclk = { .id = -1, }; +static struct clk *clk_cdev[] = { + &clk_i2s0, +}; + static struct clksrc_clk *clksrc_cdev[] = { &clk_sclk_uclk, &clk_sclk_spi0, &clk_sclk_spi1, &clk_sclk_mmc0, &clk_sclk_mmc1, - &clk_sclk_mmc2 + &clk_sclk_mmc2, + &clk_audio_bus2, }; static struct clk_lookup s5p6440_clk_lookup[] = { @@ -524,6 +534,8 @@ static struct clk_lookup s5p6440_clk_lookup[] = { CLKDEV_INIT("s3c-sdhci.0", "mmc_busclk.2", &clk_sclk_mmc0.clk), CLKDEV_INIT("s3c-sdhci.1", "mmc_busclk.2", &clk_sclk_mmc1.clk), CLKDEV_INIT("s3c-sdhci.2", "mmc_busclk.2", &clk_sclk_mmc2.clk), + CLKDEV_INIT("samsung-i2s.0", "i2s_opclk0", &clk_i2s0), + CLKDEV_INIT("samsung-i2s.0", "i2s_opclk1", &clk_audio_bus2.clk), }; void __init_or_cpufreq s5p6440_setup_clocks(void) @@ -596,12 +608,17 @@ static struct clk *clks[] __initdata = { void __init s5p6440_register_clocks(void) { int ptr; + unsigned int cnt; s3c24xx_register_clocks(clks, ARRAY_SIZE(clks)); for (ptr = 0; ptr < ARRAY_SIZE(sysclks); ptr++) s3c_register_clksrc(sysclks[ptr], 1); + s3c24xx_register_clocks(clk_cdev, ARRAY_SIZE(clk_cdev)); + for (cnt = 0; cnt < ARRAY_SIZE(clk_cdev); cnt++) + s3c_disable_clocks(clk_cdev[cnt], 1); + s3c_register_clksrc(clksrcs, ARRAY_SIZE(clksrcs)); s3c_register_clocks(init_clocks, ARRAY_SIZE(init_clocks)); for (ptr = 0; ptr < ARRAY_SIZE(clksrc_cdev); ptr++) diff --git a/arch/arm/mach-s5p64x0/clock-s5p6450.c b/arch/arm/mach-s5p64x0/clock-s5p6450.c index f3e0ef3d27c9..154dea702d70 100644 --- a/arch/arm/mach-s5p64x0/clock-s5p6450.c +++ b/arch/arm/mach-s5p64x0/clock-s5p6450.c @@ -247,24 +247,6 @@ static struct clk init_clocks_off[] = { .enable = s5p64x0_pclk_ctrl, .ctrlbit = (1 << 22), }, { - .name = "iis", - .devname = "samsung-i2s.0", - .parent = &clk_pclk_low.clk, - .enable = s5p64x0_pclk_ctrl, - .ctrlbit = (1 << 26), - }, { - .name = "iis", - .devname = "samsung-i2s.1", - .parent = &clk_pclk_low.clk, - .enable = s5p64x0_pclk_ctrl, - .ctrlbit = (1 << 15), - }, { - .name = "iis", - .devname = "samsung-i2s.2", - .parent = &clk_pclk_low.clk, - .enable = s5p64x0_pclk_ctrl, - .ctrlbit = (1 << 16), - }, { .name = "i2c", .devname = "s3c2440-i2c.1", .parent = &clk_pclk_low.clk, @@ -402,6 +384,7 @@ static struct clksrc_sources clkset_sclk_audio0 = { static struct clksrc_clk clk_sclk_audio0 = { .clk = { .name = "audio-bus", + .devname = "samsung-i2s.0", .enable = s5p64x0_sclk_ctrl, .ctrlbit = (1 << 8), .parent = &clk_dout_epll.clk, @@ -549,6 +532,36 @@ static struct clksrc_clk clk_sclk_spi1 = { .reg_div = { .reg = S5P64X0_CLK_DIV2, .shift = 4, .size = 4 }, }; +static struct clk clk_i2s0 = { + .name = "iis", + .devname = "samsung-i2s.0", + .parent = &clk_pclk_low.clk, + .enable = s5p64x0_pclk_ctrl, + .ctrlbit = (1 << 26), +}; + +static struct clk clk_i2s1 = { + .name = "iis", + .devname = "samsung-i2s.1", + .parent = &clk_pclk_low.clk, + .enable = s5p64x0_pclk_ctrl, + .ctrlbit = (1 << 15), +}; + +static struct clk clk_i2s2 = { + .name = "iis", + .devname = "samsung-i2s.2", + .parent = &clk_pclk_low.clk, + .enable = s5p64x0_pclk_ctrl, + .ctrlbit = (1 << 16), +}; + +static struct clk *clk_cdev[] = { + &clk_i2s0, + &clk_i2s1, + &clk_i2s2, +}; + static struct clksrc_clk *clksrc_cdev[] = { &clk_sclk_uclk, &clk_sclk_spi0, @@ -556,6 +569,7 @@ static struct clksrc_clk *clksrc_cdev[] = { &clk_sclk_mmc0, &clk_sclk_mmc1, &clk_sclk_mmc2, + &clk_sclk_audio0, }; static struct clk_lookup s5p6450_clk_lookup[] = { @@ -567,6 +581,10 @@ static struct clk_lookup s5p6450_clk_lookup[] = { CLKDEV_INIT("s3c-sdhci.0", "mmc_busclk.2", &clk_sclk_mmc0.clk), CLKDEV_INIT("s3c-sdhci.1", "mmc_busclk.2", &clk_sclk_mmc1.clk), CLKDEV_INIT("s3c-sdhci.2", "mmc_busclk.2", &clk_sclk_mmc2.clk), + CLKDEV_INIT("samsung-i2s.0", "i2s_opclk0", &clk_i2s0), + CLKDEV_INIT("samsung-i2s.0", "i2s_opclk1", &clk_sclk_audio0.clk), + CLKDEV_INIT("samsung-i2s.1", "i2s_opclk0", &clk_i2s1), + CLKDEV_INIT("samsung-i2s.2", "i2s_opclk0", &clk_i2s2), }; /* Clock initialization code */ @@ -584,7 +602,6 @@ static struct clksrc_clk *sysclks[] = { &clk_pclk, &clk_hclk_low, &clk_pclk_low, - &clk_sclk_audio0, }; static struct clk dummy_apb_pclk = { @@ -661,10 +678,16 @@ void __init_or_cpufreq s5p6450_setup_clocks(void) void __init s5p6450_register_clocks(void) { int ptr; + unsigned int cnt; for (ptr = 0; ptr < ARRAY_SIZE(sysclks); ptr++) s3c_register_clksrc(sysclks[ptr], 1); + + s3c24xx_register_clocks(clk_cdev, ARRAY_SIZE(clk_cdev)); + for (cnt = 0; cnt < ARRAY_SIZE(clk_cdev); cnt++) + s3c_disable_clocks(clk_cdev[cnt], 1); + s3c_register_clksrc(clksrcs, ARRAY_SIZE(clksrcs)); s3c_register_clocks(init_clocks, ARRAY_SIZE(init_clocks)); for (ptr = 0; ptr < ARRAY_SIZE(clksrc_cdev); ptr++) diff --git a/arch/arm/mach-s5p64x0/dev-audio.c b/arch/arm/mach-s5p64x0/dev-audio.c index a0d6edfd23a0..723d4773c323 100644 --- a/arch/arm/mach-s5p64x0/dev-audio.c +++ b/arch/arm/mach-s5p64x0/dev-audio.c @@ -19,11 +19,6 @@ #include <mach/dma.h> #include <mach/irqs.h> -static const char *rclksrc[] = { - [0] = "iis", - [1] = "sclk_audio2", -}; - static int s5p6440_cfg_i2s(struct platform_device *pdev) { switch (pdev->id) { @@ -45,7 +40,6 @@ static struct s3c_audio_pdata s5p6440_i2s_pdata = { .type = { .i2s = { .quirks = QUIRK_PRI_6CHAN, - .src_clk = rclksrc, }, }, }; @@ -93,7 +87,6 @@ static struct s3c_audio_pdata s5p6450_i2s0_pdata = { .type = { .i2s = { .quirks = QUIRK_PRI_6CHAN, - .src_clk = rclksrc, }, }, }; @@ -110,11 +103,6 @@ struct platform_device s5p6450_device_iis0 = { static struct s3c_audio_pdata s5p6450_i2s_pdata = { .cfg_gpio = s5p6450_cfg_i2s, - .type = { - .i2s = { - .src_clk = rclksrc, - }, - }, }; static struct resource s5p6450_i2s1_resource[] = { diff --git a/arch/arm/mach-s5pc100/clock.c b/arch/arm/mach-s5pc100/clock.c index 926219791f0d..a206dc35eff1 100644 --- a/arch/arm/mach-s5pc100/clock.c +++ b/arch/arm/mach-s5pc100/clock.c @@ -606,24 +606,6 @@ static struct clk init_clocks_off[] = { .enable = s5pc100_d1_4_ctrl, .ctrlbit = (1 << 13), }, { - .name = "iis", - .devname = "samsung-i2s.0", - .parent = &clk_div_pclkd1.clk, - .enable = s5pc100_d1_5_ctrl, - .ctrlbit = (1 << 0), - }, { - .name = "iis", - .devname = "samsung-i2s.1", - .parent = &clk_div_pclkd1.clk, - .enable = s5pc100_d1_5_ctrl, - .ctrlbit = (1 << 1), - }, { - .name = "iis", - .devname = "samsung-i2s.2", - .parent = &clk_div_pclkd1.clk, - .enable = s5pc100_d1_5_ctrl, - .ctrlbit = (1 << 2), - }, { .name = "ac97", .parent = &clk_div_pclkd1.clk, .enable = s5pc100_d1_5_ctrl, @@ -724,6 +706,30 @@ static struct clk clk_48m_spi2 = { .ctrlbit = (1 << 9), }; +static struct clk clk_i2s0 = { + .name = "iis", + .devname = "samsung-i2s.0", + .parent = &clk_div_pclkd1.clk, + .enable = s5pc100_d1_5_ctrl, + .ctrlbit = (1 << 0), +}; + +static struct clk clk_i2s1 = { + .name = "iis", + .devname = "samsung-i2s.1", + .parent = &clk_div_pclkd1.clk, + .enable = s5pc100_d1_5_ctrl, + .ctrlbit = (1 << 1), +}; + +static struct clk clk_i2s2 = { + .name = "iis", + .devname = "samsung-i2s.2", + .parent = &clk_div_pclkd1.clk, + .enable = s5pc100_d1_5_ctrl, + .ctrlbit = (1 << 2), +}; + static struct clk clk_vclk54m = { .name = "vclk_54m", .rate = 54000000, @@ -1154,6 +1160,9 @@ static struct clk *clk_cdev[] = { &clk_48m_spi0, &clk_48m_spi1, &clk_48m_spi2, + &clk_i2s0, + &clk_i2s1, + &clk_i2s2, }; static struct clksrc_clk *clksrc_cdev[] = { @@ -1321,6 +1330,9 @@ static struct clk_lookup s5pc100_clk_lookup[] = { CLKDEV_INIT("s5pc100-spi.1", "spi_busclk2", &clk_sclk_spi1.clk), CLKDEV_INIT("s5pc100-spi.2", "spi_busclk1", &clk_48m_spi2), CLKDEV_INIT("s5pc100-spi.2", "spi_busclk2", &clk_sclk_spi2.clk), + CLKDEV_INIT("samsung-i2s.0", "i2s_opclk0", &clk_i2s0), + CLKDEV_INIT("samsung-i2s.1", "i2s_opclk0", &clk_i2s1), + CLKDEV_INIT("samsung-i2s.2", "i2s_opclk0", &clk_i2s2), }; void __init s5pc100_register_clocks(void) diff --git a/arch/arm/mach-s5pc100/dev-audio.c b/arch/arm/mach-s5pc100/dev-audio.c index 1cc252cef268..46f488b09391 100644 --- a/arch/arm/mach-s5pc100/dev-audio.c +++ b/arch/arm/mach-s5pc100/dev-audio.c @@ -39,18 +39,12 @@ static int s5pc100_cfg_i2s(struct platform_device *pdev) return 0; } -static const char *rclksrc_v5[] = { - [0] = "iis", - [1] = "i2sclkd2", -}; - static struct s3c_audio_pdata i2sv5_pdata = { .cfg_gpio = s5pc100_cfg_i2s, .type = { .i2s = { .quirks = QUIRK_PRI_6CHAN | QUIRK_SEC_DAI | QUIRK_NEED_RSTCLR, - .src_clk = rclksrc_v5, }, }, }; @@ -72,18 +66,8 @@ struct platform_device s5pc100_device_iis0 = { }, }; -static const char *rclksrc_v3[] = { - [0] = "iis", - [1] = "sclk_audio", -}; - static struct s3c_audio_pdata i2sv3_pdata = { .cfg_gpio = s5pc100_cfg_i2s, - .type = { - .i2s = { - .src_clk = rclksrc_v3, - }, - }, }; static struct resource s5pc100_iis1_resource[] = { diff --git a/arch/arm/mach-s5pv210/dev-audio.c b/arch/arm/mach-s5pv210/dev-audio.c index 0a5480bbcbd5..addfb165c13d 100644 --- a/arch/arm/mach-s5pv210/dev-audio.c +++ b/arch/arm/mach-s5pv210/dev-audio.c @@ -20,11 +20,6 @@ #include <mach/irqs.h> #include <mach/regs-audss.h> -static const char *rclksrc[] = { - [0] = "busclk", - [1] = "i2sclk", -}; - static int s5pv210_cfg_i2s(struct platform_device *pdev) { /* configure GPIO for i2s port */ @@ -52,7 +47,6 @@ static struct s3c_audio_pdata i2sv5_pdata = { .i2s = { .quirks = QUIRK_PRI_6CHAN | QUIRK_SEC_DAI | QUIRK_NEED_RSTCLR, - .src_clk = rclksrc, .idma_addr = S5PV210_AUDSS_INT_MEM, }, }, @@ -75,18 +69,8 @@ struct platform_device s5pv210_device_iis0 = { }, }; -static const char *rclksrc_v3[] = { - [0] = "iis", - [1] = "audio-bus", -}; - static struct s3c_audio_pdata i2sv3_pdata = { .cfg_gpio = s5pv210_cfg_i2s, - .type = { - .i2s = { - .src_clk = rclksrc_v3, - }, - }, }; static struct resource s5pv210_iis1_resource[] = { diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index 0816562725f6..d54cfc54b9fe 100644 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -104,7 +104,7 @@ static __initdata struct tegra_clk_init_table tegra20_clk_init_table[] = { static __initdata struct tegra_clk_init_table tegra30_clk_init_table[] = { /* name parent rate enabled */ { "clk_m", NULL, 0, true }, - { "pll_p", "clk_m", 408000000, true }, + { "pll_p", "pll_ref", 408000000, true }, { "pll_p_out1", "pll_p", 9600000, true }, { "pll_p_out4", "pll_p", 102000000, true }, { "sclk", "pll_p_out4", 102000000, true }, diff --git a/arch/arm/mach-tegra/tegra30_clocks.c b/arch/arm/mach-tegra/tegra30_clocks.c index efc000e32e1c..d7147779f8ea 100644 --- a/arch/arm/mach-tegra/tegra30_clocks.c +++ b/arch/arm/mach-tegra/tegra30_clocks.c @@ -2045,9 +2045,7 @@ struct clk_ops tegra30_periph_clk_ops = { static int tegra30_dsib_clk_set_parent(struct clk_hw *hw, u8 index) { struct clk *d = clk_get_sys(NULL, "pll_d"); - /* The DSIB parent selection bit is in PLLD base - register - can not do direct r-m-w, must be - protected by PLLD lock */ + /* The DSIB parent selection bit is in PLLD base register */ tegra_clk_cfg_ex( d, TEGRA_CLK_PLLD_MIPI_MUX_SEL, index); diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c index 8b204ae69002..4ce77cdc31cc 100644 --- a/arch/arm/mach-u300/core.c +++ b/arch/arm/mach-u300/core.c @@ -27,7 +27,6 @@ #include <linux/mtd/nand.h> #include <linux/mtd/fsmc.h> #include <linux/pinctrl/machine.h> -#include <linux/pinctrl/consumer.h> #include <linux/pinctrl/pinconf-generic.h> #include <linux/dma-mapping.h> #include <linux/platform_data/clk-u300.h> @@ -1553,39 +1552,6 @@ static struct pinctrl_map __initdata u300_pinmux_map[] = { pin_highz_conf), }; -struct u300_mux_hog { - struct device *dev; - struct pinctrl *p; -}; - -static struct u300_mux_hog u300_mux_hogs[] = { - { - .dev = &uart0_device.dev, - }, - { - .dev = &mmcsd_device.dev, - }, -}; - -static int __init u300_pinctrl_fetch(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(u300_mux_hogs); i++) { - struct pinctrl *p; - - p = pinctrl_get_select_default(u300_mux_hogs[i].dev); - if (IS_ERR(p)) { - pr_err("u300: could not get pinmux hog for dev %s\n", - dev_name(u300_mux_hogs[i].dev)); - continue; - } - u300_mux_hogs[i].p = p; - } - return 0; -} -subsys_initcall(u300_pinctrl_fetch); - /* * Notice that AMBA devices are initialized before platform devices. * diff --git a/arch/arm/mach-ux500/devices-db8500.h b/arch/arm/mach-ux500/devices-db8500.h index 4b24c9992654..a5e05f6e256f 100644 --- a/arch/arm/mach-ux500/devices-db8500.h +++ b/arch/arm/mach-ux500/devices-db8500.h @@ -8,6 +8,7 @@ #ifndef __DEVICES_DB8500_H #define __DEVICES_DB8500_H +#include <linux/platform_data/usb-musb-ux500.h> #include <mach/irqs.h> #include "devices-common.h" diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index cd956647c21a..7539ec275065 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -44,8 +44,10 @@ ENDPROC(v7_flush_icache_all) ENTRY(v7_flush_dcache_louis) dmb @ ensure ordering with previous memory accesses mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr - ands r3, r0, #0xe00000 @ extract LoUIS from clidr - mov r3, r3, lsr #20 @ r3 = LoUIS * 2 + ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr + ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr + ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2 + ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2 moveq pc, lr @ return if level == 0 mov r10, #0 @ r10 (starting level) = 0 b flush_levels @ start flushing cache levels diff --git a/arch/arm/plat-omap/Makefile b/arch/arm/plat-omap/Makefile index 8d885848600a..9d9aa2f55129 100644 --- a/arch/arm/plat-omap/Makefile +++ b/arch/arm/plat-omap/Makefile @@ -11,7 +11,6 @@ obj- := # omap_device support (OMAP2+ only at the moment) obj-$(CONFIG_OMAP_DM_TIMER) += dmtimer.o -obj-$(CONFIG_OMAP_DEBUG_DEVICES) += debug-devices.o obj-$(CONFIG_OMAP_DEBUG_LEDS) += debug-leds.o i2c-omap-$(CONFIG_I2C_OMAP) := i2c.o obj-y += $(i2c-omap-m) $(i2c-omap-y) diff --git a/arch/arm/plat-omap/debug-devices.c b/arch/arm/plat-omap/debug-devices.c deleted file mode 100644 index a609e2161817..000000000000 --- a/arch/arm/plat-omap/debug-devices.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * linux/arch/arm/plat-omap/debug-devices.c - * - * Copyright (C) 2005 Nokia Corporation - * Modified from mach-omap2/board-h4.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/gpio.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/platform_device.h> -#include <linux/io.h> -#include <linux/smc91x.h> - -#include <plat/debug-devices.h> - -/* Many OMAP development platforms reuse the same "debug board"; these - * platforms include H2, H3, H4, and Perseus2. - */ - -static struct smc91x_platdata smc91x_info = { - .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, - .leda = RPC_LED_100_10, - .ledb = RPC_LED_TX_RX, -}; - -static struct resource smc91x_resources[] = { - [0] = { - .flags = IORESOURCE_MEM, - }, - [1] = { - .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE, - }, -}; - -static struct platform_device smc91x_device = { - .name = "smc91x", - .id = -1, - .dev = { - .platform_data = &smc91x_info, - }, - .num_resources = ARRAY_SIZE(smc91x_resources), - .resource = smc91x_resources, -}; - -static struct resource led_resources[] = { - [0] = { - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device led_device = { - .name = "omap_dbg_led", - .id = -1, - .num_resources = ARRAY_SIZE(led_resources), - .resource = led_resources, -}; - -static struct platform_device *debug_devices[] __initdata = { - &smc91x_device, - &led_device, - /* ps2 kbd + mouse ports */ - /* 4 extra uarts */ - /* 6 input dip switches */ - /* 8 output pins */ -}; - -int __init debug_card_init(u32 addr, unsigned gpio) -{ - int status; - - smc91x_resources[0].start = addr + 0x300; - smc91x_resources[0].end = addr + 0x30f; - - smc91x_resources[1].start = gpio_to_irq(gpio); - smc91x_resources[1].end = gpio_to_irq(gpio); - - status = gpio_request(gpio, "SMC91x irq"); - if (status < 0) { - printk(KERN_ERR "GPIO%d unavailable for smc91x IRQ\n", gpio); - return status; - } - gpio_direction_input(gpio); - - led_resources[0].start = addr; - led_resources[0].end = addr + SZ_4K - 1; - - return platform_add_devices(debug_devices, ARRAY_SIZE(debug_devices)); -} diff --git a/arch/arm/plat-omap/include/plat/debug-devices.h b/arch/arm/plat-omap/include/plat/debug-devices.h deleted file mode 100644 index 8fc4287222dd..000000000000 --- a/arch/arm/plat-omap/include/plat/debug-devices.h +++ /dev/null @@ -1,2 +0,0 @@ -/* for TI reference platforms sharing the same debug card */ -extern int debug_card_init(u32 addr, unsigned gpio); diff --git a/arch/arm/plat-samsung/include/plat/gpio-core.h b/arch/arm/plat-samsung/include/plat/gpio-core.h index dfd8b7af8c7a..f7a3ea2c498a 100644 --- a/arch/arm/plat-samsung/include/plat/gpio-core.h +++ b/arch/arm/plat-samsung/include/plat/gpio-core.h @@ -11,6 +11,9 @@ * published by the Free Software Foundation. */ +#ifndef __PLAT_SAMSUNG_GPIO_CORE_H +#define __PLAT_SAMSUNG_GPIO_CORE_H + #define GPIOCON_OFF (0x00) #define GPIODAT_OFF (0x04) @@ -124,3 +127,5 @@ extern struct samsung_gpio_pm samsung_gpio_pm_4bit; /* locking wrappers to deal with multiple access to the same gpio bank */ #define samsung_gpio_lock(_oc, _fl) spin_lock_irqsave(&(_oc)->lock, _fl) #define samsung_gpio_unlock(_oc, _fl) spin_unlock_irqrestore(&(_oc)->lock, _fl) + +#endif /* __PLAT_SAMSUNG_GPIO_CORE_H */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f9ccff915918..9c829b008261 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -8,8 +8,6 @@ config ARM64 select GENERIC_IOMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW - select GENERIC_KERNEL_EXECVE - select GENERIC_KERNEL_THREAD select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 37e610dc084e..d9ec40217a27 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -209,10 +209,11 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } +#define compat_user_stack_pointer() (current_pt_regs()->compat_sp) + static inline void __user *arch_compat_alloc_user_space(long len) { - struct pt_regs *regs = task_pt_regs(current); - return (void __user *)regs->compat_sp - len; + return (void __user *)compat_user_stack_pointer() - len; } struct compat_ipc64_perm { diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 538f4b44db5d..994776894198 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -50,6 +50,7 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) static inline int dma_mapping_error(struct device *dev, dma_addr_t dev_addr) { struct dma_map_ops *ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dev_addr); return ops->mapping_error(dev, dev_addr); } diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 76fb7dd3350a..744087fb521c 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -28,6 +28,5 @@ #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #endif -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE #include <uapi/asm/unistd.h> diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index e40c9bd79143..2ae6591b3a55 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -17,8 +17,6 @@ config AVR32 select GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE help AVR32 is a high-performance 32-bit RISC microprocessor core, designed for cost-sensitive embedded applications, with particular diff --git a/arch/avr32/include/asm/ptrace.h b/arch/avr32/include/asm/ptrace.h index 8d3c412fc65f..630e4f9bf5f0 100644 --- a/arch/avr32/include/asm/ptrace.h +++ b/arch/avr32/include/asm/ptrace.h @@ -21,6 +21,7 @@ #define user_mode(regs) (((regs)->sr & MODE_MASK) == MODE_USER) #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) +#define user_stack_pointer(regs) ((regs)->sp) static __inline__ int valid_user_regs(struct pt_regs *regs) { diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h index f05a9804e8e2..0bdf6371574e 100644 --- a/arch/avr32/include/asm/unistd.h +++ b/arch/avr32/include/asm/unistd.h @@ -39,7 +39,6 @@ #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/avr32/include/uapi/asm/signal.h b/arch/avr32/include/uapi/asm/signal.h index eb46f61adb7d..1b77a93eff50 100644 --- a/arch/avr32/include/uapi/asm/signal.h +++ b/arch/avr32/include/uapi/asm/signal.h @@ -89,12 +89,6 @@ typedef unsigned long sigset_t; #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index ab9ff4075f4d..b6f3ad5441c5 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -45,8 +45,6 @@ config BLACKFIN select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE config GENERIC_CSUM def_bool y diff --git a/arch/blackfin/include/asm/ptrace.h b/arch/blackfin/include/asm/ptrace.h index 14ea93388c05..c00491594b46 100644 --- a/arch/blackfin/include/asm/ptrace.h +++ b/arch/blackfin/include/asm/ptrace.h @@ -17,6 +17,7 @@ #define arch_has_single_step() (1) /* common code demands this function */ #define ptrace_disable(child) user_disable_single_step(child) +#define current_user_stack_pointer() rdusp() extern int is_user_addr_valid(struct task_struct *child, unsigned long start, unsigned long len); diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index 17eb748e9c54..e943cb130048 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -20,7 +20,6 @@ #define __ARCH_WANT_SYS_NICE #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_VFORK /* diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index 66eab3703c75..f6a3648f5ec3 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -17,8 +17,6 @@ config C6X select OF select OF_EARLY_FLATTREE select GENERIC_CLOCKEVENTS - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select MODULES_USE_ELF_RELA config MMU diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index eae7b5963e86..4258b088aa93 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h generic-y += mman.h +generic-y += mmu.h generic-y += mmu_context.h generic-y += msgbuf.h generic-y += param.h diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h index 03579fd99dba..3c694065030f 100644 --- a/arch/c6x/include/asm/dma-mapping.h +++ b/arch/c6x/include/asm/dma-mapping.h @@ -32,6 +32,7 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask) */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { + debug_dma_mapping_error(dev, dma_addr); return dma_addr == ~0; } diff --git a/arch/c6x/include/asm/mmu.h b/arch/c6x/include/asm/mmu.h deleted file mode 100644 index 4467e770a1ce..000000000000 --- a/arch/c6x/include/asm/mmu.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Port on Texas Instruments TMS320C6x architecture - * - * Copyright (C) 2004, 2009, 2010 Texas Instruments Incorporated - * Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef _ASM_C6X_MMU_H -#define _ASM_C6X_MMU_H - -typedef struct { - unsigned long end_brk; -#ifdef CONFIG_BINFMT_ELF_FDPIC - unsigned long exec_fdpic_loadmap; - unsigned long interp_fdpic_loadmap; -#endif -} mm_context_t; - -#endif /* _ASM_C6X_MMU_H */ diff --git a/arch/c6x/include/uapi/asm/unistd.h b/arch/c6x/include/uapi/asm/unistd.h index f3987a8703d9..e7d09a614d10 100644 --- a/arch/c6x/include/uapi/asm/unistd.h +++ b/arch/c6x/include/uapi/asm/unistd.h @@ -14,7 +14,6 @@ * more details. */ -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE /* Use the standard ABI for syscalls. */ diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 0cac6a49f230..c59a01dd9c0c 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -49,8 +49,6 @@ config CRIS select GENERIC_SMP_IDLE_THREAD if ETRAX_ARCH_V32 select GENERIC_CMOS_UPDATE select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select CLONE_BACKWARDS2 config HZ diff --git a/arch/cris/include/arch-v10/arch/Kbuild b/arch/cris/include/arch-v10/arch/Kbuild index 7a192e1290b1..1f0fc7a66f5f 100644 --- a/arch/cris/include/arch-v10/arch/Kbuild +++ b/arch/cris/include/arch-v10/arch/Kbuild @@ -1,4 +1 @@ -header-y += user.h -header-y += svinto.h -header-y += sv_addr_ag.h -header-y += sv_addr.agh +# CRISv10 arch diff --git a/arch/cris/include/arch-v32/arch/Kbuild b/arch/cris/include/arch-v32/arch/Kbuild index 35f2fc4f993e..2fd65c7e15c9 100644 --- a/arch/cris/include/arch-v32/arch/Kbuild +++ b/arch/cris/include/arch-v32/arch/Kbuild @@ -1,2 +1 @@ -header-y += user.h -header-y += cryptocop.h +# CRISv32 arch diff --git a/arch/cris/include/arch-v32/arch/cryptocop.h b/arch/cris/include/arch-v32/arch/cryptocop.h index e1cd83dfabb5..716e434e9269 100644 --- a/arch/cris/include/arch-v32/arch/cryptocop.h +++ b/arch/cris/include/arch-v32/arch/cryptocop.h @@ -2,124 +2,12 @@ * The device /dev/cryptocop is accessible using this driver using * CRYPTOCOP_MAJOR (254) and minor number 0. */ - #ifndef CRYPTOCOP_H #define CRYPTOCOP_H -#include <linux/uio.h> - - -#define CRYPTOCOP_SESSION_ID_NONE (0) - -typedef unsigned long long int cryptocop_session_id; - -/* cryptocop ioctls */ -#define ETRAXCRYPTOCOP_IOCTYPE (250) - -#define CRYPTOCOP_IO_CREATE_SESSION _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 1, struct strcop_session_op) -#define CRYPTOCOP_IO_CLOSE_SESSION _IOW(ETRAXCRYPTOCOP_IOCTYPE, 2, struct strcop_session_op) -#define CRYPTOCOP_IO_PROCESS_OP _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 3, struct strcop_crypto_op) -#define CRYPTOCOP_IO_MAXNR (3) - -typedef enum { - cryptocop_cipher_des = 0, - cryptocop_cipher_3des = 1, - cryptocop_cipher_aes = 2, - cryptocop_cipher_m2m = 3, /* mem2mem is essentially a NULL cipher with blocklength=1 */ - cryptocop_cipher_none -} cryptocop_cipher_type; - -typedef enum { - cryptocop_digest_sha1 = 0, - cryptocop_digest_md5 = 1, - cryptocop_digest_none -} cryptocop_digest_type; - -typedef enum { - cryptocop_csum_le = 0, - cryptocop_csum_be = 1, - cryptocop_csum_none -} cryptocop_csum_type; - -typedef enum { - cryptocop_cipher_mode_ecb = 0, - cryptocop_cipher_mode_cbc, - cryptocop_cipher_mode_none -} cryptocop_cipher_mode; - -typedef enum { - cryptocop_3des_eee = 0, - cryptocop_3des_eed = 1, - cryptocop_3des_ede = 2, - cryptocop_3des_edd = 3, - cryptocop_3des_dee = 4, - cryptocop_3des_ded = 5, - cryptocop_3des_dde = 6, - cryptocop_3des_ddd = 7 -} cryptocop_3des_mode; - -/* Usermode accessible (ioctl) operations. */ -struct strcop_session_op{ - cryptocop_session_id ses_id; - - cryptocop_cipher_type cipher; /* AES, DES, 3DES, m2m, none */ - - cryptocop_cipher_mode cmode; /* ECB, CBC, none */ - cryptocop_3des_mode des3_mode; - - cryptocop_digest_type digest; /* MD5, SHA1, none */ - - cryptocop_csum_type csum; /* BE, LE, none */ - - unsigned char *key; - size_t keylen; -}; - -#define CRYPTOCOP_CSUM_LENGTH (2) -#define CRYPTOCOP_MAX_DIGEST_LENGTH (20) /* SHA-1 20, MD5 16 */ -#define CRYPTOCOP_MAX_IV_LENGTH (16) /* (3)DES==8, AES == 16 */ -#define CRYPTOCOP_MAX_KEY_LENGTH (32) - -struct strcop_crypto_op{ - cryptocop_session_id ses_id; - - /* Indata. */ - unsigned char *indata; - size_t inlen; /* Total indata length. */ - - /* Cipher configuration. */ - unsigned char do_cipher:1; - unsigned char decrypt:1; /* 1 == decrypt, 0 == encrypt */ - unsigned char cipher_explicit:1; - size_t cipher_start; - size_t cipher_len; - /* cipher_iv is used if do_cipher and cipher_explicit and the cipher - mode is CBC. The length is controlled by the type of cipher, - e.g. DES/3DES 8 octets and AES 16 octets. */ - unsigned char cipher_iv[CRYPTOCOP_MAX_IV_LENGTH]; - /* Outdata. */ - unsigned char *cipher_outdata; - size_t cipher_outlen; - - /* digest configuration. */ - unsigned char do_digest:1; - size_t digest_start; - size_t digest_len; - /* Outdata. The actual length is determined by the type of the digest. */ - unsigned char digest[CRYPTOCOP_MAX_DIGEST_LENGTH]; - - /* Checksum configuration. */ - unsigned char do_csum:1; - size_t csum_start; - size_t csum_len; - /* Outdata. */ - unsigned char csum[CRYPTOCOP_CSUM_LENGTH]; -}; +#include <uapi/arch-v32/arch/cryptocop.h> - -#ifdef __KERNEL__ - /********** The API to use from inside the kernel. ************/ #include <arch/hwregs/dma.h> @@ -267,6 +155,4 @@ int cryptocop_job_queue_insert_crypto(struct cryptocop_operation *operation); int cryptocop_job_queue_insert_user_job(struct cryptocop_operation *operation); -#endif /* __KERNEL__ */ - #endif /* CRYPTOCOP_H */ diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h index f171a6600fbc..f13275522f4d 100644 --- a/arch/cris/include/arch-v32/arch/spinlock.h +++ b/arch/cris/include/arch-v32/arch/spinlock.h @@ -118,7 +118,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) ret = 1; } arch_spin_unlock(&rw->slock); - return 1; + return ret; } #define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock) diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 15a122c3767c..f1e79edc9dd2 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -1,12 +1,7 @@ -include include/asm-generic/Kbuild.asm header-y += arch-v10/ header-y += arch-v32/ -header-y += ethernet.h -header-y += etraxgpio.h -header-y += rs485.h -header-y += sync_serial.h generic-y += clkdev.h generic-y += exec.h diff --git a/arch/cris/include/asm/ptrace.h b/arch/cris/include/asm/ptrace.h index 6618893bfe8e..9e788d04a4ef 100644 --- a/arch/cris/include/asm/ptrace.h +++ b/arch/cris/include/asm/ptrace.h @@ -1,16 +1,14 @@ #ifndef _CRIS_PTRACE_H #define _CRIS_PTRACE_H -#include <arch/ptrace.h> +#include <uapi/asm/ptrace.h> -#ifdef __KERNEL__ /* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ #define PTRACE_GETREGS 12 #define PTRACE_SETREGS 13 #define profile_pc(regs) instruction_pointer(regs) - -#endif /* __KERNEL__ */ +#define current_user_stack_pointer() rdusp() #endif /* _CRIS_PTRACE_H */ diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h index 72dbbf59dfae..c0cb1fd4644c 100644 --- a/arch/cris/include/asm/signal.h +++ b/arch/cris/include/asm/signal.h @@ -1,12 +1,8 @@ #ifndef _ASM_CRIS_SIGNAL_H #define _ASM_CRIS_SIGNAL_H -#include <linux/types.h> +#include <uapi/asm/signal.h> -/* Avoid too many header ordering problems. */ -struct siginfo; - -#ifdef __KERNEL__ /* Most things should be clean enough to redefine this at will, if care is taken to make libc match. */ @@ -20,95 +16,6 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; -#else -/* Here we must cater to libcs that poke about in kernel headers. */ - -#define NSIG 32 -typedef unsigned long sigset_t; - -#endif /* __KERNEL__ */ - -#define SIGHUP 1 -#define SIGINT 2 -#define SIGQUIT 3 -#define SIGILL 4 -#define SIGTRAP 5 -#define SIGABRT 6 -#define SIGIOT 6 -#define SIGBUS 7 -#define SIGFPE 8 -#define SIGKILL 9 -#define SIGUSR1 10 -#define SIGSEGV 11 -#define SIGUSR2 12 -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGTERM 15 -#define SIGSTKFLT 16 -#define SIGCHLD 17 -#define SIGCONT 18 -#define SIGSTOP 19 -#define SIGTSTP 20 -#define SIGTTIN 21 -#define SIGTTOU 22 -#define SIGURG 23 -#define SIGXCPU 24 -#define SIGXFSZ 25 -#define SIGVTALRM 26 -#define SIGPROF 27 -#define SIGWINCH 28 -#define SIGIO 29 -#define SIGPOLL SIGIO -/* -#define SIGLOST 29 -*/ -#define SIGPWR 30 -#define SIGSYS 31 -#define SIGUNUSED 31 - -/* These should not be considered constants from userland. */ -#define SIGRTMIN 32 -#define SIGRTMAX _NSIG - -/* - * SA_FLAGS values: - * - * SA_ONSTACK indicates that a registered stack_t will be used. - * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. - * SA_RESETHAND clears the handler when the signal is delivered. - * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. - * SA_NODEFER prevents the current signal from being masked in the handler. - * - * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single - * Unix names RESETHAND and NODEFER respectively. - */ - -#define SA_NOCLDSTOP 0x00000001u -#define SA_NOCLDWAIT 0x00000002u -#define SA_SIGINFO 0x00000004u -#define SA_ONSTACK 0x08000000u -#define SA_RESTART 0x10000000u -#define SA_NODEFER 0x40000000u -#define SA_RESETHAND 0x80000000u - -#define SA_NOMASK SA_NODEFER -#define SA_ONESHOT SA_RESETHAND - -#define SA_RESTORER 0x04000000 - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - -#define MINSIGSTKSZ 2048 -#define SIGSTKSZ 8192 - -#include <asm-generic/signal-defs.h> - -#ifdef __KERNEL__ struct old_sigaction { __sighandler_t sa_handler; old_sigset_t sa_mask; @@ -126,32 +33,6 @@ struct sigaction { struct k_sigaction { struct sigaction sa; }; -#else -/* Here we must cater to libcs that poke about in kernel headers. */ - -struct sigaction { - union { - __sighandler_t _sa_handler; - void (*_sa_sigaction)(int, struct siginfo *, void *); - } _u; - sigset_t sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -}; - -#define sa_handler _u._sa_handler -#define sa_sigaction _u._sa_sigaction - -#endif /* __KERNEL__ */ - -typedef struct sigaltstack { - void *ss_sp; - int ss_flags; - size_t ss_size; -} stack_t; - -#ifdef __KERNEL__ #include <asm/sigcontext.h> -#endif /* __KERNEL__ */ #endif diff --git a/arch/cris/include/asm/swab.h b/arch/cris/include/asm/swab.h index 80668e88419c..991b6ace1ba9 100644 --- a/arch/cris/include/asm/swab.h +++ b/arch/cris/include/asm/swab.h @@ -1,8 +1,7 @@ #ifndef _CRIS_SWAB_H #define _CRIS_SWAB_H -#ifdef __KERNEL__ #include <arch/swab.h> -#endif /* __KERNEL__ */ +#include <uapi/asm/swab.h> #endif /* _CRIS_SWAB_H */ diff --git a/arch/cris/include/asm/termios.h b/arch/cris/include/asm/termios.h index 1265109f4ce3..1991cd9e4083 100644 --- a/arch/cris/include/asm/termios.h +++ b/arch/cris/include/asm/termios.h @@ -1,47 +1,8 @@ #ifndef _CRIS_TERMIOS_H #define _CRIS_TERMIOS_H -#include <asm/termbits.h> -#include <asm/ioctls.h> -#include <asm/rs485.h> -#include <linux/serial.h> +#include <uapi/asm/termios.h> -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; - -#define NCC 8 -struct termio { - unsigned short c_iflag; /* input mode flags */ - unsigned short c_oflag; /* output mode flags */ - unsigned short c_cflag; /* control mode flags */ - unsigned short c_lflag; /* local mode flags */ - unsigned char c_line; /* line discipline */ - unsigned char c_cc[NCC]; /* control characters */ -}; - -/* modem lines */ -#define TIOCM_LE 0x001 -#define TIOCM_DTR 0x002 -#define TIOCM_RTS 0x004 -#define TIOCM_ST 0x008 -#define TIOCM_SR 0x010 -#define TIOCM_CTS 0x020 -#define TIOCM_CAR 0x040 -#define TIOCM_RNG 0x080 -#define TIOCM_DSR 0x100 -#define TIOCM_CD TIOCM_CAR -#define TIOCM_RI TIOCM_RNG -#define TIOCM_OUT1 0x2000 -#define TIOCM_OUT2 0x4000 -#define TIOCM_LOOP 0x8000 - -/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ - -#ifdef __KERNEL__ /* intr=^C quit=^\ erase=del kill=^U eof=^D vtime=\0 vmin=\1 sxtc=\0 @@ -87,6 +48,4 @@ struct termio { #define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) #define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) -#endif /* __KERNEL__ */ - #endif /* _CRIS_TERMIOS_H */ diff --git a/arch/cris/include/asm/types.h b/arch/cris/include/asm/types.h index adaf82780bb4..a3cac7757c7f 100644 --- a/arch/cris/include/asm/types.h +++ b/arch/cris/include/asm/types.h @@ -1,15 +1,12 @@ #ifndef _ETRAX_TYPES_H #define _ETRAX_TYPES_H -#include <asm-generic/int-ll64.h> +#include <uapi/asm/types.h> /* * These aren't exported outside the kernel to avoid name space clashes */ -#ifdef __KERNEL__ #define BITS_PER_LONG 32 -#endif /* __KERNEL__ */ - #endif diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h index f27b542e0ebc..6d062bdf92d4 100644 --- a/arch/cris/include/asm/unistd.h +++ b/arch/cris/include/asm/unistd.h @@ -1,347 +1,8 @@ #ifndef _ASM_CRIS_UNISTD_H_ #define _ASM_CRIS_UNISTD_H_ -/* - * This file contains the system call numbers, and stub macros for libc. - */ - -#define __NR_restart_syscall 0 -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_waitpid 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_time 13 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_lchown 16 -#define __NR_break 17 -#define __NR_oldstat 18 -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_oldfstat 28 -#define __NR_pause 29 -#define __NR_utime 30 -#define __NR_stty 31 -#define __NR_gtty 32 -#define __NR_access 33 -#define __NR_nice 34 -#define __NR_ftime 35 -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -#define __NR_prof 44 -#define __NR_brk 45 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_signal 48 -#define __NR_geteuid 49 -#define __NR_getegid 50 -#define __NR_acct 51 -#define __NR_umount2 52 -#define __NR_lock 53 -#define __NR_ioctl 54 -#define __NR_fcntl 55 -#define __NR_mpx 56 -#define __NR_setpgid 57 -#define __NR_ulimit 58 -#define __NR_oldolduname 59 -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -#define __NR_sigaction 67 -#define __NR_sgetmask 68 -#define __NR_ssetmask 69 -#define __NR_setreuid 70 -#define __NR_setregid 71 -#define __NR_sigsuspend 72 -#define __NR_sigpending 73 -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -#define __NR_getrlimit 76 -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -#define __NR_select 82 -#define __NR_symlink 83 -#define __NR_oldlstat 84 -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -#define __NR_readdir 89 -#define __NR_mmap 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_fchown 95 -#define __NR_getpriority 96 -#define __NR_setpriority 97 -#define __NR_profil 98 -#define __NR_statfs 99 -#define __NR_fstatfs 100 -#define __NR_ioperm 101 -#define __NR_socketcall 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_stat 106 -#define __NR_lstat 107 -#define __NR_fstat 108 -#define __NR_olduname 109 -#define __NR_iopl 110 -#define __NR_vhangup 111 -#define __NR_idle 112 -#define __NR_vm86 113 -#define __NR_wait4 114 -#define __NR_swapoff 115 -#define __NR_sysinfo 116 -#define __NR_ipc 117 -#define __NR_fsync 118 -#define __NR_sigreturn 119 -#define __NR_clone 120 -#define __NR_setdomainname 121 -#define __NR_uname 122 -#define __NR_modify_ldt 123 -#define __NR_adjtimex 124 -#define __NR_mprotect 125 -#define __NR_sigprocmask 126 -#define __NR_create_module 127 -#define __NR_init_module 128 -#define __NR_delete_module 129 -#define __NR_get_kernel_syms 130 -#define __NR_quotactl 131 -#define __NR_getpgid 132 -#define __NR_fchdir 133 -#define __NR_bdflush 134 -#define __NR_sysfs 135 -#define __NR_personality 136 -#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#define __NR__llseek 140 -#define __NR_getdents 141 -#define __NR__newselect 142 -#define __NR_flock 143 -#define __NR_msync 144 -#define __NR_readv 145 -#define __NR_writev 146 -#define __NR_getsid 147 -#define __NR_fdatasync 148 -#define __NR__sysctl 149 -#define __NR_mlock 150 -#define __NR_munlock 151 -#define __NR_mlockall 152 -#define __NR_munlockall 153 -#define __NR_sched_setparam 154 -#define __NR_sched_getparam 155 -#define __NR_sched_setscheduler 156 -#define __NR_sched_getscheduler 157 -#define __NR_sched_yield 158 -#define __NR_sched_get_priority_max 159 -#define __NR_sched_get_priority_min 160 -#define __NR_sched_rr_get_interval 161 -#define __NR_nanosleep 162 -#define __NR_mremap 163 -#define __NR_setresuid 164 -#define __NR_getresuid 165 - -#define __NR_query_module 167 -#define __NR_poll 168 -#define __NR_nfsservctl 169 -#define __NR_setresgid 170 -#define __NR_getresgid 171 -#define __NR_prctl 172 -#define __NR_rt_sigreturn 173 -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 -#define __NR_rt_sigpending 176 -#define __NR_rt_sigtimedwait 177 -#define __NR_rt_sigqueueinfo 178 -#define __NR_rt_sigsuspend 179 -#define __NR_pread64 180 -#define __NR_pwrite64 181 -#define __NR_chown 182 -#define __NR_getcwd 183 -#define __NR_capget 184 -#define __NR_capset 185 -#define __NR_sigaltstack 186 -#define __NR_sendfile 187 -#define __NR_getpmsg 188 /* some people actually want streams */ -#define __NR_putpmsg 189 /* some people actually want streams */ -#define __NR_vfork 190 -#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ -#define __NR_mmap2 192 -#define __NR_truncate64 193 -#define __NR_ftruncate64 194 -#define __NR_stat64 195 -#define __NR_lstat64 196 -#define __NR_fstat64 197 -#define __NR_lchown32 198 -#define __NR_getuid32 199 -#define __NR_getgid32 200 -#define __NR_geteuid32 201 -#define __NR_getegid32 202 -#define __NR_setreuid32 203 -#define __NR_setregid32 204 -#define __NR_getgroups32 205 -#define __NR_setgroups32 206 -#define __NR_fchown32 207 -#define __NR_setresuid32 208 -#define __NR_getresuid32 209 -#define __NR_setresgid32 210 -#define __NR_getresgid32 211 -#define __NR_chown32 212 -#define __NR_setuid32 213 -#define __NR_setgid32 214 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 -#define __NR_pivot_root 217 -#define __NR_mincore 218 -#define __NR_madvise 219 -#define __NR_getdents64 220 -#define __NR_fcntl64 221 -/* 223 is unused */ -#define __NR_gettid 224 -#define __NR_readahead 225 -#define __NR_setxattr 226 -#define __NR_lsetxattr 227 -#define __NR_fsetxattr 228 -#define __NR_getxattr 229 -#define __NR_lgetxattr 230 -#define __NR_fgetxattr 231 -#define __NR_listxattr 232 -#define __NR_llistxattr 233 -#define __NR_flistxattr 234 -#define __NR_removexattr 235 -#define __NR_lremovexattr 236 -#define __NR_fremovexattr 237 -#define __NR_tkill 238 -#define __NR_sendfile64 239 -#define __NR_futex 240 -#define __NR_sched_setaffinity 241 -#define __NR_sched_getaffinity 242 -#define __NR_set_thread_area 243 -#define __NR_get_thread_area 244 -#define __NR_io_setup 245 -#define __NR_io_destroy 246 -#define __NR_io_getevents 247 -#define __NR_io_submit 248 -#define __NR_io_cancel 249 -#define __NR_fadvise64 250 -/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ -#define __NR_exit_group 252 -#define __NR_lookup_dcookie 253 -#define __NR_epoll_create 254 -#define __NR_epoll_ctl 255 -#define __NR_epoll_wait 256 -#define __NR_remap_file_pages 257 -#define __NR_set_tid_address 258 -#define __NR_timer_create 259 -#define __NR_timer_settime (__NR_timer_create+1) -#define __NR_timer_gettime (__NR_timer_create+2) -#define __NR_timer_getoverrun (__NR_timer_create+3) -#define __NR_timer_delete (__NR_timer_create+4) -#define __NR_clock_settime (__NR_timer_create+5) -#define __NR_clock_gettime (__NR_timer_create+6) -#define __NR_clock_getres (__NR_timer_create+7) -#define __NR_clock_nanosleep (__NR_timer_create+8) -#define __NR_statfs64 268 -#define __NR_fstatfs64 269 -#define __NR_tgkill 270 -#define __NR_utimes 271 -#define __NR_fadvise64_64 272 -#define __NR_vserver 273 -#define __NR_mbind 274 -#define __NR_get_mempolicy 275 -#define __NR_set_mempolicy 276 -#define __NR_mq_open 277 -#define __NR_mq_unlink (__NR_mq_open+1) -#define __NR_mq_timedsend (__NR_mq_open+2) -#define __NR_mq_timedreceive (__NR_mq_open+3) -#define __NR_mq_notify (__NR_mq_open+4) -#define __NR_mq_getsetattr (__NR_mq_open+5) -#define __NR_kexec_load 283 -#define __NR_waitid 284 -/* #define __NR_sys_setaltroot 285 */ -#define __NR_add_key 286 -#define __NR_request_key 287 -#define __NR_keyctl 288 -#define __NR_ioprio_set 289 -#define __NR_ioprio_get 290 -#define __NR_inotify_init 291 -#define __NR_inotify_add_watch 292 -#define __NR_inotify_rm_watch 293 -#define __NR_migrate_pages 294 -#define __NR_openat 295 -#define __NR_mkdirat 296 -#define __NR_mknodat 297 -#define __NR_fchownat 298 -#define __NR_futimesat 299 -#define __NR_fstatat64 300 -#define __NR_unlinkat 301 -#define __NR_renameat 302 -#define __NR_linkat 303 -#define __NR_symlinkat 304 -#define __NR_readlinkat 305 -#define __NR_fchmodat 306 -#define __NR_faccessat 307 -#define __NR_pselect6 308 -#define __NR_ppoll 309 -#define __NR_unshare 310 -#define __NR_set_robust_list 311 -#define __NR_get_robust_list 312 -#define __NR_splice 313 -#define __NR_sync_file_range 314 -#define __NR_tee 315 -#define __NR_vmsplice 316 -#define __NR_move_pages 317 -#define __NR_getcpu 318 -#define __NR_epoll_pwait 319 -#define __NR_utimensat 320 -#define __NR_signalfd 321 -#define __NR_timerfd_create 322 -#define __NR_eventfd 323 -#define __NR_fallocate 324 -#define __NR_timerfd_settime 325 -#define __NR_timerfd_gettime 326 -#define __NR_signalfd4 327 -#define __NR_eventfd2 328 -#define __NR_epoll_create1 329 -#define __NR_dup3 330 -#define __NR_pipe2 331 -#define __NR_inotify_init1 332 -#define __NR_preadv 333 -#define __NR_pwritev 334 -#define __NR_setns 335 +#include <uapi/asm/unistd.h> -#ifdef __KERNEL__ #define NR_syscalls 336 @@ -371,7 +32,6 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE @@ -384,5 +44,4 @@ */ #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") -#endif /* __KERNEL__ */ #endif /* _ASM_CRIS_UNISTD_H_ */ diff --git a/arch/cris/include/uapi/arch-v10/arch/Kbuild b/arch/cris/include/uapi/arch-v10/arch/Kbuild index aafaa5aa54d4..9048c87a782b 100644 --- a/arch/cris/include/uapi/arch-v10/arch/Kbuild +++ b/arch/cris/include/uapi/arch-v10/arch/Kbuild @@ -1 +1,5 @@ # UAPI Header export list +header-y += sv_addr.agh +header-y += sv_addr_ag.h +header-y += svinto.h +header-y += user.h diff --git a/arch/cris/include/arch-v10/arch/sv_addr.agh b/arch/cris/include/uapi/arch-v10/arch/sv_addr.agh index 6ac3a7bc9760..6ac3a7bc9760 100644 --- a/arch/cris/include/arch-v10/arch/sv_addr.agh +++ b/arch/cris/include/uapi/arch-v10/arch/sv_addr.agh diff --git a/arch/cris/include/arch-v10/arch/sv_addr_ag.h b/arch/cris/include/uapi/arch-v10/arch/sv_addr_ag.h index 5517f04153a4..5517f04153a4 100644 --- a/arch/cris/include/arch-v10/arch/sv_addr_ag.h +++ b/arch/cris/include/uapi/arch-v10/arch/sv_addr_ag.h diff --git a/arch/cris/include/arch-v10/arch/svinto.h b/arch/cris/include/uapi/arch-v10/arch/svinto.h index da5c15272652..da5c15272652 100644 --- a/arch/cris/include/arch-v10/arch/svinto.h +++ b/arch/cris/include/uapi/arch-v10/arch/svinto.h diff --git a/arch/cris/include/arch-v10/arch/user.h b/arch/cris/include/uapi/arch-v10/arch/user.h index 9303ea77c915..9303ea77c915 100644 --- a/arch/cris/include/arch-v10/arch/user.h +++ b/arch/cris/include/uapi/arch-v10/arch/user.h diff --git a/arch/cris/include/uapi/arch-v32/arch/Kbuild b/arch/cris/include/uapi/arch-v32/arch/Kbuild index aafaa5aa54d4..59efffd16b61 100644 --- a/arch/cris/include/uapi/arch-v32/arch/Kbuild +++ b/arch/cris/include/uapi/arch-v32/arch/Kbuild @@ -1 +1,3 @@ # UAPI Header export list +header-y += cryptocop.h +header-y += user.h diff --git a/arch/cris/include/uapi/arch-v32/arch/cryptocop.h b/arch/cris/include/uapi/arch-v32/arch/cryptocop.h new file mode 100644 index 000000000000..694fd13ce1cf --- /dev/null +++ b/arch/cris/include/uapi/arch-v32/arch/cryptocop.h @@ -0,0 +1,122 @@ +/* + * The device /dev/cryptocop is accessible using this driver using + * CRYPTOCOP_MAJOR (254) and minor number 0. + */ + +#ifndef _UAPICRYPTOCOP_H +#define _UAPICRYPTOCOP_H + +#include <linux/uio.h> + + +#define CRYPTOCOP_SESSION_ID_NONE (0) + +typedef unsigned long long int cryptocop_session_id; + +/* cryptocop ioctls */ +#define ETRAXCRYPTOCOP_IOCTYPE (250) + +#define CRYPTOCOP_IO_CREATE_SESSION _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 1, struct strcop_session_op) +#define CRYPTOCOP_IO_CLOSE_SESSION _IOW(ETRAXCRYPTOCOP_IOCTYPE, 2, struct strcop_session_op) +#define CRYPTOCOP_IO_PROCESS_OP _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 3, struct strcop_crypto_op) +#define CRYPTOCOP_IO_MAXNR (3) + +typedef enum { + cryptocop_cipher_des = 0, + cryptocop_cipher_3des = 1, + cryptocop_cipher_aes = 2, + cryptocop_cipher_m2m = 3, /* mem2mem is essentially a NULL cipher with blocklength=1 */ + cryptocop_cipher_none +} cryptocop_cipher_type; + +typedef enum { + cryptocop_digest_sha1 = 0, + cryptocop_digest_md5 = 1, + cryptocop_digest_none +} cryptocop_digest_type; + +typedef enum { + cryptocop_csum_le = 0, + cryptocop_csum_be = 1, + cryptocop_csum_none +} cryptocop_csum_type; + +typedef enum { + cryptocop_cipher_mode_ecb = 0, + cryptocop_cipher_mode_cbc, + cryptocop_cipher_mode_none +} cryptocop_cipher_mode; + +typedef enum { + cryptocop_3des_eee = 0, + cryptocop_3des_eed = 1, + cryptocop_3des_ede = 2, + cryptocop_3des_edd = 3, + cryptocop_3des_dee = 4, + cryptocop_3des_ded = 5, + cryptocop_3des_dde = 6, + cryptocop_3des_ddd = 7 +} cryptocop_3des_mode; + +/* Usermode accessible (ioctl) operations. */ +struct strcop_session_op{ + cryptocop_session_id ses_id; + + cryptocop_cipher_type cipher; /* AES, DES, 3DES, m2m, none */ + + cryptocop_cipher_mode cmode; /* ECB, CBC, none */ + cryptocop_3des_mode des3_mode; + + cryptocop_digest_type digest; /* MD5, SHA1, none */ + + cryptocop_csum_type csum; /* BE, LE, none */ + + unsigned char *key; + size_t keylen; +}; + +#define CRYPTOCOP_CSUM_LENGTH (2) +#define CRYPTOCOP_MAX_DIGEST_LENGTH (20) /* SHA-1 20, MD5 16 */ +#define CRYPTOCOP_MAX_IV_LENGTH (16) /* (3)DES==8, AES == 16 */ +#define CRYPTOCOP_MAX_KEY_LENGTH (32) + +struct strcop_crypto_op{ + cryptocop_session_id ses_id; + + /* Indata. */ + unsigned char *indata; + size_t inlen; /* Total indata length. */ + + /* Cipher configuration. */ + unsigned char do_cipher:1; + unsigned char decrypt:1; /* 1 == decrypt, 0 == encrypt */ + unsigned char cipher_explicit:1; + size_t cipher_start; + size_t cipher_len; + /* cipher_iv is used if do_cipher and cipher_explicit and the cipher + mode is CBC. The length is controlled by the type of cipher, + e.g. DES/3DES 8 octets and AES 16 octets. */ + unsigned char cipher_iv[CRYPTOCOP_MAX_IV_LENGTH]; + /* Outdata. */ + unsigned char *cipher_outdata; + size_t cipher_outlen; + + /* digest configuration. */ + unsigned char do_digest:1; + size_t digest_start; + size_t digest_len; + /* Outdata. The actual length is determined by the type of the digest. */ + unsigned char digest[CRYPTOCOP_MAX_DIGEST_LENGTH]; + + /* Checksum configuration. */ + unsigned char do_csum:1; + size_t csum_start; + size_t csum_len; + /* Outdata. */ + unsigned char csum[CRYPTOCOP_CSUM_LENGTH]; +}; + + + + +#endif /* _UAPICRYPTOCOP_H */ diff --git a/arch/cris/include/arch-v32/arch/user.h b/arch/cris/include/uapi/arch-v32/arch/user.h index 03fa1f3c3c00..03fa1f3c3c00 100644 --- a/arch/cris/include/arch-v32/arch/user.h +++ b/arch/cris/include/uapi/arch-v32/arch/user.h diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild index f50236ae9ca3..7d47b366ad82 100644 --- a/arch/cris/include/uapi/asm/Kbuild +++ b/arch/cris/include/uapi/asm/Kbuild @@ -3,3 +3,37 @@ include include/uapi/asm-generic/Kbuild.asm header-y += arch-v10/ header-y += arch-v32/ +header-y += auxvec.h +header-y += bitsperlong.h +header-y += byteorder.h +header-y += errno.h +header-y += ethernet.h +header-y += etraxgpio.h +header-y += fcntl.h +header-y += ioctl.h +header-y += ioctls.h +header-y += ipcbuf.h +header-y += mman.h +header-y += msgbuf.h +header-y += param.h +header-y += poll.h +header-y += posix_types.h +header-y += ptrace.h +header-y += resource.h +header-y += rs485.h +header-y += sembuf.h +header-y += setup.h +header-y += shmbuf.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += signal.h +header-y += socket.h +header-y += sockios.h +header-y += stat.h +header-y += statfs.h +header-y += swab.h +header-y += sync_serial.h +header-y += termbits.h +header-y += termios.h +header-y += types.h +header-y += unistd.h diff --git a/arch/cris/include/asm/auxvec.h b/arch/cris/include/uapi/asm/auxvec.h index cb30b01bf19f..cb30b01bf19f 100644 --- a/arch/cris/include/asm/auxvec.h +++ b/arch/cris/include/uapi/asm/auxvec.h diff --git a/arch/cris/include/asm/bitsperlong.h b/arch/cris/include/uapi/asm/bitsperlong.h index 6dc0bb0c13b2..6dc0bb0c13b2 100644 --- a/arch/cris/include/asm/bitsperlong.h +++ b/arch/cris/include/uapi/asm/bitsperlong.h diff --git a/arch/cris/include/asm/byteorder.h b/arch/cris/include/uapi/asm/byteorder.h index bcd189798e26..bcd189798e26 100644 --- a/arch/cris/include/asm/byteorder.h +++ b/arch/cris/include/uapi/asm/byteorder.h diff --git a/arch/cris/include/asm/errno.h b/arch/cris/include/uapi/asm/errno.h index 2bf5eb5fa773..2bf5eb5fa773 100644 --- a/arch/cris/include/asm/errno.h +++ b/arch/cris/include/uapi/asm/errno.h diff --git a/arch/cris/include/asm/ethernet.h b/arch/cris/include/uapi/asm/ethernet.h index 4d58652c3a49..4d58652c3a49 100644 --- a/arch/cris/include/asm/ethernet.h +++ b/arch/cris/include/uapi/asm/ethernet.h diff --git a/arch/cris/include/asm/etraxgpio.h b/arch/cris/include/uapi/asm/etraxgpio.h index 461c089db765..461c089db765 100644 --- a/arch/cris/include/asm/etraxgpio.h +++ b/arch/cris/include/uapi/asm/etraxgpio.h diff --git a/arch/cris/include/asm/fcntl.h b/arch/cris/include/uapi/asm/fcntl.h index 46ab12db5739..46ab12db5739 100644 --- a/arch/cris/include/asm/fcntl.h +++ b/arch/cris/include/uapi/asm/fcntl.h diff --git a/arch/cris/include/asm/ioctl.h b/arch/cris/include/uapi/asm/ioctl.h index b279fe06dfe5..b279fe06dfe5 100644 --- a/arch/cris/include/asm/ioctl.h +++ b/arch/cris/include/uapi/asm/ioctl.h diff --git a/arch/cris/include/asm/ioctls.h b/arch/cris/include/uapi/asm/ioctls.h index 488fbb3f5e84..488fbb3f5e84 100644 --- a/arch/cris/include/asm/ioctls.h +++ b/arch/cris/include/uapi/asm/ioctls.h diff --git a/arch/cris/include/asm/ipcbuf.h b/arch/cris/include/uapi/asm/ipcbuf.h index 84c7e51cb6d0..84c7e51cb6d0 100644 --- a/arch/cris/include/asm/ipcbuf.h +++ b/arch/cris/include/uapi/asm/ipcbuf.h diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/uapi/asm/mman.h index 8eebf89f5ab1..8eebf89f5ab1 100644 --- a/arch/cris/include/asm/mman.h +++ b/arch/cris/include/uapi/asm/mman.h diff --git a/arch/cris/include/asm/msgbuf.h b/arch/cris/include/uapi/asm/msgbuf.h index ada63df1d574..ada63df1d574 100644 --- a/arch/cris/include/asm/msgbuf.h +++ b/arch/cris/include/uapi/asm/msgbuf.h diff --git a/arch/cris/include/asm/param.h b/arch/cris/include/uapi/asm/param.h index 484fcf8667c0..484fcf8667c0 100644 --- a/arch/cris/include/asm/param.h +++ b/arch/cris/include/uapi/asm/param.h diff --git a/arch/cris/include/asm/poll.h b/arch/cris/include/uapi/asm/poll.h index c98509d3149e..c98509d3149e 100644 --- a/arch/cris/include/asm/poll.h +++ b/arch/cris/include/uapi/asm/poll.h diff --git a/arch/cris/include/asm/posix_types.h b/arch/cris/include/uapi/asm/posix_types.h index ce4e51793151..ce4e51793151 100644 --- a/arch/cris/include/asm/posix_types.h +++ b/arch/cris/include/uapi/asm/posix_types.h diff --git a/arch/cris/include/uapi/asm/ptrace.h b/arch/cris/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..c689c9bbbe50 --- /dev/null +++ b/arch/cris/include/uapi/asm/ptrace.h @@ -0,0 +1 @@ +#include <arch/ptrace.h> diff --git a/arch/cris/include/asm/resource.h b/arch/cris/include/uapi/asm/resource.h index b5d29448de4e..b5d29448de4e 100644 --- a/arch/cris/include/asm/resource.h +++ b/arch/cris/include/uapi/asm/resource.h diff --git a/arch/cris/include/asm/rs485.h b/arch/cris/include/uapi/asm/rs485.h index ad40f9fbcb8a..ad40f9fbcb8a 100644 --- a/arch/cris/include/asm/rs485.h +++ b/arch/cris/include/uapi/asm/rs485.h diff --git a/arch/cris/include/asm/sembuf.h b/arch/cris/include/uapi/asm/sembuf.h index 7fed9843796d..7fed9843796d 100644 --- a/arch/cris/include/asm/sembuf.h +++ b/arch/cris/include/uapi/asm/sembuf.h diff --git a/arch/cris/include/asm/setup.h b/arch/cris/include/uapi/asm/setup.h index b90728652d1a..b90728652d1a 100644 --- a/arch/cris/include/asm/setup.h +++ b/arch/cris/include/uapi/asm/setup.h diff --git a/arch/cris/include/asm/shmbuf.h b/arch/cris/include/uapi/asm/shmbuf.h index 3239e3f000e8..3239e3f000e8 100644 --- a/arch/cris/include/asm/shmbuf.h +++ b/arch/cris/include/uapi/asm/shmbuf.h diff --git a/arch/cris/include/asm/sigcontext.h b/arch/cris/include/uapi/asm/sigcontext.h index a1d634e120df..a1d634e120df 100644 --- a/arch/cris/include/asm/sigcontext.h +++ b/arch/cris/include/uapi/asm/sigcontext.h diff --git a/arch/cris/include/asm/siginfo.h b/arch/cris/include/uapi/asm/siginfo.h index c1cd6d16928b..c1cd6d16928b 100644 --- a/arch/cris/include/asm/siginfo.h +++ b/arch/cris/include/uapi/asm/siginfo.h diff --git a/arch/cris/include/uapi/asm/signal.h b/arch/cris/include/uapi/asm/signal.h new file mode 100644 index 000000000000..ce42fa7c32ad --- /dev/null +++ b/arch/cris/include/uapi/asm/signal.h @@ -0,0 +1,116 @@ +#ifndef _UAPI_ASM_CRIS_SIGNAL_H +#define _UAPI_ASM_CRIS_SIGNAL_H + +#include <linux/types.h> + +/* Avoid too many header ordering problems. */ +struct siginfo; + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ + +#define SA_NOCLDSTOP 0x00000001u +#define SA_NOCLDWAIT 0x00000002u +#define SA_SIGINFO 0x00000004u +#define SA_ONSTACK 0x08000000u +#define SA_RESTART 0x10000000u +#define SA_NODEFER 0x40000000u +#define SA_RESETHAND 0x80000000u + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +#define SA_RESTORER 0x04000000 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#include <asm-generic/signal-defs.h> + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + + +#endif /* _UAPI_ASM_CRIS_SIGNAL_H */ diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index b681b043f6c8..b681b043f6c8 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h diff --git a/arch/cris/include/asm/sockios.h b/arch/cris/include/uapi/asm/sockios.h index cfe7bfecf599..cfe7bfecf599 100644 --- a/arch/cris/include/asm/sockios.h +++ b/arch/cris/include/uapi/asm/sockios.h diff --git a/arch/cris/include/asm/stat.h b/arch/cris/include/uapi/asm/stat.h index 9e558cc3c43b..9e558cc3c43b 100644 --- a/arch/cris/include/asm/stat.h +++ b/arch/cris/include/uapi/asm/stat.h diff --git a/arch/cris/include/asm/statfs.h b/arch/cris/include/uapi/asm/statfs.h index fdaf921844bc..fdaf921844bc 100644 --- a/arch/cris/include/asm/statfs.h +++ b/arch/cris/include/uapi/asm/statfs.h diff --git a/arch/cris/include/uapi/asm/swab.h b/arch/cris/include/uapi/asm/swab.h new file mode 100644 index 000000000000..4adf1e9f0b09 --- /dev/null +++ b/arch/cris/include/uapi/asm/swab.h @@ -0,0 +1,3 @@ +/* + * CRIS byte swapping. + */ diff --git a/arch/cris/include/asm/sync_serial.h b/arch/cris/include/uapi/asm/sync_serial.h index 7f827fea30e7..7f827fea30e7 100644 --- a/arch/cris/include/asm/sync_serial.h +++ b/arch/cris/include/uapi/asm/sync_serial.h diff --git a/arch/cris/include/asm/termbits.h b/arch/cris/include/uapi/asm/termbits.h index 1c43bc874ccf..1c43bc874ccf 100644 --- a/arch/cris/include/asm/termbits.h +++ b/arch/cris/include/uapi/asm/termbits.h diff --git a/arch/cris/include/uapi/asm/termios.h b/arch/cris/include/uapi/asm/termios.h new file mode 100644 index 000000000000..0a0386a55027 --- /dev/null +++ b/arch/cris/include/uapi/asm/termios.h @@ -0,0 +1,45 @@ +#ifndef _UAPI_CRIS_TERMIOS_H +#define _UAPI_CRIS_TERMIOS_H + +#include <asm/termbits.h> +#include <asm/ioctls.h> +#include <asm/rs485.h> +#include <linux/serial.h> + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + + +#endif /* _UAPI_CRIS_TERMIOS_H */ diff --git a/arch/cris/include/uapi/asm/types.h b/arch/cris/include/uapi/asm/types.h new file mode 100644 index 000000000000..9ec9d4c5ac4d --- /dev/null +++ b/arch/cris/include/uapi/asm/types.h @@ -0,0 +1 @@ +#include <asm-generic/int-ll64.h> diff --git a/arch/cris/include/uapi/asm/unistd.h b/arch/cris/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..48842896f6c2 --- /dev/null +++ b/arch/cris/include/uapi/asm/unistd.h @@ -0,0 +1,344 @@ +#ifndef _UAPI_ASM_CRIS_UNISTD_H_ +#define _UAPI_ASM_CRIS_UNISTD_H_ + +/* + * This file contains the system call numbers, and stub macros for libc. + */ + +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_waitpid 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lchown 16 +#define __NR_break 17 +#define __NR_oldstat 18 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_oldfstat 28 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_stty 31 +#define __NR_gtty 32 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_ftime 35 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_prof 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_signal 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_lock 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_mpx 56 +#define __NR_setpgid 57 +#define __NR_ulimit 58 +#define __NR_oldolduname 59 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sgetmask 68 +#define __NR_ssetmask 69 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_oldlstat 84 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_profil 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_ioperm 101 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_olduname 109 +#define __NR_iopl 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_vm86 113 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_modify_ldt 123 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR_getdents 141 +#define __NR__newselect 142 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_setresuid 164 +#define __NR_getresuid 165 + +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +#define __NR_chown 182 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 /* some people actually want streams */ +#define __NR_putpmsg 189 /* some people actually want streams */ +#define __NR_vfork 190 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_getdents64 220 +#define __NR_fcntl64 221 +/* 223 is unused */ +#define __NR_gettid 224 +#define __NR_readahead 225 +#define __NR_setxattr 226 +#define __NR_lsetxattr 227 +#define __NR_fsetxattr 228 +#define __NR_getxattr 229 +#define __NR_lgetxattr 230 +#define __NR_fgetxattr 231 +#define __NR_listxattr 232 +#define __NR_llistxattr 233 +#define __NR_flistxattr 234 +#define __NR_removexattr 235 +#define __NR_lremovexattr 236 +#define __NR_fremovexattr 237 +#define __NR_tkill 238 +#define __NR_sendfile64 239 +#define __NR_futex 240 +#define __NR_sched_setaffinity 241 +#define __NR_sched_getaffinity 242 +#define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 +#define __NR_io_setup 245 +#define __NR_io_destroy 246 +#define __NR_io_getevents 247 +#define __NR_io_submit 248 +#define __NR_io_cancel 249 +#define __NR_fadvise64 250 +/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ +#define __NR_exit_group 252 +#define __NR_lookup_dcookie 253 +#define __NR_epoll_create 254 +#define __NR_epoll_ctl 255 +#define __NR_epoll_wait 256 +#define __NR_remap_file_pages 257 +#define __NR_set_tid_address 258 +#define __NR_timer_create 259 +#define __NR_timer_settime (__NR_timer_create+1) +#define __NR_timer_gettime (__NR_timer_create+2) +#define __NR_timer_getoverrun (__NR_timer_create+3) +#define __NR_timer_delete (__NR_timer_create+4) +#define __NR_clock_settime (__NR_timer_create+5) +#define __NR_clock_gettime (__NR_timer_create+6) +#define __NR_clock_getres (__NR_timer_create+7) +#define __NR_clock_nanosleep (__NR_timer_create+8) +#define __NR_statfs64 268 +#define __NR_fstatfs64 269 +#define __NR_tgkill 270 +#define __NR_utimes 271 +#define __NR_fadvise64_64 272 +#define __NR_vserver 273 +#define __NR_mbind 274 +#define __NR_get_mempolicy 275 +#define __NR_set_mempolicy 276 +#define __NR_mq_open 277 +#define __NR_mq_unlink (__NR_mq_open+1) +#define __NR_mq_timedsend (__NR_mq_open+2) +#define __NR_mq_timedreceive (__NR_mq_open+3) +#define __NR_mq_notify (__NR_mq_open+4) +#define __NR_mq_getsetattr (__NR_mq_open+5) +#define __NR_kexec_load 283 +#define __NR_waitid 284 +/* #define __NR_sys_setaltroot 285 */ +#define __NR_add_key 286 +#define __NR_request_key 287 +#define __NR_keyctl 288 +#define __NR_ioprio_set 289 +#define __NR_ioprio_get 290 +#define __NR_inotify_init 291 +#define __NR_inotify_add_watch 292 +#define __NR_inotify_rm_watch 293 +#define __NR_migrate_pages 294 +#define __NR_openat 295 +#define __NR_mkdirat 296 +#define __NR_mknodat 297 +#define __NR_fchownat 298 +#define __NR_futimesat 299 +#define __NR_fstatat64 300 +#define __NR_unlinkat 301 +#define __NR_renameat 302 +#define __NR_linkat 303 +#define __NR_symlinkat 304 +#define __NR_readlinkat 305 +#define __NR_fchmodat 306 +#define __NR_faccessat 307 +#define __NR_pselect6 308 +#define __NR_ppoll 309 +#define __NR_unshare 310 +#define __NR_set_robust_list 311 +#define __NR_get_robust_list 312 +#define __NR_splice 313 +#define __NR_sync_file_range 314 +#define __NR_tee 315 +#define __NR_vmsplice 316 +#define __NR_move_pages 317 +#define __NR_getcpu 318 +#define __NR_epoll_pwait 319 +#define __NR_utimensat 320 +#define __NR_signalfd 321 +#define __NR_timerfd_create 322 +#define __NR_eventfd 323 +#define __NR_fallocate 324 +#define __NR_timerfd_settime 325 +#define __NR_timerfd_gettime 326 +#define __NR_signalfd4 327 +#define __NR_eventfd2 328 +#define __NR_epoll_create1 329 +#define __NR_dup3 330 +#define __NR_pipe2 331 +#define __NR_inotify_init1 332 +#define __NR_preadv 333 +#define __NR_pwritev 334 +#define __NR_setns 335 + +#endif /* _UAPI_ASM_CRIS_UNISTD_H_ */ diff --git a/arch/cris/kernel/asm-offsets.c b/arch/cris/kernel/asm-offsets.c index dd7b8e983221..a5fd88d816a6 100644 --- a/arch/cris/kernel/asm-offsets.c +++ b/arch/cris/kernel/asm-offsets.c @@ -1,3 +1,4 @@ +#include <linux/kbuild.h> #include <linux/sched.h> #include <asm/thread_info.h> @@ -7,11 +8,6 @@ * and format the required data. */ -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - #if !defined(CONFIG_ETRAX_ARCH_V10) && !defined(CONFIG_ETRAX_ARCH_V32) #error One of ARCH v10 and ARCH v32 must be true! #endif diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index df2eb4bd9fa2..9d262645f667 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -12,8 +12,6 @@ config FRV select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_CPU_DEVICES select ARCH_WANT_IPC_PARSE_VERSION - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE config ZONE_DMA bool diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h index 1807d8ea8cb5..d685da17f5fb 100644 --- a/arch/frv/include/asm/unistd.h +++ b/arch/frv/include/asm/unistd.h @@ -29,7 +29,6 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 0ae445087607..2d2efb653ee0 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -9,8 +9,6 @@ config H8300 select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE config SYMBOL_PREFIX string diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 4bc8ae73e08a..995eb47e01bb 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -1,6 +1,6 @@ -include include/asm-generic/Kbuild.asm generic-y += clkdev.h generic-y += exec.h +generic-y += mmu.h generic-y += module.h generic-y += trace_clock.h diff --git a/arch/h8300/include/asm/mmu.h b/arch/h8300/include/asm/mmu.h deleted file mode 100644 index 31309969df70..000000000000 --- a/arch/h8300/include/asm/mmu.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __MMU_H -#define __MMU_H - -/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */ - -typedef struct { - unsigned long end_brk; -} mm_context_t; - -#endif diff --git a/arch/h8300/include/asm/param.h b/arch/h8300/include/asm/param.h index 1c72fb8080ff..c3909e7ff178 100644 --- a/arch/h8300/include/asm/param.h +++ b/arch/h8300/include/asm/param.h @@ -1,20 +1,9 @@ #ifndef _H8300_PARAM_H #define _H8300_PARAM_H -#ifdef __KERNEL__ +#include <uapi/asm/param.h> + #define HZ CONFIG_HZ #define USER_HZ HZ #define CLOCKS_PER_SEC (USER_HZ) -#else -#define HZ 100 -#endif - -#define EXEC_PAGESIZE 4096 - -#ifndef NOGROUP -#define NOGROUP (-1) -#endif - -#define MAXHOSTNAMELEN 64 /* max length of hostname */ - #endif /* _H8300_PARAM_H */ diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h index 7468589a128b..c1826b95c5ca 100644 --- a/arch/h8300/include/asm/ptrace.h +++ b/arch/h8300/include/asm/ptrace.h @@ -1,46 +1,11 @@ #ifndef _H8300_PTRACE_H #define _H8300_PTRACE_H -#ifndef __ASSEMBLY__ - -#define PT_ER1 0 -#define PT_ER2 1 -#define PT_ER3 2 -#define PT_ER4 3 -#define PT_ER5 4 -#define PT_ER6 5 -#define PT_ER0 6 -#define PT_ORIG_ER0 7 -#define PT_CCR 8 -#define PT_PC 9 -#define PT_USP 10 -#define PT_EXR 12 - -/* this struct defines the way the registers are stored on the - stack during a system call. */ +#include <uapi/asm/ptrace.h> -struct pt_regs { - long retpc; - long er4; - long er5; - long er6; - long er3; - long er2; - long er1; - long orig_er0; - unsigned short ccr; - long er0; - long vector; +#ifndef __ASSEMBLY__ #if defined(CONFIG_CPU_H8S) - unsigned short exr; #endif - unsigned long pc; -} __attribute__((aligned(2),packed)); - -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 - -#ifdef __KERNEL__ #ifndef PS_S #define PS_S (0x10) #endif @@ -63,6 +28,6 @@ struct pt_regs { #define current_pt_regs() ((struct pt_regs *) \ (THREAD_SIZE + (unsigned long)current_thread_info()) - 1) #define signal_pt_regs() ((struct pt_regs *)current->thread.esp0) -#endif /* __KERNEL__ */ +#define current_user_stack_pointer() rdusp() #endif /* __ASSEMBLY__ */ #endif /* _H8300_PTRACE_H */ diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h index c43c0a7d2c2e..66c81c67e55d 100644 --- a/arch/h8300/include/asm/signal.h +++ b/arch/h8300/include/asm/signal.h @@ -1,12 +1,8 @@ #ifndef _H8300_SIGNAL_H #define _H8300_SIGNAL_H -#include <linux/types.h> +#include <uapi/asm/signal.h> -/* Avoid too many header ordering problems. */ -struct siginfo; - -#ifdef __KERNEL__ /* Most things should be clean enough to redefine this at will, if care is taken to make libc match. */ @@ -20,94 +16,6 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; -#else -/* Here we must cater to libcs that poke about in kernel headers. */ - -#define NSIG 32 -typedef unsigned long sigset_t; - -#endif /* __KERNEL__ */ - -#define SIGHUP 1 -#define SIGINT 2 -#define SIGQUIT 3 -#define SIGILL 4 -#define SIGTRAP 5 -#define SIGABRT 6 -#define SIGIOT 6 -#define SIGBUS 7 -#define SIGFPE 8 -#define SIGKILL 9 -#define SIGUSR1 10 -#define SIGSEGV 11 -#define SIGUSR2 12 -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGTERM 15 -#define SIGSTKFLT 16 -#define SIGCHLD 17 -#define SIGCONT 18 -#define SIGSTOP 19 -#define SIGTSTP 20 -#define SIGTTIN 21 -#define SIGTTOU 22 -#define SIGURG 23 -#define SIGXCPU 24 -#define SIGXFSZ 25 -#define SIGVTALRM 26 -#define SIGPROF 27 -#define SIGWINCH 28 -#define SIGIO 29 -#define SIGPOLL SIGIO -/* -#define SIGLOST 29 -*/ -#define SIGPWR 30 -#define SIGSYS 31 -#define SIGUNUSED 31 - -/* These should not be considered constants from userland. */ -#define SIGRTMIN 32 -#define SIGRTMAX _NSIG - -/* - * SA_FLAGS values: - * - * SA_ONSTACK indicates that a registered stack_t will be used. - * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. - * SA_RESETHAND clears the handler when the signal is delivered. - * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. - * SA_NODEFER prevents the current signal from being masked in the handler. - * - * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single - * Unix names RESETHAND and NODEFER respectively. - */ -#define SA_NOCLDSTOP 0x00000001 -#define SA_NOCLDWAIT 0x00000002 /* not supported yet */ -#define SA_SIGINFO 0x00000004 -#define SA_ONSTACK 0x08000000 -#define SA_RESTART 0x10000000 -#define SA_NODEFER 0x40000000 -#define SA_RESETHAND 0x80000000 - -#define SA_NOMASK SA_NODEFER -#define SA_ONESHOT SA_RESETHAND - -#define SA_RESTORER 0x04000000 - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - -#define MINSIGSTKSZ 2048 -#define SIGSTKSZ 8192 - -#include <asm-generic/signal-defs.h> - -#ifdef __KERNEL__ struct old_sigaction { __sighandler_t sa_handler; old_sigset_t sa_mask; @@ -125,35 +33,8 @@ struct sigaction { struct k_sigaction { struct sigaction sa; }; -#else -/* Here we must cater to libcs that poke about in kernel headers. */ - -struct sigaction { - union { - __sighandler_t _sa_handler; - void (*_sa_sigaction)(int, struct siginfo *, void *); - } _u; - sigset_t sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -}; - -#define sa_handler _u._sa_handler -#define sa_sigaction _u._sa_sigaction - -#endif /* __KERNEL__ */ - -typedef struct sigaltstack { - void *ss_sp; - int ss_flags; - size_t ss_size; -} stack_t; - -#ifdef __KERNEL__ #include <asm/sigcontext.h> #undef __HAVE_ARCH_SIG_BITOPS -#endif /* __KERNEL__ */ - #endif /* _H8300_SIGNAL_H */ diff --git a/arch/h8300/include/asm/termios.h b/arch/h8300/include/asm/termios.h index 70eea64b4213..93a63df56247 100644 --- a/arch/h8300/include/asm/termios.h +++ b/arch/h8300/include/asm/termios.h @@ -1,27 +1,8 @@ #ifndef _H8300_TERMIOS_H #define _H8300_TERMIOS_H -#include <asm/termbits.h> -#include <asm/ioctls.h> - -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; +#include <uapi/asm/termios.h> -#define NCC 8 -struct termio { - unsigned short c_iflag; /* input mode flags */ - unsigned short c_oflag; /* output mode flags */ - unsigned short c_cflag; /* control mode flags */ - unsigned short c_lflag; /* local mode flags */ - unsigned char c_line; /* line discipline */ - unsigned char c_cc[NCC]; /* control characters */ -}; - -#ifdef __KERNEL__ /* intr=^C quit=^| erase=del kill=^U eof=^D vtime=\0 vmin=\1 sxtc=\0 start=^Q stop=^S susp=^Z eol=\0 @@ -29,27 +10,6 @@ struct termio { eol2=\0 */ #define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" -#endif - -/* modem lines */ -#define TIOCM_LE 0x001 -#define TIOCM_DTR 0x002 -#define TIOCM_RTS 0x004 -#define TIOCM_ST 0x008 -#define TIOCM_SR 0x010 -#define TIOCM_CTS 0x020 -#define TIOCM_CAR 0x040 -#define TIOCM_RNG 0x080 -#define TIOCM_DSR 0x100 -#define TIOCM_CD TIOCM_CAR -#define TIOCM_RI TIOCM_RNG -#define TIOCM_OUT1 0x2000 -#define TIOCM_OUT2 0x4000 -#define TIOCM_LOOP 0x8000 - -/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ - -#ifdef __KERNEL__ /* * Translate a "termio" structure into a "termios". Ugh. @@ -87,6 +47,4 @@ struct termio { #define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) #define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) -#endif /* __KERNEL__ */ - #endif /* _H8300_TERMIOS_H */ diff --git a/arch/h8300/include/asm/types.h b/arch/h8300/include/asm/types.h index 07257d9487d8..c012707f6037 100644 --- a/arch/h8300/include/asm/types.h +++ b/arch/h8300/include/asm/types.h @@ -1,12 +1,9 @@ #ifndef _H8300_TYPES_H #define _H8300_TYPES_H -#include <asm-generic/int-ll64.h> +#include <uapi/asm/types.h> -#ifdef __KERNEL__ #define BITS_PER_LONG 32 -#endif /* __KERNEL__ */ - #endif /* _H8300_TYPES_H */ diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h index c2c2f5c7d6bf..aa38105959fb 100644 --- a/arch/h8300/include/asm/unistd.h +++ b/arch/h8300/include/asm/unistd.h @@ -1,333 +1,8 @@ #ifndef _ASM_H8300_UNISTD_H_ #define _ASM_H8300_UNISTD_H_ -/* - * This file contains the system call numbers. - */ - -#define __NR_restart_syscall 0 -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_waitpid 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_time 13 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_lchown 16 -#define __NR_break 17 -#define __NR_oldstat 18 -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_oldfstat 28 -#define __NR_pause 29 -#define __NR_utime 30 -#define __NR_stty 31 -#define __NR_gtty 32 -#define __NR_access 33 -#define __NR_nice 34 -#define __NR_ftime 35 -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -#define __NR_prof 44 -#define __NR_brk 45 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_signal 48 -#define __NR_geteuid 49 -#define __NR_getegid 50 -#define __NR_acct 51 -#define __NR_umount2 52 -#define __NR_lock 53 -#define __NR_ioctl 54 -#define __NR_fcntl 55 -#define __NR_mpx 56 -#define __NR_setpgid 57 -#define __NR_ulimit 58 -#define __NR_oldolduname 59 -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -#define __NR_sigaction 67 -#define __NR_sgetmask 68 -#define __NR_ssetmask 69 -#define __NR_setreuid 70 -#define __NR_setregid 71 -#define __NR_sigsuspend 72 -#define __NR_sigpending 73 -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -#define __NR_getrlimit 76 -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -#define __NR_select 82 -#define __NR_symlink 83 -#define __NR_oldlstat 84 -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -#define __NR_readdir 89 -#define __NR_mmap 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_fchown 95 -#define __NR_getpriority 96 -#define __NR_setpriority 97 -#define __NR_profil 98 -#define __NR_statfs 99 -#define __NR_fstatfs 100 -#define __NR_ioperm 101 -#define __NR_socketcall 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_stat 106 -#define __NR_lstat 107 -#define __NR_fstat 108 -#define __NR_olduname 109 -#define __NR_iopl 110 -#define __NR_vhangup 111 -#define __NR_idle 112 -#define __NR_vm86old 113 -#define __NR_wait4 114 -#define __NR_swapoff 115 -#define __NR_sysinfo 116 -#define __NR_ipc 117 -#define __NR_fsync 118 -#define __NR_sigreturn 119 -#define __NR_clone 120 -#define __NR_setdomainname 121 -#define __NR_uname 122 -#define __NR_modify_ldt 123 -#define __NR_adjtimex 124 -#define __NR_mprotect 125 -#define __NR_sigprocmask 126 -#define __NR_create_module 127 -#define __NR_init_module 128 -#define __NR_delete_module 129 -#define __NR_get_kernel_syms 130 -#define __NR_quotactl 131 -#define __NR_getpgid 132 -#define __NR_fchdir 133 -#define __NR_bdflush 134 -#define __NR_sysfs 135 -#define __NR_personality 136 -#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#define __NR__llseek 140 -#define __NR_getdents 141 -#define __NR__newselect 142 -#define __NR_flock 143 -#define __NR_msync 144 -#define __NR_readv 145 -#define __NR_writev 146 -#define __NR_getsid 147 -#define __NR_fdatasync 148 -#define __NR__sysctl 149 -#define __NR_mlock 150 -#define __NR_munlock 151 -#define __NR_mlockall 152 -#define __NR_munlockall 153 -#define __NR_sched_setparam 154 -#define __NR_sched_getparam 155 -#define __NR_sched_setscheduler 156 -#define __NR_sched_getscheduler 157 -#define __NR_sched_yield 158 -#define __NR_sched_get_priority_max 159 -#define __NR_sched_get_priority_min 160 -#define __NR_sched_rr_get_interval 161 -#define __NR_nanosleep 162 -#define __NR_mremap 163 -#define __NR_setresuid 164 -#define __NR_getresuid 165 -#define __NR_vm86 166 -#define __NR_query_module 167 -#define __NR_poll 168 -#define __NR_nfsservctl 169 -#define __NR_setresgid 170 -#define __NR_getresgid 171 -#define __NR_prctl 172 -#define __NR_rt_sigreturn 173 -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 -#define __NR_rt_sigpending 176 -#define __NR_rt_sigtimedwait 177 -#define __NR_rt_sigqueueinfo 178 -#define __NR_rt_sigsuspend 179 -#define __NR_pread64 180 -#define __NR_pwrite64 181 -#define __NR_chown 182 -#define __NR_getcwd 183 -#define __NR_capget 184 -#define __NR_capset 185 -#define __NR_sigaltstack 186 -#define __NR_sendfile 187 -#define __NR_getpmsg 188 /* some people actually want streams */ -#define __NR_putpmsg 189 /* some people actually want streams */ -#define __NR_vfork 190 -#define __NR_ugetrlimit 191 -#define __NR_mmap2 192 -#define __NR_truncate64 193 -#define __NR_ftruncate64 194 -#define __NR_stat64 195 -#define __NR_lstat64 196 -#define __NR_fstat64 197 -#define __NR_lchown32 198 -#define __NR_getuid32 199 -#define __NR_getgid32 200 -#define __NR_geteuid32 201 -#define __NR_getegid32 202 -#define __NR_setreuid32 203 -#define __NR_setregid32 204 -#define __NR_getgroups32 205 -#define __NR_setgroups32 206 -#define __NR_fchown32 207 -#define __NR_setresuid32 208 -#define __NR_getresuid32 209 -#define __NR_setresgid32 210 -#define __NR_getresgid32 211 -#define __NR_chown32 212 -#define __NR_setuid32 213 -#define __NR_setgid32 214 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 -#define __NR_pivot_root 217 -#define __NR_mincore 218 -#define __NR_madvise 219 -#define __NR_madvise1 219 -#define __NR_getdents64 220 -#define __NR_fcntl64 221 -/* 223 is unused */ -#define __NR_gettid 224 -#define __NR_readahead 225 -#define __NR_setxattr 226 -#define __NR_lsetxattr 227 -#define __NR_fsetxattr 228 -#define __NR_getxattr 229 -#define __NR_lgetxattr 230 -#define __NR_fgetxattr 231 -#define __NR_listxattr 232 -#define __NR_llistxattr 233 -#define __NR_flistxattr 234 -#define __NR_removexattr 235 -#define __NR_lremovexattr 236 -#define __NR_fremovexattr 237 -#define __NR_tkill 238 -#define __NR_sendfile64 239 -#define __NR_futex 240 -#define __NR_sched_setaffinity 241 -#define __NR_sched_getaffinity 242 -#define __NR_set_thread_area 243 -#define __NR_get_thread_area 244 -#define __NR_io_setup 245 -#define __NR_io_destroy 246 -#define __NR_io_getevents 247 -#define __NR_io_submit 248 -#define __NR_io_cancel 249 -#define __NR_fadvise64 250 -/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ -#define __NR_exit_group 252 -#define __NR_lookup_dcookie 253 -#define __NR_epoll_create 254 -#define __NR_epoll_ctl 255 -#define __NR_epoll_wait 256 -#define __NR_remap_file_pages 257 -#define __NR_set_tid_address 258 -#define __NR_timer_create 259 -#define __NR_timer_settime (__NR_timer_create+1) -#define __NR_timer_gettime (__NR_timer_create+2) -#define __NR_timer_getoverrun (__NR_timer_create+3) -#define __NR_timer_delete (__NR_timer_create+4) -#define __NR_clock_settime (__NR_timer_create+5) -#define __NR_clock_gettime (__NR_timer_create+6) -#define __NR_clock_getres (__NR_timer_create+7) -#define __NR_clock_nanosleep (__NR_timer_create+8) -#define __NR_statfs64 268 -#define __NR_fstatfs64 269 -#define __NR_tgkill 270 -#define __NR_utimes 271 -#define __NR_fadvise64_64 272 -#define __NR_vserver 273 -#define __NR_mbind 274 -#define __NR_get_mempolicy 275 -#define __NR_set_mempolicy 276 -#define __NR_mq_open 277 -#define __NR_mq_unlink (__NR_mq_open+1) -#define __NR_mq_timedsend (__NR_mq_open+2) -#define __NR_mq_timedreceive (__NR_mq_open+3) -#define __NR_mq_notify (__NR_mq_open+4) -#define __NR_mq_getsetattr (__NR_mq_open+5) -#define __NR_kexec_load 283 -#define __NR_waitid 284 -/* #define __NR_sys_setaltroot 285 */ -#define __NR_add_key 286 -#define __NR_request_key 287 -#define __NR_keyctl 288 -#define __NR_ioprio_set 289 -#define __NR_ioprio_get 290 -#define __NR_inotify_init 291 -#define __NR_inotify_add_watch 292 -#define __NR_inotify_rm_watch 293 -#define __NR_migrate_pages 294 -#define __NR_openat 295 -#define __NR_mkdirat 296 -#define __NR_mknodat 297 -#define __NR_fchownat 298 -#define __NR_futimesat 299 -#define __NR_fstatat64 300 -#define __NR_unlinkat 301 -#define __NR_renameat 302 -#define __NR_linkat 303 -#define __NR_symlinkat 304 -#define __NR_readlinkat 305 -#define __NR_fchmodat 306 -#define __NR_faccessat 307 -#define __NR_pselect6 308 -#define __NR_ppoll 309 -#define __NR_unshare 310 -#define __NR_set_robust_list 311 -#define __NR_get_robust_list 312 -#define __NR_splice 313 -#define __NR_sync_file_range 314 -#define __NR_tee 315 -#define __NR_vmsplice 316 -#define __NR_move_pages 317 -#define __NR_getcpu 318 -#define __NR_epoll_pwait 319 -#define __NR_setns 320 +#include <uapi/asm/unistd.h> -#ifdef __KERNEL__ #define NR_syscalls 321 @@ -356,7 +31,6 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE @@ -368,5 +42,4 @@ asm (".weak\t_" #name "\n" \ ".set\t_" #name ",_sys_ni_syscall"); -#endif /* __KERNEL__ */ #endif /* _ASM_H8300_UNISTD_H_ */ diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild index baebb3da1d44..040178cdb3eb 100644 --- a/arch/h8300/include/uapi/asm/Kbuild +++ b/arch/h8300/include/uapi/asm/Kbuild @@ -1,3 +1,34 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +header-y += auxvec.h +header-y += bitsperlong.h +header-y += byteorder.h +header-y += errno.h +header-y += fcntl.h +header-y += ioctl.h +header-y += ioctls.h +header-y += ipcbuf.h +header-y += kvm_para.h +header-y += mman.h +header-y += msgbuf.h +header-y += param.h +header-y += poll.h +header-y += posix_types.h +header-y += ptrace.h +header-y += resource.h +header-y += sembuf.h +header-y += setup.h +header-y += shmbuf.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += signal.h +header-y += socket.h +header-y += sockios.h +header-y += stat.h +header-y += statfs.h +header-y += swab.h +header-y += termbits.h +header-y += termios.h +header-y += types.h +header-y += unistd.h diff --git a/arch/h8300/include/asm/auxvec.h b/arch/h8300/include/uapi/asm/auxvec.h index 1d36fe38b088..1d36fe38b088 100644 --- a/arch/h8300/include/asm/auxvec.h +++ b/arch/h8300/include/uapi/asm/auxvec.h diff --git a/arch/h8300/include/asm/bitsperlong.h b/arch/h8300/include/uapi/asm/bitsperlong.h index 6dc0bb0c13b2..6dc0bb0c13b2 100644 --- a/arch/h8300/include/asm/bitsperlong.h +++ b/arch/h8300/include/uapi/asm/bitsperlong.h diff --git a/arch/h8300/include/asm/byteorder.h b/arch/h8300/include/uapi/asm/byteorder.h index 13539da99efd..13539da99efd 100644 --- a/arch/h8300/include/asm/byteorder.h +++ b/arch/h8300/include/uapi/asm/byteorder.h diff --git a/arch/h8300/include/asm/errno.h b/arch/h8300/include/uapi/asm/errno.h index 0c2f5641fdcc..0c2f5641fdcc 100644 --- a/arch/h8300/include/asm/errno.h +++ b/arch/h8300/include/uapi/asm/errno.h diff --git a/arch/h8300/include/asm/fcntl.h b/arch/h8300/include/uapi/asm/fcntl.h index 1952cb2e3b06..1952cb2e3b06 100644 --- a/arch/h8300/include/asm/fcntl.h +++ b/arch/h8300/include/uapi/asm/fcntl.h diff --git a/arch/h8300/include/asm/ioctl.h b/arch/h8300/include/uapi/asm/ioctl.h index b279fe06dfe5..b279fe06dfe5 100644 --- a/arch/h8300/include/asm/ioctl.h +++ b/arch/h8300/include/uapi/asm/ioctl.h diff --git a/arch/h8300/include/asm/ioctls.h b/arch/h8300/include/uapi/asm/ioctls.h index 30eaed2facdb..30eaed2facdb 100644 --- a/arch/h8300/include/asm/ioctls.h +++ b/arch/h8300/include/uapi/asm/ioctls.h diff --git a/arch/h8300/include/asm/ipcbuf.h b/arch/h8300/include/uapi/asm/ipcbuf.h index 84c7e51cb6d0..84c7e51cb6d0 100644 --- a/arch/h8300/include/asm/ipcbuf.h +++ b/arch/h8300/include/uapi/asm/ipcbuf.h diff --git a/arch/h8300/include/asm/kvm_para.h b/arch/h8300/include/uapi/asm/kvm_para.h index 14fab8f0b957..14fab8f0b957 100644 --- a/arch/h8300/include/asm/kvm_para.h +++ b/arch/h8300/include/uapi/asm/kvm_para.h diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/uapi/asm/mman.h index 8eebf89f5ab1..8eebf89f5ab1 100644 --- a/arch/h8300/include/asm/mman.h +++ b/arch/h8300/include/uapi/asm/mman.h diff --git a/arch/h8300/include/asm/msgbuf.h b/arch/h8300/include/uapi/asm/msgbuf.h index 6b148cd09aa5..6b148cd09aa5 100644 --- a/arch/h8300/include/asm/msgbuf.h +++ b/arch/h8300/include/uapi/asm/msgbuf.h diff --git a/arch/h8300/include/uapi/asm/param.h b/arch/h8300/include/uapi/asm/param.h new file mode 100644 index 000000000000..3dd18ae15f03 --- /dev/null +++ b/arch/h8300/include/uapi/asm/param.h @@ -0,0 +1,16 @@ +#ifndef _UAPI_H8300_PARAM_H +#define _UAPI_H8300_PARAM_H + +#ifndef __KERNEL__ +#define HZ 100 +#endif + +#define EXEC_PAGESIZE 4096 + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#endif /* _UAPI_H8300_PARAM_H */ diff --git a/arch/h8300/include/asm/poll.h b/arch/h8300/include/uapi/asm/poll.h index f61540c22d94..f61540c22d94 100644 --- a/arch/h8300/include/asm/poll.h +++ b/arch/h8300/include/uapi/asm/poll.h diff --git a/arch/h8300/include/asm/posix_types.h b/arch/h8300/include/uapi/asm/posix_types.h index 91e62ba4c7b0..91e62ba4c7b0 100644 --- a/arch/h8300/include/asm/posix_types.h +++ b/arch/h8300/include/uapi/asm/posix_types.h diff --git a/arch/h8300/include/uapi/asm/ptrace.h b/arch/h8300/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..ef39ec5977b6 --- /dev/null +++ b/arch/h8300/include/uapi/asm/ptrace.h @@ -0,0 +1,44 @@ +#ifndef _UAPI_H8300_PTRACE_H +#define _UAPI_H8300_PTRACE_H + +#ifndef __ASSEMBLY__ + +#define PT_ER1 0 +#define PT_ER2 1 +#define PT_ER3 2 +#define PT_ER4 3 +#define PT_ER5 4 +#define PT_ER6 5 +#define PT_ER0 6 +#define PT_ORIG_ER0 7 +#define PT_CCR 8 +#define PT_PC 9 +#define PT_USP 10 +#define PT_EXR 12 + +/* this struct defines the way the registers are stored on the + stack during a system call. */ + +struct pt_regs { + long retpc; + long er4; + long er5; + long er6; + long er3; + long er2; + long er1; + long orig_er0; + unsigned short ccr; + long er0; + long vector; +#if defined(CONFIG_CPU_H8S) + unsigned short exr; +#endif + unsigned long pc; +} __attribute__((aligned(2),packed)); + +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 + +#endif /* __ASSEMBLY__ */ +#endif /* _UAPI_H8300_PTRACE_H */ diff --git a/arch/h8300/include/asm/resource.h b/arch/h8300/include/uapi/asm/resource.h index 46c5f4391607..46c5f4391607 100644 --- a/arch/h8300/include/asm/resource.h +++ b/arch/h8300/include/uapi/asm/resource.h diff --git a/arch/h8300/include/asm/sembuf.h b/arch/h8300/include/uapi/asm/sembuf.h index e04a3ec0cb92..e04a3ec0cb92 100644 --- a/arch/h8300/include/asm/sembuf.h +++ b/arch/h8300/include/uapi/asm/sembuf.h diff --git a/arch/h8300/include/asm/setup.h b/arch/h8300/include/uapi/asm/setup.h index e2c600e96733..e2c600e96733 100644 --- a/arch/h8300/include/asm/setup.h +++ b/arch/h8300/include/uapi/asm/setup.h diff --git a/arch/h8300/include/asm/shmbuf.h b/arch/h8300/include/uapi/asm/shmbuf.h index 64e77993a7a9..64e77993a7a9 100644 --- a/arch/h8300/include/asm/shmbuf.h +++ b/arch/h8300/include/uapi/asm/shmbuf.h diff --git a/arch/h8300/include/asm/sigcontext.h b/arch/h8300/include/uapi/asm/sigcontext.h index e4b81505f8f8..e4b81505f8f8 100644 --- a/arch/h8300/include/asm/sigcontext.h +++ b/arch/h8300/include/uapi/asm/sigcontext.h diff --git a/arch/h8300/include/asm/siginfo.h b/arch/h8300/include/uapi/asm/siginfo.h index bc8fbea931a5..bc8fbea931a5 100644 --- a/arch/h8300/include/asm/siginfo.h +++ b/arch/h8300/include/uapi/asm/siginfo.h diff --git a/arch/h8300/include/uapi/asm/signal.h b/arch/h8300/include/uapi/asm/signal.h new file mode 100644 index 000000000000..af3a6c37fee6 --- /dev/null +++ b/arch/h8300/include/uapi/asm/signal.h @@ -0,0 +1,115 @@ +#ifndef _UAPI_H8300_SIGNAL_H +#define _UAPI_H8300_SIGNAL_H + +#include <linux/types.h> + +/* Avoid too many header ordering problems. */ +struct siginfo; + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 /* not supported yet */ +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +#define SA_RESTORER 0x04000000 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#include <asm-generic/signal-defs.h> + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + + +#endif /* _UAPI_H8300_SIGNAL_H */ diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h index 90a2e573c7e6..90a2e573c7e6 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/uapi/asm/socket.h diff --git a/arch/h8300/include/asm/sockios.h b/arch/h8300/include/uapi/asm/sockios.h index e9c7ec810c23..e9c7ec810c23 100644 --- a/arch/h8300/include/asm/sockios.h +++ b/arch/h8300/include/uapi/asm/sockios.h diff --git a/arch/h8300/include/asm/stat.h b/arch/h8300/include/uapi/asm/stat.h index 62c3cc24dfe6..62c3cc24dfe6 100644 --- a/arch/h8300/include/asm/stat.h +++ b/arch/h8300/include/uapi/asm/stat.h diff --git a/arch/h8300/include/asm/statfs.h b/arch/h8300/include/uapi/asm/statfs.h index b96efa712aac..b96efa712aac 100644 --- a/arch/h8300/include/asm/statfs.h +++ b/arch/h8300/include/uapi/asm/statfs.h diff --git a/arch/h8300/include/asm/swab.h b/arch/h8300/include/uapi/asm/swab.h index 39abbf52807d..39abbf52807d 100644 --- a/arch/h8300/include/asm/swab.h +++ b/arch/h8300/include/uapi/asm/swab.h diff --git a/arch/h8300/include/asm/termbits.h b/arch/h8300/include/uapi/asm/termbits.h index 3287a6244d74..3287a6244d74 100644 --- a/arch/h8300/include/asm/termbits.h +++ b/arch/h8300/include/uapi/asm/termbits.h diff --git a/arch/h8300/include/uapi/asm/termios.h b/arch/h8300/include/uapi/asm/termios.h new file mode 100644 index 000000000000..5a67d7e38843 --- /dev/null +++ b/arch/h8300/include/uapi/asm/termios.h @@ -0,0 +1,44 @@ +#ifndef _UAPI_H8300_TERMIOS_H +#define _UAPI_H8300_TERMIOS_H + +#include <asm/termbits.h> +#include <asm/ioctls.h> + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + + +#endif /* _UAPI_H8300_TERMIOS_H */ diff --git a/arch/h8300/include/uapi/asm/types.h b/arch/h8300/include/uapi/asm/types.h new file mode 100644 index 000000000000..9ec9d4c5ac4d --- /dev/null +++ b/arch/h8300/include/uapi/asm/types.h @@ -0,0 +1 @@ +#include <asm-generic/int-ll64.h> diff --git a/arch/h8300/include/uapi/asm/unistd.h b/arch/h8300/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..8cb5d429f840 --- /dev/null +++ b/arch/h8300/include/uapi/asm/unistd.h @@ -0,0 +1,330 @@ +#ifndef _UAPI_ASM_H8300_UNISTD_H_ +#define _UAPI_ASM_H8300_UNISTD_H_ + +/* + * This file contains the system call numbers. + */ + +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_waitpid 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lchown 16 +#define __NR_break 17 +#define __NR_oldstat 18 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_oldfstat 28 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_stty 31 +#define __NR_gtty 32 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_ftime 35 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_prof 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_signal 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_lock 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_mpx 56 +#define __NR_setpgid 57 +#define __NR_ulimit 58 +#define __NR_oldolduname 59 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sgetmask 68 +#define __NR_ssetmask 69 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_oldlstat 84 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_profil 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_ioperm 101 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_olduname 109 +#define __NR_iopl 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_vm86old 113 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_modify_ldt 123 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR_getdents 141 +#define __NR__newselect 142 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_vm86 166 +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +#define __NR_chown 182 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 /* some people actually want streams */ +#define __NR_putpmsg 189 /* some people actually want streams */ +#define __NR_vfork 190 +#define __NR_ugetrlimit 191 +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_madvise1 219 +#define __NR_getdents64 220 +#define __NR_fcntl64 221 +/* 223 is unused */ +#define __NR_gettid 224 +#define __NR_readahead 225 +#define __NR_setxattr 226 +#define __NR_lsetxattr 227 +#define __NR_fsetxattr 228 +#define __NR_getxattr 229 +#define __NR_lgetxattr 230 +#define __NR_fgetxattr 231 +#define __NR_listxattr 232 +#define __NR_llistxattr 233 +#define __NR_flistxattr 234 +#define __NR_removexattr 235 +#define __NR_lremovexattr 236 +#define __NR_fremovexattr 237 +#define __NR_tkill 238 +#define __NR_sendfile64 239 +#define __NR_futex 240 +#define __NR_sched_setaffinity 241 +#define __NR_sched_getaffinity 242 +#define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 +#define __NR_io_setup 245 +#define __NR_io_destroy 246 +#define __NR_io_getevents 247 +#define __NR_io_submit 248 +#define __NR_io_cancel 249 +#define __NR_fadvise64 250 +/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ +#define __NR_exit_group 252 +#define __NR_lookup_dcookie 253 +#define __NR_epoll_create 254 +#define __NR_epoll_ctl 255 +#define __NR_epoll_wait 256 +#define __NR_remap_file_pages 257 +#define __NR_set_tid_address 258 +#define __NR_timer_create 259 +#define __NR_timer_settime (__NR_timer_create+1) +#define __NR_timer_gettime (__NR_timer_create+2) +#define __NR_timer_getoverrun (__NR_timer_create+3) +#define __NR_timer_delete (__NR_timer_create+4) +#define __NR_clock_settime (__NR_timer_create+5) +#define __NR_clock_gettime (__NR_timer_create+6) +#define __NR_clock_getres (__NR_timer_create+7) +#define __NR_clock_nanosleep (__NR_timer_create+8) +#define __NR_statfs64 268 +#define __NR_fstatfs64 269 +#define __NR_tgkill 270 +#define __NR_utimes 271 +#define __NR_fadvise64_64 272 +#define __NR_vserver 273 +#define __NR_mbind 274 +#define __NR_get_mempolicy 275 +#define __NR_set_mempolicy 276 +#define __NR_mq_open 277 +#define __NR_mq_unlink (__NR_mq_open+1) +#define __NR_mq_timedsend (__NR_mq_open+2) +#define __NR_mq_timedreceive (__NR_mq_open+3) +#define __NR_mq_notify (__NR_mq_open+4) +#define __NR_mq_getsetattr (__NR_mq_open+5) +#define __NR_kexec_load 283 +#define __NR_waitid 284 +/* #define __NR_sys_setaltroot 285 */ +#define __NR_add_key 286 +#define __NR_request_key 287 +#define __NR_keyctl 288 +#define __NR_ioprio_set 289 +#define __NR_ioprio_get 290 +#define __NR_inotify_init 291 +#define __NR_inotify_add_watch 292 +#define __NR_inotify_rm_watch 293 +#define __NR_migrate_pages 294 +#define __NR_openat 295 +#define __NR_mkdirat 296 +#define __NR_mknodat 297 +#define __NR_fchownat 298 +#define __NR_futimesat 299 +#define __NR_fstatat64 300 +#define __NR_unlinkat 301 +#define __NR_renameat 302 +#define __NR_linkat 303 +#define __NR_symlinkat 304 +#define __NR_readlinkat 305 +#define __NR_fchmodat 306 +#define __NR_faccessat 307 +#define __NR_pselect6 308 +#define __NR_ppoll 309 +#define __NR_unshare 310 +#define __NR_set_robust_list 311 +#define __NR_get_robust_list 312 +#define __NR_splice 313 +#define __NR_sync_file_range 314 +#define __NR_tee 315 +#define __NR_vmsplice 316 +#define __NR_move_pages 317 +#define __NR_getcpu 318 +#define __NR_epoll_pwait 319 +#define __NR_setns 320 + +#endif /* _UAPI_ASM_H8300_UNISTD_H_ */ diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index e418803b6c8e..0744f7d7b1fd 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -31,8 +31,6 @@ config HEXAGON select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE ---help--- Qualcomm Hexagon is a processor architecture designed for high performance and low power across a wide variety of applications. diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h index 2af81533bd0f..4a87cc47075c 100644 --- a/arch/hexagon/include/uapi/asm/unistd.h +++ b/arch/hexagon/include/uapi/asm/unistd.h @@ -27,7 +27,6 @@ */ #define sys_mmap2 sys_mmap_pgoff -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE #include <asm-generic/unistd.h> diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 670600468128..3279646120e3 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -42,8 +42,6 @@ config IA64 select GENERIC_TIME_VSYSCALL_OLD select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 4f5e8148440d..cf3ab7e784b5 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -58,6 +58,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr) { struct dma_map_ops *ops = platform_dma_get_ops(dev); + debug_dma_mapping_error(dev, daddr); return ops->mapping_error(dev, daddr); } diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h index b0e973649cb9..845143990a1d 100644 --- a/arch/ia64/include/asm/ptrace.h +++ b/arch/ia64/include/asm/ptrace.h @@ -78,6 +78,11 @@ static inline long regs_return_value(struct pt_regs *regs) unsigned long __ip = instruction_pointer(regs); \ (__ip & ~3UL) + ((__ip & 3UL) << 2); \ }) +/* + * Why not default? Because user_stack_pointer() on ia64 gives register + * stack backing store instead... + */ +#define current_user_stack_pointer() (current_pt_regs()->r12) /* given a pointer to a task_struct, return the user's pt_regs */ # define task_pt_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1) diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 1574bca86138..8b3ff2f5b861 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -29,7 +29,6 @@ #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER) diff --git a/arch/ia64/include/uapi/asm/signal.h b/arch/ia64/include/uapi/asm/signal.h index e531c424434c..c0ea2855e96b 100644 --- a/arch/ia64/include/uapi/asm/signal.h +++ b/arch/ia64/include/uapi/asm/signal.h @@ -79,12 +79,6 @@ #define SA_RESTORER 0x04000000 /* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - -/* * The minimum stack size needs to be fairly large because we want to * be sure that an app compiled for today's CPUs will continue to run * on all future CPU models. The CPU model matters because the signal diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 5183f43a2cf7..f807721e19a5 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -15,8 +15,6 @@ config M32R select GENERIC_ATOMIC64 select ARCH_USES_GETTIMEOFFSET select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE config SBUS bool diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 4bc8ae73e08a..bebdc36ebb0a 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -1,4 +1,3 @@ -include include/asm-generic/Kbuild.asm generic-y += clkdev.h generic-y += exec.h diff --git a/arch/m32r/include/asm/ptrace.h b/arch/m32r/include/asm/ptrace.h index c4432f1fb2cf..fa58ccfff865 100644 --- a/arch/m32r/include/asm/ptrace.h +++ b/arch/m32r/include/asm/ptrace.h @@ -1,6 +1,3 @@ -#ifndef _ASM_M32R_PTRACE_H -#define _ASM_M32R_PTRACE_H - /* * linux/include/asm-m32r/ptrace.h * @@ -11,111 +8,12 @@ * M32R version: * Copyright (C) 2001-2002, 2004 Hirokazu Takata <takata at linux-m32r.org> */ +#ifndef _ASM_M32R_PTRACE_H +#define _ASM_M32R_PTRACE_H -/* 0 - 13 are integer registers (general purpose registers). */ -#define PT_R4 0 -#define PT_R5 1 -#define PT_R6 2 -#define PT_REGS 3 -#define PT_R0 4 -#define PT_R1 5 -#define PT_R2 6 -#define PT_R3 7 -#define PT_R7 8 -#define PT_R8 9 -#define PT_R9 10 -#define PT_R10 11 -#define PT_R11 12 -#define PT_R12 13 -#define PT_SYSCNR 14 -#define PT_R13 PT_FP -#define PT_R14 PT_LR -#define PT_R15 PT_SP - -/* processor status and miscellaneous context registers. */ -#define PT_ACC0H 15 -#define PT_ACC0L 16 -#define PT_ACC1H 17 /* ISA_DSP_LEVEL2 only */ -#define PT_ACC1L 18 /* ISA_DSP_LEVEL2 only */ -#define PT_PSW 19 -#define PT_BPC 20 -#define PT_BBPSW 21 -#define PT_BBPC 22 -#define PT_SPU 23 -#define PT_FP 24 -#define PT_LR 25 -#define PT_SPI 26 -#define PT_ORIGR0 27 - -/* virtual pt_reg entry for gdb */ -#define PT_PC 30 -#define PT_CBR 31 -#define PT_EVB 32 - - -/* Control registers. */ -#define SPR_CR0 PT_PSW -#define SPR_CR1 PT_CBR /* read only */ -#define SPR_CR2 PT_SPI -#define SPR_CR3 PT_SPU -#define SPR_CR4 -#define SPR_CR5 PT_EVB /* part of M32R/E, M32R/I core only */ -#define SPR_CR6 PT_BPC -#define SPR_CR7 -#define SPR_CR8 PT_BBPSW -#define SPR_CR9 -#define SPR_CR10 -#define SPR_CR11 -#define SPR_CR12 -#define SPR_CR13 PT_WR -#define SPR_CR14 PT_BBPC -#define SPR_CR15 - -/* this struct defines the way the registers are stored on the - stack during a system call. */ -struct pt_regs { - /* Saved main processor registers. */ - unsigned long r4; - unsigned long r5; - unsigned long r6; - struct pt_regs *pt_regs; - unsigned long r0; - unsigned long r1; - unsigned long r2; - unsigned long r3; - unsigned long r7; - unsigned long r8; - unsigned long r9; - unsigned long r10; - unsigned long r11; - unsigned long r12; - long syscall_nr; - - /* Saved main processor status and miscellaneous context registers. */ - unsigned long acc0h; - unsigned long acc0l; - unsigned long acc1h; /* ISA_DSP_LEVEL2 only */ - unsigned long acc1l; /* ISA_DSP_LEVEL2 only */ - unsigned long psw; - unsigned long bpc; /* saved PC for TRAP syscalls */ - unsigned long bbpsw; - unsigned long bbpc; - unsigned long spu; /* saved user stack */ - unsigned long fp; - unsigned long lr; /* saved PC for JL syscalls */ - unsigned long spi; /* saved kernel stack */ - unsigned long orig_r0; -}; - -/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 - -#define PTRACE_OLDSETOPTIONS 21 - -#ifdef __KERNEL__ #include <asm/m32r.h> /* M32R_PSW_BSM, M32R_PSW_BPM */ +#include <uapi/asm/ptrace.h> #define arch_has_single_step() (1) @@ -134,6 +32,7 @@ extern void init_debug_traps(struct task_struct *); #define instruction_pointer(regs) ((regs)->bpc) #define profile_pc(regs) instruction_pointer(regs) +#define user_stack_pointer(regs) ((regs)->spu) extern void withdraw_debug_trap(struct pt_regs *regs); @@ -142,6 +41,4 @@ extern void withdraw_debug_trap(struct pt_regs *regs); #define current_pt_regs() ((struct pt_regs *) \ ((unsigned long)current_thread_info() + THREAD_SIZE) - 1) -#endif /* __KERNEL */ - #endif /* _ASM_M32R_PTRACE_H */ diff --git a/arch/m32r/include/asm/setup.h b/arch/m32r/include/asm/setup.h index c637ab992394..bbe59a9ce8c6 100644 --- a/arch/m32r/include/asm/setup.h +++ b/arch/m32r/include/asm/setup.h @@ -1,13 +1,8 @@ #ifndef _ASM_M32R_SETUP_H #define _ASM_M32R_SETUP_H -/* - * This is set up by the setup-routine at boot-time - */ +#include <uapi/asm/setup.h> -#define COMMAND_LINE_SIZE 512 - -#ifdef __KERNEL__ #define PARAM ((unsigned char *)empty_zero_page) @@ -33,6 +28,4 @@ extern unsigned long memory_start; extern unsigned long memory_end; -#endif /* __KERNEL__ */ - #endif /* _ASM_M32R_SETUP_H */ diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h index e4d2e2ad5f1e..a5ba4a217fb9 100644 --- a/arch/m32r/include/asm/signal.h +++ b/arch/m32r/include/asm/signal.h @@ -1,14 +1,8 @@ #ifndef _ASM_M32R_SIGNAL_H #define _ASM_M32R_SIGNAL_H -#include <linux/types.h> -#include <linux/time.h> -#include <linux/compiler.h> +#include <uapi/asm/signal.h> -/* Avoid too many header ordering problems. */ -struct siginfo; - -#ifdef __KERNEL__ /* Most things should be clean enough to redefine this at will, if care is taken to make libc match. */ @@ -22,94 +16,6 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; -#else -/* Here we must cater to libcs that poke about in kernel headers. */ - -#define NSIG 32 -typedef unsigned long sigset_t; - -#endif /* __KERNEL__ */ - -#define SIGHUP 1 -#define SIGINT 2 -#define SIGQUIT 3 -#define SIGILL 4 -#define SIGTRAP 5 -#define SIGABRT 6 -#define SIGIOT 6 -#define SIGBUS 7 -#define SIGFPE 8 -#define SIGKILL 9 -#define SIGUSR1 10 -#define SIGSEGV 11 -#define SIGUSR2 12 -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGTERM 15 -#define SIGSTKFLT 16 -#define SIGCHLD 17 -#define SIGCONT 18 -#define SIGSTOP 19 -#define SIGTSTP 20 -#define SIGTTIN 21 -#define SIGTTOU 22 -#define SIGURG 23 -#define SIGXCPU 24 -#define SIGXFSZ 25 -#define SIGVTALRM 26 -#define SIGPROF 27 -#define SIGWINCH 28 -#define SIGIO 29 -#define SIGPOLL SIGIO -/* -#define SIGLOST 29 -*/ -#define SIGPWR 30 -#define SIGSYS 31 -#define SIGUNUSED 31 - -/* These should not be considered constants from userland. */ -#define SIGRTMIN 32 -#define SIGRTMAX _NSIG - -/* - * SA_FLAGS values: - * - * SA_ONSTACK indicates that a registered stack_t will be used. - * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. - * SA_RESETHAND clears the handler when the signal is delivered. - * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. - * SA_NODEFER prevents the current signal from being masked in the handler. - * - * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single - * Unix names RESETHAND and NODEFER respectively. - */ -#define SA_NOCLDSTOP 0x00000001u -#define SA_NOCLDWAIT 0x00000002u -#define SA_SIGINFO 0x00000004u -#define SA_ONSTACK 0x08000000u -#define SA_RESTART 0x10000000u -#define SA_NODEFER 0x40000000u -#define SA_RESETHAND 0x80000000u - -#define SA_NOMASK SA_NODEFER -#define SA_ONESHOT SA_RESETHAND - -#define SA_RESTORER 0x04000000 - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - -#define MINSIGSTKSZ 2048 -#define SIGSTKSZ 8192 - -#include <asm-generic/signal-defs.h> - -#ifdef __KERNEL__ struct sigaction { __sighandler_t sa_handler; unsigned long sa_flags; @@ -120,35 +26,8 @@ struct sigaction { struct k_sigaction { struct sigaction sa; }; -#else -/* Here we must cater to libcs that poke about in kernel headers. */ - -struct sigaction { - union { - __sighandler_t _sa_handler; - void (*_sa_sigaction)(int, struct siginfo *, void *); - } _u; - sigset_t sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -}; - -#define sa_handler _u._sa_handler -#define sa_sigaction _u._sa_sigaction - -#endif /* __KERNEL__ */ - -typedef struct sigaltstack { - void __user *ss_sp; - int ss_flags; - size_t ss_size; -} stack_t; - -#ifdef __KERNEL__ #include <asm/sigcontext.h> #undef __HAVE_ARCH_SIG_BITOPS -#endif /* __KERNEL__ */ - #endif /* _ASM_M32R_SIGNAL_H */ diff --git a/arch/m32r/include/asm/termios.h b/arch/m32r/include/asm/termios.h index 93ce79fd342a..680898f0b3d6 100644 --- a/arch/m32r/include/asm/termios.h +++ b/arch/m32r/include/asm/termios.h @@ -1,46 +1,8 @@ #ifndef _M32R_TERMIOS_H #define _M32R_TERMIOS_H -#include <asm/termbits.h> -#include <asm/ioctls.h> - -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; - -#define NCC 8 -struct termio { - unsigned short c_iflag; /* input mode flags */ - unsigned short c_oflag; /* output mode flags */ - unsigned short c_cflag; /* control mode flags */ - unsigned short c_lflag; /* local mode flags */ - unsigned char c_line; /* line discipline */ - unsigned char c_cc[NCC]; /* control characters */ -}; - -/* modem lines */ -#define TIOCM_LE 0x001 -#define TIOCM_DTR 0x002 -#define TIOCM_RTS 0x004 -#define TIOCM_ST 0x008 -#define TIOCM_SR 0x010 -#define TIOCM_CTS 0x020 -#define TIOCM_CAR 0x040 -#define TIOCM_RNG 0x080 -#define TIOCM_DSR 0x100 -#define TIOCM_CD TIOCM_CAR -#define TIOCM_RI TIOCM_RNG -#define TIOCM_OUT1 0x2000 -#define TIOCM_OUT2 0x4000 -#define TIOCM_LOOP 0x8000 - -/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ - -#ifdef __KERNEL__ #include <linux/module.h> +#include <uapi/asm/termios.h> /* intr=^C quit=^\ erase=del kill=^U eof=^D vtime=\0 vmin=\1 sxtc=\0 @@ -86,6 +48,4 @@ struct termio { #define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) #define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) -#endif /* __KERNEL__ */ - #endif /* _M32R_TERMIOS_H */ diff --git a/arch/m32r/include/asm/types.h b/arch/m32r/include/asm/types.h index bb2eeadecf99..04a44c6ee34d 100644 --- a/arch/m32r/include/asm/types.h +++ b/arch/m32r/include/asm/types.h @@ -1,15 +1,12 @@ #ifndef _ASM_M32R_TYPES_H #define _ASM_M32R_TYPES_H -#include <asm-generic/int-ll64.h> +#include <uapi/asm/types.h> /* * These aren't exported outside the kernel to avoid name space clashes */ -#ifdef __KERNEL__ #define BITS_PER_LONG 32 -#endif /* __KERNEL__ */ - #endif /* _ASM_M32R_TYPES_H */ diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h index d9e7351af2a4..79b063caec85 100644 --- a/arch/m32r/include/asm/unistd.h +++ b/arch/m32r/include/asm/unistd.h @@ -1,338 +1,8 @@ #ifndef _ASM_M32R_UNISTD_H #define _ASM_M32R_UNISTD_H -/* - * This file contains the system call numbers. - */ - -#define __NR_restart_syscall 0 -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_waitpid 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_time 13 -#define __NR_mknod 14 -#define __NR_chmod 15 -/* 16 is unused */ -/* 17 is unused */ -/* 18 is unused */ -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -/* 23 is unused */ -/* 24 is unused */ -#define __NR_stime 25 -#define __NR_ptrace 26 -#define __NR_alarm 27 -/* 28 is unused */ -#define __NR_pause 29 -#define __NR_utime 30 -/* 31 is unused */ -#define __NR_cachectl 32 /* old #define __NR_gtty 32*/ -#define __NR_access 33 -/* 34 is unused */ -/* 35 is unused */ -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -/* 44 is unused */ -#define __NR_brk 45 -/* 46 is unused */ -/* 47 is unused (getgid16) */ -/* 48 is unused */ -/* 49 is unused */ -/* 50 is unused */ -#define __NR_acct 51 -#define __NR_umount2 52 -/* 53 is unused */ -#define __NR_ioctl 54 -/* 55 is unused (fcntl) */ -/* 56 is unused */ -#define __NR_setpgid 57 -/* 58 is unused */ -/* 59 is unused */ -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -/* 67 is unused */ -/* 68 is unused*/ -/* 69 is unused*/ -/* 70 is unused */ -/* 71 is unused */ -/* 72 is unused */ -/* 73 is unused */ -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -/* 76 is unused (old getrlimit) */ -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -/* 80 is unused */ -/* 81 is unused */ -/* 82 is unused */ -#define __NR_symlink 83 -/* 84 is unused */ -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -/* 89 is unused */ -/* 90 is unused */ -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -/* 95 is unused */ -#define __NR_getpriority 96 -#define __NR_setpriority 97 -/* 98 is unused */ -#define __NR_statfs 99 -#define __NR_fstatfs 100 -/* 101 is unused */ -#define __NR_socketcall 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_stat 106 -#define __NR_lstat 107 -#define __NR_fstat 108 -/* 109 is unused */ -/* 110 is unused */ -#define __NR_vhangup 111 -/* 112 is unused */ -/* 113 is unused */ -#define __NR_wait4 114 -#define __NR_swapoff 115 -#define __NR_sysinfo 116 -#define __NR_ipc 117 -#define __NR_fsync 118 -/* 119 is unused */ -#define __NR_clone 120 -#define __NR_setdomainname 121 -#define __NR_uname 122 -/* 123 is unused */ -#define __NR_adjtimex 124 -#define __NR_mprotect 125 -/* 126 is unused */ -/* 127 is unused */ -#define __NR_init_module 128 -#define __NR_delete_module 129 -/* 130 is unused */ -#define __NR_quotactl 131 -#define __NR_getpgid 132 -#define __NR_fchdir 133 -#define __NR_bdflush 134 -#define __NR_sysfs 135 -#define __NR_personality 136 -/* 137 is unused */ -/* 138 is unused */ -/* 139 is unused */ -#define __NR__llseek 140 -#define __NR_getdents 141 -#define __NR__newselect 142 -#define __NR_flock 143 -#define __NR_msync 144 -#define __NR_readv 145 -#define __NR_writev 146 -#define __NR_getsid 147 -#define __NR_fdatasync 148 -#define __NR__sysctl 149 -#define __NR_mlock 150 -#define __NR_munlock 151 -#define __NR_mlockall 152 -#define __NR_munlockall 153 -#define __NR_sched_setparam 154 -#define __NR_sched_getparam 155 -#define __NR_sched_setscheduler 156 -#define __NR_sched_getscheduler 157 -#define __NR_sched_yield 158 -#define __NR_sched_get_priority_max 159 -#define __NR_sched_get_priority_min 160 -#define __NR_sched_rr_get_interval 161 -#define __NR_nanosleep 162 -#define __NR_mremap 163 -/* 164 is unused */ -/* 165 is unused */ -#define __NR_tas 166 -/* 167 is unused */ -#define __NR_poll 168 -#define __NR_nfsservctl 169 -/* 170 is unused */ -/* 171 is unused */ -#define __NR_prctl 172 -#define __NR_rt_sigreturn 173 -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 -#define __NR_rt_sigpending 176 -#define __NR_rt_sigtimedwait 177 -#define __NR_rt_sigqueueinfo 178 -#define __NR_rt_sigsuspend 179 -#define __NR_pread64 180 -#define __NR_pwrite64 181 -/* 182 is unused */ -#define __NR_getcwd 183 -#define __NR_capget 184 -#define __NR_capset 185 -#define __NR_sigaltstack 186 -#define __NR_sendfile 187 -/* 188 is unused */ -/* 189 is unused */ -#define __NR_vfork 190 -#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ -#define __NR_mmap2 192 -#define __NR_truncate64 193 -#define __NR_ftruncate64 194 -#define __NR_stat64 195 -#define __NR_lstat64 196 -#define __NR_fstat64 197 -#define __NR_lchown32 198 -#define __NR_getuid32 199 -#define __NR_getgid32 200 -#define __NR_geteuid32 201 -#define __NR_getegid32 202 -#define __NR_setreuid32 203 -#define __NR_setregid32 204 -#define __NR_getgroups32 205 -#define __NR_setgroups32 206 -#define __NR_fchown32 207 -#define __NR_setresuid32 208 -#define __NR_getresuid32 209 -#define __NR_setresgid32 210 -#define __NR_getresgid32 211 -#define __NR_chown32 212 -#define __NR_setuid32 213 -#define __NR_setgid32 214 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 -#define __NR_pivot_root 217 -#define __NR_mincore 218 -#define __NR_madvise 219 -#define __NR_getdents64 220 -#define __NR_fcntl64 221 -/* 222 is unused */ -/* 223 is unused */ -#define __NR_gettid 224 -#define __NR_readahead 225 -#define __NR_setxattr 226 -#define __NR_lsetxattr 227 -#define __NR_fsetxattr 228 -#define __NR_getxattr 229 -#define __NR_lgetxattr 230 -#define __NR_fgetxattr 231 -#define __NR_listxattr 232 -#define __NR_llistxattr 233 -#define __NR_flistxattr 234 -#define __NR_removexattr 235 -#define __NR_lremovexattr 236 -#define __NR_fremovexattr 237 -#define __NR_tkill 238 -#define __NR_sendfile64 239 -#define __NR_futex 240 -#define __NR_sched_setaffinity 241 -#define __NR_sched_getaffinity 242 -#define __NR_set_thread_area 243 -#define __NR_get_thread_area 244 -#define __NR_io_setup 245 -#define __NR_io_destroy 246 -#define __NR_io_getevents 247 -#define __NR_io_submit 248 -#define __NR_io_cancel 249 -#define __NR_fadvise64 250 -/* 251 is unused */ -#define __NR_exit_group 252 -#define __NR_lookup_dcookie 253 -#define __NR_epoll_create 254 -#define __NR_epoll_ctl 255 -#define __NR_epoll_wait 256 -#define __NR_remap_file_pages 257 -#define __NR_set_tid_address 258 -#define __NR_timer_create 259 -#define __NR_timer_settime (__NR_timer_create+1) -#define __NR_timer_gettime (__NR_timer_create+2) -#define __NR_timer_getoverrun (__NR_timer_create+3) -#define __NR_timer_delete (__NR_timer_create+4) -#define __NR_clock_settime (__NR_timer_create+5) -#define __NR_clock_gettime (__NR_timer_create+6) -#define __NR_clock_getres (__NR_timer_create+7) -#define __NR_clock_nanosleep (__NR_timer_create+8) -#define __NR_statfs64 268 -#define __NR_fstatfs64 269 -#define __NR_tgkill 270 -#define __NR_utimes 271 -#define __NR_fadvise64_64 272 -#define __NR_vserver 273 -#define __NR_mbind 274 -#define __NR_get_mempolicy 275 -#define __NR_set_mempolicy 276 -#define __NR_mq_open 277 -#define __NR_mq_unlink (__NR_mq_open+1) -#define __NR_mq_timedsend (__NR_mq_open+2) -#define __NR_mq_timedreceive (__NR_mq_open+3) -#define __NR_mq_notify (__NR_mq_open+4) -#define __NR_mq_getsetattr (__NR_mq_open+5) -#define __NR_kexec_load 283 -#define __NR_waitid 284 -/* 285 is unused */ -#define __NR_add_key 286 -#define __NR_request_key 287 -#define __NR_keyctl 288 -#define __NR_ioprio_set 289 -#define __NR_ioprio_get 290 -#define __NR_inotify_init 291 -#define __NR_inotify_add_watch 292 -#define __NR_inotify_rm_watch 293 -#define __NR_migrate_pages 294 -#define __NR_openat 295 -#define __NR_mkdirat 296 -#define __NR_mknodat 297 -#define __NR_fchownat 298 -#define __NR_futimesat 299 -#define __NR_fstatat64 300 -#define __NR_unlinkat 301 -#define __NR_renameat 302 -#define __NR_linkat 303 -#define __NR_symlinkat 304 -#define __NR_readlinkat 305 -#define __NR_fchmodat 306 -#define __NR_faccessat 307 -#define __NR_pselect6 308 -#define __NR_ppoll 309 -#define __NR_unshare 310 -#define __NR_set_robust_list 311 -#define __NR_get_robust_list 312 -#define __NR_splice 313 -#define __NR_sync_file_range 314 -#define __NR_tee 315 -#define __NR_vmsplice 316 -#define __NR_move_pages 317 -#define __NR_getcpu 318 -#define __NR_epoll_pwait 319 -#define __NR_utimensat 320 -#define __NR_signalfd 321 -/* #define __NR_timerfd 322 removed */ -#define __NR_eventfd 323 -#define __NR_fallocate 324 -#define __NR_setns 325 +#include <uapi/asm/unistd.h> -#ifdef __KERNEL__ #define NR_syscalls 326 @@ -352,7 +22,6 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK @@ -391,5 +60,4 @@ #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") #endif -#endif /* __KERNEL__ */ #endif /* _ASM_M32R_UNISTD_H */ diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild index baebb3da1d44..43937a61d6cf 100644 --- a/arch/m32r/include/uapi/asm/Kbuild +++ b/arch/m32r/include/uapi/asm/Kbuild @@ -1,3 +1,33 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +header-y += auxvec.h +header-y += bitsperlong.h +header-y += byteorder.h +header-y += errno.h +header-y += fcntl.h +header-y += ioctl.h +header-y += ioctls.h +header-y += ipcbuf.h +header-y += mman.h +header-y += msgbuf.h +header-y += param.h +header-y += poll.h +header-y += posix_types.h +header-y += ptrace.h +header-y += resource.h +header-y += sembuf.h +header-y += setup.h +header-y += shmbuf.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += signal.h +header-y += socket.h +header-y += sockios.h +header-y += stat.h +header-y += statfs.h +header-y += swab.h +header-y += termbits.h +header-y += termios.h +header-y += types.h +header-y += unistd.h diff --git a/arch/m32r/include/asm/auxvec.h b/arch/m32r/include/uapi/asm/auxvec.h index f76dcc860fae..f76dcc860fae 100644 --- a/arch/m32r/include/asm/auxvec.h +++ b/arch/m32r/include/uapi/asm/auxvec.h diff --git a/arch/m32r/include/asm/bitsperlong.h b/arch/m32r/include/uapi/asm/bitsperlong.h index 6dc0bb0c13b2..6dc0bb0c13b2 100644 --- a/arch/m32r/include/asm/bitsperlong.h +++ b/arch/m32r/include/uapi/asm/bitsperlong.h diff --git a/arch/m32r/include/asm/byteorder.h b/arch/m32r/include/uapi/asm/byteorder.h index 21855d8b028b..21855d8b028b 100644 --- a/arch/m32r/include/asm/byteorder.h +++ b/arch/m32r/include/uapi/asm/byteorder.h diff --git a/arch/m32r/include/asm/errno.h b/arch/m32r/include/uapi/asm/errno.h index 777149262aad..777149262aad 100644 --- a/arch/m32r/include/asm/errno.h +++ b/arch/m32r/include/uapi/asm/errno.h diff --git a/arch/m32r/include/asm/fcntl.h b/arch/m32r/include/uapi/asm/fcntl.h index 46ab12db5739..46ab12db5739 100644 --- a/arch/m32r/include/asm/fcntl.h +++ b/arch/m32r/include/uapi/asm/fcntl.h diff --git a/arch/m32r/include/asm/ioctl.h b/arch/m32r/include/uapi/asm/ioctl.h index b279fe06dfe5..b279fe06dfe5 100644 --- a/arch/m32r/include/asm/ioctl.h +++ b/arch/m32r/include/uapi/asm/ioctl.h diff --git a/arch/m32r/include/asm/ioctls.h b/arch/m32r/include/uapi/asm/ioctls.h index 349bf87bfbd0..349bf87bfbd0 100644 --- a/arch/m32r/include/asm/ioctls.h +++ b/arch/m32r/include/uapi/asm/ioctls.h diff --git a/arch/m32r/include/asm/ipcbuf.h b/arch/m32r/include/uapi/asm/ipcbuf.h index 84c7e51cb6d0..84c7e51cb6d0 100644 --- a/arch/m32r/include/asm/ipcbuf.h +++ b/arch/m32r/include/uapi/asm/ipcbuf.h diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/uapi/asm/mman.h index 8eebf89f5ab1..8eebf89f5ab1 100644 --- a/arch/m32r/include/asm/mman.h +++ b/arch/m32r/include/uapi/asm/mman.h diff --git a/arch/m32r/include/asm/msgbuf.h b/arch/m32r/include/uapi/asm/msgbuf.h index 0d5a877b813e..0d5a877b813e 100644 --- a/arch/m32r/include/asm/msgbuf.h +++ b/arch/m32r/include/uapi/asm/msgbuf.h diff --git a/arch/m32r/include/asm/param.h b/arch/m32r/include/uapi/asm/param.h index fa207bdf96e7..fa207bdf96e7 100644 --- a/arch/m32r/include/asm/param.h +++ b/arch/m32r/include/uapi/asm/param.h diff --git a/arch/m32r/include/asm/poll.h b/arch/m32r/include/uapi/asm/poll.h index c98509d3149e..c98509d3149e 100644 --- a/arch/m32r/include/asm/poll.h +++ b/arch/m32r/include/uapi/asm/poll.h diff --git a/arch/m32r/include/asm/posix_types.h b/arch/m32r/include/uapi/asm/posix_types.h index 236de26a409b..236de26a409b 100644 --- a/arch/m32r/include/asm/posix_types.h +++ b/arch/m32r/include/uapi/asm/posix_types.h diff --git a/arch/m32r/include/uapi/asm/ptrace.h b/arch/m32r/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..f6930a822517 --- /dev/null +++ b/arch/m32r/include/uapi/asm/ptrace.h @@ -0,0 +1,117 @@ +/* + * linux/include/asm-m32r/ptrace.h + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * M32R version: + * Copyright (C) 2001-2002, 2004 Hirokazu Takata <takata at linux-m32r.org> + */ +#ifndef _UAPI_ASM_M32R_PTRACE_H +#define _UAPI_ASM_M32R_PTRACE_H + + +/* 0 - 13 are integer registers (general purpose registers). */ +#define PT_R4 0 +#define PT_R5 1 +#define PT_R6 2 +#define PT_REGS 3 +#define PT_R0 4 +#define PT_R1 5 +#define PT_R2 6 +#define PT_R3 7 +#define PT_R7 8 +#define PT_R8 9 +#define PT_R9 10 +#define PT_R10 11 +#define PT_R11 12 +#define PT_R12 13 +#define PT_SYSCNR 14 +#define PT_R13 PT_FP +#define PT_R14 PT_LR +#define PT_R15 PT_SP + +/* processor status and miscellaneous context registers. */ +#define PT_ACC0H 15 +#define PT_ACC0L 16 +#define PT_ACC1H 17 /* ISA_DSP_LEVEL2 only */ +#define PT_ACC1L 18 /* ISA_DSP_LEVEL2 only */ +#define PT_PSW 19 +#define PT_BPC 20 +#define PT_BBPSW 21 +#define PT_BBPC 22 +#define PT_SPU 23 +#define PT_FP 24 +#define PT_LR 25 +#define PT_SPI 26 +#define PT_ORIGR0 27 + +/* virtual pt_reg entry for gdb */ +#define PT_PC 30 +#define PT_CBR 31 +#define PT_EVB 32 + + +/* Control registers. */ +#define SPR_CR0 PT_PSW +#define SPR_CR1 PT_CBR /* read only */ +#define SPR_CR2 PT_SPI +#define SPR_CR3 PT_SPU +#define SPR_CR4 +#define SPR_CR5 PT_EVB /* part of M32R/E, M32R/I core only */ +#define SPR_CR6 PT_BPC +#define SPR_CR7 +#define SPR_CR8 PT_BBPSW +#define SPR_CR9 +#define SPR_CR10 +#define SPR_CR11 +#define SPR_CR12 +#define SPR_CR13 PT_WR +#define SPR_CR14 PT_BBPC +#define SPR_CR15 + +/* this struct defines the way the registers are stored on the + stack during a system call. */ +struct pt_regs { + /* Saved main processor registers. */ + unsigned long r4; + unsigned long r5; + unsigned long r6; + struct pt_regs *pt_regs; + unsigned long r0; + unsigned long r1; + unsigned long r2; + unsigned long r3; + unsigned long r7; + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + long syscall_nr; + + /* Saved main processor status and miscellaneous context registers. */ + unsigned long acc0h; + unsigned long acc0l; + unsigned long acc1h; /* ISA_DSP_LEVEL2 only */ + unsigned long acc1l; /* ISA_DSP_LEVEL2 only */ + unsigned long psw; + unsigned long bpc; /* saved PC for TRAP syscalls */ + unsigned long bbpsw; + unsigned long bbpc; + unsigned long spu; /* saved user stack */ + unsigned long fp; + unsigned long lr; /* saved PC for JL syscalls */ + unsigned long spi; /* saved kernel stack */ + unsigned long orig_r0; +}; + +/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 + +#define PTRACE_OLDSETOPTIONS 21 + + +#endif /* _UAPI_ASM_M32R_PTRACE_H */ diff --git a/arch/m32r/include/asm/resource.h b/arch/m32r/include/uapi/asm/resource.h index b1ce766e37a0..b1ce766e37a0 100644 --- a/arch/m32r/include/asm/resource.h +++ b/arch/m32r/include/uapi/asm/resource.h diff --git a/arch/m32r/include/asm/sembuf.h b/arch/m32r/include/uapi/asm/sembuf.h index c9873d6890e2..c9873d6890e2 100644 --- a/arch/m32r/include/asm/sembuf.h +++ b/arch/m32r/include/uapi/asm/sembuf.h diff --git a/arch/m32r/include/uapi/asm/setup.h b/arch/m32r/include/uapi/asm/setup.h new file mode 100644 index 000000000000..96961a42e5f4 --- /dev/null +++ b/arch/m32r/include/uapi/asm/setup.h @@ -0,0 +1,11 @@ +#ifndef _UAPI_ASM_M32R_SETUP_H +#define _UAPI_ASM_M32R_SETUP_H + +/* + * This is set up by the setup-routine at boot-time + */ + +#define COMMAND_LINE_SIZE 512 + + +#endif /* _UAPI_ASM_M32R_SETUP_H */ diff --git a/arch/m32r/include/asm/shmbuf.h b/arch/m32r/include/uapi/asm/shmbuf.h index b0cdf0aa7d65..b0cdf0aa7d65 100644 --- a/arch/m32r/include/asm/shmbuf.h +++ b/arch/m32r/include/uapi/asm/shmbuf.h diff --git a/arch/m32r/include/asm/sigcontext.h b/arch/m32r/include/uapi/asm/sigcontext.h index da4a9c36d09b..da4a9c36d09b 100644 --- a/arch/m32r/include/asm/sigcontext.h +++ b/arch/m32r/include/uapi/asm/sigcontext.h diff --git a/arch/m32r/include/asm/siginfo.h b/arch/m32r/include/uapi/asm/siginfo.h index 7d9cd9ebfd0e..7d9cd9ebfd0e 100644 --- a/arch/m32r/include/asm/siginfo.h +++ b/arch/m32r/include/uapi/asm/siginfo.h diff --git a/arch/m32r/include/uapi/asm/signal.h b/arch/m32r/include/uapi/asm/signal.h new file mode 100644 index 000000000000..54acacb1f1f7 --- /dev/null +++ b/arch/m32r/include/uapi/asm/signal.h @@ -0,0 +1,117 @@ +#ifndef _UAPI_ASM_M32R_SIGNAL_H +#define _UAPI_ASM_M32R_SIGNAL_H + +#include <linux/types.h> +#include <linux/time.h> +#include <linux/compiler.h> + +/* Avoid too many header ordering problems. */ +struct siginfo; + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001u +#define SA_NOCLDWAIT 0x00000002u +#define SA_SIGINFO 0x00000004u +#define SA_ONSTACK 0x08000000u +#define SA_RESTART 0x10000000u +#define SA_NODEFER 0x40000000u +#define SA_RESETHAND 0x80000000u + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +#define SA_RESTORER 0x04000000 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#include <asm-generic/signal-defs.h> + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + + +#endif /* _UAPI_ASM_M32R_SIGNAL_H */ diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 5e7088a26726..5e7088a26726 100644 --- a/arch/m32r/include/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h diff --git a/arch/m32r/include/asm/sockios.h b/arch/m32r/include/uapi/asm/sockios.h index 6c1fb9b43bdb..6c1fb9b43bdb 100644 --- a/arch/m32r/include/asm/sockios.h +++ b/arch/m32r/include/uapi/asm/sockios.h diff --git a/arch/m32r/include/asm/stat.h b/arch/m32r/include/uapi/asm/stat.h index da4518f82d6d..da4518f82d6d 100644 --- a/arch/m32r/include/asm/stat.h +++ b/arch/m32r/include/uapi/asm/stat.h diff --git a/arch/m32r/include/asm/statfs.h b/arch/m32r/include/uapi/asm/statfs.h index 6eb4c6007e6b..6eb4c6007e6b 100644 --- a/arch/m32r/include/asm/statfs.h +++ b/arch/m32r/include/uapi/asm/statfs.h diff --git a/arch/m32r/include/asm/swab.h b/arch/m32r/include/uapi/asm/swab.h index 54dab001d6d1..54dab001d6d1 100644 --- a/arch/m32r/include/asm/swab.h +++ b/arch/m32r/include/uapi/asm/swab.h diff --git a/arch/m32r/include/asm/termbits.h b/arch/m32r/include/uapi/asm/termbits.h index 957a3c688549..957a3c688549 100644 --- a/arch/m32r/include/asm/termbits.h +++ b/arch/m32r/include/uapi/asm/termbits.h diff --git a/arch/m32r/include/uapi/asm/termios.h b/arch/m32r/include/uapi/asm/termios.h new file mode 100644 index 000000000000..07ad27b8f7db --- /dev/null +++ b/arch/m32r/include/uapi/asm/termios.h @@ -0,0 +1,43 @@ +#ifndef _UAPI_M32R_TERMIOS_H +#define _UAPI_M32R_TERMIOS_H + +#include <asm/termbits.h> +#include <asm/ioctls.h> + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + + +#endif /* _UAPI_M32R_TERMIOS_H */ diff --git a/arch/m32r/include/uapi/asm/types.h b/arch/m32r/include/uapi/asm/types.h new file mode 100644 index 000000000000..9ec9d4c5ac4d --- /dev/null +++ b/arch/m32r/include/uapi/asm/types.h @@ -0,0 +1 @@ +#include <asm-generic/int-ll64.h> diff --git a/arch/m32r/include/uapi/asm/unistd.h b/arch/m32r/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..5a54f2ae3b51 --- /dev/null +++ b/arch/m32r/include/uapi/asm/unistd.h @@ -0,0 +1,335 @@ +#ifndef _UAPI_ASM_M32R_UNISTD_H +#define _UAPI_ASM_M32R_UNISTD_H + +/* + * This file contains the system call numbers. + */ + +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_waitpid 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +/* 16 is unused */ +/* 17 is unused */ +/* 18 is unused */ +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +/* 23 is unused */ +/* 24 is unused */ +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +/* 28 is unused */ +#define __NR_pause 29 +#define __NR_utime 30 +/* 31 is unused */ +#define __NR_cachectl 32 /* old #define __NR_gtty 32*/ +#define __NR_access 33 +/* 34 is unused */ +/* 35 is unused */ +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +/* 44 is unused */ +#define __NR_brk 45 +/* 46 is unused */ +/* 47 is unused (getgid16) */ +/* 48 is unused */ +/* 49 is unused */ +/* 50 is unused */ +#define __NR_acct 51 +#define __NR_umount2 52 +/* 53 is unused */ +#define __NR_ioctl 54 +/* 55 is unused (fcntl) */ +/* 56 is unused */ +#define __NR_setpgid 57 +/* 58 is unused */ +/* 59 is unused */ +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +/* 67 is unused */ +/* 68 is unused*/ +/* 69 is unused*/ +/* 70 is unused */ +/* 71 is unused */ +/* 72 is unused */ +/* 73 is unused */ +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +/* 76 is unused (old getrlimit) */ +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +/* 80 is unused */ +/* 81 is unused */ +/* 82 is unused */ +#define __NR_symlink 83 +/* 84 is unused */ +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +/* 89 is unused */ +/* 90 is unused */ +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +/* 95 is unused */ +#define __NR_getpriority 96 +#define __NR_setpriority 97 +/* 98 is unused */ +#define __NR_statfs 99 +#define __NR_fstatfs 100 +/* 101 is unused */ +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +/* 109 is unused */ +/* 110 is unused */ +#define __NR_vhangup 111 +/* 112 is unused */ +/* 113 is unused */ +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +/* 119 is unused */ +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +/* 123 is unused */ +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +/* 126 is unused */ +/* 127 is unused */ +#define __NR_init_module 128 +#define __NR_delete_module 129 +/* 130 is unused */ +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +/* 137 is unused */ +/* 138 is unused */ +/* 139 is unused */ +#define __NR__llseek 140 +#define __NR_getdents 141 +#define __NR__newselect 142 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +/* 164 is unused */ +/* 165 is unused */ +#define __NR_tas 166 +/* 167 is unused */ +#define __NR_poll 168 +#define __NR_nfsservctl 169 +/* 170 is unused */ +/* 171 is unused */ +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +/* 182 is unused */ +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +/* 188 is unused */ +/* 189 is unused */ +#define __NR_vfork 190 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_getdents64 220 +#define __NR_fcntl64 221 +/* 222 is unused */ +/* 223 is unused */ +#define __NR_gettid 224 +#define __NR_readahead 225 +#define __NR_setxattr 226 +#define __NR_lsetxattr 227 +#define __NR_fsetxattr 228 +#define __NR_getxattr 229 +#define __NR_lgetxattr 230 +#define __NR_fgetxattr 231 +#define __NR_listxattr 232 +#define __NR_llistxattr 233 +#define __NR_flistxattr 234 +#define __NR_removexattr 235 +#define __NR_lremovexattr 236 +#define __NR_fremovexattr 237 +#define __NR_tkill 238 +#define __NR_sendfile64 239 +#define __NR_futex 240 +#define __NR_sched_setaffinity 241 +#define __NR_sched_getaffinity 242 +#define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 +#define __NR_io_setup 245 +#define __NR_io_destroy 246 +#define __NR_io_getevents 247 +#define __NR_io_submit 248 +#define __NR_io_cancel 249 +#define __NR_fadvise64 250 +/* 251 is unused */ +#define __NR_exit_group 252 +#define __NR_lookup_dcookie 253 +#define __NR_epoll_create 254 +#define __NR_epoll_ctl 255 +#define __NR_epoll_wait 256 +#define __NR_remap_file_pages 257 +#define __NR_set_tid_address 258 +#define __NR_timer_create 259 +#define __NR_timer_settime (__NR_timer_create+1) +#define __NR_timer_gettime (__NR_timer_create+2) +#define __NR_timer_getoverrun (__NR_timer_create+3) +#define __NR_timer_delete (__NR_timer_create+4) +#define __NR_clock_settime (__NR_timer_create+5) +#define __NR_clock_gettime (__NR_timer_create+6) +#define __NR_clock_getres (__NR_timer_create+7) +#define __NR_clock_nanosleep (__NR_timer_create+8) +#define __NR_statfs64 268 +#define __NR_fstatfs64 269 +#define __NR_tgkill 270 +#define __NR_utimes 271 +#define __NR_fadvise64_64 272 +#define __NR_vserver 273 +#define __NR_mbind 274 +#define __NR_get_mempolicy 275 +#define __NR_set_mempolicy 276 +#define __NR_mq_open 277 +#define __NR_mq_unlink (__NR_mq_open+1) +#define __NR_mq_timedsend (__NR_mq_open+2) +#define __NR_mq_timedreceive (__NR_mq_open+3) +#define __NR_mq_notify (__NR_mq_open+4) +#define __NR_mq_getsetattr (__NR_mq_open+5) +#define __NR_kexec_load 283 +#define __NR_waitid 284 +/* 285 is unused */ +#define __NR_add_key 286 +#define __NR_request_key 287 +#define __NR_keyctl 288 +#define __NR_ioprio_set 289 +#define __NR_ioprio_get 290 +#define __NR_inotify_init 291 +#define __NR_inotify_add_watch 292 +#define __NR_inotify_rm_watch 293 +#define __NR_migrate_pages 294 +#define __NR_openat 295 +#define __NR_mkdirat 296 +#define __NR_mknodat 297 +#define __NR_fchownat 298 +#define __NR_futimesat 299 +#define __NR_fstatat64 300 +#define __NR_unlinkat 301 +#define __NR_renameat 302 +#define __NR_linkat 303 +#define __NR_symlinkat 304 +#define __NR_readlinkat 305 +#define __NR_fchmodat 306 +#define __NR_faccessat 307 +#define __NR_pselect6 308 +#define __NR_ppoll 309 +#define __NR_unshare 310 +#define __NR_set_robust_list 311 +#define __NR_get_robust_list 312 +#define __NR_splice 313 +#define __NR_sync_file_range 314 +#define __NR_tee 315 +#define __NR_vmsplice 316 +#define __NR_move_pages 317 +#define __NR_getcpu 318 +#define __NR_epoll_pwait 319 +#define __NR_utimensat 320 +#define __NR_signalfd 321 +/* #define __NR_timerfd 322 removed */ +#define __NR_eventfd 323 +#define __NR_fallocate 324 +#define __NR_setns 325 + +#endif /* _UAPI_ASM_M32R_UNISTD_H */ diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 953a7ba5d050..6710084e072a 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -15,8 +15,6 @@ config M68K select FPU if MMU select ARCH_WANT_IPC_PARSE_VERSION select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_REL select MODULES_USE_ELF_RELA diff --git a/arch/m68k/include/asm/ptrace.h b/arch/m68k/include/asm/ptrace.h index 0f717045bdde..a45cb6894ad3 100644 --- a/arch/m68k/include/asm/ptrace.h +++ b/arch/m68k/include/asm/ptrace.h @@ -15,6 +15,7 @@ #define profile_pc(regs) instruction_pointer(regs) #define current_pt_regs() \ (struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1 +#define current_user_stack_pointer() rdusp() #define arch_has_single_step() (1) diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index a021d67cdd72..847994ce6804 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -31,7 +31,6 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK diff --git a/arch/m68k/include/uapi/asm/signal.h b/arch/m68k/include/uapi/asm/signal.h index 2b450f311bd9..cba6f858bb46 100644 --- a/arch/m68k/include/uapi/asm/signal.h +++ b/arch/m68k/include/uapi/asm/signal.h @@ -80,12 +80,6 @@ typedef unsigned long sigset_t; #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 4bcf89148f3c..ba3b7c8c04b8 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -26,8 +26,6 @@ config MICROBLAZE select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select CLONE_BACKWARDS config SWAP diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h index 01d228286cb0..46460f1c49c4 100644 --- a/arch/microblaze/include/asm/dma-mapping.h +++ b/arch/microblaze/include/asm/dma-mapping.h @@ -114,6 +114,8 @@ static inline void __dma_sync(unsigned long paddr, static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *ops = get_dma_ops(dev); + + debug_dma_mapping_error(dev, dma_addr); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h index 3732bcf186fd..5b18ec124e51 100644 --- a/arch/microblaze/include/asm/ptrace.h +++ b/arch/microblaze/include/asm/ptrace.h @@ -16,6 +16,7 @@ #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) +#define user_stack_pointer(regs) ((regs)->r1) static inline long regs_return_value(struct pt_regs *regs) { diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index 99e23937a31a..a5f06ac97113 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -35,7 +35,6 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE #define __ARCH_WANT_SYS_VFORK #ifdef CONFIG_MMU diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d971d1586f1c..b7dc39c6c849 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -41,8 +41,6 @@ config MIPS select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_REL select MODULES_USE_ELF_RELA if 64BIT - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE menu "Machine selection" diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index be39a12901c6..006b43e38a9c 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -40,6 +40,8 @@ static inline int dma_supported(struct device *dev, u64 mask) static inline int dma_mapping_error(struct device *dev, u64 mask) { struct dma_map_ops *ops = get_dma_ops(dev); + + debug_dma_mapping_error(dev, mask); return ops->mapping_error(dev, mask); } diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index cec5e125f7e4..a3186f2bb8a0 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -49,6 +49,7 @@ static inline long regs_return_value(struct pt_regs *regs) #define instruction_pointer(regs) ((regs)->cp0_epc) #define profile_pc(regs) instruction_pointer(regs) +#define user_stack_pointer(r) ((r)->regs[29]) extern asmlinkage void syscall_trace_enter(struct pt_regs *regs); extern asmlinkage void syscall_trace_leave(struct pt_regs *regs); diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index b306e2081cad..9e47cc11aa26 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -20,7 +20,6 @@ #define __ARCH_OMIT_COMPAT_SYS_GETDENTS64 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_ALARM -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_PAUSE diff --git a/arch/mips/include/uapi/asm/signal.h b/arch/mips/include/uapi/asm/signal.h index 3f1237c6c80e..770732cb8d03 100644 --- a/arch/mips/include/uapi/asm/signal.h +++ b/arch/mips/include/uapi/asm/signal.h @@ -86,12 +86,6 @@ typedef unsigned long old_sigset_t; /* at least 32 bits */ #define SA_RESTORER 0x04000000 /* Only for o32 */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 72471744a912..aa03f2e13385 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -8,8 +8,6 @@ config MN10300 select HAVE_ARCH_KGDB select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER select GENERIC_CLOCKEVENTS - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select MODULES_USE_ELF_RELA config AM33_2 diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h index cabf8ba73b27..e6d2ed4ba68f 100644 --- a/arch/mn10300/include/asm/unistd.h +++ b/arch/mn10300/include/asm/unistd.h @@ -43,7 +43,6 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/mn10300/include/uapi/asm/signal.h b/arch/mn10300/include/uapi/asm/signal.h index 08dcd6a85618..f423a08d7eeb 100644 --- a/arch/mn10300/include/uapi/asm/signal.h +++ b/arch/mn10300/include/uapi/asm/signal.h @@ -92,12 +92,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index ec37e185d20d..0ac66f67521f 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -22,8 +22,6 @@ config OPENRISC select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE config MMU def_bool y diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h index 07f5299d6c28..7c691399da3f 100644 --- a/arch/openrisc/include/asm/io.h +++ b/arch/openrisc/include/asm/io.h @@ -30,6 +30,7 @@ #define PIO_MASK 0 #include <asm-generic/io.h> +#include <asm/pgtable.h> extern void __iomem *__ioremap(phys_addr_t offset, unsigned long size, pgprot_t prot); diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h index 5082b8066325..ce40b71df006 100644 --- a/arch/openrisc/include/uapi/asm/unistd.h +++ b/arch/openrisc/include/uapi/asm/unistd.h @@ -20,7 +20,6 @@ #define sys_mmap2 sys_mmap_pgoff -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index e688a2be30f6..b77feffbadea 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -22,8 +22,6 @@ config PARISC select GENERIC_STRNCPY_FROM_USER select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select CLONE_BACKWARDS help diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 1efef41659c9..3043194547cd 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -163,7 +163,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ #define __ARCH_WANT_SYS_RT_SIGACTION #define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h index b1ddaa243376..a2fa297196bc 100644 --- a/arch/parisc/include/uapi/asm/signal.h +++ b/arch/parisc/include/uapi/asm/signal.h @@ -71,12 +71,6 @@ #define SA_RESTORER 0x04000000 /* obsolete -- ignored */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 951a517a1a0f..17903f1f356b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -141,10 +141,8 @@ config PPC select GENERIC_CLOCKEVENTS select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER - select GENERIC_KERNEL_THREAD select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_EXECVE select CLONE_BACKWARDS config EARLY_PRINTK diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 78160874809a..e27e9ad6818e 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -172,6 +172,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *dma_ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dma_addr); if (dma_ops->mapping_error) return dma_ops->mapping_error(dev, dma_addr); diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 29365e15ed7c..1d4864a40e35 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -56,7 +56,6 @@ #define __ARCH_WANT_COMPAT_SYS_SENDFILE #define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #endif -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/powerpc/include/uapi/asm/signal.h b/arch/powerpc/include/uapi/asm/signal.h index 48fa8d3f2f9a..e079fb39d5bc 100644 --- a/arch/powerpc/include/uapi/asm/signal.h +++ b/arch/powerpc/include/uapi/asm/signal.h @@ -85,12 +85,6 @@ typedef struct { #define SA_RESTORER 0x04000000U -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 5b7d8ffbf890..baee994fe810 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -66,7 +66,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags, struct dentry *dentry; int ret; - dentry = user_path_create(AT_FDCWD, pathname, &path, 1); + dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY); ret = PTR_ERR(dentry); if (!IS_ERR(dentry)) { ret = spufs_create(&path, dentry, flags, mode, neighbor); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 32425af9d68d..b5ea38c25647 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -137,8 +137,6 @@ config S390 select GENERIC_CLOCKEVENTS select KTIME_SCALAR if 32BIT select HAVE_ARCH_SECCOMP_FILTER - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS2 diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 18cd6b592650..f8c6df6cd1f0 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -7,6 +7,9 @@ #include <linux/sched.h> #include <linux/thread_info.h> +#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64)) +#define __SC_DELOUSE(t,v) (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)) + #define PSW32_MASK_PER 0x40000000UL #define PSW32_MASK_DAT 0x04000000UL #define PSW32_MASK_IO 0x02000000UL diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 086bb8eaf6ab..636530872516 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -53,7 +53,6 @@ # define __ARCH_WANT_COMPAT_SYS_TIME # define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND # endif -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h index 8c6a49e392ee..2f43cfbf5f1a 100644 --- a/arch/s390/include/uapi/asm/signal.h +++ b/arch/s390/include/uapi/asm/signal.h @@ -90,12 +90,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 45893390c7dd..3b1482e7afac 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -13,8 +13,6 @@ config SCORE select GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_REL - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select CLONE_BACKWARDS choice diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 16e41fe1a419..cebaff8069a1 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -1,4 +1,3 @@ -include include/asm-generic/Kbuild.asm header-y += diff --git a/arch/score/include/asm/ptrace.h b/arch/score/include/asm/ptrace.h index e89dc9b1ef49..abc279d96b73 100644 --- a/arch/score/include/asm/ptrace.h +++ b/arch/score/include/asm/ptrace.h @@ -1,78 +1,8 @@ #ifndef _ASM_SCORE_PTRACE_H #define _ASM_SCORE_PTRACE_H -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 +#include <uapi/asm/ptrace.h> -#define PC 32 -#define CONDITION 33 -#define ECR 34 -#define EMA 35 -#define CEH 36 -#define CEL 37 -#define COUNTER 38 -#define LDCR 39 -#define STCR 40 -#define PSR 41 - -#define SINGLESTEP16_INSN 0x7006 -#define SINGLESTEP32_INSN 0x840C8000 -#define BREAKPOINT16_INSN 0x7002 /* work on SPG300 */ -#define BREAKPOINT32_INSN 0x84048000 /* work on SPG300 */ - -/* Define instruction mask */ -#define INSN32_MASK 0x80008000 - -#define J32 0x88008000 /* 1_00010_0000000000_1_000000000000000 */ -#define J32M 0xFC008000 /* 1_11111_0000000000_1_000000000000000 */ - -#define B32 0x90008000 /* 1_00100_0000000000_1_000000000000000 */ -#define B32M 0xFC008000 -#define BL32 0x90008001 /* 1_00100_0000000000_1_000000000000001 */ -#define BL32M B32 -#define BR32 0x80008008 /* 1_00000_0000000000_1_00000000_000100_0 */ -#define BR32M 0xFFE0807E -#define BRL32 0x80008009 /* 1_00000_0000000000_1_00000000_000100_1 */ -#define BRL32M BR32M - -#define B32_SET (J32 | B32 | BL32 | BR32 | BRL32) - -#define J16 0x3000 /* 0_011_....... */ -#define J16M 0xF000 -#define B16 0x4000 /* 0_100_....... */ -#define B16M 0xF000 -#define BR16 0x0004 /* 0_000.......0100 */ -#define BR16M 0xF00F -#define B16_SET (J16 | B16 | BR16) - - -/* - * This struct defines the way the registers are stored on the stack during a - * system call/exception. As usual the registers k0/k1 aren't being saved. - */ -struct pt_regs { - unsigned long pad0[6]; /* stack arguments */ - unsigned long orig_r4; - unsigned long orig_r7; - long is_syscall; - - unsigned long regs[32]; - - unsigned long cel; - unsigned long ceh; - - unsigned long sr0; /* cnt */ - unsigned long sr1; /* lcr */ - unsigned long sr2; /* scr */ - - unsigned long cp0_epc; - unsigned long cp0_ema; - unsigned long cp0_psr; - unsigned long cp0_ecr; - unsigned long cp0_condition; -}; - -#ifdef __KERNEL__ struct task_struct; @@ -83,6 +13,7 @@ struct task_struct; #define instruction_pointer(regs) ((unsigned long)(regs)->cp0_epc) #define profile_pc(regs) instruction_pointer(regs) +#define user_stack_pointer(r) ((unsigned long)(r)->regs[0]) extern void do_syscall_trace(struct pt_regs *regs, int entryexit); extern int read_tsk_long(struct task_struct *, unsigned long, unsigned long *); @@ -91,6 +22,4 @@ extern int read_tsk_short(struct task_struct *, unsigned long, #define arch_has_single_step() (1) -#endif /* __KERNEL__ */ - #endif /* _ASM_SCORE_PTRACE_H */ diff --git a/arch/score/include/asm/setup.h b/arch/score/include/asm/setup.h index 3cb944dc68dc..1f3aa7262fa3 100644 --- a/arch/score/include/asm/setup.h +++ b/arch/score/include/asm/setup.h @@ -1,11 +1,8 @@ #ifndef _ASM_SCORE_SETUP_H #define _ASM_SCORE_SETUP_H -#define COMMAND_LINE_SIZE 256 -#define MEMORY_START 0 -#define MEMORY_SIZE 0x2000000 +#include <uapi/asm/setup.h> -#ifdef __KERNEL__ extern void pagetable_init(void); extern void pgd_init(unsigned long page); @@ -36,6 +33,4 @@ extern void debug_exception_vector(void); extern void general_exception_vector(void); extern void interrupt_exception_vector(void); -#endif /* __KERNEL__ */ - #endif /* _ASM_SCORE_SETUP_H */ diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild index baebb3da1d44..040178cdb3eb 100644 --- a/arch/score/include/uapi/asm/Kbuild +++ b/arch/score/include/uapi/asm/Kbuild @@ -1,3 +1,34 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +header-y += auxvec.h +header-y += bitsperlong.h +header-y += byteorder.h +header-y += errno.h +header-y += fcntl.h +header-y += ioctl.h +header-y += ioctls.h +header-y += ipcbuf.h +header-y += kvm_para.h +header-y += mman.h +header-y += msgbuf.h +header-y += param.h +header-y += poll.h +header-y += posix_types.h +header-y += ptrace.h +header-y += resource.h +header-y += sembuf.h +header-y += setup.h +header-y += shmbuf.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += signal.h +header-y += socket.h +header-y += sockios.h +header-y += stat.h +header-y += statfs.h +header-y += swab.h +header-y += termbits.h +header-y += termios.h +header-y += types.h +header-y += unistd.h diff --git a/arch/score/include/asm/auxvec.h b/arch/score/include/uapi/asm/auxvec.h index f69151565aee..f69151565aee 100644 --- a/arch/score/include/asm/auxvec.h +++ b/arch/score/include/uapi/asm/auxvec.h diff --git a/arch/score/include/asm/bitsperlong.h b/arch/score/include/uapi/asm/bitsperlong.h index 86ff337aa459..86ff337aa459 100644 --- a/arch/score/include/asm/bitsperlong.h +++ b/arch/score/include/uapi/asm/bitsperlong.h diff --git a/arch/score/include/asm/byteorder.h b/arch/score/include/uapi/asm/byteorder.h index 88cbebc79212..88cbebc79212 100644 --- a/arch/score/include/asm/byteorder.h +++ b/arch/score/include/uapi/asm/byteorder.h diff --git a/arch/score/include/asm/errno.h b/arch/score/include/uapi/asm/errno.h index 29ff39d5ab47..29ff39d5ab47 100644 --- a/arch/score/include/asm/errno.h +++ b/arch/score/include/uapi/asm/errno.h diff --git a/arch/score/include/asm/fcntl.h b/arch/score/include/uapi/asm/fcntl.h index 03968a3103a4..03968a3103a4 100644 --- a/arch/score/include/asm/fcntl.h +++ b/arch/score/include/uapi/asm/fcntl.h diff --git a/arch/score/include/asm/ioctl.h b/arch/score/include/uapi/asm/ioctl.h index a351d2194bfd..a351d2194bfd 100644 --- a/arch/score/include/asm/ioctl.h +++ b/arch/score/include/uapi/asm/ioctl.h diff --git a/arch/score/include/asm/ioctls.h b/arch/score/include/uapi/asm/ioctls.h index ed01d2b9aeab..ed01d2b9aeab 100644 --- a/arch/score/include/asm/ioctls.h +++ b/arch/score/include/uapi/asm/ioctls.h diff --git a/arch/score/include/asm/ipcbuf.h b/arch/score/include/uapi/asm/ipcbuf.h index e082ceff1818..e082ceff1818 100644 --- a/arch/score/include/asm/ipcbuf.h +++ b/arch/score/include/uapi/asm/ipcbuf.h diff --git a/arch/score/include/asm/kvm_para.h b/arch/score/include/uapi/asm/kvm_para.h index 14fab8f0b957..14fab8f0b957 100644 --- a/arch/score/include/asm/kvm_para.h +++ b/arch/score/include/uapi/asm/kvm_para.h diff --git a/arch/score/include/asm/mman.h b/arch/score/include/uapi/asm/mman.h index 84d85ddfed8d..84d85ddfed8d 100644 --- a/arch/score/include/asm/mman.h +++ b/arch/score/include/uapi/asm/mman.h diff --git a/arch/score/include/asm/msgbuf.h b/arch/score/include/uapi/asm/msgbuf.h index 7506721e29fa..7506721e29fa 100644 --- a/arch/score/include/asm/msgbuf.h +++ b/arch/score/include/uapi/asm/msgbuf.h diff --git a/arch/score/include/asm/param.h b/arch/score/include/uapi/asm/param.h index 916b8690b6aa..916b8690b6aa 100644 --- a/arch/score/include/asm/param.h +++ b/arch/score/include/uapi/asm/param.h diff --git a/arch/score/include/asm/poll.h b/arch/score/include/uapi/asm/poll.h index 18532db02861..18532db02861 100644 --- a/arch/score/include/asm/poll.h +++ b/arch/score/include/uapi/asm/poll.h diff --git a/arch/score/include/asm/posix_types.h b/arch/score/include/uapi/asm/posix_types.h index b88acf80048a..b88acf80048a 100644 --- a/arch/score/include/asm/posix_types.h +++ b/arch/score/include/uapi/asm/posix_types.h diff --git a/arch/score/include/uapi/asm/ptrace.h b/arch/score/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..f59771a3f127 --- /dev/null +++ b/arch/score/include/uapi/asm/ptrace.h @@ -0,0 +1,76 @@ +#ifndef _UAPI_ASM_SCORE_PTRACE_H +#define _UAPI_ASM_SCORE_PTRACE_H + +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 + +#define PC 32 +#define CONDITION 33 +#define ECR 34 +#define EMA 35 +#define CEH 36 +#define CEL 37 +#define COUNTER 38 +#define LDCR 39 +#define STCR 40 +#define PSR 41 + +#define SINGLESTEP16_INSN 0x7006 +#define SINGLESTEP32_INSN 0x840C8000 +#define BREAKPOINT16_INSN 0x7002 /* work on SPG300 */ +#define BREAKPOINT32_INSN 0x84048000 /* work on SPG300 */ + +/* Define instruction mask */ +#define INSN32_MASK 0x80008000 + +#define J32 0x88008000 /* 1_00010_0000000000_1_000000000000000 */ +#define J32M 0xFC008000 /* 1_11111_0000000000_1_000000000000000 */ + +#define B32 0x90008000 /* 1_00100_0000000000_1_000000000000000 */ +#define B32M 0xFC008000 +#define BL32 0x90008001 /* 1_00100_0000000000_1_000000000000001 */ +#define BL32M B32 +#define BR32 0x80008008 /* 1_00000_0000000000_1_00000000_000100_0 */ +#define BR32M 0xFFE0807E +#define BRL32 0x80008009 /* 1_00000_0000000000_1_00000000_000100_1 */ +#define BRL32M BR32M + +#define B32_SET (J32 | B32 | BL32 | BR32 | BRL32) + +#define J16 0x3000 /* 0_011_....... */ +#define J16M 0xF000 +#define B16 0x4000 /* 0_100_....... */ +#define B16M 0xF000 +#define BR16 0x0004 /* 0_000.......0100 */ +#define BR16M 0xF00F +#define B16_SET (J16 | B16 | BR16) + + +/* + * This struct defines the way the registers are stored on the stack during a + * system call/exception. As usual the registers k0/k1 aren't being saved. + */ +struct pt_regs { + unsigned long pad0[6]; /* stack arguments */ + unsigned long orig_r4; + unsigned long orig_r7; + long is_syscall; + + unsigned long regs[32]; + + unsigned long cel; + unsigned long ceh; + + unsigned long sr0; /* cnt */ + unsigned long sr1; /* lcr */ + unsigned long sr2; /* scr */ + + unsigned long cp0_epc; + unsigned long cp0_ema; + unsigned long cp0_psr; + unsigned long cp0_ecr; + unsigned long cp0_condition; +}; + + +#endif /* _UAPI_ASM_SCORE_PTRACE_H */ diff --git a/arch/score/include/asm/resource.h b/arch/score/include/uapi/asm/resource.h index 9ce22bc7b475..9ce22bc7b475 100644 --- a/arch/score/include/asm/resource.h +++ b/arch/score/include/uapi/asm/resource.h diff --git a/arch/score/include/asm/sembuf.h b/arch/score/include/uapi/asm/sembuf.h index dae5e835ce9e..dae5e835ce9e 100644 --- a/arch/score/include/asm/sembuf.h +++ b/arch/score/include/uapi/asm/sembuf.h diff --git a/arch/score/include/uapi/asm/setup.h b/arch/score/include/uapi/asm/setup.h new file mode 100644 index 000000000000..ab9dbdb59bba --- /dev/null +++ b/arch/score/include/uapi/asm/setup.h @@ -0,0 +1,9 @@ +#ifndef _UAPI_ASM_SCORE_SETUP_H +#define _UAPI_ASM_SCORE_SETUP_H + +#define COMMAND_LINE_SIZE 256 +#define MEMORY_START 0 +#define MEMORY_SIZE 0x2000000 + + +#endif /* _UAPI_ASM_SCORE_SETUP_H */ diff --git a/arch/score/include/asm/shmbuf.h b/arch/score/include/uapi/asm/shmbuf.h index c85b2429ba21..c85b2429ba21 100644 --- a/arch/score/include/asm/shmbuf.h +++ b/arch/score/include/uapi/asm/shmbuf.h diff --git a/arch/score/include/asm/sigcontext.h b/arch/score/include/uapi/asm/sigcontext.h index 5ffda39ddb90..5ffda39ddb90 100644 --- a/arch/score/include/asm/sigcontext.h +++ b/arch/score/include/uapi/asm/sigcontext.h diff --git a/arch/score/include/asm/siginfo.h b/arch/score/include/uapi/asm/siginfo.h index 87ca35607a28..87ca35607a28 100644 --- a/arch/score/include/asm/siginfo.h +++ b/arch/score/include/uapi/asm/siginfo.h diff --git a/arch/score/include/asm/signal.h b/arch/score/include/uapi/asm/signal.h index 2605bc06b64f..2605bc06b64f 100644 --- a/arch/score/include/asm/signal.h +++ b/arch/score/include/uapi/asm/signal.h diff --git a/arch/score/include/asm/socket.h b/arch/score/include/uapi/asm/socket.h index 612a70e385ba..612a70e385ba 100644 --- a/arch/score/include/asm/socket.h +++ b/arch/score/include/uapi/asm/socket.h diff --git a/arch/score/include/asm/sockios.h b/arch/score/include/uapi/asm/sockios.h index ba8256480189..ba8256480189 100644 --- a/arch/score/include/asm/sockios.h +++ b/arch/score/include/uapi/asm/sockios.h diff --git a/arch/score/include/asm/stat.h b/arch/score/include/uapi/asm/stat.h index 5037055500a2..5037055500a2 100644 --- a/arch/score/include/asm/stat.h +++ b/arch/score/include/uapi/asm/stat.h diff --git a/arch/score/include/asm/statfs.h b/arch/score/include/uapi/asm/statfs.h index 36e41004e996..36e41004e996 100644 --- a/arch/score/include/asm/statfs.h +++ b/arch/score/include/uapi/asm/statfs.h diff --git a/arch/score/include/asm/swab.h b/arch/score/include/uapi/asm/swab.h index fadc3cc6d8a2..fadc3cc6d8a2 100644 --- a/arch/score/include/asm/swab.h +++ b/arch/score/include/uapi/asm/swab.h diff --git a/arch/score/include/asm/termbits.h b/arch/score/include/uapi/asm/termbits.h index 9a95c1412437..9a95c1412437 100644 --- a/arch/score/include/asm/termbits.h +++ b/arch/score/include/uapi/asm/termbits.h diff --git a/arch/score/include/asm/termios.h b/arch/score/include/uapi/asm/termios.h index 40984e811ad6..40984e811ad6 100644 --- a/arch/score/include/asm/termios.h +++ b/arch/score/include/uapi/asm/termios.h diff --git a/arch/score/include/asm/types.h b/arch/score/include/uapi/asm/types.h index 2140032778ee..2140032778ee 100644 --- a/arch/score/include/asm/types.h +++ b/arch/score/include/uapi/asm/types.h diff --git a/arch/score/include/asm/unistd.h b/arch/score/include/uapi/asm/unistd.h index 56001c93095a..9cb4260a5f3e 100644 --- a/arch/score/include/asm/unistd.h +++ b/arch/score/include/uapi/asm/unistd.h @@ -4,7 +4,6 @@ #define __ARCH_WANT_SYSCALL_NO_FLAGS #define __ARCH_WANT_SYSCALL_OFF_T #define __ARCH_WANT_SYSCALL_DEPRECATED -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 8451317eed58..babc2b826c5c 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -40,8 +40,6 @@ config SUPERH select GENERIC_STRNLEN_USER select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index 8bd965e00a15..b437f2c780b8 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h @@ -46,6 +46,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dma_addr); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h index 43d3f26b2eab..012004ed3330 100644 --- a/arch/sh/include/asm/unistd.h +++ b/arch/sh/include/asm/unistd.h @@ -28,7 +28,6 @@ # define __ARCH_WANT_SYS_SIGPENDING # define __ARCH_WANT_SYS_SIGPROCMASK # define __ARCH_WANT_SYS_RT_SIGACTION -# define __ARCH_WANT_SYS_EXECVE # define __ARCH_WANT_SYS_FORK # define __ARCH_WANT_SYS_VFORK # define __ARCH_WANT_SYS_CLONE diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 0c7d365fa402..9f2edb5c5551 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -41,8 +41,6 @@ config SPARC select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE config SPARC32 def_bool !64BIT diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S index 23f6cbb910d3..1cda8aa7cb85 100644 --- a/arch/sparc/crypto/aes_asm.S +++ b/arch/sparc/crypto/aes_asm.S @@ -1024,7 +1024,11 @@ ENTRY(aes_sparc64_ecb_encrypt_256) add %o2, 0x20, %o2 brlz,pt %o3, 11f nop -10: ldx [%o1 + 0x00], %g3 +10: ldd [%o0 + 0xd0], %f56 + ldd [%o0 + 0xd8], %f58 + ldd [%o0 + 0xe0], %f60 + ldd [%o0 + 0xe8], %f62 + ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 xor %g1, %g3, %g3 xor %g2, %g7, %g7 @@ -1128,9 +1132,9 @@ ENTRY(aes_sparc64_ecb_decrypt_256) /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ ldx [%o0 - 0x10], %g1 subcc %o3, 0x10, %o3 + ldx [%o0 - 0x08], %g2 be 10f - ldx [%o0 - 0x08], %g2 - sub %o0, 0xf0, %o0 + sub %o0, 0xf0, %o0 1: ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 ldx [%o1 + 0x10], %o4 @@ -1154,7 +1158,11 @@ ENTRY(aes_sparc64_ecb_decrypt_256) add %o2, 0x20, %o2 brlz,pt %o3, 11f nop -10: ldx [%o1 + 0x00], %g3 +10: ldd [%o0 + 0x18], %f56 + ldd [%o0 + 0x10], %f58 + ldd [%o0 + 0x08], %f60 + ldd [%o0 + 0x00], %f62 + ldx [%o1 + 0x00], %g3 ldx [%o1 + 0x08], %g7 xor %g1, %g3, %g3 xor %g2, %g7, %g7 @@ -1511,11 +1519,11 @@ ENTRY(aes_sparc64_ctr_crypt_256) add %o2, 0x20, %o2 brlz,pt %o3, 11f nop - ldd [%o0 + 0xd0], %f56 +10: ldd [%o0 + 0xd0], %f56 ldd [%o0 + 0xd8], %f58 ldd [%o0 + 0xe0], %f60 ldd [%o0 + 0xe8], %f62 -10: xor %g1, %g3, %o5 + xor %g1, %g3, %o5 MOVXTOD_O5_F0 xor %g2, %g7, %o5 MOVXTOD_O5_F2 diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 3965d1d36dfa..503e6d96ad4e 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -222,6 +222,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; ctx->ops->load_encrypt_keys(&ctx->key[0]); while ((nbytes = walk.nbytes)) { @@ -251,6 +252,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; ctx->ops->load_decrypt_keys(&ctx->key[0]); key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; @@ -280,6 +282,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; ctx->ops->load_encrypt_keys(&ctx->key[0]); while ((nbytes = walk.nbytes)) { @@ -309,6 +312,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; ctx->ops->load_decrypt_keys(&ctx->key[0]); key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; @@ -329,6 +333,22 @@ static int cbc_decrypt(struct blkcipher_desc *desc, return err; } +static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx, + struct blkcipher_walk *walk) +{ + u8 *ctrblk = walk->iv; + u64 keystream[AES_BLOCK_SIZE / sizeof(u64)]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk, + keystream, AES_BLOCK_SIZE); + crypto_xor((u8 *) keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + crypto_inc(ctrblk, AES_BLOCK_SIZE); +} + static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -338,10 +358,11 @@ static int ctr_crypt(struct blkcipher_desc *desc, int err; blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; ctx->ops->load_encrypt_keys(&ctx->key[0]); - while ((nbytes = walk.nbytes)) { + while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { unsigned int block_len = nbytes & AES_BLOCK_MASK; if (likely(block_len)) { @@ -353,6 +374,10 @@ static int ctr_crypt(struct blkcipher_desc *desc, nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } + if (walk.nbytes) { + ctr_crypt_final(ctx, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } fprs_write(0); return err; } @@ -418,7 +443,7 @@ static struct crypto_alg algs[] = { { .cra_driver_name = "ctr-aes-sparc64", .cra_priority = SPARC_CR_OPCODE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, + .cra_blocksize = 1, .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), .cra_alignmask = 7, .cra_type = &crypto_blkcipher_type, diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 62c89af3fd3f..888f6260b4ec 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -98,6 +98,7 @@ static int __ecb_crypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; if (encrypt) key = &ctx->encrypt_key[0]; @@ -160,6 +161,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; key = &ctx->encrypt_key[0]; camellia_sparc64_load_keys(key, ctx->key_len); @@ -198,6 +200,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; key = &ctx->decrypt_key[0]; camellia_sparc64_load_keys(key, ctx->key_len); diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S index 30b6e90b28b2..b5c8fc269b5f 100644 --- a/arch/sparc/crypto/des_asm.S +++ b/arch/sparc/crypto/des_asm.S @@ -376,6 +376,7 @@ ENTRY(des3_ede_sparc64_ecb_crypt) 1: ldd [%o1 + 0x00], %f60 DES3_LOOP_BODY(60) std %f60, [%o2 + 0x00] + add %o1, 0x08, %o1 subcc %o3, 0x08, %o3 bne,pt %icc, 1b add %o2, 0x08, %o2 diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index 41524cebcc49..3065bc61f9d3 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -100,6 +100,7 @@ static int __ecb_crypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; if (encrypt) des_sparc64_load_keys(&ctx->encrypt_expkey[0]); @@ -147,6 +148,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; des_sparc64_load_keys(&ctx->encrypt_expkey[0]); while ((nbytes = walk.nbytes)) { @@ -177,6 +179,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; des_sparc64_load_keys(&ctx->decrypt_expkey[0]); while ((nbytes = walk.nbytes)) { @@ -266,6 +269,7 @@ static int __ecb3_crypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; if (encrypt) K = &ctx->encrypt_expkey[0]; @@ -317,6 +321,7 @@ static int cbc3_encrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; K = &ctx->encrypt_expkey[0]; des3_ede_sparc64_load_keys(K); @@ -352,6 +357,7 @@ static int cbc3_decrypt(struct blkcipher_desc *desc, blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; K = &ctx->decrypt_expkey[0]; des3_ede_sparc64_load_keys(K); diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 8493fd3c7ba5..05fe53f5346e 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -59,6 +59,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { + debug_dma_mapping_error(dev, dma_addr); return (dma_addr == DMA_ERROR_CODE); } diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 8c5eed6d267f..9661e9bc7bb6 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -61,14 +61,20 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - ptep_set_wrprotect(mm, addr, ptep); + pte_t old_pte = *ptep; + set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); } static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + int changed = !pte_same(*ptep, pte); + if (changed) { + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + flush_tlb_page(vma, addr); + } + return changed; } static inline pte_t huge_ptep_get(pte_t *ptep) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 95515f1e7cef..7870be0f5adc 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -617,6 +617,12 @@ static inline unsigned long pte_present(pte_t pte) return val; } +#define pte_accessible pte_accessible +static inline unsigned long pte_accessible(pte_t a) +{ + return pte_val(a) & _PAGE_VALID; +} + static inline unsigned long pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; @@ -802,7 +808,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U * and SUN4V pte layout, so this inline test is fine. */ - if (likely(mm != &init_mm) && (pte_val(orig) & _PAGE_VALID)) + if (likely(mm != &init_mm) && pte_accessible(orig)) tlb_batch_add(mm, addr, ptep, orig, fullmm); } diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index 497386a7ed28..87ce24c5eb95 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -47,7 +47,6 @@ #define __ARCH_WANT_COMPAT_SYS_SENDFILE #define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #endif -#define __ARCH_WANT_SYS_EXECVE /* * "Conditional" syscalls diff --git a/arch/sparc/include/uapi/asm/signal.h b/arch/sparc/include/uapi/asm/signal.h index 1a041892538f..c4ffd6c97106 100644 --- a/arch/sparc/include/uapi/asm/signal.h +++ b/arch/sparc/include/uapi/asm/signal.h @@ -147,12 +147,6 @@ struct sigstack { #define SIG_UNBLOCK 0x02 /* for unblocking signals */ #define SIG_SETMASK 0x04 /* for setting the signal mask */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 4096 #define SIGSTKSZ 16384 diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index ea7f61e8bc9e..875d008828b8 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -21,8 +21,6 @@ config TILE select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 4b6247d1a315..f2ff191376b4 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -72,6 +72,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { + debug_dma_mapping_error(dev, dma_addr); return get_dma_ops(dev)->mapping_error(dev, dma_addr); } diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index 5ce052e16b7b..2e83fc1b9467 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h @@ -35,6 +35,7 @@ typedef unsigned long pt_reg_t; #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) +#define user_stack_pointer(regs) ((regs)->sp) /* Does the process account for user or for system time? */ #define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL) diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h index fe841e7d4963..6ac21034f69a 100644 --- a/arch/tile/include/asm/unistd.h +++ b/arch/tile/include/asm/unistd.h @@ -17,6 +17,5 @@ #define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #endif #define __ARCH_WANT_SYS_NEWFSTATAT -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE #include <uapi/asm/unistd.h> diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index db18eb6124e1..48ccf718e290 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -132,8 +132,3 @@ long sys_sigsuspend(int history0, int history1, old_sigset_t mask) siginitset(&blocked, mask); return sigsuspend(&blocked); } - -long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss) -{ - return do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs)); -} diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index c4fbb21e802b..60651df5f952 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -16,8 +16,6 @@ config UNICORE32 select ARCH_WANT_FRAME_POINTERS select GENERIC_IOMAP select MODULES_USE_ELF_REL - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE help UniCore-32 is 32-bit Instruction Set Architecture, including a series of low-power-consumption RISC chip diff --git a/arch/unicore32/include/asm/ptrace.h b/arch/unicore32/include/asm/ptrace.h index 726749dab52f..9df53d991c78 100644 --- a/arch/unicore32/include/asm/ptrace.h +++ b/arch/unicore32/include/asm/ptrace.h @@ -54,6 +54,7 @@ static inline int valid_user_regs(struct pt_regs *regs) } #define instruction_pointer(regs) ((regs)->UCreg_pc) +#define user_stack_pointer(regs) ((regs)->UCreg_sp) #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/unicore32/include/uapi/asm/unistd.h b/arch/unicore32/include/uapi/asm/unistd.h index 00cf5e286fca..d4cc4559d848 100644 --- a/arch/unicore32/include/uapi/asm/unistd.h +++ b/arch/unicore32/include/uapi/asm/unistd.h @@ -12,5 +12,4 @@ /* Use the standard ABI for syscalls. */ #include <asm-generic/unistd.h> -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 97f8c5ad8c2d..79795af59810 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -110,11 +110,10 @@ config X86 select GENERIC_STRNLEN_USER select HAVE_CONTEXT_TRACKING if X86_64 select HAVE_IRQ_TIME_ACCOUNTING - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select MODULES_USE_ELF_REL if X86_32 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 + select GENERIC_SIGALTSTACK config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index efc6a958b71d..a1daf4a65009 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -136,52 +136,6 @@ asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask) return sigsuspend(&blocked); } -asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, - stack_ia32_t __user *uoss_ptr, - struct pt_regs *regs) -{ - stack_t uss, uoss; - int ret, err = 0; - mm_segment_t seg; - - if (uss_ptr) { - u32 ptr; - - memset(&uss, 0, sizeof(stack_t)); - if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t))) - return -EFAULT; - - get_user_try { - get_user_ex(ptr, &uss_ptr->ss_sp); - get_user_ex(uss.ss_flags, &uss_ptr->ss_flags); - get_user_ex(uss.ss_size, &uss_ptr->ss_size); - } get_user_catch(err); - - if (err) - return -EFAULT; - uss.ss_sp = compat_ptr(ptr); - } - seg = get_fs(); - set_fs(KERNEL_DS); - ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL), - (stack_t __force __user *) &uoss, regs->sp); - set_fs(seg); - if (ret >= 0 && uoss_ptr) { - if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) - return -EFAULT; - - put_user_try { - put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp); - put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags); - put_user_ex(uoss.ss_size, &uoss_ptr->ss_size); - } put_user_catch(err); - - if (err) - ret = -EFAULT; - } - return ret; -} - /* * Do a signal return; undo the signal stack. */ @@ -292,7 +246,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) struct rt_sigframe_ia32 __user *frame; sigset_t set; unsigned int ax; - struct pt_regs tregs; frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); @@ -306,8 +259,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; - tregs = *regs; - if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT) + if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; return ax; @@ -515,10 +467,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, else put_user_ex(0, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); - put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 32e6f05ddaaa..102ff7cb3e41 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -464,7 +464,6 @@ GLOBAL(\label) PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi - PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx PTREGSCALL stub32_execve, compat_sys_execve, %rcx PTREGSCALL stub32_fork, sys_fork, %rdi PTREGSCALL stub32_vfork, sys_vfork, %rdi diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index f7b4c7903e7e..808dae63eeea 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -47,6 +47,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dma_addr); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index e6232773ce49..4c6da2e4bb1d 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -29,16 +29,10 @@ struct old_sigaction32 { unsigned int sa_restorer; /* Another 32 bit pointer */ }; -typedef struct sigaltstack_ia32 { - unsigned int ss_sp; - int ss_flags; - unsigned int ss_size; -} stack_ia32_t; - struct ucontext_ia32 { unsigned int uc_flags; unsigned int uc_link; - stack_ia32_t uc_stack; + compat_stack_t uc_stack; struct sigcontext_ia32 uc_mcontext; compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; @@ -46,7 +40,7 @@ struct ucontext_ia32 { struct ucontext_x32 { unsigned int uc_flags; unsigned int uc_link; - stack_ia32_t uc_stack; + compat_stack_t uc_stack; unsigned int uc__pad0; /* needed for alignment */ struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ compat_sigset_t uc_sigmask; /* mask last for extensibility */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 03ca442d8f0d..942a08623a1a 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -133,6 +133,13 @@ static inline bool user_64bit_mode(struct pt_regs *regs) return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; #endif } + +#define current_user_stack_pointer() this_cpu_read(old_rsp) +/* ia32 vs. x32 difference */ +#define compat_user_stack_pointer() \ + (test_thread_flag(TIF_IA32) \ + ? current_pt_regs()->sp \ + : this_cpu_read(old_rsp)) #endif #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index c76fae4d90be..31f61f96e0fb 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -69,8 +69,6 @@ asmlinkage long sys32_fallocate(int, int, unsigned, /* ia32/ia32_signal.c */ asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); -asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *, - stack_ia32_t __user *, struct pt_regs *); asmlinkage long sys32_sigreturn(struct pt_regs *); asmlinkage long sys32_rt_sigreturn(struct pt_regs *); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 2f8374718aa3..58b7e3eac0ae 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -25,9 +25,6 @@ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); /* kernel/signal.c */ long sys_rt_sigreturn(struct pt_regs *); -long sys_sigaltstack(const stack_t __user *, stack_t __user *, - struct pt_regs *); - /* kernel/tls.c */ asmlinkage int sys_set_thread_area(struct user_desc __user *); diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 1003e69a40d9..a0790e07ba65 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -48,7 +48,6 @@ # define __ARCH_WANT_SYS_TIME # define __ARCH_WANT_SYS_UTIME # define __ARCH_WANT_SYS_WAITPID -# define __ARCH_WANT_SYS_EXECVE # define __ARCH_WANT_SYS_FORK # define __ARCH_WANT_SYS_VFORK # define __ARCH_WANT_SYS_CLONE diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h index 0818f9a8e889..aa7d6ae39e0e 100644 --- a/arch/x86/include/uapi/asm/signal.h +++ b/arch/x86/include/uapi/asm/signal.h @@ -87,12 +87,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c763116c5359..ff84d5469d77 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -739,7 +739,6 @@ ENTRY(ptregs_##name) ; \ ENDPROC(ptregs_##name) PTREGSCALL1(iopl) -PTREGSCALL2(sigaltstack) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) PTREGSCALL2(vm86) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 70641aff0c25..07a7a04529bc 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -864,7 +864,6 @@ END(stub_\func) FORK_LIKE clone FORK_LIKE fork FORK_LIKE vfork - PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx PTREGSCALL stub_iopl, sys_iopl, %rsi ENTRY(ptregscall_common) @@ -913,8 +912,6 @@ ENTRY(stub_rt_sigreturn) END(stub_rt_sigreturn) #ifdef CONFIG_X86_X32_ABI - PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx - ENTRY(stub_x32_rt_sigreturn) CFI_STARTPROC addq $8, %rsp diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index fbbb604313a2..d6bf1f34a6e9 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -364,10 +364,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, else put_user_ex(0, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); - put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __save_altstack(&frame->uc.uc_stack, regs->sp); /* Set up to return from userspace. */ restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); @@ -414,7 +411,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct rt_sigframe __user *frame; void __user *fp = NULL; int err = 0; - struct task_struct *me = current; frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp); @@ -433,10 +429,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, else put_user_ex(0, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); - put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __save_altstack(&frame->uc.uc_stack, regs->sp); /* Set up to return from userspace. If provided, use a stub already in userspace. */ @@ -503,10 +496,7 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, else put_user_ex(0, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); - put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); put_user_ex(0, &frame->uc.uc__pad0); if (ka->sa.sa_flags & SA_RESTORER) { @@ -603,13 +593,6 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, } #endif /* CONFIG_X86_32 */ -long -sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, - struct pt_regs *regs) -{ - return do_sigaltstack(uss, uoss, regs->sp); -} - /* * Do a signal return; undo the signal stack. */ @@ -659,7 +642,7 @@ long sys_rt_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; - if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) + if (restore_altstack(&frame->uc.uc_stack)) goto badframe; return ax; @@ -865,7 +848,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) struct rt_sigframe_x32 __user *frame; sigset_t set; unsigned long ax; - struct pt_regs tregs; frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); @@ -879,8 +861,7 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; - tregs = *regs; - if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT) + if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; return ax; diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 05f404f53f59..28e3fa9056ea 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -192,7 +192,7 @@ 183 i386 getcwd sys_getcwd 184 i386 capget sys_capget 185 i386 capset sys_capset -186 i386 sigaltstack ptregs_sigaltstack stub32_sigaltstack +186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack 187 i386 sendfile sys_sendfile sys32_sendfile 188 i386 getpmsg 189 i386 putpmsg diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 7c58c84b7bc8..dc97328bd90a 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -137,7 +137,7 @@ 128 64 rt_sigtimedwait sys_rt_sigtimedwait 129 64 rt_sigqueueinfo sys_rt_sigqueueinfo 130 common rt_sigsuspend sys_rt_sigsuspend -131 64 sigaltstack stub_sigaltstack +131 64 sigaltstack sys_sigaltstack 132 common utime sys_utime 133 common mknod sys_mknod 134 64 uselib @@ -338,7 +338,7 @@ 522 x32 rt_sigpending sys32_rt_sigpending 523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait 524 x32 rt_sigqueueinfo sys32_rt_sigqueueinfo -525 x32 sigaltstack stub_x32_sigaltstack +525 x32 sigaltstack compat_sys_sigaltstack 526 x32 timer_create compat_sys_timer_create 527 x32 mq_notify compat_sys_mq_notify 528 x32 kexec_load compat_sys_kexec_load diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 983997041963..53c90fd412d1 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -13,8 +13,7 @@ endmenu config UML_X86 def_bool y select GENERIC_FIND_FIRST_BIT - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE + select GENERIC_SIGALTSTACK config 64BIT bool "64-bit kernel" if SUBARCH = "x86" diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h index 755133258c45..54f8102ccde5 100644 --- a/arch/x86/um/asm/ptrace.h +++ b/arch/x86/um/asm/ptrace.h @@ -86,4 +86,5 @@ extern long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr); #endif +#define user_stack_pointer(regs) PT_REGS_SP(regs) #endif /* __UM_X86_PTRACE_H */ diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index bdaa08cfbcf4..71cef48ea5cd 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -342,9 +342,7 @@ static int copy_ucontext_to_user(struct ucontext __user *uc, { int err = 0; - err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp); - err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags); - err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size); + err |= __save_altstack(&uc->uc_stack, sp); err |= copy_sc_to_user(&uc->uc_mcontext, fp, ¤t->thread.regs, 0); err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set)); return err; @@ -529,10 +527,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs)); err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]); err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate); diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 812e98c098e4..a0c3b0d1a122 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -27,7 +27,6 @@ #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old #define ptregs_vm86 sys_vm86 -#define ptregs_sigaltstack sys_sigaltstack #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; #include <asm/syscalls_32.h> diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index 170bd926a69c..f2f0723070ca 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -31,7 +31,6 @@ #define stub_fork sys_fork #define stub_vfork sys_vfork #define stub_execve sys_execve -#define stub_sigaltstack sys_sigaltstack #define stub_rt_sigreturn sys_rt_sigreturn #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 73d34e77c39c..5aab1acabf1c 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -13,8 +13,6 @@ config XTENSA select GENERIC_CPU_DEVICES select MODULES_USE_ELF_RELA select GENERIC_PCI_IOMAP - select GENERIC_KERNEL_THREAD - select GENERIC_KERNEL_EXECVE select ARCH_WANT_OPTIONAL_GPIOLIB select CLONE_BACKWARDS select IRQ_DOMAIN diff --git a/arch/xtensa/include/asm/mmu.h b/arch/xtensa/include/asm/mmu.h index 04890d6e2335..8554b2c8b17a 100644 --- a/arch/xtensa/include/asm/mmu.h +++ b/arch/xtensa/include/asm/mmu.h @@ -12,7 +12,7 @@ #define _XTENSA_MMU_H #ifndef CONFIG_MMU -#include <asm/nommu.h> +#include <asm-generic/mmu.h> #else /* Default "unsigned long" context */ diff --git a/arch/xtensa/include/asm/nommu.h b/arch/xtensa/include/asm/nommu.h deleted file mode 100644 index dce2c438c5ba..000000000000 --- a/arch/xtensa/include/asm/nommu.h +++ /dev/null @@ -1,3 +0,0 @@ -typedef struct { - unsigned long end_brk; -} mm_context_t; diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h index 58bf6fd3f913..682b1deac1f2 100644 --- a/arch/xtensa/include/asm/ptrace.h +++ b/arch/xtensa/include/asm/ptrace.h @@ -63,6 +63,8 @@ struct pt_regs { # define profile_pc(regs) instruction_pointer(regs) # endif +#define user_stack_pointer(regs) ((regs)->areg[1]) + #else /* __ASSEMBLY__ */ # include <asm/asm-offsets.h> diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index e002dbcc88b6..eb63ea87815c 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -1,7 +1,6 @@ #ifndef _XTENSA_UNISTD_H #define _XTENSA_UNISTD_H -#define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE #include <uapi/asm/unistd.h> diff --git a/arch/xtensa/include/uapi/asm/signal.h b/arch/xtensa/include/uapi/asm/signal.h index b88ce96f2af9..dacf716dd3e0 100644 --- a/arch/xtensa/include/uapi/asm/signal.h +++ b/arch/xtensa/include/uapi/asm/signal.h @@ -97,12 +97,6 @@ typedef struct { #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index bd5de08ad6fd..0576a7dd32a5 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -157,6 +157,7 @@ int tegra_ahb_enable_smmu(struct device_node *dn) EXPORT_SYMBOL(tegra_ahb_enable_smmu); #endif +#ifdef CONFIG_PM_SLEEP static int tegra_ahb_suspend(struct device *dev) { int i; @@ -176,6 +177,7 @@ static int tegra_ahb_resume(struct device *dev) gizmo_writel(ahb, ahb->ctx[i], tegra_ahb_gizmo[i]); return 0; } +#endif static UNIVERSAL_DEV_PM_OPS(tegra_ahb_pm, tegra_ahb_suspend, diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index c909b7b7d5f1..d70abe77f737 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -42,7 +42,8 @@ #include <linux/swab.h> #include <linux/slab.h> -#define VERSION "0.07" +#define VERSION "1.04" +#define DRIVER_VERSION 0x01 #define PTAG "solos-pci" #define CONFIG_RAM_SIZE 128 @@ -56,16 +57,21 @@ #define FLASH_BUSY 0x60 #define FPGA_MODE 0x5C #define FLASH_MODE 0x58 +#define GPIO_STATUS 0x54 +#define DRIVER_VER 0x50 #define TX_DMA_ADDR(port) (0x40 + (4 * (port))) #define RX_DMA_ADDR(port) (0x30 + (4 * (port))) #define DATA_RAM_SIZE 32768 #define BUF_SIZE 2048 #define OLD_BUF_SIZE 4096 /* For FPGA versions <= 2*/ -#define FPGA_PAGE 528 /* FPGA flash page size*/ -#define SOLOS_PAGE 512 /* Solos flash page size*/ -#define FPGA_BLOCK (FPGA_PAGE * 8) /* FPGA flash block size*/ -#define SOLOS_BLOCK (SOLOS_PAGE * 8) /* Solos flash block size*/ +/* Old boards use ATMEL AD45DB161D flash */ +#define ATMEL_FPGA_PAGE 528 /* FPGA flash page size*/ +#define ATMEL_SOLOS_PAGE 512 /* Solos flash page size*/ +#define ATMEL_FPGA_BLOCK (ATMEL_FPGA_PAGE * 8) /* FPGA block size*/ +#define ATMEL_SOLOS_BLOCK (ATMEL_SOLOS_PAGE * 8) /* Solos block size*/ +/* Current boards use M25P/M25PE SPI flash */ +#define SPI_FLASH_BLOCK (256 * 64) #define RX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2) #define TX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2 + (card->buffer_size)) @@ -122,11 +128,14 @@ struct solos_card { struct sk_buff_head cli_queue[4]; struct sk_buff *tx_skb[4]; struct sk_buff *rx_skb[4]; + unsigned char *dma_bounce; wait_queue_head_t param_wq; wait_queue_head_t fw_wq; int using_dma; + int dma_alignment; int fpga_version; int buffer_size; + int atmel_flash; }; @@ -451,7 +460,6 @@ static ssize_t console_show(struct device *dev, struct device_attribute *attr, len = skb->len; memcpy(buf, skb->data, len); - dev_dbg(&card->dev->dev, "len: %d\n", len); kfree_skb(skb); return len; @@ -498,6 +506,78 @@ static ssize_t console_store(struct device *dev, struct device_attribute *attr, return err?:count; } +struct geos_gpio_attr { + struct device_attribute attr; + int offset; +}; + +#define SOLOS_GPIO_ATTR(_name, _mode, _show, _store, _offset) \ + struct geos_gpio_attr gpio_attr_##_name = { \ + .attr = __ATTR(_name, _mode, _show, _store), \ + .offset = _offset } + +static ssize_t geos_gpio_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + struct geos_gpio_attr *gattr = container_of(attr, struct geos_gpio_attr, attr); + struct solos_card *card = pci_get_drvdata(pdev); + uint32_t data32; + + if (count != 1 && (count != 2 || buf[1] != '\n')) + return -EINVAL; + + spin_lock_irq(&card->param_queue_lock); + data32 = ioread32(card->config_regs + GPIO_STATUS); + if (buf[0] == '1') { + data32 |= 1 << gattr->offset; + iowrite32(data32, card->config_regs + GPIO_STATUS); + } else if (buf[0] == '0') { + data32 &= ~(1 << gattr->offset); + iowrite32(data32, card->config_regs + GPIO_STATUS); + } else { + count = -EINVAL; + } + spin_lock_irq(&card->param_queue_lock); + return count; +} + +static ssize_t geos_gpio_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + struct geos_gpio_attr *gattr = container_of(attr, struct geos_gpio_attr, attr); + struct solos_card *card = pci_get_drvdata(pdev); + uint32_t data32; + + data32 = ioread32(card->config_regs + GPIO_STATUS); + data32 = (data32 >> gattr->offset) & 1; + + return sprintf(buf, "%d\n", data32); +} + +static ssize_t hardware_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + struct geos_gpio_attr *gattr = container_of(attr, struct geos_gpio_attr, attr); + struct solos_card *card = pci_get_drvdata(pdev); + uint32_t data32; + + data32 = ioread32(card->config_regs + GPIO_STATUS); + switch (gattr->offset) { + case 0: + /* HardwareVersion */ + data32 = data32 & 0x1F; + break; + case 1: + /* HardwareVariant */ + data32 = (data32 >> 5) & 0x0F; + break; + } + return sprintf(buf, "%d\n", data32); +} + static DEVICE_ATTR(console, 0644, console_show, console_store); @@ -506,6 +586,14 @@ static DEVICE_ATTR(console, 0644, console_show, console_store); #include "solos-attrlist.c" +static SOLOS_GPIO_ATTR(GPIO1, 0644, geos_gpio_show, geos_gpio_store, 9); +static SOLOS_GPIO_ATTR(GPIO2, 0644, geos_gpio_show, geos_gpio_store, 10); +static SOLOS_GPIO_ATTR(GPIO3, 0644, geos_gpio_show, geos_gpio_store, 11); +static SOLOS_GPIO_ATTR(GPIO4, 0644, geos_gpio_show, geos_gpio_store, 12); +static SOLOS_GPIO_ATTR(GPIO5, 0644, geos_gpio_show, geos_gpio_store, 13); +static SOLOS_GPIO_ATTR(PushButton, 0444, geos_gpio_show, NULL, 14); +static SOLOS_GPIO_ATTR(HardwareVersion, 0444, hardware_show, NULL, 0); +static SOLOS_GPIO_ATTR(HardwareVariant, 0444, hardware_show, NULL, 1); #undef SOLOS_ATTR_RO #undef SOLOS_ATTR_RW @@ -522,6 +610,23 @@ static struct attribute_group solos_attr_group = { .name = "parameters", }; +static struct attribute *gpio_attrs[] = { + &gpio_attr_GPIO1.attr.attr, + &gpio_attr_GPIO2.attr.attr, + &gpio_attr_GPIO3.attr.attr, + &gpio_attr_GPIO4.attr.attr, + &gpio_attr_GPIO5.attr.attr, + &gpio_attr_PushButton.attr.attr, + &gpio_attr_HardwareVersion.attr.attr, + &gpio_attr_HardwareVariant.attr.attr, + NULL +}; + +static struct attribute_group gpio_attr_group = { + .attrs = gpio_attrs, + .name = "gpio", +}; + static int flash_upgrade(struct solos_card *card, int chip) { const struct firmware *fw; @@ -533,16 +638,25 @@ static int flash_upgrade(struct solos_card *card, int chip) switch (chip) { case 0: fw_name = "solos-FPGA.bin"; - blocksize = FPGA_BLOCK; + if (card->atmel_flash) + blocksize = ATMEL_FPGA_BLOCK; + else + blocksize = SPI_FLASH_BLOCK; break; case 1: fw_name = "solos-Firmware.bin"; - blocksize = SOLOS_BLOCK; + if (card->atmel_flash) + blocksize = ATMEL_SOLOS_BLOCK; + else + blocksize = SPI_FLASH_BLOCK; break; case 2: if (card->fpga_version > LEGACY_BUFFERS){ fw_name = "solos-db-FPGA.bin"; - blocksize = FPGA_BLOCK; + if (card->atmel_flash) + blocksize = ATMEL_FPGA_BLOCK; + else + blocksize = SPI_FLASH_BLOCK; } else { dev_info(&card->dev->dev, "FPGA version doesn't support" " daughter board upgrades\n"); @@ -552,7 +666,10 @@ static int flash_upgrade(struct solos_card *card, int chip) case 3: if (card->fpga_version > LEGACY_BUFFERS){ fw_name = "solos-Firmware.bin"; - blocksize = SOLOS_BLOCK; + if (card->atmel_flash) + blocksize = ATMEL_SOLOS_BLOCK; + else + blocksize = SPI_FLASH_BLOCK; } else { dev_info(&card->dev->dev, "FPGA version doesn't support" " daughter board upgrades\n"); @@ -568,6 +685,9 @@ static int flash_upgrade(struct solos_card *card, int chip) dev_info(&card->dev->dev, "Flash upgrade starting\n"); + /* New FPGAs require driver version before permitting flash upgrades */ + iowrite32(DRIVER_VERSION, card->config_regs + DRIVER_VER); + numblocks = fw->size / blocksize; dev_info(&card->dev->dev, "Firmware size: %zd\n", fw->size); dev_info(&card->dev->dev, "Number of blocks: %d\n", numblocks); @@ -597,9 +717,13 @@ static int flash_upgrade(struct solos_card *card, int chip) /* dev_info(&card->dev->dev, "Set FPGA Flash mode to Block Write\n"); */ iowrite32(((chip * 2) + 1), card->config_regs + FLASH_MODE); - /* Copy block to buffer, swapping each 16 bits */ + /* Copy block to buffer, swapping each 16 bits for Atmel flash */ for(i = 0; i < blocksize; i += 4) { - uint32_t word = swahb32p((uint32_t *)(fw->data + offset + i)); + uint32_t word; + if (card->atmel_flash) + word = swahb32p((uint32_t *)(fw->data + offset + i)); + else + word = *(uint32_t *)(fw->data + offset + i); if(card->fpga_version > LEGACY_BUFFERS) iowrite32(word, FLASH_BUF + i); else @@ -961,7 +1085,12 @@ static uint32_t fpga_tx(struct solos_card *card) tx_started |= 1 << port; oldskb = skb; /* We're done with this skb already */ } else if (skb && card->using_dma) { - SKB_CB(skb)->dma_addr = pci_map_single(card->dev, skb->data, + unsigned char *data = skb->data; + if ((unsigned long)data & card->dma_alignment) { + data = card->dma_bounce + (BUF_SIZE * port); + memcpy(data, skb->data, skb->len); + } + SKB_CB(skb)->dma_addr = pci_map_single(card->dev, data, skb->len, PCI_DMA_TODEVICE); card->tx_skb[port] = skb; iowrite32(SKB_CB(skb)->dma_addr, @@ -1133,18 +1262,33 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) db_fpga_upgrade = db_firmware_upgrade = 0; } + /* Stopped using Atmel flash after 0.03-38 */ + if (fpga_ver < 39) + card->atmel_flash = 1; + else + card->atmel_flash = 0; + + data32 = ioread32(card->config_regs + PORTS); + card->nr_ports = (data32 & 0x000000FF); + if (card->fpga_version >= DMA_SUPPORTED) { pci_set_master(dev); card->using_dma = 1; + if (1) { /* All known FPGA versions so far */ + card->dma_alignment = 3; + card->dma_bounce = kmalloc(card->nr_ports * BUF_SIZE, GFP_KERNEL); + if (!card->dma_bounce) { + dev_warn(&card->dev->dev, "Failed to allocate DMA bounce buffers\n"); + /* Fallback to MMIO doesn't work */ + goto out_unmap_both; + } + } } else { card->using_dma = 0; /* Set RX empty flag for all ports */ iowrite32(0xF0, card->config_regs + FLAGS_ADDR); } - data32 = ioread32(card->config_regs + PORTS); - card->nr_ports = (data32 & 0x000000FF); - pci_set_drvdata(dev, card); tasklet_init(&card->tlet, solos_bh, (unsigned long)card); @@ -1179,6 +1323,10 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) if (err) goto out_free_irq; + if (card->fpga_version >= DMA_SUPPORTED && + sysfs_create_group(&card->dev->dev.kobj, &gpio_attr_group)) + dev_err(&card->dev->dev, "Could not register parameter group for GPIOs\n"); + return 0; out_free_irq: @@ -1187,6 +1335,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) tasklet_kill(&card->tlet); out_unmap_both: + kfree(card->dma_bounce); pci_set_drvdata(dev, NULL); pci_iounmap(dev, card->buffers); out_unmap_config: @@ -1289,11 +1438,16 @@ static void fpga_remove(struct pci_dev *dev) iowrite32(1, card->config_regs + FPGA_MODE); (void)ioread32(card->config_regs + FPGA_MODE); + if (card->fpga_version >= DMA_SUPPORTED) + sysfs_remove_group(&card->dev->dev.kobj, &gpio_attr_group); + atm_remove(card); free_irq(dev->irq, card); tasklet_kill(&card->tlet); + kfree(card->dma_bounce); + /* Release device from reset */ iowrite32(0, card->config_regs + FPGA_MODE); (void)ioread32(card->config_regs + FPGA_MODE); diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 147d1a4dd269..17cf7cad601e 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -148,7 +148,7 @@ static int dev_mkdir(const char *name, umode_t mode) struct path path; int err; - dentry = kern_path_create(AT_FDCWD, name, &path, 1); + dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); if (IS_ERR(dentry)) return PTR_ERR(dentry); diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index 460e22dee36d..a3f79c495a41 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -298,6 +298,8 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, struct sg_table *sg_table, enum dma_data_direction direction) { + might_sleep(); + if (WARN_ON(!attach || !attach->dmabuf || !sg_table)) return; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bb3d9be3b1b4..89576a0b3f2e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -61,15 +61,29 @@ #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ -#define RBD_MAX_SNAP_NAME_LEN 32 +#define RBD_SNAP_DEV_NAME_PREFIX "snap_" +#define RBD_MAX_SNAP_NAME_LEN \ + (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) + #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ #define RBD_MAX_OPT_LEN 1024 #define RBD_SNAP_HEAD_NAME "-" +/* This allows a single page to hold an image name sent by OSD */ +#define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1) #define RBD_IMAGE_ID_LEN_MAX 64 + #define RBD_OBJ_PREFIX_LEN_MAX 64 +/* Feature bits */ + +#define RBD_FEATURE_LAYERING 1 + +/* Features supported by this (client software) implementation. */ + +#define RBD_FEATURES_ALL (0) + /* * An RBD device name will be "rbd#", where the "rbd" comes from * RBD_DRV_NAME above, and # is a unique integer identifier. @@ -101,6 +115,27 @@ struct rbd_image_header { u64 obj_version; }; +/* + * An rbd image specification. + * + * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely + * identify an image. + */ +struct rbd_spec { + u64 pool_id; + char *pool_name; + + char *image_id; + size_t image_id_len; + char *image_name; + size_t image_name_len; + + u64 snap_id; + char *snap_name; + + struct kref kref; +}; + struct rbd_options { bool read_only; }; @@ -155,11 +190,8 @@ struct rbd_snap { }; struct rbd_mapping { - char *snap_name; - u64 snap_id; u64 size; u64 features; - bool snap_exists; bool read_only; }; @@ -173,7 +205,6 @@ struct rbd_device { struct gendisk *disk; /* blkdev's gendisk and rq */ u32 image_format; /* Either 1 or 2 */ - struct rbd_options rbd_opts; struct rbd_client *rbd_client; char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ @@ -181,17 +212,17 @@ struct rbd_device { spinlock_t lock; /* queue lock */ struct rbd_image_header header; - char *image_id; - size_t image_id_len; - char *image_name; - size_t image_name_len; + bool exists; + struct rbd_spec *spec; + char *header_name; - char *pool_name; - int pool_id; struct ceph_osd_event *watch_event; struct ceph_osd_request *watch_request; + struct rbd_spec *parent_spec; + u64 parent_overlap; + /* protects updating the header */ struct rw_semaphore header_rwsem; @@ -204,6 +235,7 @@ struct rbd_device { /* sysfs related */ struct device dev; + unsigned long open_count; }; static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ @@ -218,7 +250,7 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev); static int rbd_dev_snaps_register(struct rbd_device *rbd_dev); static void rbd_dev_release(struct device *dev); -static void __rbd_remove_snap_dev(struct rbd_snap *snap); +static void rbd_remove_snap_dev(struct rbd_snap *snap); static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count); @@ -258,17 +290,8 @@ static struct device rbd_root_dev = { # define rbd_assert(expr) ((void) 0) #endif /* !RBD_DEBUG */ -static struct device *rbd_get_dev(struct rbd_device *rbd_dev) -{ - return get_device(&rbd_dev->dev); -} - -static void rbd_put_dev(struct rbd_device *rbd_dev) -{ - put_device(&rbd_dev->dev); -} - -static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver); +static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver); +static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); static int rbd_open(struct block_device *bdev, fmode_t mode) { @@ -277,8 +300,11 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) return -EROFS; - rbd_get_dev(rbd_dev); + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + (void) get_device(&rbd_dev->dev); set_device_ro(bdev, rbd_dev->mapping.read_only); + rbd_dev->open_count++; + mutex_unlock(&ctl_mutex); return 0; } @@ -287,7 +313,11 @@ static int rbd_release(struct gendisk *disk, fmode_t mode) { struct rbd_device *rbd_dev = disk->private_data; - rbd_put_dev(rbd_dev); + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + rbd_assert(rbd_dev->open_count > 0); + rbd_dev->open_count--; + put_device(&rbd_dev->dev); + mutex_unlock(&ctl_mutex); return 0; } @@ -388,7 +418,7 @@ enum { static match_table_t rbd_opts_tokens = { /* int args above */ /* string args above */ - {Opt_read_only, "mapping.read_only"}, + {Opt_read_only, "read_only"}, {Opt_read_only, "ro"}, /* Alternate spelling */ {Opt_read_write, "read_write"}, {Opt_read_write, "rw"}, /* Alternate spelling */ @@ -441,33 +471,17 @@ static int parse_rbd_opts_token(char *c, void *private) * Get a ceph client with specific addr and configuration, if one does * not exist create it. */ -static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr, - size_t mon_addr_len, char *options) +static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) { - struct rbd_options *rbd_opts = &rbd_dev->rbd_opts; - struct ceph_options *ceph_opts; struct rbd_client *rbdc; - rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; - - ceph_opts = ceph_parse_options(options, mon_addr, - mon_addr + mon_addr_len, - parse_rbd_opts_token, rbd_opts); - if (IS_ERR(ceph_opts)) - return PTR_ERR(ceph_opts); - rbdc = rbd_client_find(ceph_opts); - if (rbdc) { - /* using an existing client */ + if (rbdc) /* using an existing client */ ceph_destroy_options(ceph_opts); - } else { + else rbdc = rbd_client_create(ceph_opts); - if (IS_ERR(rbdc)) - return PTR_ERR(rbdc); - } - rbd_dev->rbd_client = rbdc; - return 0; + return rbdc; } /* @@ -492,10 +506,10 @@ static void rbd_client_release(struct kref *kref) * Drop reference to ceph client node. If it's not referenced anymore, release * it. */ -static void rbd_put_client(struct rbd_device *rbd_dev) +static void rbd_put_client(struct rbd_client *rbdc) { - kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); - rbd_dev->rbd_client = NULL; + if (rbdc) + kref_put(&rbdc->kref, rbd_client_release); } /* @@ -524,6 +538,16 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) return false; + /* The bio layer requires at least sector-sized I/O */ + + if (ondisk->options.order < SECTOR_SHIFT) + return false; + + /* If we use u64 in a few spots we may be able to loosen this */ + + if (ondisk->options.order > 8 * sizeof (int) - 1) + return false; + /* * The size of a snapshot header has to fit in a size_t, and * that limits the number of snapshots. @@ -635,6 +659,20 @@ out_err: return -ENOMEM; } +static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) +{ + struct rbd_snap *snap; + + if (snap_id == CEPH_NOSNAP) + return RBD_SNAP_HEAD_NAME; + + list_for_each_entry(snap, &rbd_dev->snaps, node) + if (snap_id == snap->id) + return snap->name; + + return NULL; +} + static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) { @@ -642,7 +680,7 @@ static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) list_for_each_entry(snap, &rbd_dev->snaps, node) { if (!strcmp(snap_name, snap->name)) { - rbd_dev->mapping.snap_id = snap->id; + rbd_dev->spec->snap_id = snap->id; rbd_dev->mapping.size = snap->size; rbd_dev->mapping.features = snap->features; @@ -653,26 +691,23 @@ static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) return -ENOENT; } -static int rbd_dev_set_mapping(struct rbd_device *rbd_dev, char *snap_name) +static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) { int ret; - if (!memcmp(snap_name, RBD_SNAP_HEAD_NAME, + if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME, sizeof (RBD_SNAP_HEAD_NAME))) { - rbd_dev->mapping.snap_id = CEPH_NOSNAP; + rbd_dev->spec->snap_id = CEPH_NOSNAP; rbd_dev->mapping.size = rbd_dev->header.image_size; rbd_dev->mapping.features = rbd_dev->header.features; - rbd_dev->mapping.snap_exists = false; - rbd_dev->mapping.read_only = rbd_dev->rbd_opts.read_only; ret = 0; } else { - ret = snap_by_name(rbd_dev, snap_name); + ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name); if (ret < 0) goto done; - rbd_dev->mapping.snap_exists = true; rbd_dev->mapping.read_only = true; } - rbd_dev->mapping.snap_name = snap_name; + rbd_dev->exists = true; done: return ret; } @@ -695,13 +730,13 @@ static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) u64 segment; int ret; - name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); + name = kmalloc(MAX_OBJ_NAME_SIZE + 1, GFP_NOIO); if (!name) return NULL; segment = offset >> rbd_dev->header.obj_order; - ret = snprintf(name, RBD_MAX_SEG_NAME_LEN, "%s.%012llx", + ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, "%s.%012llx", rbd_dev->header.object_prefix, segment); - if (ret < 0 || ret >= RBD_MAX_SEG_NAME_LEN) { + if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) { pr_err("error formatting segment name for #%llu (%d)\n", segment, ret); kfree(name); @@ -800,77 +835,144 @@ static void zero_bio_chain(struct bio *chain, int start_ofs) } /* - * bio_chain_clone - clone a chain of bios up to a certain length. - * might return a bio_pair that will need to be released. + * Clone a portion of a bio, starting at the given byte offset + * and continuing for the number of bytes indicated. */ -static struct bio *bio_chain_clone(struct bio **old, struct bio **next, - struct bio_pair **bp, - int len, gfp_t gfpmask) -{ - struct bio *old_chain = *old; - struct bio *new_chain = NULL; - struct bio *tail; - int total = 0; - - if (*bp) { - bio_pair_release(*bp); - *bp = NULL; - } +static struct bio *bio_clone_range(struct bio *bio_src, + unsigned int offset, + unsigned int len, + gfp_t gfpmask) +{ + struct bio_vec *bv; + unsigned int resid; + unsigned short idx; + unsigned int voff; + unsigned short end_idx; + unsigned short vcnt; + struct bio *bio; - while (old_chain && (total < len)) { - struct bio *tmp; + /* Handle the easy case for the caller */ - tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); - if (!tmp) - goto err_out; - gfpmask &= ~__GFP_WAIT; /* can't wait after the first */ + if (!offset && len == bio_src->bi_size) + return bio_clone(bio_src, gfpmask); - if (total + old_chain->bi_size > len) { - struct bio_pair *bp; + if (WARN_ON_ONCE(!len)) + return NULL; + if (WARN_ON_ONCE(len > bio_src->bi_size)) + return NULL; + if (WARN_ON_ONCE(offset > bio_src->bi_size - len)) + return NULL; - /* - * this split can only happen with a single paged bio, - * split_bio will BUG_ON if this is not the case - */ - dout("bio_chain_clone split! total=%d remaining=%d" - "bi_size=%u\n", - total, len - total, old_chain->bi_size); + /* Find first affected segment... */ - /* split the bio. We'll release it either in the next - call, or it will have to be released outside */ - bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); - if (!bp) - goto err_out; + resid = offset; + __bio_for_each_segment(bv, bio_src, idx, 0) { + if (resid < bv->bv_len) + break; + resid -= bv->bv_len; + } + voff = resid; - __bio_clone(tmp, &bp->bio1); + /* ...and the last affected segment */ - *next = &bp->bio2; - } else { - __bio_clone(tmp, old_chain); - *next = old_chain->bi_next; - } + resid += len; + __bio_for_each_segment(bv, bio_src, end_idx, idx) { + if (resid <= bv->bv_len) + break; + resid -= bv->bv_len; + } + vcnt = end_idx - idx + 1; + + /* Build the clone */ - tmp->bi_bdev = NULL; - tmp->bi_next = NULL; - if (new_chain) - tail->bi_next = tmp; - else - new_chain = tmp; - tail = tmp; - old_chain = old_chain->bi_next; + bio = bio_alloc(gfpmask, (unsigned int) vcnt); + if (!bio) + return NULL; /* ENOMEM */ - total += tmp->bi_size; + bio->bi_bdev = bio_src->bi_bdev; + bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT); + bio->bi_rw = bio_src->bi_rw; + bio->bi_flags |= 1 << BIO_CLONED; + + /* + * Copy over our part of the bio_vec, then update the first + * and last (or only) entries. + */ + memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx], + vcnt * sizeof (struct bio_vec)); + bio->bi_io_vec[0].bv_offset += voff; + if (vcnt > 1) { + bio->bi_io_vec[0].bv_len -= voff; + bio->bi_io_vec[vcnt - 1].bv_len = resid; + } else { + bio->bi_io_vec[0].bv_len = len; } - rbd_assert(total == len); + bio->bi_vcnt = vcnt; + bio->bi_size = len; + bio->bi_idx = 0; + + return bio; +} + +/* + * Clone a portion of a bio chain, starting at the given byte offset + * into the first bio in the source chain and continuing for the + * number of bytes indicated. The result is another bio chain of + * exactly the given length, or a null pointer on error. + * + * The bio_src and offset parameters are both in-out. On entry they + * refer to the first source bio and the offset into that bio where + * the start of data to be cloned is located. + * + * On return, bio_src is updated to refer to the bio in the source + * chain that contains first un-cloned byte, and *offset will + * contain the offset of that byte within that bio. + */ +static struct bio *bio_chain_clone_range(struct bio **bio_src, + unsigned int *offset, + unsigned int len, + gfp_t gfpmask) +{ + struct bio *bi = *bio_src; + unsigned int off = *offset; + struct bio *chain = NULL; + struct bio **end; + + /* Build up a chain of clone bios up to the limit */ + + if (!bi || off >= bi->bi_size || !len) + return NULL; /* Nothing to clone */ - *old = old_chain; + end = &chain; + while (len) { + unsigned int bi_size; + struct bio *bio; + + if (!bi) + goto out_err; /* EINVAL; ran out of bio's */ + bi_size = min_t(unsigned int, bi->bi_size - off, len); + bio = bio_clone_range(bi, off, bi_size, gfpmask); + if (!bio) + goto out_err; /* ENOMEM */ + + *end = bio; + end = &bio->bi_next; + + off += bi_size; + if (off == bi->bi_size) { + bi = bi->bi_next; + off = 0; + } + len -= bi_size; + } + *bio_src = bi; + *offset = off; - return new_chain; + return chain; +out_err: + bio_chain_put(chain); -err_out: - dout("bio_chain_clone with err\n"); - bio_chain_put(new_chain); return NULL; } @@ -988,8 +1090,9 @@ static int rbd_do_request(struct request *rq, req_data->coll_index = coll_index; } - dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name, - (unsigned long long) ofs, (unsigned long long) len); + dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", + object_name, (unsigned long long) ofs, + (unsigned long long) len, coll, coll_index); osdc = &rbd_dev->rbd_client->client->osdc; req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, @@ -1019,7 +1122,7 @@ static int rbd_do_request(struct request *rq, layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); layout->fl_stripe_count = cpu_to_le32(1); layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); + layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, req, ops); rbd_assert(ret == 0); @@ -1154,8 +1257,6 @@ done: static int rbd_do_op(struct request *rq, struct rbd_device *rbd_dev, struct ceph_snap_context *snapc, - u64 snapid, - int opcode, int flags, u64 ofs, u64 len, struct bio *bio, struct rbd_req_coll *coll, @@ -1167,6 +1268,9 @@ static int rbd_do_op(struct request *rq, int ret; struct ceph_osd_req_op *ops; u32 payload_len; + int opcode; + int flags; + u64 snapid; seg_name = rbd_segment_name(rbd_dev, ofs); if (!seg_name) @@ -1174,7 +1278,18 @@ static int rbd_do_op(struct request *rq, seg_len = rbd_segment_length(rbd_dev, ofs, len); seg_ofs = rbd_segment_offset(rbd_dev, ofs); - payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); + if (rq_data_dir(rq) == WRITE) { + opcode = CEPH_OSD_OP_WRITE; + flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; + snapid = CEPH_NOSNAP; + payload_len = seg_len; + } else { + opcode = CEPH_OSD_OP_READ; + flags = CEPH_OSD_FLAG_READ; + snapc = NULL; + snapid = rbd_dev->spec->snap_id; + payload_len = 0; + } ret = -ENOMEM; ops = rbd_create_rw_ops(1, opcode, payload_len); @@ -1202,41 +1317,6 @@ done: } /* - * Request async osd write - */ -static int rbd_req_write(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, - CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ofs, len, bio, coll, coll_index); -} - -/* - * Request async osd read - */ -static int rbd_req_read(struct request *rq, - struct rbd_device *rbd_dev, - u64 snapid, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - return rbd_do_op(rq, rbd_dev, NULL, - snapid, - CEPH_OSD_OP_READ, - CEPH_OSD_FLAG_READ, - ofs, len, bio, coll, coll_index); -} - -/* * Request sync osd read */ static int rbd_req_sync_read(struct rbd_device *rbd_dev, @@ -1304,7 +1384,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", rbd_dev->header_name, (unsigned long long) notify_id, (unsigned int) opcode); - rc = rbd_refresh_header(rbd_dev, &hver); + rc = rbd_dev_refresh(rbd_dev, &hver); if (rc) pr_warning(RBD_DRV_NAME "%d got notification but failed to " " update snaps: %d\n", rbd_dev->major, rc); @@ -1460,18 +1540,16 @@ static void rbd_rq_fn(struct request_queue *q) { struct rbd_device *rbd_dev = q->queuedata; struct request *rq; - struct bio_pair *bp = NULL; while ((rq = blk_fetch_request(q))) { struct bio *bio; - struct bio *rq_bio, *next_bio = NULL; bool do_write; unsigned int size; - u64 op_size = 0; u64 ofs; int num_segs, cur_seg = 0; struct rbd_req_coll *coll; struct ceph_snap_context *snapc; + unsigned int bio_offset; dout("fetched request\n"); @@ -1483,10 +1561,6 @@ static void rbd_rq_fn(struct request_queue *q) /* deduce our operation (read, write) */ do_write = (rq_data_dir(rq) == WRITE); - - size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * SECTOR_SIZE; - rq_bio = rq->bio; if (do_write && rbd_dev->mapping.read_only) { __blk_end_request_all(rq, -EROFS); continue; @@ -1496,8 +1570,8 @@ static void rbd_rq_fn(struct request_queue *q) down_read(&rbd_dev->header_rwsem); - if (rbd_dev->mapping.snap_id != CEPH_NOSNAP && - !rbd_dev->mapping.snap_exists) { + if (!rbd_dev->exists) { + rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); up_read(&rbd_dev->header_rwsem); dout("request for non-existent snapshot"); spin_lock_irq(q->queue_lock); @@ -1509,6 +1583,10 @@ static void rbd_rq_fn(struct request_queue *q) up_read(&rbd_dev->header_rwsem); + size = blk_rq_bytes(rq); + ofs = blk_rq_pos(rq) * SECTOR_SIZE; + bio = rq->bio; + dout("%s 0x%x bytes at 0x%llx\n", do_write ? "write" : "read", size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); @@ -1528,45 +1606,37 @@ static void rbd_rq_fn(struct request_queue *q) continue; } + bio_offset = 0; do { - /* a bio clone to be passed down to OSD req */ + u64 limit = rbd_segment_length(rbd_dev, ofs, size); + unsigned int chain_size; + struct bio *bio_chain; + + BUG_ON(limit > (u64) UINT_MAX); + chain_size = (unsigned int) limit; dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); - op_size = rbd_segment_length(rbd_dev, ofs, size); + kref_get(&coll->kref); - bio = bio_chain_clone(&rq_bio, &next_bio, &bp, - op_size, GFP_ATOMIC); - if (!bio) { - rbd_coll_end_req_index(rq, coll, cur_seg, - -ENOMEM, op_size); - goto next_seg; - } + /* Pass a cloned bio chain via an osd request */ - /* init OSD command: write or read */ - if (do_write) - rbd_req_write(rq, rbd_dev, - snapc, - ofs, - op_size, bio, - coll, cur_seg); + bio_chain = bio_chain_clone_range(&bio, + &bio_offset, chain_size, + GFP_ATOMIC); + if (bio_chain) + (void) rbd_do_op(rq, rbd_dev, snapc, + ofs, chain_size, + bio_chain, coll, cur_seg); else - rbd_req_read(rq, rbd_dev, - rbd_dev->mapping.snap_id, - ofs, - op_size, bio, - coll, cur_seg); - -next_seg: - size -= op_size; - ofs += op_size; + rbd_coll_end_req_index(rq, coll, cur_seg, + -ENOMEM, chain_size); + size -= chain_size; + ofs += chain_size; cur_seg++; - rq_bio = next_bio; } while (size > 0); kref_put(&coll->kref, rbd_coll_release); - if (bp) - bio_pair_release(bp); spin_lock_irq(q->queue_lock); ceph_put_snap_context(snapc); @@ -1576,28 +1646,47 @@ next_seg: /* * a queue callback. Makes sure that we don't create a bio that spans across * multiple osd objects. One exception would be with a single page bios, - * which we handle later at bio_chain_clone + * which we handle later at bio_chain_clone_range() */ static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, struct bio_vec *bvec) { struct rbd_device *rbd_dev = q->queuedata; - unsigned int chunk_sectors; - sector_t sector; - unsigned int bio_sectors; - int max; + sector_t sector_offset; + sector_t sectors_per_obj; + sector_t obj_sector_offset; + int ret; - chunk_sectors = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); - sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev); - bio_sectors = bmd->bi_size >> SECTOR_SHIFT; + /* + * Find how far into its rbd object the partition-relative + * bio start sector is to offset relative to the enclosing + * device. + */ + sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector; + sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); + obj_sector_offset = sector_offset & (sectors_per_obj - 1); + + /* + * Compute the number of bytes from that offset to the end + * of the object. Account for what's already used by the bio. + */ + ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT; + if (ret > bmd->bi_size) + ret -= bmd->bi_size; + else + ret = 0; - max = (chunk_sectors - ((sector & (chunk_sectors - 1)) - + bio_sectors)) << SECTOR_SHIFT; - if (max < 0) - max = 0; /* bio_add cannot handle a negative return */ - if (max <= bvec->bv_len && bio_sectors == 0) - return bvec->bv_len; - return max; + /* + * Don't send back more than was asked for. And if the bio + * was empty, let the whole thing through because: "Note + * that a block device *must* allow a single page to be + * added to an empty bio." + */ + rbd_assert(bvec->bv_len <= PAGE_SIZE); + if (ret > (int) bvec->bv_len || !bmd->bi_size) + ret = (int) bvec->bv_len; + + return ret; } static void rbd_free_disk(struct rbd_device *rbd_dev) @@ -1663,13 +1752,13 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) ret = -ENXIO; pr_warning("short header read for image %s" " (want %zd got %d)\n", - rbd_dev->image_name, size, ret); + rbd_dev->spec->image_name, size, ret); goto out_err; } if (!rbd_dev_ondisk_valid(ondisk)) { ret = -ENXIO; pr_warning("invalid header for image %s\n", - rbd_dev->image_name); + rbd_dev->spec->image_name); goto out_err; } @@ -1707,19 +1796,32 @@ static int rbd_read_header(struct rbd_device *rbd_dev, return ret; } -static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) +static void rbd_remove_all_snaps(struct rbd_device *rbd_dev) { struct rbd_snap *snap; struct rbd_snap *next; list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) - __rbd_remove_snap_dev(snap); + rbd_remove_snap_dev(snap); +} + +static void rbd_update_mapping_size(struct rbd_device *rbd_dev) +{ + sector_t size; + + if (rbd_dev->spec->snap_id != CEPH_NOSNAP) + return; + + size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE; + dout("setting size to %llu sectors", (unsigned long long) size); + rbd_dev->mapping.size = (u64) size; + set_capacity(rbd_dev->disk, size); } /* * only read the first part of the ondisk header, without the snaps info */ -static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) +static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver) { int ret; struct rbd_image_header h; @@ -1730,17 +1832,9 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) down_write(&rbd_dev->header_rwsem); - /* resized? */ - if (rbd_dev->mapping.snap_id == CEPH_NOSNAP) { - sector_t size = (sector_t) h.image_size / SECTOR_SIZE; - - if (size != (sector_t) rbd_dev->mapping.size) { - dout("setting size to %llu sectors", - (unsigned long long) size); - rbd_dev->mapping.size = (u64) size; - set_capacity(rbd_dev->disk, size); - } - } + /* Update image size, and check for resize of mapped image */ + rbd_dev->header.image_size = h.image_size; + rbd_update_mapping_size(rbd_dev); /* rbd_dev->header.object_prefix shouldn't change */ kfree(rbd_dev->header.snap_sizes); @@ -1768,12 +1862,16 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) return ret; } -static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) +static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver) { int ret; + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - ret = __rbd_refresh_header(rbd_dev, hver); + if (rbd_dev->image_format == 1) + ret = rbd_dev_v1_refresh(rbd_dev, hver); + else + ret = rbd_dev_v2_refresh(rbd_dev, hver); mutex_unlock(&ctl_mutex); return ret; @@ -1885,7 +1983,7 @@ static ssize_t rbd_pool_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->pool_name); + return sprintf(buf, "%s\n", rbd_dev->spec->pool_name); } static ssize_t rbd_pool_id_show(struct device *dev, @@ -1893,7 +1991,8 @@ static ssize_t rbd_pool_id_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%d\n", rbd_dev->pool_id); + return sprintf(buf, "%llu\n", + (unsigned long long) rbd_dev->spec->pool_id); } static ssize_t rbd_name_show(struct device *dev, @@ -1901,7 +2000,10 @@ static ssize_t rbd_name_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->image_name); + if (rbd_dev->spec->image_name) + return sprintf(buf, "%s\n", rbd_dev->spec->image_name); + + return sprintf(buf, "(unknown)\n"); } static ssize_t rbd_image_id_show(struct device *dev, @@ -1909,7 +2011,7 @@ static ssize_t rbd_image_id_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->image_id); + return sprintf(buf, "%s\n", rbd_dev->spec->image_id); } /* @@ -1922,7 +2024,50 @@ static ssize_t rbd_snap_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%s\n", rbd_dev->mapping.snap_name); + return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); +} + +/* + * For an rbd v2 image, shows the pool id, image id, and snapshot id + * for the parent image. If there is no parent, simply shows + * "(no parent image)". + */ +static ssize_t rbd_parent_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); + struct rbd_spec *spec = rbd_dev->parent_spec; + int count; + char *bufp = buf; + + if (!spec) + return sprintf(buf, "(no parent image)\n"); + + count = sprintf(bufp, "pool_id %llu\npool_name %s\n", + (unsigned long long) spec->pool_id, spec->pool_name); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id, + spec->image_name ? spec->image_name : "(unknown)"); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n", + (unsigned long long) spec->snap_id, spec->snap_name); + if (count < 0) + return count; + bufp += count; + + count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap); + if (count < 0) + return count; + bufp += count; + + return (ssize_t) (bufp - buf); } static ssize_t rbd_image_refresh(struct device *dev, @@ -1933,7 +2078,7 @@ static ssize_t rbd_image_refresh(struct device *dev, struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); int ret; - ret = rbd_refresh_header(rbd_dev, NULL); + ret = rbd_dev_refresh(rbd_dev, NULL); return ret < 0 ? ret : size; } @@ -1948,6 +2093,7 @@ static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL); static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); +static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL); static struct attribute *rbd_attrs[] = { &dev_attr_size.attr, @@ -1959,6 +2105,7 @@ static struct attribute *rbd_attrs[] = { &dev_attr_name.attr, &dev_attr_image_id.attr, &dev_attr_current_snap.attr, + &dev_attr_parent.attr, &dev_attr_refresh.attr, NULL }; @@ -2047,6 +2194,74 @@ static struct device_type rbd_snap_device_type = { .release = rbd_snap_dev_release, }; +static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec) +{ + kref_get(&spec->kref); + + return spec; +} + +static void rbd_spec_free(struct kref *kref); +static void rbd_spec_put(struct rbd_spec *spec) +{ + if (spec) + kref_put(&spec->kref, rbd_spec_free); +} + +static struct rbd_spec *rbd_spec_alloc(void) +{ + struct rbd_spec *spec; + + spec = kzalloc(sizeof (*spec), GFP_KERNEL); + if (!spec) + return NULL; + kref_init(&spec->kref); + + rbd_spec_put(rbd_spec_get(spec)); /* TEMPORARY */ + + return spec; +} + +static void rbd_spec_free(struct kref *kref) +{ + struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref); + + kfree(spec->pool_name); + kfree(spec->image_id); + kfree(spec->image_name); + kfree(spec->snap_name); + kfree(spec); +} + +struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, + struct rbd_spec *spec) +{ + struct rbd_device *rbd_dev; + + rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL); + if (!rbd_dev) + return NULL; + + spin_lock_init(&rbd_dev->lock); + INIT_LIST_HEAD(&rbd_dev->node); + INIT_LIST_HEAD(&rbd_dev->snaps); + init_rwsem(&rbd_dev->header_rwsem); + + rbd_dev->spec = spec; + rbd_dev->rbd_client = rbdc; + + return rbd_dev; +} + +static void rbd_dev_destroy(struct rbd_device *rbd_dev) +{ + rbd_spec_put(rbd_dev->parent_spec); + kfree(rbd_dev->header_name); + rbd_put_client(rbd_dev->rbd_client); + rbd_spec_put(rbd_dev->spec); + kfree(rbd_dev); +} + static bool rbd_snap_registered(struct rbd_snap *snap) { bool ret = snap->dev.type == &rbd_snap_device_type; @@ -2057,7 +2272,7 @@ static bool rbd_snap_registered(struct rbd_snap *snap) return ret; } -static void __rbd_remove_snap_dev(struct rbd_snap *snap) +static void rbd_remove_snap_dev(struct rbd_snap *snap) { list_del(&snap->node); if (device_is_registered(&snap->dev)) @@ -2073,7 +2288,7 @@ static int rbd_register_snap_dev(struct rbd_snap *snap, dev->type = &rbd_snap_device_type; dev->parent = parent; dev->release = rbd_snap_dev_release; - dev_set_name(dev, "snap_%s", snap->name); + dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name); dout("%s: registering device for snapshot %s\n", __func__, snap->name); ret = device_register(dev); @@ -2189,6 +2404,7 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); if (ret < 0) goto out; + ret = 0; /* rbd_req_sync_exec() can return positive */ p = reply_buf; rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, @@ -2216,6 +2432,7 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, __le64 features; __le64 incompat; } features_buf = { 0 }; + u64 incompat; int ret; ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, @@ -2226,6 +2443,11 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); if (ret < 0) return ret; + + incompat = le64_to_cpu(features_buf.incompat); + if (incompat & ~RBD_FEATURES_ALL) + return -ENXIO; + *snap_features = le64_to_cpu(features_buf.features); dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", @@ -2242,6 +2464,183 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev) &rbd_dev->header.features); } +static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) +{ + struct rbd_spec *parent_spec; + size_t size; + void *reply_buf = NULL; + __le64 snapid; + void *p; + void *end; + char *image_id; + u64 overlap; + size_t len = 0; + int ret; + + parent_spec = rbd_spec_alloc(); + if (!parent_spec) + return -ENOMEM; + + size = sizeof (__le64) + /* pool_id */ + sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX + /* image_id */ + sizeof (__le64) + /* snap_id */ + sizeof (__le64); /* overlap */ + reply_buf = kmalloc(size, GFP_KERNEL); + if (!reply_buf) { + ret = -ENOMEM; + goto out_err; + } + + snapid = cpu_to_le64(CEPH_NOSNAP); + ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + "rbd", "get_parent", + (char *) &snapid, sizeof (snapid), + (char *) reply_buf, size, + CEPH_OSD_FLAG_READ, NULL); + dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + if (ret < 0) + goto out_err; + + ret = -ERANGE; + p = reply_buf; + end = (char *) reply_buf + size; + ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); + if (parent_spec->pool_id == CEPH_NOPOOL) + goto out; /* No parent? No problem. */ + + image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + if (IS_ERR(image_id)) { + ret = PTR_ERR(image_id); + goto out_err; + } + parent_spec->image_id = image_id; + parent_spec->image_id_len = len; + ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); + ceph_decode_64_safe(&p, end, overlap, out_err); + + rbd_dev->parent_overlap = overlap; + rbd_dev->parent_spec = parent_spec; + parent_spec = NULL; /* rbd_dev now owns this */ +out: + ret = 0; +out_err: + kfree(reply_buf); + rbd_spec_put(parent_spec); + + return ret; +} + +static char *rbd_dev_image_name(struct rbd_device *rbd_dev) +{ + size_t image_id_size; + char *image_id; + void *p; + void *end; + size_t size; + void *reply_buf = NULL; + size_t len = 0; + char *image_name = NULL; + int ret; + + rbd_assert(!rbd_dev->spec->image_name); + + image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len; + image_id = kmalloc(image_id_size, GFP_KERNEL); + if (!image_id) + return NULL; + + p = image_id; + end = (char *) image_id + image_id_size; + ceph_encode_string(&p, end, rbd_dev->spec->image_id, + (u32) rbd_dev->spec->image_id_len); + + size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; + reply_buf = kmalloc(size, GFP_KERNEL); + if (!reply_buf) + goto out; + + ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, + "rbd", "dir_get_name", + image_id, image_id_size, + (char *) reply_buf, size, + CEPH_OSD_FLAG_READ, NULL); + if (ret < 0) + goto out; + p = reply_buf; + end = (char *) reply_buf + size; + image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + if (IS_ERR(image_name)) + image_name = NULL; + else + dout("%s: name is %s len is %zd\n", __func__, image_name, len); +out: + kfree(reply_buf); + kfree(image_id); + + return image_name; +} + +/* + * When a parent image gets probed, we only have the pool, image, + * and snapshot ids but not the names of any of them. This call + * is made later to fill in those names. It has to be done after + * rbd_dev_snaps_update() has completed because some of the + * information (in particular, snapshot name) is not available + * until then. + */ +static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) +{ + struct ceph_osd_client *osdc; + const char *name; + void *reply_buf = NULL; + int ret; + + if (rbd_dev->spec->pool_name) + return 0; /* Already have the names */ + + /* Look up the pool name */ + + osdc = &rbd_dev->rbd_client->client->osdc; + name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); + if (!name) + return -EIO; /* pool id too large (>= 2^31) */ + + rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); + if (!rbd_dev->spec->pool_name) + return -ENOMEM; + + /* Fetch the image name; tolerate failure here */ + + name = rbd_dev_image_name(rbd_dev); + if (name) { + rbd_dev->spec->image_name_len = strlen(name); + rbd_dev->spec->image_name = (char *) name; + } else { + pr_warning(RBD_DRV_NAME "%d " + "unable to get image name for image id %s\n", + rbd_dev->major, rbd_dev->spec->image_id); + } + + /* Look up the snapshot name. */ + + name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); + if (!name) { + ret = -EIO; + goto out_err; + } + rbd_dev->spec->snap_name = kstrdup(name, GFP_KERNEL); + if(!rbd_dev->spec->snap_name) + goto out_err; + + return 0; +out_err: + kfree(reply_buf); + kfree(rbd_dev->spec->pool_name); + rbd_dev->spec->pool_name = NULL; + + return ret; +} + static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) { size_t size; @@ -2328,7 +2727,6 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) int ret; void *p; void *end; - size_t snap_name_len; char *snap_name; size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN; @@ -2348,9 +2746,7 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) p = reply_buf; end = (char *) reply_buf + size; - snap_name_len = 0; - snap_name = ceph_extract_encoded_string(&p, end, &snap_name_len, - GFP_KERNEL); + snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); if (IS_ERR(snap_name)) { ret = PTR_ERR(snap_name); goto out; @@ -2397,6 +2793,41 @@ static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which, return ERR_PTR(-EINVAL); } +static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver) +{ + int ret; + __u8 obj_order; + + down_write(&rbd_dev->header_rwsem); + + /* Grab old order first, to see if it changes */ + + obj_order = rbd_dev->header.obj_order, + ret = rbd_dev_v2_image_size(rbd_dev); + if (ret) + goto out; + if (rbd_dev->header.obj_order != obj_order) { + ret = -EIO; + goto out; + } + rbd_update_mapping_size(rbd_dev); + + ret = rbd_dev_v2_snap_context(rbd_dev, hver); + dout("rbd_dev_v2_snap_context returned %d\n", ret); + if (ret) + goto out; + ret = rbd_dev_snaps_update(rbd_dev); + dout("rbd_dev_snaps_update returned %d\n", ret); + if (ret) + goto out; + ret = rbd_dev_snaps_register(rbd_dev); + dout("rbd_dev_snaps_register returned %d\n", ret); +out: + up_write(&rbd_dev->header_rwsem); + + return ret; +} + /* * Scan the rbd device's current snapshot list and compare it to the * newly-received snapshot context. Remove any existing snapshots @@ -2436,12 +2867,12 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) /* Existing snapshot not in the new snap context */ - if (rbd_dev->mapping.snap_id == snap->id) - rbd_dev->mapping.snap_exists = false; - __rbd_remove_snap_dev(snap); + if (rbd_dev->spec->snap_id == snap->id) + rbd_dev->exists = false; + rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", - rbd_dev->mapping.snap_id == snap->id ? - "mapped " : "", + rbd_dev->spec->snap_id == snap->id ? + "mapped " : "", (unsigned long long) snap->id); /* Done with this list entry; advance */ @@ -2559,7 +2990,7 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) do { ret = rbd_req_sync_watch(rbd_dev); if (ret == -ERANGE) { - rc = rbd_refresh_header(rbd_dev, NULL); + rc = rbd_dev_refresh(rbd_dev, NULL); if (rc < 0) return rc; } @@ -2621,8 +3052,8 @@ static void rbd_dev_id_put(struct rbd_device *rbd_dev) struct rbd_device *rbd_dev; rbd_dev = list_entry(tmp, struct rbd_device, node); - if (rbd_id > max_id) - max_id = rbd_id; + if (rbd_dev->dev_id > max_id) + max_id = rbd_dev->dev_id; } spin_unlock(&rbd_dev_list_lock); @@ -2722,73 +3153,140 @@ static inline char *dup_token(const char **buf, size_t *lenp) } /* - * This fills in the pool_name, image_name, image_name_len, rbd_dev, - * rbd_md_name, and name fields of the given rbd_dev, based on the - * list of monitor addresses and other options provided via - * /sys/bus/rbd/add. Returns a pointer to a dynamically-allocated - * copy of the snapshot name to map if successful, or a - * pointer-coded error otherwise. + * Parse the options provided for an "rbd add" (i.e., rbd image + * mapping) request. These arrive via a write to /sys/bus/rbd/add, + * and the data written is passed here via a NUL-terminated buffer. + * Returns 0 if successful or an error code otherwise. + * + * The information extracted from these options is recorded in + * the other parameters which return dynamically-allocated + * structures: + * ceph_opts + * The address of a pointer that will refer to a ceph options + * structure. Caller must release the returned pointer using + * ceph_destroy_options() when it is no longer needed. + * rbd_opts + * Address of an rbd options pointer. Fully initialized by + * this function; caller must release with kfree(). + * spec + * Address of an rbd image specification pointer. Fully + * initialized by this function based on parsed options. + * Caller must release with rbd_spec_put(). * - * Note: rbd_dev is assumed to have been initially zero-filled. + * The options passed take this form: + * <mon_addrs> <options> <pool_name> <image_name> [<snap_id>] + * where: + * <mon_addrs> + * A comma-separated list of one or more monitor addresses. + * A monitor address is an ip address, optionally followed + * by a port number (separated by a colon). + * I.e.: ip1[:port1][,ip2[:port2]...] + * <options> + * A comma-separated list of ceph and/or rbd options. + * <pool_name> + * The name of the rados pool containing the rbd image. + * <image_name> + * The name of the image in that pool to map. + * <snap_id> + * An optional snapshot id. If provided, the mapping will + * present data from the image at the time that snapshot was + * created. The image head is used if no snapshot id is + * provided. Snapshot mappings are always read-only. */ -static char *rbd_add_parse_args(struct rbd_device *rbd_dev, - const char *buf, - const char **mon_addrs, - size_t *mon_addrs_size, - char *options, - size_t options_size) +static int rbd_add_parse_args(const char *buf, + struct ceph_options **ceph_opts, + struct rbd_options **opts, + struct rbd_spec **rbd_spec) { size_t len; - char *err_ptr = ERR_PTR(-EINVAL); - char *snap_name; + char *options; + const char *mon_addrs; + size_t mon_addrs_size; + struct rbd_spec *spec = NULL; + struct rbd_options *rbd_opts = NULL; + struct ceph_options *copts; + int ret; /* The first four tokens are required */ len = next_token(&buf); if (!len) - return err_ptr; - *mon_addrs_size = len + 1; - *mon_addrs = buf; - + return -EINVAL; /* Missing monitor address(es) */ + mon_addrs = buf; + mon_addrs_size = len + 1; buf += len; - len = copy_token(&buf, options, options_size); - if (!len || len >= options_size) - return err_ptr; + ret = -EINVAL; + options = dup_token(&buf, NULL); + if (!options) + return -ENOMEM; + if (!*options) + goto out_err; /* Missing options */ - err_ptr = ERR_PTR(-ENOMEM); - rbd_dev->pool_name = dup_token(&buf, NULL); - if (!rbd_dev->pool_name) - goto out_err; + spec = rbd_spec_alloc(); + if (!spec) + goto out_mem; - rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len); - if (!rbd_dev->image_name) - goto out_err; + spec->pool_name = dup_token(&buf, NULL); + if (!spec->pool_name) + goto out_mem; + if (!*spec->pool_name) + goto out_err; /* Missing pool name */ - /* Snapshot name is optional */ + spec->image_name = dup_token(&buf, &spec->image_name_len); + if (!spec->image_name) + goto out_mem; + if (!*spec->image_name) + goto out_err; /* Missing image name */ + + /* + * Snapshot name is optional; default is to use "-" + * (indicating the head/no snapshot). + */ len = next_token(&buf); if (!len) { buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ len = sizeof (RBD_SNAP_HEAD_NAME) - 1; - } - snap_name = kmalloc(len + 1, GFP_KERNEL); - if (!snap_name) + } else if (len > RBD_MAX_SNAP_NAME_LEN) { + ret = -ENAMETOOLONG; goto out_err; - memcpy(snap_name, buf, len); - *(snap_name + len) = '\0'; + } + spec->snap_name = kmalloc(len + 1, GFP_KERNEL); + if (!spec->snap_name) + goto out_mem; + memcpy(spec->snap_name, buf, len); + *(spec->snap_name + len) = '\0'; -dout(" SNAP_NAME is <%s>, len is %zd\n", snap_name, len); + /* Initialize all rbd options to the defaults */ - return snap_name; + rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL); + if (!rbd_opts) + goto out_mem; + + rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; + + copts = ceph_parse_options(options, mon_addrs, + mon_addrs + mon_addrs_size - 1, + parse_rbd_opts_token, rbd_opts); + if (IS_ERR(copts)) { + ret = PTR_ERR(copts); + goto out_err; + } + kfree(options); + *ceph_opts = copts; + *opts = rbd_opts; + *rbd_spec = spec; + + return 0; +out_mem: + ret = -ENOMEM; out_err: - kfree(rbd_dev->image_name); - rbd_dev->image_name = NULL; - rbd_dev->image_name_len = 0; - kfree(rbd_dev->pool_name); - rbd_dev->pool_name = NULL; + kfree(rbd_opts); + rbd_spec_put(spec); + kfree(options); - return err_ptr; + return ret; } /* @@ -2814,14 +3312,22 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) void *p; /* + * When probing a parent image, the image id is already + * known (and the image name likely is not). There's no + * need to fetch the image id again in this case. + */ + if (rbd_dev->spec->image_id) + return 0; + + /* * First, see if the format 2 image id file exists, and if * so, get the image's persistent id from it. */ - size = sizeof (RBD_ID_PREFIX) + rbd_dev->image_name_len; + size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; object_name = kmalloc(size, GFP_NOIO); if (!object_name) return -ENOMEM; - sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->image_name); + sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name); dout("rbd id object name is %s\n", object_name); /* Response will be an encoded string, which includes a length */ @@ -2841,17 +3347,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); if (ret < 0) goto out; + ret = 0; /* rbd_req_sync_exec() can return positive */ p = response; - rbd_dev->image_id = ceph_extract_encoded_string(&p, + rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, p + RBD_IMAGE_ID_LEN_MAX, - &rbd_dev->image_id_len, + &rbd_dev->spec->image_id_len, GFP_NOIO); - if (IS_ERR(rbd_dev->image_id)) { - ret = PTR_ERR(rbd_dev->image_id); - rbd_dev->image_id = NULL; + if (IS_ERR(rbd_dev->spec->image_id)) { + ret = PTR_ERR(rbd_dev->spec->image_id); + rbd_dev->spec->image_id = NULL; } else { - dout("image_id is %s\n", rbd_dev->image_id); + dout("image_id is %s\n", rbd_dev->spec->image_id); } out: kfree(response); @@ -2867,26 +3374,33 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) /* Version 1 images have no id; empty string is used */ - rbd_dev->image_id = kstrdup("", GFP_KERNEL); - if (!rbd_dev->image_id) + rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); + if (!rbd_dev->spec->image_id) return -ENOMEM; - rbd_dev->image_id_len = 0; + rbd_dev->spec->image_id_len = 0; /* Record the header object name for this rbd image. */ - size = rbd_dev->image_name_len + sizeof (RBD_SUFFIX); + size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) { ret = -ENOMEM; goto out_err; } - sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX); + sprintf(rbd_dev->header_name, "%s%s", + rbd_dev->spec->image_name, RBD_SUFFIX); /* Populate rbd image metadata */ ret = rbd_read_header(rbd_dev, &rbd_dev->header); if (ret < 0) goto out_err; + + /* Version 1 images have no parent (no layering) */ + + rbd_dev->parent_spec = NULL; + rbd_dev->parent_overlap = 0; + rbd_dev->image_format = 1; dout("discovered version 1 image, header name is %s\n", @@ -2897,8 +3411,8 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) out_err: kfree(rbd_dev->header_name); rbd_dev->header_name = NULL; - kfree(rbd_dev->image_id); - rbd_dev->image_id = NULL; + kfree(rbd_dev->spec->image_id); + rbd_dev->spec->image_id = NULL; return ret; } @@ -2913,12 +3427,12 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) * Image id was filled in by the caller. Record the header * object name for this rbd image. */ - size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->image_id_len; + size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) return -ENOMEM; sprintf(rbd_dev->header_name, "%s%s", - RBD_HEADER_PREFIX, rbd_dev->image_id); + RBD_HEADER_PREFIX, rbd_dev->spec->image_id); /* Get the size and object order for the image */ @@ -2932,12 +3446,20 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) if (ret < 0) goto out_err; - /* Get the features for the image */ + /* Get the and check features for the image */ ret = rbd_dev_v2_features(rbd_dev); if (ret < 0) goto out_err; + /* If the image supports layering, get the parent info */ + + if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret < 0) + goto out_err; + } + /* crypto and compression type aren't (yet) supported for v2 images */ rbd_dev->header.crypt_type = 0; @@ -2955,8 +3477,11 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) dout("discovered version 2 image, header name is %s\n", rbd_dev->header_name); - return -ENOTSUPP; + return 0; out_err: + rbd_dev->parent_overlap = 0; + rbd_spec_put(rbd_dev->parent_spec); + rbd_dev->parent_spec = NULL; kfree(rbd_dev->header_name); rbd_dev->header_name = NULL; kfree(rbd_dev->header.object_prefix); @@ -2965,91 +3490,22 @@ out_err: return ret; } -/* - * Probe for the existence of the header object for the given rbd - * device. For format 2 images this includes determining the image - * id. - */ -static int rbd_dev_probe(struct rbd_device *rbd_dev) +static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) { int ret; - /* - * Get the id from the image id object. If it's not a - * format 2 image, we'll get ENOENT back, and we'll assume - * it's a format 1 image. - */ - ret = rbd_dev_image_id(rbd_dev); - if (ret) - ret = rbd_dev_v1_probe(rbd_dev); - else - ret = rbd_dev_v2_probe(rbd_dev); + /* no need to lock here, as rbd_dev is not registered yet */ + ret = rbd_dev_snaps_update(rbd_dev); if (ret) - dout("probe failed, returning %d\n", ret); - - return ret; -} - -static ssize_t rbd_add(struct bus_type *bus, - const char *buf, - size_t count) -{ - char *options; - struct rbd_device *rbd_dev = NULL; - const char *mon_addrs = NULL; - size_t mon_addrs_size = 0; - struct ceph_osd_client *osdc; - int rc = -ENOMEM; - char *snap_name; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - - options = kmalloc(count, GFP_KERNEL); - if (!options) - goto err_out_mem; - rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); - if (!rbd_dev) - goto err_out_mem; - - /* static rbd_device initialization */ - spin_lock_init(&rbd_dev->lock); - INIT_LIST_HEAD(&rbd_dev->node); - INIT_LIST_HEAD(&rbd_dev->snaps); - init_rwsem(&rbd_dev->header_rwsem); - - /* parse add command */ - snap_name = rbd_add_parse_args(rbd_dev, buf, - &mon_addrs, &mon_addrs_size, options, count); - if (IS_ERR(snap_name)) { - rc = PTR_ERR(snap_name); - goto err_out_mem; - } - - rc = rbd_get_client(rbd_dev, mon_addrs, mon_addrs_size - 1, options); - if (rc < 0) - goto err_out_args; - - /* pick the pool */ - osdc = &rbd_dev->rbd_client->client->osdc; - rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); - if (rc < 0) - goto err_out_client; - rbd_dev->pool_id = rc; - - rc = rbd_dev_probe(rbd_dev); - if (rc < 0) - goto err_out_client; - rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + return ret; - /* no need to lock here, as rbd_dev is not registered yet */ - rc = rbd_dev_snaps_update(rbd_dev); - if (rc) - goto err_out_header; + ret = rbd_dev_probe_update_spec(rbd_dev); + if (ret) + goto err_out_snaps; - rc = rbd_dev_set_mapping(rbd_dev, snap_name); - if (rc) - goto err_out_header; + ret = rbd_dev_set_mapping(rbd_dev); + if (ret) + goto err_out_snaps; /* generate unique id: find highest unique id, add one */ rbd_dev_id_get(rbd_dev); @@ -3061,34 +3517,33 @@ static ssize_t rbd_add(struct bus_type *bus, /* Get our block major device number. */ - rc = register_blkdev(0, rbd_dev->name); - if (rc < 0) + ret = register_blkdev(0, rbd_dev->name); + if (ret < 0) goto err_out_id; - rbd_dev->major = rc; + rbd_dev->major = ret; /* Set up the blkdev mapping. */ - rc = rbd_init_disk(rbd_dev); - if (rc) + ret = rbd_init_disk(rbd_dev); + if (ret) goto err_out_blkdev; - rc = rbd_bus_add_dev(rbd_dev); - if (rc) + ret = rbd_bus_add_dev(rbd_dev); + if (ret) goto err_out_disk; /* * At this point cleanup in the event of an error is the job * of the sysfs code (initiated by rbd_bus_del_dev()). */ - down_write(&rbd_dev->header_rwsem); - rc = rbd_dev_snaps_register(rbd_dev); + ret = rbd_dev_snaps_register(rbd_dev); up_write(&rbd_dev->header_rwsem); - if (rc) + if (ret) goto err_out_bus; - rc = rbd_init_watch_dev(rbd_dev); - if (rc) + ret = rbd_init_watch_dev(rbd_dev); + if (ret) goto err_out_bus; /* Everything's ready. Announce the disk to the world. */ @@ -3098,37 +3553,119 @@ static ssize_t rbd_add(struct bus_type *bus, pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name, (unsigned long long) rbd_dev->mapping.size); - return count; - + return ret; err_out_bus: /* this will also clean up rest of rbd_dev stuff */ rbd_bus_del_dev(rbd_dev); - kfree(options); - return rc; + return ret; err_out_disk: rbd_free_disk(rbd_dev); err_out_blkdev: unregister_blkdev(rbd_dev->major, rbd_dev->name); err_out_id: rbd_dev_id_put(rbd_dev); -err_out_header: - rbd_header_free(&rbd_dev->header); +err_out_snaps: + rbd_remove_all_snaps(rbd_dev); + + return ret; +} + +/* + * Probe for the existence of the header object for the given rbd + * device. For format 2 images this includes determining the image + * id. + */ +static int rbd_dev_probe(struct rbd_device *rbd_dev) +{ + int ret; + + /* + * Get the id from the image id object. If it's not a + * format 2 image, we'll get ENOENT back, and we'll assume + * it's a format 1 image. + */ + ret = rbd_dev_image_id(rbd_dev); + if (ret) + ret = rbd_dev_v1_probe(rbd_dev); + else + ret = rbd_dev_v2_probe(rbd_dev); + if (ret) { + dout("probe failed, returning %d\n", ret); + + return ret; + } + + ret = rbd_dev_probe_finish(rbd_dev); + if (ret) + rbd_header_free(&rbd_dev->header); + + return ret; +} + +static ssize_t rbd_add(struct bus_type *bus, + const char *buf, + size_t count) +{ + struct rbd_device *rbd_dev = NULL; + struct ceph_options *ceph_opts = NULL; + struct rbd_options *rbd_opts = NULL; + struct rbd_spec *spec = NULL; + struct rbd_client *rbdc; + struct ceph_osd_client *osdc; + int rc = -ENOMEM; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + /* parse add command */ + rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); + if (rc < 0) + goto err_out_module; + + rbdc = rbd_get_client(ceph_opts); + if (IS_ERR(rbdc)) { + rc = PTR_ERR(rbdc); + goto err_out_args; + } + ceph_opts = NULL; /* rbd_dev client now owns this */ + + /* pick the pool */ + osdc = &rbdc->client->osdc; + rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name); + if (rc < 0) + goto err_out_client; + spec->pool_id = (u64) rc; + + rbd_dev = rbd_dev_create(rbdc, spec); + if (!rbd_dev) + goto err_out_client; + rbdc = NULL; /* rbd_dev now owns this */ + spec = NULL; /* rbd_dev now owns this */ + + rbd_dev->mapping.read_only = rbd_opts->read_only; + kfree(rbd_opts); + rbd_opts = NULL; /* done with this */ + + rc = rbd_dev_probe(rbd_dev); + if (rc < 0) + goto err_out_rbd_dev; + + return count; +err_out_rbd_dev: + rbd_dev_destroy(rbd_dev); err_out_client: - kfree(rbd_dev->header_name); - rbd_put_client(rbd_dev); - kfree(rbd_dev->image_id); + rbd_put_client(rbdc); err_out_args: - kfree(rbd_dev->mapping.snap_name); - kfree(rbd_dev->image_name); - kfree(rbd_dev->pool_name); -err_out_mem: - kfree(rbd_dev); - kfree(options); + if (ceph_opts) + ceph_destroy_options(ceph_opts); + kfree(rbd_opts); + rbd_spec_put(spec); +err_out_module: + module_put(THIS_MODULE); dout("Error adding device %s\n", buf); - module_put(THIS_MODULE); return (ssize_t) rc; } @@ -3163,7 +3700,6 @@ static void rbd_dev_release(struct device *dev) if (rbd_dev->watch_event) rbd_req_sync_unwatch(rbd_dev); - rbd_put_client(rbd_dev); /* clean up and free blkdev */ rbd_free_disk(rbd_dev); @@ -3173,13 +3709,9 @@ static void rbd_dev_release(struct device *dev) rbd_header_free(&rbd_dev->header); /* done with the id, and with the rbd_dev */ - kfree(rbd_dev->mapping.snap_name); - kfree(rbd_dev->image_id); - kfree(rbd_dev->header_name); - kfree(rbd_dev->pool_name); - kfree(rbd_dev->image_name); rbd_dev_id_put(rbd_dev); - kfree(rbd_dev); + rbd_assert(rbd_dev->rbd_client != NULL); + rbd_dev_destroy(rbd_dev); /* release module ref */ module_put(THIS_MODULE); @@ -3211,7 +3743,12 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } - __rbd_remove_all_snaps(rbd_dev); + if (rbd_dev->open_count) { + ret = -EBUSY; + goto done; + } + + rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); done: diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h index cbe77fa105ba..49d77cbcf8bd 100644 --- a/drivers/block/rbd_types.h +++ b/drivers/block/rbd_types.h @@ -46,8 +46,6 @@ #define RBD_MIN_OBJ_ORDER 16 #define RBD_MAX_OBJ_ORDER 30 -#define RBD_MAX_SEG_NAME_LEN 128 - #define RBD_COMP_NONE 0 #define RBD_CRYPT_NONE 0 diff --git a/drivers/char/random.c b/drivers/char/random.c index b86eae9b77df..85e81ec1451e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -399,7 +399,6 @@ static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; -#if 0 static bool debug; module_param(debug, bool, 0644); #define DEBUG_ENT(fmt, arg...) do { \ @@ -410,9 +409,6 @@ module_param(debug, bool, 0644); blocking_pool.entropy_count,\ nonblocking_pool.entropy_count,\ ## arg); } while (0) -#else -#define DEBUG_ENT(fmt, arg...) do {} while (0) -#endif /********************************************************************** * @@ -437,6 +433,7 @@ struct entropy_store { int entropy_count; int entropy_total; unsigned int initialized:1; + bool last_data_init; __u8 last_data[EXTRACT_SIZE]; }; @@ -829,7 +826,7 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) bytes = min_t(int, bytes, sizeof(tmp)); DEBUG_ENT("going to reseed %s with %d bits " - "(%d of %d requested)\n", + "(%zu of %d requested)\n", r->name, bytes * 8, nbytes * 8, r->entropy_count); bytes = extract_entropy(r->pull, tmp, bytes, @@ -860,7 +857,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, spin_lock_irqsave(&r->lock, flags); BUG_ON(r->entropy_count > r->poolinfo->POOLBITS); - DEBUG_ENT("trying to extract %d bits from %s\n", + DEBUG_ENT("trying to extract %zu bits from %s\n", nbytes * 8, r->name); /* Can we pull enough? */ @@ -882,7 +879,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, } } - DEBUG_ENT("debiting %d entropy credits from %s%s\n", + DEBUG_ENT("debiting %zu entropy credits from %s%s\n", nbytes * 8, r->name, r->limit ? "" : " (unlimited)"); spin_unlock_irqrestore(&r->lock, flags); @@ -957,6 +954,10 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; + /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */ + if (fips_enabled && !r->last_data_init) + nbytes += EXTRACT_SIZE; + trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_); xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, min, reserved); @@ -967,6 +968,17 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, if (fips_enabled) { unsigned long flags; + + /* prime last_data value if need be, per fips 140-2 */ + if (!r->last_data_init) { + spin_lock_irqsave(&r->lock, flags); + memcpy(r->last_data, tmp, EXTRACT_SIZE); + r->last_data_init = true; + nbytes -= EXTRACT_SIZE; + spin_unlock_irqrestore(&r->lock, flags); + extract_buf(r, tmp); + } + spin_lock_irqsave(&r->lock, flags); if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) panic("Hardware RNG duplicated output!\n"); @@ -1086,6 +1098,7 @@ static void init_std_data(struct entropy_store *r) r->entropy_count = 0; r->entropy_total = 0; + r->last_data_init = false; mix_pool_bytes(r, &now, sizeof(now), NULL); for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof(rv)) { if (!arch_get_random_long(&rv)) @@ -1142,11 +1155,16 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) if (n > SEC_XFER_SIZE) n = SEC_XFER_SIZE; - DEBUG_ENT("reading %d bits\n", n*8); + DEBUG_ENT("reading %zu bits\n", n*8); n = extract_entropy_user(&blocking_pool, buf, n); - DEBUG_ENT("read got %d bits (%d still needed)\n", + if (n < 0) { + retval = n; + break; + } + + DEBUG_ENT("read got %zd bits (%zd still needed)\n", n*8, (nbytes-n)*8); if (n == 0) { @@ -1171,10 +1189,6 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) continue; } - if (n < 0) { - retval = n; - break; - } count += n; buf += n; nbytes -= n; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 90493d4ead1f..c594cb16c37b 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -37,8 +37,12 @@ #include <linux/wait.h> #include <linux/workqueue.h> #include <linux/module.h> +#include <linux/dma-mapping.h> +#include <linux/kconfig.h> #include "../tty/hvc/hvc_console.h" +#define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC) + /* * This is a global struct for storing common data for all the devices * this driver handles. @@ -111,6 +115,21 @@ struct port_buffer { size_t len; /* offset in the buf from which to consume data */ size_t offset; + + /* DMA address of buffer */ + dma_addr_t dma; + + /* Device we got DMA memory from */ + struct device *dev; + + /* List of pending dma buffers to free */ + struct list_head list; + + /* If sgpages == 0 then buf is used */ + unsigned int sgpages; + + /* sg is used if spages > 0. sg must be the last in is struct */ + struct scatterlist sg[0]; }; /* @@ -325,6 +344,11 @@ static bool is_console_port(struct port *port) return false; } +static bool is_rproc_serial(const struct virtio_device *vdev) +{ + return is_rproc_enabled && vdev->id.device == VIRTIO_ID_RPROC_SERIAL; +} + static inline bool use_multiport(struct ports_device *portdev) { /* @@ -336,20 +360,110 @@ static inline bool use_multiport(struct ports_device *portdev) return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT); } -static void free_buf(struct port_buffer *buf) +static DEFINE_SPINLOCK(dma_bufs_lock); +static LIST_HEAD(pending_free_dma_bufs); + +static void free_buf(struct port_buffer *buf, bool can_sleep) { - kfree(buf->buf); + unsigned int i; + + for (i = 0; i < buf->sgpages; i++) { + struct page *page = sg_page(&buf->sg[i]); + if (!page) + break; + put_page(page); + } + + if (!buf->dev) { + kfree(buf->buf); + } else if (is_rproc_enabled) { + unsigned long flags; + + /* dma_free_coherent requires interrupts to be enabled. */ + if (!can_sleep) { + /* queue up dma-buffers to be freed later */ + spin_lock_irqsave(&dma_bufs_lock, flags); + list_add_tail(&buf->list, &pending_free_dma_bufs); + spin_unlock_irqrestore(&dma_bufs_lock, flags); + return; + } + dma_free_coherent(buf->dev, buf->size, buf->buf, buf->dma); + + /* Release device refcnt and allow it to be freed */ + put_device(buf->dev); + } + kfree(buf); } -static struct port_buffer *alloc_buf(size_t buf_size) +static void reclaim_dma_bufs(void) +{ + unsigned long flags; + struct port_buffer *buf, *tmp; + LIST_HEAD(tmp_list); + + if (list_empty(&pending_free_dma_bufs)) + return; + + /* Create a copy of the pending_free_dma_bufs while holding the lock */ + spin_lock_irqsave(&dma_bufs_lock, flags); + list_cut_position(&tmp_list, &pending_free_dma_bufs, + pending_free_dma_bufs.prev); + spin_unlock_irqrestore(&dma_bufs_lock, flags); + + /* Release the dma buffers, without irqs enabled */ + list_for_each_entry_safe(buf, tmp, &tmp_list, list) { + list_del(&buf->list); + free_buf(buf, true); + } +} + +static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size, + int pages) { struct port_buffer *buf; - buf = kmalloc(sizeof(*buf), GFP_KERNEL); + reclaim_dma_bufs(); + + /* + * Allocate buffer and the sg list. The sg list array is allocated + * directly after the port_buffer struct. + */ + buf = kmalloc(sizeof(*buf) + sizeof(struct scatterlist) * pages, + GFP_KERNEL); if (!buf) goto fail; - buf->buf = kzalloc(buf_size, GFP_KERNEL); + + buf->sgpages = pages; + if (pages > 0) { + buf->dev = NULL; + buf->buf = NULL; + return buf; + } + + if (is_rproc_serial(vq->vdev)) { + /* + * Allocate DMA memory from ancestor. When a virtio + * device is created by remoteproc, the DMA memory is + * associated with the grandparent device: + * vdev => rproc => platform-dev. + * The code here would have been less quirky if + * DMA_MEMORY_INCLUDES_CHILDREN had been supported + * in dma-coherent.c + */ + if (!vq->vdev->dev.parent || !vq->vdev->dev.parent->parent) + goto free_buf; + buf->dev = vq->vdev->dev.parent->parent; + + /* Increase device refcnt to avoid freeing it */ + get_device(buf->dev); + buf->buf = dma_alloc_coherent(buf->dev, buf_size, &buf->dma, + GFP_KERNEL); + } else { + buf->dev = NULL; + buf->buf = kmalloc(buf_size, GFP_KERNEL); + } + if (!buf->buf) goto free_buf; buf->len = 0; @@ -396,6 +510,8 @@ static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf) ret = virtqueue_add_buf(vq, sg, 0, 1, buf, GFP_ATOMIC); virtqueue_kick(vq); + if (!ret) + ret = vq->num_free; return ret; } @@ -416,7 +532,7 @@ static void discard_port_data(struct port *port) port->stats.bytes_discarded += buf->len - buf->offset; if (add_inbuf(port->in_vq, buf) < 0) { err++; - free_buf(buf); + free_buf(buf, false); } port->inbuf = NULL; buf = get_inbuf(port); @@ -459,7 +575,7 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, vq = portdev->c_ovq; sg_init_one(sg, &cpkt, sizeof(cpkt)); - if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) >= 0) { + if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) == 0) { virtqueue_kick(vq); while (!virtqueue_get_buf(vq, &len)) cpu_relax(); @@ -476,55 +592,29 @@ static ssize_t send_control_msg(struct port *port, unsigned int event, return 0; } -struct buffer_token { - union { - void *buf; - struct scatterlist *sg; - } u; - /* If sgpages == 0 then buf is used, else sg is used */ - unsigned int sgpages; -}; - -static void reclaim_sg_pages(struct scatterlist *sg, unsigned int nrpages) -{ - int i; - struct page *page; - - for (i = 0; i < nrpages; i++) { - page = sg_page(&sg[i]); - if (!page) - break; - put_page(page); - } - kfree(sg); -} /* Callers must take the port->outvq_lock */ static void reclaim_consumed_buffers(struct port *port) { - struct buffer_token *tok; + struct port_buffer *buf; unsigned int len; if (!port->portdev) { /* Device has been unplugged. vqs are already gone. */ return; } - while ((tok = virtqueue_get_buf(port->out_vq, &len))) { - if (tok->sgpages) - reclaim_sg_pages(tok->u.sg, tok->sgpages); - else - kfree(tok->u.buf); - kfree(tok); + while ((buf = virtqueue_get_buf(port->out_vq, &len))) { + free_buf(buf, false); port->outvq_full = false; } } static ssize_t __send_to_port(struct port *port, struct scatterlist *sg, int nents, size_t in_count, - struct buffer_token *tok, bool nonblock) + void *data, bool nonblock) { struct virtqueue *out_vq; - ssize_t ret; + int err; unsigned long flags; unsigned int len; @@ -534,17 +624,17 @@ static ssize_t __send_to_port(struct port *port, struct scatterlist *sg, reclaim_consumed_buffers(port); - ret = virtqueue_add_buf(out_vq, sg, nents, 0, tok, GFP_ATOMIC); + err = virtqueue_add_buf(out_vq, sg, nents, 0, data, GFP_ATOMIC); /* Tell Host to go! */ virtqueue_kick(out_vq); - if (ret < 0) { + if (err) { in_count = 0; goto done; } - if (ret == 0) + if (out_vq->num_free == 0) port->outvq_full = true; if (nonblock) @@ -572,37 +662,6 @@ done: return in_count; } -static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, - bool nonblock) -{ - struct scatterlist sg[1]; - struct buffer_token *tok; - - tok = kmalloc(sizeof(*tok), GFP_ATOMIC); - if (!tok) - return -ENOMEM; - tok->sgpages = 0; - tok->u.buf = in_buf; - - sg_init_one(sg, in_buf, in_count); - - return __send_to_port(port, sg, 1, in_count, tok, nonblock); -} - -static ssize_t send_pages(struct port *port, struct scatterlist *sg, int nents, - size_t in_count, bool nonblock) -{ - struct buffer_token *tok; - - tok = kmalloc(sizeof(*tok), GFP_ATOMIC); - if (!tok) - return -ENOMEM; - tok->sgpages = nents; - tok->u.sg = sg; - - return __send_to_port(port, sg, nents, in_count, tok, nonblock); -} - /* * Give out the data that's requested from the buffer that we have * queued up. @@ -748,9 +807,10 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { struct port *port; - char *buf; + struct port_buffer *buf; ssize_t ret; bool nonblock; + struct scatterlist sg[1]; /* Userspace could be out to fool us */ if (!count) @@ -766,11 +826,11 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, count = min((size_t)(32 * 1024), count); - buf = kmalloc(count, GFP_KERNEL); + buf = alloc_buf(port->out_vq, count, 0); if (!buf) return -ENOMEM; - ret = copy_from_user(buf, ubuf, count); + ret = copy_from_user(buf->buf, ubuf, count); if (ret) { ret = -EFAULT; goto free_buf; @@ -784,13 +844,14 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, * through to the host. */ nonblock = true; - ret = send_buf(port, buf, count, nonblock); + sg_init_one(sg, buf->buf, count); + ret = __send_to_port(port, sg, 1, count, buf, nonblock); if (nonblock && ret > 0) goto out; free_buf: - kfree(buf); + free_buf(buf, true); out: return ret; } @@ -856,6 +917,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, struct port *port = filp->private_data; struct sg_list sgl; ssize_t ret; + struct port_buffer *buf; struct splice_desc sd = { .total_len = len, .flags = flags, @@ -863,22 +925,34 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, .u.data = &sgl, }; + /* + * Rproc_serial does not yet support splice. To support splice + * pipe_to_sg() must allocate dma-buffers and copy content from + * regular pages to dma pages. And alloc_buf and free_buf must + * support allocating and freeing such a list of dma-buffers. + */ + if (is_rproc_serial(port->out_vq->vdev)) + return -EINVAL; + ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); if (ret < 0) return ret; + buf = alloc_buf(port->out_vq, 0, pipe->nrbufs); + if (!buf) + return -ENOMEM; + sgl.n = 0; sgl.len = 0; sgl.size = pipe->nrbufs; - sgl.sg = kmalloc(sizeof(struct scatterlist) * sgl.size, GFP_KERNEL); - if (unlikely(!sgl.sg)) - return -ENOMEM; - + sgl.sg = buf->sg; sg_init_table(sgl.sg, sgl.size); ret = __splice_from_pipe(pipe, &sd, pipe_to_sg); if (likely(ret > 0)) - ret = send_pages(port, sgl.sg, sgl.n, sgl.len, true); + ret = __send_to_port(port, buf->sg, sgl.n, sgl.len, buf, true); + if (unlikely(ret <= 0)) + free_buf(buf, true); return ret; } @@ -927,6 +1001,7 @@ static int port_fops_release(struct inode *inode, struct file *filp) reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); + reclaim_dma_bufs(); /* * Locks aren't necessary here as a port can't be opened after * unplug, and if a port isn't unplugged, a kref would already @@ -1031,6 +1106,7 @@ static const struct file_operations port_fops = { static int put_chars(u32 vtermno, const char *buf, int count) { struct port *port; + struct scatterlist sg[1]; if (unlikely(early_put_chars)) return early_put_chars(vtermno, buf, count); @@ -1039,7 +1115,8 @@ static int put_chars(u32 vtermno, const char *buf, int count) if (!port) return -EPIPE; - return send_buf(port, (void *)buf, count, false); + sg_init_one(sg, buf, count); + return __send_to_port(port, sg, 1, count, (void *)buf, false); } /* @@ -1076,7 +1153,10 @@ static void resize_console(struct port *port) return; vdev = port->portdev->vdev; - if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) + + /* Don't test F_SIZE at all if we're rproc: not a valid feature! */ + if (!is_rproc_serial(vdev) && + virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) hvc_resize(port->cons.hvc, port->cons.ws); } @@ -1260,7 +1340,7 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) nr_added_bufs = 0; do { - buf = alloc_buf(PAGE_SIZE); + buf = alloc_buf(vq, PAGE_SIZE, 0); if (!buf) break; @@ -1268,7 +1348,7 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) ret = add_inbuf(vq, buf); if (ret < 0) { spin_unlock_irq(lock); - free_buf(buf); + free_buf(buf, true); break; } nr_added_bufs++; @@ -1356,10 +1436,18 @@ static int add_port(struct ports_device *portdev, u32 id) goto free_device; } - /* - * If we're not using multiport support, this has to be a console port - */ - if (!use_multiport(port->portdev)) { + if (is_rproc_serial(port->portdev->vdev)) + /* + * For rproc_serial assume remote processor is connected. + * rproc_serial does not want the console port, only + * the generic port implementation. + */ + port->host_connected = true; + else if (!use_multiport(port->portdev)) { + /* + * If we're not using multiport support, + * this has to be a console port. + */ err = init_port_console(port); if (err) goto free_inbufs; @@ -1392,7 +1480,7 @@ static int add_port(struct ports_device *portdev, u32 id) free_inbufs: while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf); + free_buf(buf, true); free_device: device_destroy(pdrvdata.class, port->dev->devt); free_cdev: @@ -1434,7 +1522,11 @@ static void remove_port_data(struct port *port) /* Remove buffers we queued up for the Host to send us data in. */ while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf); + free_buf(buf, true); + + /* Free pending buffers from the out-queue. */ + while ((buf = virtqueue_detach_unused_buf(port->out_vq))) + free_buf(buf, true); } /* @@ -1636,7 +1728,7 @@ static void control_work_handler(struct work_struct *work) if (add_inbuf(portdev->c_ivq, buf) < 0) { dev_warn(&portdev->vdev->dev, "Error adding buffer to queue\n"); - free_buf(buf); + free_buf(buf, false); } } spin_unlock(&portdev->cvq_lock); @@ -1832,10 +1924,10 @@ static void remove_controlq_data(struct ports_device *portdev) return; while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) - free_buf(buf); + free_buf(buf, true); while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) - free_buf(buf); + free_buf(buf, true); } /* @@ -1882,11 +1974,15 @@ static int virtcons_probe(struct virtio_device *vdev) multiport = false; portdev->config.max_nr_ports = 1; - if (virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT, - offsetof(struct virtio_console_config, - max_nr_ports), - &portdev->config.max_nr_ports) == 0) + + /* Don't test MULTIPORT at all if we're rproc: not a valid feature! */ + if (!is_rproc_serial(vdev) && + virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT, + offsetof(struct virtio_console_config, + max_nr_ports), + &portdev->config.max_nr_ports) == 0) { multiport = true; + } err = init_vqs(portdev); if (err < 0) { @@ -1996,6 +2092,16 @@ static unsigned int features[] = { VIRTIO_CONSOLE_F_MULTIPORT, }; +static struct virtio_device_id rproc_serial_id_table[] = { +#if IS_ENABLED(CONFIG_REMOTEPROC) + { VIRTIO_ID_RPROC_SERIAL, VIRTIO_DEV_ANY_ID }, +#endif + { 0 }, +}; + +static unsigned int rproc_serial_features[] = { +}; + #ifdef CONFIG_PM static int virtcons_freeze(struct virtio_device *vdev) { @@ -2080,6 +2186,20 @@ static struct virtio_driver virtio_console = { #endif }; +/* + * virtio_rproc_serial refers to __devinit function which causes + * section mismatch warnings. So use __refdata to silence warnings. + */ +static struct virtio_driver __refdata virtio_rproc_serial = { + .feature_table = rproc_serial_features, + .feature_table_size = ARRAY_SIZE(rproc_serial_features), + .driver.name = "virtio_rproc_serial", + .driver.owner = THIS_MODULE, + .id_table = rproc_serial_id_table, + .probe = virtcons_probe, + .remove = virtcons_remove, +}; + static int __init init(void) { int err; @@ -2104,7 +2224,15 @@ static int __init init(void) pr_err("Error %d registering virtio driver\n", err); goto free; } + err = register_virtio_driver(&virtio_rproc_serial); + if (err < 0) { + pr_err("Error %d registering virtio rproc serial driver\n", + err); + goto unregister; + } return 0; +unregister: + unregister_virtio_driver(&virtio_console); free: if (pdrvdata.debugfs_dir) debugfs_remove_recursive(pdrvdata.debugfs_dir); @@ -2114,7 +2242,10 @@ free: static void __exit fini(void) { + reclaim_dma_bufs(); + unregister_virtio_driver(&virtio_console); + unregister_virtio_driver(&virtio_rproc_serial); class_destroy(pdrvdata.class); if (pdrvdata.debugfs_dir) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index b298158cb922..fd3ae6290d71 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -16,6 +16,7 @@ */ static char dmi_empty_string[] = " "; +static u16 __initdata dmi_ver; /* * Catch too early calls to dmi_check_system(): */ @@ -118,12 +119,12 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *, return 0; } -static int __init dmi_checksum(const u8 *buf) +static int __init dmi_checksum(const u8 *buf, u8 len) { u8 sum = 0; int a; - for (a = 0; a < 15; a++) + for (a = 0; a < len; a++) sum += buf[a]; return sum == 0; @@ -161,8 +162,10 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde return; for (i = 0; i < 16 && (is_ff || is_00); i++) { - if(d[i] != 0x00) is_ff = 0; - if(d[i] != 0xFF) is_00 = 0; + if (d[i] != 0x00) + is_00 = 0; + if (d[i] != 0xFF) + is_ff = 0; } if (is_ff || is_00) @@ -172,7 +175,15 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde if (!s) return; - sprintf(s, "%pUB", d); + /* + * As of version 2.6 of the SMBIOS specification, the first 3 fields of + * the UUID are supposed to be little-endian encoded. The specification + * says that this is the defacto standard. + */ + if (dmi_ver >= 0x0206) + sprintf(s, "%pUL", d); + else + sprintf(s, "%pUB", d); dmi_ident[slot] = s; } @@ -404,29 +415,57 @@ static int __init dmi_present(const char __iomem *p) u8 buf[15]; memcpy_fromio(buf, p, 15); - if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { + if (dmi_checksum(buf, 15)) { dmi_num = (buf[13] << 8) | buf[12]; dmi_len = (buf[7] << 8) | buf[6]; dmi_base = (buf[11] << 24) | (buf[10] << 16) | (buf[9] << 8) | buf[8]; - /* - * DMI version 0.0 means that the real version is taken from - * the SMBIOS version, which we don't know at this point. - */ - if (buf[14] != 0) - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0xF); - else - printk(KERN_INFO "DMI present.\n"); if (dmi_walk_early(dmi_decode) == 0) { + if (dmi_ver) + pr_info("SMBIOS %d.%d present.\n", + dmi_ver >> 8, dmi_ver & 0xFF); + else { + dmi_ver = (buf[14] & 0xF0) << 4 | + (buf[14] & 0x0F); + pr_info("Legacy DMI %d.%d present.\n", + dmi_ver >> 8, dmi_ver & 0xFF); + } dmi_dump_ids(); return 0; } } + dmi_ver = 0; return 1; } +static int __init smbios_present(const char __iomem *p) +{ + u8 buf[32]; + int offset = 0; + + memcpy_fromio(buf, p, 32); + if ((buf[5] < 32) && dmi_checksum(buf, buf[5])) { + dmi_ver = (buf[6] << 8) + buf[7]; + + /* Some BIOS report weird SMBIOS version, fix that up */ + switch (dmi_ver) { + case 0x021F: + case 0x0221: + pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", + dmi_ver & 0xFF, 3); + dmi_ver = 0x0203; + break; + case 0x0233: + pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6); + dmi_ver = 0x0206; + break; + } + offset = 16; + } + return dmi_present(buf + offset); +} + void __init dmi_scan_machine(void) { char __iomem *p, *q; @@ -444,7 +483,7 @@ void __init dmi_scan_machine(void) if (p == NULL) goto error; - rc = dmi_present(p + 0x10); /* offset of _DMI_ string */ + rc = smbios_present(p); dmi_iounmap(p, 32); if (!rc) { dmi_available = 1; @@ -462,7 +501,12 @@ void __init dmi_scan_machine(void) goto error; for (q = p; q < p + 0x10000; q += 16) { - rc = dmi_present(q); + if (memcmp(q, "_SM_", 4) == 0 && q - p <= 0xFFE0) + rc = smbios_present(q); + else if (memcmp(q, "_DMI_", 5) == 0) + rc = dmi_present(q); + else + continue; if (!rc) { dmi_available = 1; dmi_iounmap(p, 0x10000); diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 8ae1f5b19669..682de754d63f 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -172,6 +172,7 @@ config GPIO_MSM_V2 config GPIO_MVEBU def_bool y depends on PLAT_ORION + depends on OF select GPIO_GENERIC select GENERIC_IRQ_CHIP diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c index 6cc87ac8e019..6f2306db8591 100644 --- a/drivers/gpio/gpio-ich.c +++ b/drivers/gpio/gpio-ich.c @@ -390,6 +390,7 @@ static int ichx_gpio_probe(struct platform_device *pdev) return -ENODEV; } + spin_lock_init(&ichx_priv.lock); res_base = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_GPIO); ichx_priv.use_gpio = ich_info->use_gpio; err = ichx_gpio_request_regions(res_base, pdev->name, diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index d767b534c4af..7d9bd94be8d2 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -41,7 +41,6 @@ #include <linux/io.h> #include <linux/of_irq.h> #include <linux/of_device.h> -#include <linux/platform_device.h> #include <linux/pinctrl/consumer.h> /* @@ -469,19 +468,6 @@ static void mvebu_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) } } -static struct platform_device_id mvebu_gpio_ids[] = { - { - .name = "orion-gpio", - }, { - .name = "mv78200-gpio", - }, { - .name = "armadaxp-gpio", - }, { - /* sentinel */ - }, -}; -MODULE_DEVICE_TABLE(platform, mvebu_gpio_ids); - static struct of_device_id mvebu_gpio_of_match[] = { { .compatible = "marvell,orion-gpio", @@ -555,9 +541,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) mvchip->chip.base = id * MVEBU_MAX_GPIO_PER_BANK; mvchip->chip.ngpio = ngpios; mvchip->chip.can_sleep = 0; -#ifdef CONFIG_OF mvchip->chip.of_node = np; -#endif spin_lock_init(&mvchip->lock); mvchip->membase = devm_request_and_ioremap(&pdev->dev, res); @@ -698,7 +682,6 @@ static struct platform_driver mvebu_gpio_driver = { .of_match_table = mvebu_gpio_of_match, }, .probe = mvebu_gpio_probe, - .id_table = mvebu_gpio_ids, }; static int __init mvebu_gpio_init(void) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a9151337d5b9..33d20be87db5 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -579,7 +579,7 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, * at this point the buffer should be dead, so * no new sync objects can be attached. */ - sync_obj = driver->sync_obj_ref(&bo->sync_obj); + sync_obj = driver->sync_obj_ref(bo->sync_obj); spin_unlock(&bdev->fence_lock); atomic_set(&bo->reserved, 0); diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c index 9f26400713f0..89cfd64b3373 100644 --- a/drivers/hwmon/hwmon-vid.c +++ b/drivers/hwmon/hwmon-vid.c @@ -115,6 +115,12 @@ int vid_from_reg(int val, u8 vrm) return (val < 32) ? 1550 - 25 * val : 775 - (25 * (val - 31)) / 2; + case 26: /* AMD family 10h to 15h, serial VID */ + val &= 0x7f; + if (val >= 0x7c) + return 0; + return DIV_ROUND_CLOSEST(15500 - 125 * val, 10); + case 91: /* VRM 9.1 */ case 90: /* VRM 9.0 */ val &= 0x1f; @@ -195,6 +201,10 @@ static struct vrm_model vrm_models[] = { {X86_VENDOR_AMD, 0xF, 0x40, 0x7F, ANY, 24}, /* NPT family 0Fh */ {X86_VENDOR_AMD, 0xF, 0x80, ANY, ANY, 25}, /* future fam. 0Fh */ {X86_VENDOR_AMD, 0x10, 0x0, ANY, ANY, 25}, /* NPT family 10h */ + {X86_VENDOR_AMD, 0x11, 0x0, ANY, ANY, 26}, /* family 11h */ + {X86_VENDOR_AMD, 0x12, 0x0, ANY, ANY, 26}, /* family 12h */ + {X86_VENDOR_AMD, 0x14, 0x0, ANY, ANY, 26}, /* family 14h */ + {X86_VENDOR_AMD, 0x15, 0x0, ANY, ANY, 26}, /* family 15h */ {X86_VENDOR_INTEL, 0x6, 0x0, 0x6, ANY, 82}, /* Pentium Pro, * Pentium II, Xeon, diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index c3c471ca202f..646314f7c839 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -84,19 +84,21 @@ static void __init hwmon_pci_quirks(void) /* Open access to 0x295-0x296 on MSI MS-7031 */ sb = pci_get_device(PCI_VENDOR_ID_ATI, 0x436c, NULL); - if (sb && - (sb->subsystem_vendor == 0x1462 && /* MSI */ - sb->subsystem_device == 0x0031)) { /* MS-7031 */ - - pci_read_config_byte(sb, 0x48, &enable); - pci_read_config_word(sb, 0x64, &base); - - if (base == 0 && !(enable & BIT(2))) { - dev_info(&sb->dev, - "Opening wide generic port at 0x295\n"); - pci_write_config_word(sb, 0x64, 0x295); - pci_write_config_byte(sb, 0x48, enable | BIT(2)); + if (sb) { + if (sb->subsystem_vendor == 0x1462 && /* MSI */ + sb->subsystem_device == 0x0031) { /* MS-7031 */ + pci_read_config_byte(sb, 0x48, &enable); + pci_read_config_word(sb, 0x64, &base); + + if (base == 0 && !(enable & BIT(2))) { + dev_info(&sb->dev, + "Opening wide generic port at 0x295\n"); + pci_write_config_word(sb, 0x64, 0x295); + pci_write_config_byte(sb, 0x48, + enable | BIT(2)); + } } + pci_dev_put(sb); } #endif } diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index d32aa354cbdf..117d66fcded6 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -203,6 +203,8 @@ static const u8 IT87_REG_FAN[] = { 0x0d, 0x0e, 0x0f, 0x80, 0x82 }; static const u8 IT87_REG_FAN_MIN[] = { 0x10, 0x11, 0x12, 0x84, 0x86 }; static const u8 IT87_REG_FANX[] = { 0x18, 0x19, 0x1a, 0x81, 0x83 }; static const u8 IT87_REG_FANX_MIN[] = { 0x1b, 0x1c, 0x1d, 0x85, 0x87 }; +static const u8 IT87_REG_TEMP_OFFSET[] = { 0x56, 0x57, 0x59 }; + #define IT87_REG_FAN_MAIN_CTRL 0x13 #define IT87_REG_FAN_CTL 0x14 #define IT87_REG_PWM(nr) (0x15 + (nr)) @@ -226,6 +228,83 @@ static const u8 IT87_REG_FANX_MIN[] = { 0x1b, 0x1c, 0x1d, 0x85, 0x87 }; #define IT87_REG_AUTO_TEMP(nr, i) (0x60 + (nr) * 8 + (i)) #define IT87_REG_AUTO_PWM(nr, i) (0x65 + (nr) * 8 + (i)) +struct it87_devices { + const char *name; + u16 features; + u8 peci_mask; + u8 old_peci_mask; +}; + +#define FEAT_12MV_ADC (1 << 0) +#define FEAT_NEWER_AUTOPWM (1 << 1) +#define FEAT_OLD_AUTOPWM (1 << 2) +#define FEAT_16BIT_FANS (1 << 3) +#define FEAT_TEMP_OFFSET (1 << 4) +#define FEAT_TEMP_PECI (1 << 5) +#define FEAT_TEMP_OLD_PECI (1 << 6) + +static const struct it87_devices it87_devices[] = { + [it87] = { + .name = "it87", + .features = FEAT_OLD_AUTOPWM, /* may need to overwrite */ + }, + [it8712] = { + .name = "it8712", + .features = FEAT_OLD_AUTOPWM, /* may need to overwrite */ + }, + [it8716] = { + .name = "it8716", + .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET, + }, + [it8718] = { + .name = "it8718", + .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET + | FEAT_TEMP_OLD_PECI, + .old_peci_mask = 0x4, + }, + [it8720] = { + .name = "it8720", + .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET + | FEAT_TEMP_OLD_PECI, + .old_peci_mask = 0x4, + }, + [it8721] = { + .name = "it8721", + .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS + | FEAT_TEMP_OFFSET | FEAT_TEMP_OLD_PECI | FEAT_TEMP_PECI, + .peci_mask = 0x05, + .old_peci_mask = 0x02, /* Actually reports PCH */ + }, + [it8728] = { + .name = "it8728", + .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS + | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI, + .peci_mask = 0x07, + }, + [it8782] = { + .name = "it8782", + .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET + | FEAT_TEMP_OLD_PECI, + .old_peci_mask = 0x4, + }, + [it8783] = { + .name = "it8783", + .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET + | FEAT_TEMP_OLD_PECI, + .old_peci_mask = 0x4, + }, +}; + +#define has_16bit_fans(data) ((data)->features & FEAT_16BIT_FANS) +#define has_12mv_adc(data) ((data)->features & FEAT_12MV_ADC) +#define has_newer_autopwm(data) ((data)->features & FEAT_NEWER_AUTOPWM) +#define has_old_autopwm(data) ((data)->features & FEAT_OLD_AUTOPWM) +#define has_temp_offset(data) ((data)->features & FEAT_TEMP_OFFSET) +#define has_temp_peci(data, nr) (((data)->features & FEAT_TEMP_PECI) && \ + ((data)->peci_mask & (1 << nr))) +#define has_temp_old_peci(data, nr) \ + (((data)->features & FEAT_TEMP_OLD_PECI) && \ + ((data)->old_peci_mask & (1 << nr))) struct it87_sio_data { enum chips type; @@ -249,7 +328,9 @@ struct it87_sio_data { struct it87_data { struct device *hwmon_dev; enum chips type; - u8 revision; + u16 features; + u8 peci_mask; + u8 old_peci_mask; unsigned short addr; const char *name; @@ -258,17 +339,13 @@ struct it87_data { unsigned long last_updated; /* In jiffies */ u16 in_scaled; /* Internal voltage sensors are scaled */ - u8 in[9]; /* Register value */ - u8 in_max[8]; /* Register value */ - u8 in_min[8]; /* Register value */ + u8 in[9][3]; /* [nr][0]=in, [1]=min, [2]=max */ u8 has_fan; /* Bitfield, fans enabled */ - u16 fan[5]; /* Register values, possibly combined */ - u16 fan_min[5]; /* Register values, possibly combined */ + u16 fan[5][2]; /* Register values, [nr][0]=fan, [1]=min */ u8 has_temp; /* Bitfield, temp sensors enabled */ - s8 temp[3]; /* Register value */ - s8 temp_high[3]; /* Register value */ - s8 temp_low[3]; /* Register value */ - u8 sensor; /* Register value */ + s8 temp[3][4]; /* [nr][0]=temp, [1]=min, [2]=max, [3]=offset */ + u8 sensor; /* Register value (IT87_REG_TEMP_ENABLE) */ + u8 extra; /* Register value (IT87_REG_TEMP_EXTRA) */ u8 fan_div[3]; /* Register encoding, shifted right */ u8 vid; /* Register encoding, combined */ u8 vrm; @@ -296,26 +373,6 @@ struct it87_data { s8 auto_temp[3][5]; /* [nr][0] is point1_temp_hyst */ }; -static inline int has_12mv_adc(const struct it87_data *data) -{ - /* - * IT8721F and later have a 12 mV ADC, also with internal scaling - * on selected inputs. - */ - return data->type == it8721 - || data->type == it8728; -} - -static inline int has_newer_autopwm(const struct it87_data *data) -{ - /* - * IT8721F and later have separate registers for the temperature - * mapping and the manual duty cycle. - */ - return data->type == it8721 - || data->type == it8728; -} - static int adc_lsb(const struct it87_data *data, int nr) { int lsb = has_12mv_adc(data) ? 12 : 16; @@ -398,35 +455,6 @@ static const unsigned int pwm_freq[8] = { 750000 / 128, }; -static inline int has_16bit_fans(const struct it87_data *data) -{ - /* - * IT8705F Datasheet 0.4.1, 3h == Version G. - * IT8712F Datasheet 0.9.1, section 8.3.5 indicates 8h == Version J. - * These are the first revisions with 16-bit tachometer support. - */ - return (data->type == it87 && data->revision >= 0x03) - || (data->type == it8712 && data->revision >= 0x08) - || data->type == it8716 - || data->type == it8718 - || data->type == it8720 - || data->type == it8721 - || data->type == it8728 - || data->type == it8782 - || data->type == it8783; -} - -static inline int has_old_autopwm(const struct it87_data *data) -{ - /* - * The old automatic fan speed control interface is implemented - * by IT8705F chips up to revision F and IT8712F chips up to - * revision G. - */ - return (data->type == it87 && data->revision < 0x03) - || (data->type == it8712 && data->revision < 0x08); -} - static int it87_probe(struct platform_device *pdev); static int it87_remove(struct platform_device *pdev); @@ -447,59 +475,22 @@ static struct platform_driver it87_driver = { }; static ssize_t show_in(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in[nr])); -} - -static ssize_t show_in_min(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in_min[nr])); + return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in[nr][index])); } -static ssize_t show_in_max(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in_max[nr])); -} - -static ssize_t set_in_min(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - - struct it87_data *data = dev_get_drvdata(dev); - unsigned long val; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - mutex_lock(&data->update_lock); - data->in_min[nr] = in_to_reg(data, nr, val); - it87_write_value(data, IT87_REG_VIN_MIN(nr), - data->in_min[nr]); - mutex_unlock(&data->update_lock); - return count; -} -static ssize_t set_in_max(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t set_in(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; struct it87_data *data = dev_get_drvdata(dev); unsigned long val; @@ -508,140 +499,167 @@ static ssize_t set_in_max(struct device *dev, struct device_attribute *attr, return -EINVAL; mutex_lock(&data->update_lock); - data->in_max[nr] = in_to_reg(data, nr, val); - it87_write_value(data, IT87_REG_VIN_MAX(nr), - data->in_max[nr]); + data->in[nr][index] = in_to_reg(data, nr, val); + it87_write_value(data, + index == 1 ? IT87_REG_VIN_MIN(nr) + : IT87_REG_VIN_MAX(nr), + data->in[nr][index]); mutex_unlock(&data->update_lock); return count; } -#define show_in_offset(offset) \ -static SENSOR_DEVICE_ATTR(in##offset##_input, S_IRUGO, \ - show_in, NULL, offset); - -#define limit_in_offset(offset) \ -static SENSOR_DEVICE_ATTR(in##offset##_min, S_IRUGO | S_IWUSR, \ - show_in_min, set_in_min, offset); \ -static SENSOR_DEVICE_ATTR(in##offset##_max, S_IRUGO | S_IWUSR, \ - show_in_max, set_in_max, offset); - -show_in_offset(0); -limit_in_offset(0); -show_in_offset(1); -limit_in_offset(1); -show_in_offset(2); -limit_in_offset(2); -show_in_offset(3); -limit_in_offset(3); -show_in_offset(4); -limit_in_offset(4); -show_in_offset(5); -limit_in_offset(5); -show_in_offset(6); -limit_in_offset(6); -show_in_offset(7); -limit_in_offset(7); -show_in_offset(8); +static SENSOR_DEVICE_ATTR_2(in0_input, S_IRUGO, show_in, NULL, 0, 0); +static SENSOR_DEVICE_ATTR_2(in0_min, S_IRUGO | S_IWUSR, show_in, set_in, + 0, 1); +static SENSOR_DEVICE_ATTR_2(in0_max, S_IRUGO | S_IWUSR, show_in, set_in, + 0, 2); + +static SENSOR_DEVICE_ATTR_2(in1_input, S_IRUGO, show_in, NULL, 1, 0); +static SENSOR_DEVICE_ATTR_2(in1_min, S_IRUGO | S_IWUSR, show_in, set_in, + 1, 1); +static SENSOR_DEVICE_ATTR_2(in1_max, S_IRUGO | S_IWUSR, show_in, set_in, + 1, 2); + +static SENSOR_DEVICE_ATTR_2(in2_input, S_IRUGO, show_in, NULL, 2, 0); +static SENSOR_DEVICE_ATTR_2(in2_min, S_IRUGO | S_IWUSR, show_in, set_in, + 2, 1); +static SENSOR_DEVICE_ATTR_2(in2_max, S_IRUGO | S_IWUSR, show_in, set_in, + 2, 2); + +static SENSOR_DEVICE_ATTR_2(in3_input, S_IRUGO, show_in, NULL, 3, 0); +static SENSOR_DEVICE_ATTR_2(in3_min, S_IRUGO | S_IWUSR, show_in, set_in, + 3, 1); +static SENSOR_DEVICE_ATTR_2(in3_max, S_IRUGO | S_IWUSR, show_in, set_in, + 3, 2); + +static SENSOR_DEVICE_ATTR_2(in4_input, S_IRUGO, show_in, NULL, 4, 0); +static SENSOR_DEVICE_ATTR_2(in4_min, S_IRUGO | S_IWUSR, show_in, set_in, + 4, 1); +static SENSOR_DEVICE_ATTR_2(in4_max, S_IRUGO | S_IWUSR, show_in, set_in, + 4, 2); + +static SENSOR_DEVICE_ATTR_2(in5_input, S_IRUGO, show_in, NULL, 5, 0); +static SENSOR_DEVICE_ATTR_2(in5_min, S_IRUGO | S_IWUSR, show_in, set_in, + 5, 1); +static SENSOR_DEVICE_ATTR_2(in5_max, S_IRUGO | S_IWUSR, show_in, set_in, + 5, 2); + +static SENSOR_DEVICE_ATTR_2(in6_input, S_IRUGO, show_in, NULL, 6, 0); +static SENSOR_DEVICE_ATTR_2(in6_min, S_IRUGO | S_IWUSR, show_in, set_in, + 6, 1); +static SENSOR_DEVICE_ATTR_2(in6_max, S_IRUGO | S_IWUSR, show_in, set_in, + 6, 2); + +static SENSOR_DEVICE_ATTR_2(in7_input, S_IRUGO, show_in, NULL, 7, 0); +static SENSOR_DEVICE_ATTR_2(in7_min, S_IRUGO | S_IWUSR, show_in, set_in, + 7, 1); +static SENSOR_DEVICE_ATTR_2(in7_max, S_IRUGO | S_IWUSR, show_in, set_in, + 7, 2); + +static SENSOR_DEVICE_ATTR_2(in8_input, S_IRUGO, show_in, NULL, 8, 0); /* 3 temperatures */ static ssize_t show_temp(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp[nr])); -} -static ssize_t show_temp_max(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_high[nr])); + return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp[nr][index])); } -static ssize_t show_temp_min(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_low[nr])); -} -static ssize_t set_temp_max(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t set_temp(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; struct it87_data *data = dev_get_drvdata(dev); long val; + u8 reg, regval; if (kstrtol(buf, 10, &val) < 0) return -EINVAL; mutex_lock(&data->update_lock); - data->temp_high[nr] = TEMP_TO_REG(val); - it87_write_value(data, IT87_REG_TEMP_HIGH(nr), data->temp_high[nr]); - mutex_unlock(&data->update_lock); - return count; -} -static ssize_t set_temp_min(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = dev_get_drvdata(dev); - long val; - - if (kstrtol(buf, 10, &val) < 0) - return -EINVAL; + switch (index) { + default: + case 1: + reg = IT87_REG_TEMP_LOW(nr); + break; + case 2: + reg = IT87_REG_TEMP_HIGH(nr); + break; + case 3: + regval = it87_read_value(data, IT87_REG_BEEP_ENABLE); + if (!(regval & 0x80)) { + regval |= 0x80; + it87_write_value(data, IT87_REG_BEEP_ENABLE, regval); + } + data->valid = 0; + reg = IT87_REG_TEMP_OFFSET[nr]; + break; + } - mutex_lock(&data->update_lock); - data->temp_low[nr] = TEMP_TO_REG(val); - it87_write_value(data, IT87_REG_TEMP_LOW(nr), data->temp_low[nr]); + data->temp[nr][index] = TEMP_TO_REG(val); + it87_write_value(data, reg, data->temp[nr][index]); mutex_unlock(&data->update_lock); return count; } -#define show_temp_offset(offset) \ -static SENSOR_DEVICE_ATTR(temp##offset##_input, S_IRUGO, \ - show_temp, NULL, offset - 1); \ -static SENSOR_DEVICE_ATTR(temp##offset##_max, S_IRUGO | S_IWUSR, \ - show_temp_max, set_temp_max, offset - 1); \ -static SENSOR_DEVICE_ATTR(temp##offset##_min, S_IRUGO | S_IWUSR, \ - show_temp_min, set_temp_min, offset - 1); - -show_temp_offset(1); -show_temp_offset(2); -show_temp_offset(3); - -static ssize_t show_sensor(struct device *dev, struct device_attribute *attr, - char *buf) + +static SENSOR_DEVICE_ATTR_2(temp1_input, S_IRUGO, show_temp, NULL, 0, 0); +static SENSOR_DEVICE_ATTR_2(temp1_min, S_IRUGO | S_IWUSR, show_temp, set_temp, + 0, 1); +static SENSOR_DEVICE_ATTR_2(temp1_max, S_IRUGO | S_IWUSR, show_temp, set_temp, + 0, 2); +static SENSOR_DEVICE_ATTR_2(temp1_offset, S_IRUGO | S_IWUSR, show_temp, + set_temp, 0, 3); +static SENSOR_DEVICE_ATTR_2(temp2_input, S_IRUGO, show_temp, NULL, 1, 0); +static SENSOR_DEVICE_ATTR_2(temp2_min, S_IRUGO | S_IWUSR, show_temp, set_temp, + 1, 1); +static SENSOR_DEVICE_ATTR_2(temp2_max, S_IRUGO | S_IWUSR, show_temp, set_temp, + 1, 2); +static SENSOR_DEVICE_ATTR_2(temp2_offset, S_IRUGO | S_IWUSR, show_temp, + set_temp, 1, 3); +static SENSOR_DEVICE_ATTR_2(temp3_input, S_IRUGO, show_temp, NULL, 2, 0); +static SENSOR_DEVICE_ATTR_2(temp3_min, S_IRUGO | S_IWUSR, show_temp, set_temp, + 2, 1); +static SENSOR_DEVICE_ATTR_2(temp3_max, S_IRUGO | S_IWUSR, show_temp, set_temp, + 2, 2); +static SENSOR_DEVICE_ATTR_2(temp3_offset, S_IRUGO | S_IWUSR, show_temp, + set_temp, 2, 3); + +static ssize_t show_temp_type(struct device *dev, struct device_attribute *attr, + char *buf) { struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); int nr = sensor_attr->index; struct it87_data *data = it87_update_device(dev); u8 reg = data->sensor; /* In case value is updated while used */ + u8 extra = data->extra; + if ((has_temp_peci(data, nr) && (reg >> 6 == nr + 1)) + || (has_temp_old_peci(data, nr) && (extra & 0x80))) + return sprintf(buf, "6\n"); /* Intel PECI */ if (reg & (1 << nr)) return sprintf(buf, "3\n"); /* thermal diode */ if (reg & (8 << nr)) return sprintf(buf, "4\n"); /* thermistor */ return sprintf(buf, "0\n"); /* disabled */ } -static ssize_t set_sensor(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) + +static ssize_t set_temp_type(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); int nr = sensor_attr->index; struct it87_data *data = dev_get_drvdata(dev); long val; - u8 reg; + u8 reg, extra; if (kstrtol(buf, 10, &val) < 0) return -EINVAL; @@ -649,33 +667,45 @@ static ssize_t set_sensor(struct device *dev, struct device_attribute *attr, reg = it87_read_value(data, IT87_REG_TEMP_ENABLE); reg &= ~(1 << nr); reg &= ~(8 << nr); + if (has_temp_peci(data, nr) && (reg >> 6 == nr + 1 || val == 6)) + reg &= 0x3f; + extra = it87_read_value(data, IT87_REG_TEMP_EXTRA); + if (has_temp_old_peci(data, nr) && ((extra & 0x80) || val == 6)) + extra &= 0x7f; if (val == 2) { /* backwards compatibility */ - dev_warn(dev, "Sensor type 2 is deprecated, please use 4 " - "instead\n"); + dev_warn(dev, + "Sensor type 2 is deprecated, please use 4 instead\n"); val = 4; } - /* 3 = thermal diode; 4 = thermistor; 0 = disabled */ + /* 3 = thermal diode; 4 = thermistor; 6 = Intel PECI; 0 = disabled */ if (val == 3) reg |= 1 << nr; else if (val == 4) reg |= 8 << nr; + else if (has_temp_peci(data, nr) && val == 6) + reg |= (nr + 1) << 6; + else if (has_temp_old_peci(data, nr) && val == 6) + extra |= 0x80; else if (val != 0) return -EINVAL; mutex_lock(&data->update_lock); data->sensor = reg; + data->extra = extra; it87_write_value(data, IT87_REG_TEMP_ENABLE, data->sensor); + if (has_temp_old_peci(data, nr)) + it87_write_value(data, IT87_REG_TEMP_EXTRA, data->extra); data->valid = 0; /* Force cache refresh */ mutex_unlock(&data->update_lock); return count; } -#define show_sensor_offset(offset) \ -static SENSOR_DEVICE_ATTR(temp##offset##_type, S_IRUGO | S_IWUSR, \ - show_sensor, set_sensor, offset - 1); -show_sensor_offset(1); -show_sensor_offset(2); -show_sensor_offset(3); +static SENSOR_DEVICE_ATTR(temp1_type, S_IRUGO | S_IWUSR, show_temp_type, + set_temp_type, 0); +static SENSOR_DEVICE_ATTR(temp2_type, S_IRUGO | S_IWUSR, show_temp_type, + set_temp_type, 1); +static SENSOR_DEVICE_ATTR(temp3_type, S_IRUGO | S_IWUSR, show_temp_type, + set_temp_type, 2); /* 3 Fans */ @@ -692,25 +722,21 @@ static int pwm_mode(const struct it87_data *data, int nr) } static ssize_t show_fan(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; + int speed; struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", FAN_FROM_REG(data->fan[nr], - DIV_FROM_REG(data->fan_div[nr]))); -} -static ssize_t show_fan_min(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", FAN_FROM_REG(data->fan_min[nr], - DIV_FROM_REG(data->fan_div[nr]))); + speed = has_16bit_fans(data) ? + FAN16_FROM_REG(data->fan[nr][index]) : + FAN_FROM_REG(data->fan[nr][index], + DIV_FROM_REG(data->fan_div[nr])); + return sprintf(buf, "%d\n", speed); } + static ssize_t show_fan_div(struct device *dev, struct device_attribute *attr, char *buf) { @@ -747,11 +773,13 @@ static ssize_t show_pwm_freq(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%u\n", pwm_freq[index]); } -static ssize_t set_fan_min(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) + +static ssize_t set_fan(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int nr = sattr->nr; + int index = sattr->index; struct it87_data *data = dev_get_drvdata(dev); long val; @@ -761,24 +789,36 @@ static ssize_t set_fan_min(struct device *dev, struct device_attribute *attr, return -EINVAL; mutex_lock(&data->update_lock); - reg = it87_read_value(data, IT87_REG_FAN_DIV); - switch (nr) { - case 0: - data->fan_div[nr] = reg & 0x07; - break; - case 1: - data->fan_div[nr] = (reg >> 3) & 0x07; - break; - case 2: - data->fan_div[nr] = (reg & 0x40) ? 3 : 1; - break; + + if (has_16bit_fans(data)) { + data->fan[nr][index] = FAN16_TO_REG(val); + it87_write_value(data, IT87_REG_FAN_MIN[nr], + data->fan[nr][index] & 0xff); + it87_write_value(data, IT87_REG_FANX_MIN[nr], + data->fan[nr][index] >> 8); + } else { + reg = it87_read_value(data, IT87_REG_FAN_DIV); + switch (nr) { + case 0: + data->fan_div[nr] = reg & 0x07; + break; + case 1: + data->fan_div[nr] = (reg >> 3) & 0x07; + break; + case 2: + data->fan_div[nr] = (reg & 0x40) ? 3 : 1; + break; + } + data->fan[nr][index] = + FAN_TO_REG(val, DIV_FROM_REG(data->fan_div[nr])); + it87_write_value(data, IT87_REG_FAN_MIN[nr], + data->fan[nr][index]); } - data->fan_min[nr] = FAN_TO_REG(val, DIV_FROM_REG(data->fan_div[nr])); - it87_write_value(data, IT87_REG_FAN_MIN[nr], data->fan_min[nr]); mutex_unlock(&data->update_lock); return count; } + static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -797,7 +837,7 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, old = it87_read_value(data, IT87_REG_FAN_DIV); /* Save fan min limit */ - min = FAN_FROM_REG(data->fan_min[nr], DIV_FROM_REG(data->fan_div[nr])); + min = FAN_FROM_REG(data->fan[nr][1], DIV_FROM_REG(data->fan_div[nr])); switch (nr) { case 0: @@ -818,8 +858,8 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, it87_write_value(data, IT87_REG_FAN_DIV, val); /* Restore fan min limit */ - data->fan_min[nr] = FAN_TO_REG(min, DIV_FROM_REG(data->fan_div[nr])); - it87_write_value(data, IT87_REG_FAN_MIN[nr], data->fan_min[nr]); + data->fan[nr][1] = FAN_TO_REG(min, DIV_FROM_REG(data->fan_div[nr])); + it87_write_value(data, IT87_REG_FAN_MIN[nr], data->fan[nr][1]); mutex_unlock(&data->update_lock); return count; @@ -843,8 +883,8 @@ static int check_trip_points(struct device *dev, int nr) } if (err) { - dev_err(dev, "Inconsistent trip points, not switching to " - "automatic mode\n"); + dev_err(dev, + "Inconsistent trip points, not switching to automatic mode\n"); dev_err(dev, "Adjust the trip points and try again\n"); } return err; @@ -1092,118 +1132,106 @@ static ssize_t set_auto_temp(struct device *dev, return count; } -#define show_fan_offset(offset) \ -static SENSOR_DEVICE_ATTR(fan##offset##_input, S_IRUGO, \ - show_fan, NULL, offset - 1); \ -static SENSOR_DEVICE_ATTR(fan##offset##_min, S_IRUGO | S_IWUSR, \ - show_fan_min, set_fan_min, offset - 1); \ -static SENSOR_DEVICE_ATTR(fan##offset##_div, S_IRUGO | S_IWUSR, \ - show_fan_div, set_fan_div, offset - 1); - -show_fan_offset(1); -show_fan_offset(2); -show_fan_offset(3); - -#define show_pwm_offset(offset) \ -static SENSOR_DEVICE_ATTR(pwm##offset##_enable, S_IRUGO | S_IWUSR, \ - show_pwm_enable, set_pwm_enable, offset - 1); \ -static SENSOR_DEVICE_ATTR(pwm##offset, S_IRUGO | S_IWUSR, \ - show_pwm, set_pwm, offset - 1); \ -static DEVICE_ATTR(pwm##offset##_freq, \ - (offset == 1 ? S_IRUGO | S_IWUSR : S_IRUGO), \ - show_pwm_freq, (offset == 1 ? set_pwm_freq : NULL)); \ -static SENSOR_DEVICE_ATTR(pwm##offset##_auto_channels_temp, \ - S_IRUGO | S_IWUSR, show_pwm_temp_map, set_pwm_temp_map, \ - offset - 1); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point1_pwm, \ - S_IRUGO | S_IWUSR, show_auto_pwm, set_auto_pwm, \ - offset - 1, 0); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point2_pwm, \ - S_IRUGO | S_IWUSR, show_auto_pwm, set_auto_pwm, \ - offset - 1, 1); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point3_pwm, \ - S_IRUGO | S_IWUSR, show_auto_pwm, set_auto_pwm, \ - offset - 1, 2); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point4_pwm, \ - S_IRUGO, show_auto_pwm, NULL, offset - 1, 3); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point1_temp, \ - S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp, \ - offset - 1, 1); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point1_temp_hyst, \ - S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp, \ - offset - 1, 0); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point2_temp, \ - S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp, \ - offset - 1, 2); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point3_temp, \ - S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp, \ - offset - 1, 3); \ -static SENSOR_DEVICE_ATTR_2(pwm##offset##_auto_point4_temp, \ - S_IRUGO | S_IWUSR, show_auto_temp, set_auto_temp, \ - offset - 1, 4); - -show_pwm_offset(1); -show_pwm_offset(2); -show_pwm_offset(3); - -/* A different set of callbacks for 16-bit fans */ -static ssize_t show_fan16(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", FAN16_FROM_REG(data->fan[nr])); -} - -static ssize_t show_fan16_min(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = it87_update_device(dev); - return sprintf(buf, "%d\n", FAN16_FROM_REG(data->fan_min[nr])); -} - -static ssize_t set_fan16_min(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); - int nr = sensor_attr->index; - struct it87_data *data = dev_get_drvdata(dev); - long val; - - if (kstrtol(buf, 10, &val) < 0) - return -EINVAL; - - mutex_lock(&data->update_lock); - data->fan_min[nr] = FAN16_TO_REG(val); - it87_write_value(data, IT87_REG_FAN_MIN[nr], - data->fan_min[nr] & 0xff); - it87_write_value(data, IT87_REG_FANX_MIN[nr], - data->fan_min[nr] >> 8); - mutex_unlock(&data->update_lock); - return count; -} - -/* - * We want to use the same sysfs file names as 8-bit fans, but we need - * different variable names, so we have to use SENSOR_ATTR instead of - * SENSOR_DEVICE_ATTR. - */ -#define show_fan16_offset(offset) \ -static struct sensor_device_attribute sensor_dev_attr_fan##offset##_input16 \ - = SENSOR_ATTR(fan##offset##_input, S_IRUGO, \ - show_fan16, NULL, offset - 1); \ -static struct sensor_device_attribute sensor_dev_attr_fan##offset##_min16 \ - = SENSOR_ATTR(fan##offset##_min, S_IRUGO | S_IWUSR, \ - show_fan16_min, set_fan16_min, offset - 1) - -show_fan16_offset(1); -show_fan16_offset(2); -show_fan16_offset(3); -show_fan16_offset(4); -show_fan16_offset(5); +static SENSOR_DEVICE_ATTR_2(fan1_input, S_IRUGO, show_fan, NULL, 0, 0); +static SENSOR_DEVICE_ATTR_2(fan1_min, S_IRUGO | S_IWUSR, show_fan, set_fan, + 0, 1); +static SENSOR_DEVICE_ATTR(fan1_div, S_IRUGO | S_IWUSR, show_fan_div, + set_fan_div, 0); + +static SENSOR_DEVICE_ATTR_2(fan2_input, S_IRUGO, show_fan, NULL, 1, 0); +static SENSOR_DEVICE_ATTR_2(fan2_min, S_IRUGO | S_IWUSR, show_fan, set_fan, + 1, 1); +static SENSOR_DEVICE_ATTR(fan2_div, S_IRUGO | S_IWUSR, show_fan_div, + set_fan_div, 1); + +static SENSOR_DEVICE_ATTR_2(fan3_input, S_IRUGO, show_fan, NULL, 2, 0); +static SENSOR_DEVICE_ATTR_2(fan3_min, S_IRUGO | S_IWUSR, show_fan, set_fan, + 2, 1); +static SENSOR_DEVICE_ATTR(fan3_div, S_IRUGO | S_IWUSR, show_fan_div, + set_fan_div, 2); + +static SENSOR_DEVICE_ATTR_2(fan4_input, S_IRUGO, show_fan, NULL, 3, 0); +static SENSOR_DEVICE_ATTR_2(fan4_min, S_IRUGO | S_IWUSR, show_fan, set_fan, + 3, 1); + +static SENSOR_DEVICE_ATTR_2(fan5_input, S_IRUGO, show_fan, NULL, 4, 0); +static SENSOR_DEVICE_ATTR_2(fan5_min, S_IRUGO | S_IWUSR, show_fan, set_fan, + 4, 1); + +static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, + show_pwm_enable, set_pwm_enable, 0); +static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 0); +static DEVICE_ATTR(pwm1_freq, S_IRUGO | S_IWUSR, show_pwm_freq, set_pwm_freq); +static SENSOR_DEVICE_ATTR(pwm1_auto_channels_temp, S_IRUGO | S_IWUSR, + show_pwm_temp_map, set_pwm_temp_map, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 0, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point2_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 0, 1); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point3_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 0, 2); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point4_pwm, S_IRUGO, + show_auto_pwm, NULL, 0, 3); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 0, 1); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_temp_hyst, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 0, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point2_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 0, 2); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point3_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 0, 3); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point4_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 0, 4); + +static SENSOR_DEVICE_ATTR(pwm2_enable, S_IRUGO | S_IWUSR, + show_pwm_enable, set_pwm_enable, 1); +static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 1); +static DEVICE_ATTR(pwm2_freq, S_IRUGO, show_pwm_freq, NULL); +static SENSOR_DEVICE_ATTR(pwm2_auto_channels_temp, S_IRUGO | S_IWUSR, + show_pwm_temp_map, set_pwm_temp_map, 1); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 1, 0); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point2_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 1, 1); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point3_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 1, 2); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point4_pwm, S_IRUGO, + show_auto_pwm, NULL, 1, 3); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 1, 1); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_temp_hyst, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 1, 0); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point2_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 1, 2); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point3_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 1, 3); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point4_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 1, 4); + +static SENSOR_DEVICE_ATTR(pwm3_enable, S_IRUGO | S_IWUSR, + show_pwm_enable, set_pwm_enable, 2); +static SENSOR_DEVICE_ATTR(pwm3, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 2); +static DEVICE_ATTR(pwm3_freq, S_IRUGO, show_pwm_freq, NULL); +static SENSOR_DEVICE_ATTR(pwm3_auto_channels_temp, S_IRUGO | S_IWUSR, + show_pwm_temp_map, set_pwm_temp_map, 2); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 2, 0); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point2_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 2, 1); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point3_pwm, S_IRUGO | S_IWUSR, + show_auto_pwm, set_auto_pwm, 2, 2); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point4_pwm, S_IRUGO, + show_auto_pwm, NULL, 2, 3); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 2, 1); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_temp_hyst, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 2, 0); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point2_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 2, 2); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point3_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 2, 3); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point4_temp, S_IRUGO | S_IWUSR, + show_auto_temp, set_auto_temp, 2, 4); /* Alarms */ static ssize_t show_alarms(struct device *dev, struct device_attribute *attr, @@ -1471,6 +1499,12 @@ static const struct attribute_group it87_group_temp[3] = { { .attrs = it87_attributes_temp[2] }, }; +static struct attribute *it87_attributes_temp_offset[] = { + &sensor_dev_attr_temp1_offset.dev_attr.attr, + &sensor_dev_attr_temp2_offset.dev_attr.attr, + &sensor_dev_attr_temp3_offset.dev_attr.attr, +}; + static struct attribute *it87_attributes[] = { &dev_attr_alarms.attr, &sensor_dev_attr_intrusion0_alarm.dev_attr.attr, @@ -1500,73 +1534,47 @@ static struct attribute *it87_attributes_temp_beep[] = { &sensor_dev_attr_temp3_beep.dev_attr.attr, }; -static struct attribute *it87_attributes_fan16[5][3+1] = { { - &sensor_dev_attr_fan1_input16.dev_attr.attr, - &sensor_dev_attr_fan1_min16.dev_attr.attr, +static struct attribute *it87_attributes_fan[5][3+1] = { { + &sensor_dev_attr_fan1_input.dev_attr.attr, + &sensor_dev_attr_fan1_min.dev_attr.attr, &sensor_dev_attr_fan1_alarm.dev_attr.attr, NULL }, { - &sensor_dev_attr_fan2_input16.dev_attr.attr, - &sensor_dev_attr_fan2_min16.dev_attr.attr, + &sensor_dev_attr_fan2_input.dev_attr.attr, + &sensor_dev_attr_fan2_min.dev_attr.attr, &sensor_dev_attr_fan2_alarm.dev_attr.attr, NULL }, { - &sensor_dev_attr_fan3_input16.dev_attr.attr, - &sensor_dev_attr_fan3_min16.dev_attr.attr, + &sensor_dev_attr_fan3_input.dev_attr.attr, + &sensor_dev_attr_fan3_min.dev_attr.attr, &sensor_dev_attr_fan3_alarm.dev_attr.attr, NULL }, { - &sensor_dev_attr_fan4_input16.dev_attr.attr, - &sensor_dev_attr_fan4_min16.dev_attr.attr, + &sensor_dev_attr_fan4_input.dev_attr.attr, + &sensor_dev_attr_fan4_min.dev_attr.attr, &sensor_dev_attr_fan4_alarm.dev_attr.attr, NULL }, { - &sensor_dev_attr_fan5_input16.dev_attr.attr, - &sensor_dev_attr_fan5_min16.dev_attr.attr, + &sensor_dev_attr_fan5_input.dev_attr.attr, + &sensor_dev_attr_fan5_min.dev_attr.attr, &sensor_dev_attr_fan5_alarm.dev_attr.attr, NULL } }; -static const struct attribute_group it87_group_fan16[5] = { - { .attrs = it87_attributes_fan16[0] }, - { .attrs = it87_attributes_fan16[1] }, - { .attrs = it87_attributes_fan16[2] }, - { .attrs = it87_attributes_fan16[3] }, - { .attrs = it87_attributes_fan16[4] }, +static const struct attribute_group it87_group_fan[5] = { + { .attrs = it87_attributes_fan[0] }, + { .attrs = it87_attributes_fan[1] }, + { .attrs = it87_attributes_fan[2] }, + { .attrs = it87_attributes_fan[3] }, + { .attrs = it87_attributes_fan[4] }, }; -static struct attribute *it87_attributes_fan[3][4+1] = { { - &sensor_dev_attr_fan1_input.dev_attr.attr, - &sensor_dev_attr_fan1_min.dev_attr.attr, +static const struct attribute *it87_attributes_fan_div[] = { &sensor_dev_attr_fan1_div.dev_attr.attr, - &sensor_dev_attr_fan1_alarm.dev_attr.attr, - NULL -}, { - &sensor_dev_attr_fan2_input.dev_attr.attr, - &sensor_dev_attr_fan2_min.dev_attr.attr, &sensor_dev_attr_fan2_div.dev_attr.attr, - &sensor_dev_attr_fan2_alarm.dev_attr.attr, - NULL -}, { - &sensor_dev_attr_fan3_input.dev_attr.attr, - &sensor_dev_attr_fan3_min.dev_attr.attr, &sensor_dev_attr_fan3_div.dev_attr.attr, - &sensor_dev_attr_fan3_alarm.dev_attr.attr, - NULL -} }; - -static const struct attribute_group it87_group_fan[3] = { - { .attrs = it87_attributes_fan[0] }, - { .attrs = it87_attributes_fan[1] }, - { .attrs = it87_attributes_fan[2] }, }; -static const struct attribute_group * -it87_get_fan_group(const struct it87_data *data) -{ - return has_16bit_fans(data) ? it87_group_fan16 : it87_group_fan; -} - static struct attribute *it87_attributes_pwm[3][4+1] = { { &sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, @@ -1925,7 +1933,6 @@ static void it87_remove_files(struct device *dev) { struct it87_data *data = platform_get_drvdata(pdev); struct it87_sio_data *sio_data = dev->platform_data; - const struct attribute_group *fan_group = it87_get_fan_group(data); int i; sysfs_remove_group(&dev->kobj, &it87_group); @@ -1941,6 +1948,9 @@ static void it87_remove_files(struct device *dev) if (!(data->has_temp & (1 << i))) continue; sysfs_remove_group(&dev->kobj, &it87_group_temp[i]); + if (has_temp_offset(data)) + sysfs_remove_file(&dev->kobj, + it87_attributes_temp_offset[i]); if (sio_data->beep_pin) sysfs_remove_file(&dev->kobj, it87_attributes_temp_beep[i]); @@ -1948,10 +1958,13 @@ static void it87_remove_files(struct device *dev) for (i = 0; i < 5; i++) { if (!(data->has_fan & (1 << i))) continue; - sysfs_remove_group(&dev->kobj, &fan_group[i]); + sysfs_remove_group(&dev->kobj, &it87_group_fan[i]); if (sio_data->beep_pin) sysfs_remove_file(&dev->kobj, it87_attributes_fan_beep[i]); + if (i < 3 && !has_16bit_fans(data)) + sysfs_remove_file(&dev->kobj, + it87_attributes_fan_div[i]); } for (i = 0; i < 3; i++) { if (sio_data->skip_pwm & (1 << 0)) @@ -1972,21 +1985,9 @@ static int it87_probe(struct platform_device *pdev) struct resource *res; struct device *dev = &pdev->dev; struct it87_sio_data *sio_data = dev->platform_data; - const struct attribute_group *fan_group; int err = 0, i; int enable_pwm_interface; int fan_beep_need_rw; - static const char * const names[] = { - "it87", - "it8712", - "it8716", - "it8718", - "it8720", - "it8721", - "it8728", - "it8782", - "it8783", - }; res = platform_get_resource(pdev, IORESOURCE_IO, 0); if (!devm_request_region(&pdev->dev, res->start, IT87_EC_EXTENT, @@ -2003,8 +2004,31 @@ static int it87_probe(struct platform_device *pdev) data->addr = res->start; data->type = sio_data->type; - data->revision = sio_data->revision; - data->name = names[sio_data->type]; + data->features = it87_devices[sio_data->type].features; + data->peci_mask = it87_devices[sio_data->type].peci_mask; + data->old_peci_mask = it87_devices[sio_data->type].old_peci_mask; + data->name = it87_devices[sio_data->type].name; + /* + * IT8705F Datasheet 0.4.1, 3h == Version G. + * IT8712F Datasheet 0.9.1, section 8.3.5 indicates 8h == Version J. + * These are the first revisions with 16-bit tachometer support. + */ + switch (data->type) { + case it87: + if (sio_data->revision >= 0x03) { + data->features &= ~FEAT_OLD_AUTOPWM; + data->features |= FEAT_16BIT_FANS; + } + break; + case it8712: + if (sio_data->revision >= 0x08) { + data->features &= ~FEAT_OLD_AUTOPWM; + data->features |= FEAT_16BIT_FANS; + } + break; + default: + break; + } /* Now, we do the remaining detection. */ if ((it87_read_value(data, IT87_REG_CONFIG) & 0x80) @@ -2068,6 +2092,12 @@ static int it87_probe(struct platform_device *pdev) err = sysfs_create_group(&dev->kobj, &it87_group_temp[i]); if (err) goto error; + if (has_temp_offset(data)) { + err = sysfs_create_file(&dev->kobj, + it87_attributes_temp_offset[i]); + if (err) + goto error; + } if (sio_data->beep_pin) { err = sysfs_create_file(&dev->kobj, it87_attributes_temp_beep[i]); @@ -2077,15 +2107,21 @@ static int it87_probe(struct platform_device *pdev) } /* Do not create fan files for disabled fans */ - fan_group = it87_get_fan_group(data); fan_beep_need_rw = 1; for (i = 0; i < 5; i++) { if (!(data->has_fan & (1 << i))) continue; - err = sysfs_create_group(&dev->kobj, &fan_group[i]); + err = sysfs_create_group(&dev->kobj, &it87_group_fan[i]); if (err) goto error; + if (i < 3 && !has_16bit_fans(data)) { + err = sysfs_create_file(&dev->kobj, + it87_attributes_fan_div[i]); + if (err) + goto error; + } + if (sio_data->beep_pin) { err = sysfs_create_file(&dev->kobj, it87_attributes_fan_beep[i]); @@ -2221,8 +2257,8 @@ static int it87_check_pwm(struct device *dev) * PWM interface). */ if (!((pwm[0] | pwm[1] | pwm[2]) & 0x80)) { - dev_info(dev, "Reconfiguring PWM to " - "active high polarity\n"); + dev_info(dev, + "Reconfiguring PWM to active high polarity\n"); it87_write_value(data, IT87_REG_FAN_CTL, tmp | 0x87); for (i = 0; i < 3; i++) @@ -2232,16 +2268,16 @@ static int it87_check_pwm(struct device *dev) return 1; } - dev_info(dev, "PWM configuration is " - "too broken to be fixed\n"); + dev_info(dev, + "PWM configuration is too broken to be fixed\n"); } - dev_info(dev, "Detected broken BIOS " - "defaults, disabling PWM interface\n"); + dev_info(dev, + "Detected broken BIOS defaults, disabling PWM interface\n"); return 0; } else if (fix_pwm_polarity) { - dev_info(dev, "PWM configuration looks " - "sane, won't touch\n"); + dev_info(dev, + "PWM configuration looks sane, won't touch\n"); } return 1; @@ -2389,42 +2425,46 @@ static struct it87_data *it87_update_device(struct device *dev) it87_read_value(data, IT87_REG_CONFIG) | 0x40); } for (i = 0; i <= 7; i++) { - data->in[i] = + data->in[i][0] = it87_read_value(data, IT87_REG_VIN(i)); - data->in_min[i] = + data->in[i][1] = it87_read_value(data, IT87_REG_VIN_MIN(i)); - data->in_max[i] = + data->in[i][2] = it87_read_value(data, IT87_REG_VIN_MAX(i)); } /* in8 (battery) has no limit registers */ - data->in[8] = it87_read_value(data, IT87_REG_VIN(8)); + data->in[8][0] = it87_read_value(data, IT87_REG_VIN(8)); for (i = 0; i < 5; i++) { /* Skip disabled fans */ if (!(data->has_fan & (1 << i))) continue; - data->fan_min[i] = + data->fan[i][1] = it87_read_value(data, IT87_REG_FAN_MIN[i]); - data->fan[i] = it87_read_value(data, + data->fan[i][0] = it87_read_value(data, IT87_REG_FAN[i]); /* Add high byte if in 16-bit mode */ if (has_16bit_fans(data)) { - data->fan[i] |= it87_read_value(data, + data->fan[i][0] |= it87_read_value(data, IT87_REG_FANX[i]) << 8; - data->fan_min[i] |= it87_read_value(data, + data->fan[i][1] |= it87_read_value(data, IT87_REG_FANX_MIN[i]) << 8; } } for (i = 0; i < 3; i++) { if (!(data->has_temp & (1 << i))) continue; - data->temp[i] = + data->temp[i][0] = it87_read_value(data, IT87_REG_TEMP(i)); - data->temp_high[i] = - it87_read_value(data, IT87_REG_TEMP_HIGH(i)); - data->temp_low[i] = + data->temp[i][1] = it87_read_value(data, IT87_REG_TEMP_LOW(i)); + data->temp[i][2] = + it87_read_value(data, IT87_REG_TEMP_HIGH(i)); + if (has_temp_offset(data)) + data->temp[i][3] = + it87_read_value(data, + IT87_REG_TEMP_OFFSET[i]); } /* Newer chips don't have clock dividers */ @@ -2448,6 +2488,7 @@ static struct it87_data *it87_update_device(struct device *dev) it87_update_pwm_ctrl(data, i); data->sensor = it87_read_value(data, IT87_REG_TEMP_ENABLE); + data->extra = it87_read_value(data, IT87_REG_TEMP_EXTRA); /* * The IT8705F does not have VID capability. * The IT8718F and later don't use IT87_REG_VID for the @@ -2549,8 +2590,7 @@ static void __exit sm_it87_exit(void) } -MODULE_AUTHOR("Chris Gauthron, " - "Jean Delvare <khali@linux-fr.org>"); +MODULE_AUTHOR("Chris Gauthron, Jean Delvare <khali@linux-fr.org>"); MODULE_DESCRIPTION("IT8705F/IT871xF/IT872xF hardware monitoring driver"); module_param(update_vbat, bool, 0); MODULE_PARM_DESC(update_vbat, "Update vbat if set else return powerup value"); diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index 55ac41c05561..0e8ffd6059a0 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -1,7 +1,7 @@ /* * w83627ehf - Driver for the hardware monitoring functionality of * the Winbond W83627EHF Super-I/O chip - * Copyright (C) 2005-2011 Jean Delvare <khali@linux-fr.org> + * Copyright (C) 2005-2012 Jean Delvare <khali@linux-fr.org> * Copyright (C) 2006 Yuan Mu (Winbond), * Rudolf Marek <r.marek@assembler.cz> * David Hubbard <david.c.hubbard@gmail.com> @@ -502,6 +502,13 @@ struct w83627ehf_data { u16 have_temp_offset; u8 in6_skip:1; u8 temp3_val_only:1; + +#ifdef CONFIG_PM + /* Remember extra register values over suspend/resume */ + u8 vbat; + u8 fandiv1; + u8 fandiv2; +#endif }; struct w83627ehf_sio_data { @@ -898,6 +905,8 @@ static struct w83627ehf_data *w83627ehf_update_device(struct device *dev) data->temp_max_hyst[i] = w83627ehf_read_temp(data, data->reg_temp_hyst[i]); + if (i > 2) + continue; if (data->have_temp_offset & (1 << i)) data->temp_offset[i] = w83627ehf_read_value(data, @@ -2608,10 +2617,98 @@ static int w83627ehf_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM +static int w83627ehf_suspend(struct device *dev) +{ + struct w83627ehf_data *data = w83627ehf_update_device(dev); + struct w83627ehf_sio_data *sio_data = dev->platform_data; + + mutex_lock(&data->update_lock); + data->vbat = w83627ehf_read_value(data, W83627EHF_REG_VBAT); + if (sio_data->kind == nct6775) { + data->fandiv1 = w83627ehf_read_value(data, NCT6775_REG_FANDIV1); + data->fandiv2 = w83627ehf_read_value(data, NCT6775_REG_FANDIV2); + } + mutex_unlock(&data->update_lock); + + return 0; +} + +static int w83627ehf_resume(struct device *dev) +{ + struct w83627ehf_data *data = dev_get_drvdata(dev); + struct w83627ehf_sio_data *sio_data = dev->platform_data; + int i; + + mutex_lock(&data->update_lock); + data->bank = 0xff; /* Force initial bank selection */ + + /* Restore limits */ + for (i = 0; i < data->in_num; i++) { + if ((i == 6) && data->in6_skip) + continue; + + w83627ehf_write_value(data, W83627EHF_REG_IN_MIN(i), + data->in_min[i]); + w83627ehf_write_value(data, W83627EHF_REG_IN_MAX(i), + data->in_max[i]); + } + + for (i = 0; i < 5; i++) { + if (!(data->has_fan_min & (1 << i))) + continue; + + w83627ehf_write_value(data, data->REG_FAN_MIN[i], + data->fan_min[i]); + } + + for (i = 0; i < NUM_REG_TEMP; i++) { + if (!(data->have_temp & (1 << i))) + continue; + + if (data->reg_temp_over[i]) + w83627ehf_write_temp(data, data->reg_temp_over[i], + data->temp_max[i]); + if (data->reg_temp_hyst[i]) + w83627ehf_write_temp(data, data->reg_temp_hyst[i], + data->temp_max_hyst[i]); + if (i > 2) + continue; + if (data->have_temp_offset & (1 << i)) + w83627ehf_write_value(data, + W83627EHF_REG_TEMP_OFFSET[i], + data->temp_offset[i]); + } + + /* Restore other settings */ + w83627ehf_write_value(data, W83627EHF_REG_VBAT, data->vbat); + if (sio_data->kind == nct6775) { + w83627ehf_write_value(data, NCT6775_REG_FANDIV1, data->fandiv1); + w83627ehf_write_value(data, NCT6775_REG_FANDIV2, data->fandiv2); + } + + /* Force re-reading all values */ + data->valid = 0; + mutex_unlock(&data->update_lock); + + return 0; +} + +static const struct dev_pm_ops w83627ehf_dev_pm_ops = { + .suspend = w83627ehf_suspend, + .resume = w83627ehf_resume, +}; + +#define W83627EHF_DEV_PM_OPS (&w83627ehf_dev_pm_ops) +#else +#define W83627EHF_DEV_PM_OPS NULL +#endif /* CONFIG_PM */ + static struct platform_driver w83627ehf_driver = { .driver = { .owner = THIS_MODULE, .name = DRVNAME, + .pm = W83627EHF_DEV_PM_OPS, }, .probe = w83627ehf_probe, .remove = w83627ehf_remove, diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c index 7f68b8309d10..81f486520cea 100644 --- a/drivers/hwmon/w83627hf.c +++ b/drivers/hwmon/w83627hf.c @@ -5,7 +5,7 @@ * Philip Edelbrock <phil@netroedge.com>, * and Mark Studebaker <mdsxyz123@yahoo.com> * Ported to 2.6 by Bernhard C. Schrenk <clemy@clemy.org> - * Copyright (c) 2007 Jean Delvare <khali@linux-fr.org> + * Copyright (c) 2007 - 1012 Jean Delvare <khali@linux-fr.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -389,6 +389,12 @@ struct w83627hf_data { */ u8 vrm; u8 vrm_ovt; /* Register value, 627THF/637HF/687THF only */ + +#ifdef CONFIG_PM + /* Remember extra register values over suspend/resume */ + u8 scfg1; + u8 scfg2; +#endif }; @@ -401,10 +407,77 @@ static void w83627hf_update_fan_div(struct w83627hf_data *data); static struct w83627hf_data *w83627hf_update_device(struct device *dev); static void w83627hf_init_device(struct platform_device *pdev); +#ifdef CONFIG_PM +static int w83627hf_suspend(struct device *dev) +{ + struct w83627hf_data *data = w83627hf_update_device(dev); + + mutex_lock(&data->update_lock); + data->scfg1 = w83627hf_read_value(data, W83781D_REG_SCFG1); + data->scfg2 = w83627hf_read_value(data, W83781D_REG_SCFG2); + mutex_unlock(&data->update_lock); + + return 0; +} + +static int w83627hf_resume(struct device *dev) +{ + struct w83627hf_data *data = dev_get_drvdata(dev); + int i, num_temps = (data->type == w83697hf) ? 2 : 3; + + /* Restore limits */ + mutex_lock(&data->update_lock); + for (i = 0; i <= 8; i++) { + /* skip missing sensors */ + if (((data->type == w83697hf) && (i == 1)) || + ((data->type != w83627hf && data->type != w83697hf) + && (i == 5 || i == 6))) + continue; + w83627hf_write_value(data, W83781D_REG_IN_MAX(i), + data->in_max[i]); + w83627hf_write_value(data, W83781D_REG_IN_MIN(i), + data->in_min[i]); + } + for (i = 0; i <= 2; i++) + w83627hf_write_value(data, W83627HF_REG_FAN_MIN(i), + data->fan_min[i]); + for (i = 0; i < num_temps; i++) { + w83627hf_write_value(data, w83627hf_reg_temp_over[i], + data->temp_max[i]); + w83627hf_write_value(data, w83627hf_reg_temp_hyst[i], + data->temp_max_hyst[i]); + } + + /* Fixup BIOS bugs */ + if (data->type == w83627thf || data->type == w83637hf || + data->type == w83687thf) + w83627hf_write_value(data, W83627THF_REG_VRM_OVT_CFG, + data->vrm_ovt); + w83627hf_write_value(data, W83781D_REG_SCFG1, data->scfg1); + w83627hf_write_value(data, W83781D_REG_SCFG2, data->scfg2); + + /* Force re-reading all values */ + data->valid = 0; + mutex_unlock(&data->update_lock); + + return 0; +} + +static const struct dev_pm_ops w83627hf_dev_pm_ops = { + .suspend = w83627hf_suspend, + .resume = w83627hf_resume, +}; + +#define W83627HF_DEV_PM_OPS (&w83627hf_dev_pm_ops) +#else +#define W83627HF_DEV_PM_OPS NULL +#endif /* CONFIG_PM */ + static struct platform_driver w83627hf_driver = { .driver = { .owner = THIS_MODULE, .name = DRVNAME, + .pm = W83627HF_DEV_PM_OPS, }, .probe = w83627hf_probe, .remove = w83627hf_remove, @@ -1659,8 +1732,10 @@ static void w83627hf_init_device(struct platform_device *pdev) /* Minimize conflicts with other winbond i2c-only clients... */ /* disable i2c subclients... how to disable main i2c client?? */ /* force i2c address to relatively uncommon address */ - w83627hf_write_value(data, W83781D_REG_I2C_SUBADDR, 0x89); - w83627hf_write_value(data, W83781D_REG_I2C_ADDR, force_i2c); + if (type == w83627hf) { + w83627hf_write_value(data, W83781D_REG_I2C_SUBADDR, 0x89); + w83627hf_write_value(data, W83781D_REG_I2C_ADDR, force_i2c); + } /* Read VID only once */ if (type == w83627hf || type == w83637hf) { diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 5de86968379d..c13745cde7fa 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -38,10 +38,12 @@ #include <linux/inetdevice.h> #include <linux/ip.h> #include <linux/tcp.h> +#include <linux/if_vlan.h> #include <net/neighbour.h> #include <net/netevent.h> #include <net/route.h> +#include <net/tcp.h> #include "iw_cxgb4.h" @@ -61,6 +63,14 @@ static char *states[] = { NULL, }; +static int nocong; +module_param(nocong, int, 0644); +MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)"); + +static int enable_ecn; +module_param(enable_ecn, int, 0644); +MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); + static int dack_mode = 1; module_param(dack_mode, int, 0644); MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); @@ -265,6 +275,7 @@ void _c4iw_free_ep(struct kref *kref) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); } kfree(ep); } @@ -441,6 +452,50 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } +#define VLAN_NONE 0xfff +#define FILTER_SEL_VLAN_NONE 0xffff +#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */ +#define FILTER_SEL_WIDTH_VIN_P_FC \ + (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/ +#define FILTER_SEL_WIDTH_TAG_P_FC \ + (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */ +#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) + +static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst, + struct l2t_entry *l2t) +{ + unsigned int ntuple = 0; + u32 viid; + + switch (dev->rdev.lldi.filt_mode) { + + /* default filter mode */ + case HW_TPL_FR_MT_PR_IV_P_FC: + if (l2t->vlan == VLAN_NONE) + ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; + else { + ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC; + ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC; + } + ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << + FILTER_SEL_WIDTH_VLD_TAG_P_FC; + break; + case HW_TPL_FR_MT_PR_OV_P_FC: { + viid = cxgb4_port_viid(l2t->neigh->dev); + + ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC; + ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC; + ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC; + ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << + FILTER_SEL_WIDTH_VLD_TAG_P_FC; + break; + } + default: + break; + } + return ntuple; +} + static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; @@ -463,7 +518,8 @@ static int send_connect(struct c4iw_ep *ep) cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - opt0 = KEEP_ALIVE(1) | + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | @@ -474,6 +530,7 @@ static int send_connect(struct c4iw_ep *ep) ULP_MODE(ULP_MODE_TCPDDP) | RCV_BUFSIZ(rcv_win>>10); opt2 = RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); if (enable_tcp_timestamps) opt2 |= TSTAMPS_EN(1); @@ -492,8 +549,9 @@ static int send_connect(struct c4iw_ep *ep) req->local_ip = ep->com.local_addr.sin_addr.s_addr; req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; req->opt0 = cpu_to_be64(opt0); - req->params = 0; + req->params = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, ep->l2t)); req->opt2 = cpu_to_be32(opt2); + set_bit(ACT_OPEN_REQ, &ep->com.history); return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -770,6 +828,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) /* setup the hwtid for this connection */ ep->hwtid = tid; cxgb4_insert_tid(t, ep, tid); + insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); @@ -777,7 +836,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_emss(ep, ntohs(req->tcp_opt)); /* dealloc the atid */ + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); + set_bit(ACT_ESTAB, &ep->com.history); /* start MPA negotiation */ send_flowc(ep, NULL); @@ -803,6 +864,7 @@ static void close_complete_upcall(struct c4iw_ep *ep) ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; + set_bit(CLOSE_UPCALL, &ep->com.history); } } @@ -811,6 +873,7 @@ static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); close_complete_upcall(ep); state_set(&ep->com, ABORTING); + set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); } @@ -825,6 +888,7 @@ static void peer_close_upcall(struct c4iw_ep *ep) PDBG("peer close delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(DISCONN_UPCALL, &ep->com.history); } } @@ -843,6 +907,7 @@ static void peer_abort_upcall(struct c4iw_ep *ep) ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; + set_bit(ABORT_UPCALL, &ep->com.history); } } @@ -875,6 +940,7 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status); + set_bit(CONN_RPL_UPCALL, &ep->com.history); ep->com.cm_id->event_handler(ep->com.cm_id, &event); if (status < 0) { @@ -915,6 +981,7 @@ static void connect_request_upcall(struct c4iw_ep *ep) ep->parent_ep->com.cm_id, &event); } + set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); ep->parent_ep = NULL; } @@ -931,6 +998,7 @@ static void established_upcall(struct c4iw_ep *ep) if (ep->com.cm_id) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(ESTAB_UPCALL, &ep->com.history); } } @@ -1316,6 +1384,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int dlen = ntohs(hdr->len); unsigned int tid = GET_TID(hdr); struct tid_info *t = dev->rdev.lldi.tids; + __u8 status = hdr->status; ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); @@ -1338,9 +1407,9 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) case MPA_REP_SENT: break; default: - printk(KERN_ERR MOD "%s Unexpected streaming data." - " ep %p state %d tid %u\n", - __func__, ep, state_read(&ep->com), ep->hwtid); + pr_err("%s Unexpected streaming data." \ + " ep %p state %d tid %u status %d\n", + __func__, ep, state_read(&ep->com), ep->hwtid, status); /* * The ep will timeout and inform the ULP of the failure. @@ -1383,6 +1452,63 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) +{ + struct sk_buff *skb; + struct fw_ofld_connection_wr *req; + unsigned int mtu_idx; + int wscale; + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, + ep->l2t)); + req->le.lport = ep->com.local_addr.sin_port; + req->le.pport = ep->com.remote_addr.sin_port; + req->le.u.ipv4.lip = ep->com.local_addr.sin_addr.s_addr; + req->le.u.ipv4.pip = ep->com.remote_addr.sin_addr.s_addr; + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) | + V_FW_OFLD_CONNECTION_WR_ASTID(atid)); + req->tcb.cplrxdataack_cplpassacceptrpl = + htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); + req->tcb.tx_max = jiffies; + req->tcb.rcv_adv = htons(1); + cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + wscale = compute_wscale(rcv_win); + req->tcb.opt0 = TCAM_BYPASS(1) | + (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | + DELACK(1) | + WND_SCALE(wscale) | + MSS_IDX(mtu_idx) | + L2T_IDX(ep->l2t->idx) | + TX_CHAN(ep->tx_chan) | + SMAC_SEL(ep->smac_idx) | + DSCP(ep->tos) | + ULP_MODE(ULP_MODE_TCPDDP) | + RCV_BUFSIZ(rcv_win >> 10); + req->tcb.opt2 = PACE(1) | + TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | + RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + if (enable_tcp_timestamps) + req->tcb.opt2 |= TSTAMPS_EN(1); + if (enable_tcp_sack) + req->tcb.opt2 |= SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + req->tcb.opt2 |= WND_SCALE_EN(1); + req->tcb.opt0 = cpu_to_be64(req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32(req->tcb.opt2); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); + set_bit(ACT_OFLD_CONN, &ep->com.history); + c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + /* * Return whether a failed active open has allocated a TID */ @@ -1392,6 +1518,111 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_ARP_MISS; } +#define ACT_OPEN_RETRY_COUNT 2 + +static int c4iw_reconnect(struct c4iw_ep *ep) +{ + int err = 0; + struct rtable *rt; + struct port_info *pi; + struct net_device *pdev; + int step; + struct neighbour *neigh; + + PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); + init_timer(&ep->timer); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + pr_err("%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); + + /* find a route */ + rt = find_route(ep->com.dev, + ep->com.cm_id->local_addr.sin_addr.s_addr, + ep->com.cm_id->remote_addr.sin_addr.s_addr, + ep->com.cm_id->local_addr.sin_port, + ep->com.cm_id->remote_addr.sin_port, 0); + if (!rt) { + pr_err("%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + ep->dst = &rt->dst; + + neigh = dst_neigh_lookup(ep->dst, + &ep->com.cm_id->remote_addr.sin_addr.s_addr); + /* get a l2t entry */ + if (neigh->dev->flags & IFF_LOOPBACK) { + PDBG("%s LOOPBACK\n", __func__); + pdev = ip_dev_find(&init_net, + ep->com.cm_id->remote_addr.sin_addr.s_addr); + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + dev_put(pdev); + } else { + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, neigh->dev, 0); + pi = (struct port_info *)netdev_priv(neigh->dev); + ep->mtu = dst_mtu(ep->dst); + ep->tx_chan = cxgb4_port_chan(neigh->dev); + ep->smac_idx = (cxgb4_port_viid(neigh->dev) & + 0x7F) << 1; + } + + step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; + ep->txq_idx = pi->port_id * step; + ep->ctrlq_idx = pi->port_id; + step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan; + ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[pi->port_id * step]; + + if (!ep->l2t) { + pr_err("%s - cannot alloc l2e.\n", __func__); + err = -ENOMEM; + goto fail4; + } + + PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = 0; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + /* + * remember to send notification to upper layer. + * We are in here so the upper layer is not aware that this is + * re-connect attempt and so, upper layer is still waiting for + * response of 1st connect request. + */ + connect_reply_upcall(ep, -ECONNRESET); + c4iw_put_ep(&ep->com); +out: + return err; +} + static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; @@ -1412,6 +1643,8 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } + set_bit(ACT_OPEN_RPL, &ep->com.history); + /* * Log interesting failures. */ @@ -1419,6 +1652,29 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) case CPL_ERR_CONN_RESET: case CPL_ERR_CONN_TIMEDOUT: break; + case CPL_ERR_TCAM_FULL: + if (dev->rdev.lldi.enable_fw_ofld_conn) { + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.tcam_full++; + mutex_unlock(&dev->rdev.stats.lock); + send_fw_act_open_req(ep, + GET_TID_TID(GET_AOPEN_ATID( + ntohl(rpl->atid_status)))); + return 0; + } + break; + case CPL_ERR_CONN_EXIST: + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + set_bit(ACT_RETRY_INUSE, &ep->com.history); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, + atid); + cxgb4_free_atid(t, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + return 0; + } + break; default: printk(KERN_INFO MOD "Active open failure - " "atid %u status %u errno %d %pI4:%u->%pI4:%u\n", @@ -1436,6 +1692,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (status && act_open_has_tid(status)) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl)); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -1452,13 +1709,14 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_listen_ep *ep = lookup_stid(t, stid); if (!ep) { - printk(KERN_ERR MOD "stid %d lookup failure!\n", stid); - return 0; + PDBG("%s stid %d lookup failure!\n", __func__, stid); + goto out; } PDBG("%s ep %p status %d error %d\n", __func__, ep, rpl->status, status2errno(rpl->status)); c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); +out: return 0; } @@ -1510,14 +1768,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, skb_get(skb); cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - opt0 = KEEP_ALIVE(1) | + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | L2T_IDX(ep->l2t->idx) | TX_CHAN(ep->tx_chan) | SMAC_SEL(ep->smac_idx) | - DSCP(ep->tos) | + DSCP(ep->tos >> 2) | ULP_MODE(ULP_MODE_TCPDDP) | RCV_BUFSIZ(rcv_win>>10); opt2 = RX_CHANNEL(0) | @@ -1529,6 +1788,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN(1); + if (enable_ecn) { + const struct tcphdr *tcph; + u32 hlen = ntohl(req->hdr_len); + + tcph = (const void *)(req + 1) + G_ETH_HDR_LEN(hlen) + + G_IP_HDR_LEN(hlen); + if (tcph->ece && tcph->cwr) + opt2 |= CCTRL_ECN(1); + } rpl = cplhdr(skb); INIT_TP_WR(rpl, ep->hwtid); @@ -1645,22 +1913,30 @@ out: static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { - struct c4iw_ep *child_ep, *parent_ep; + struct c4iw_ep *child_ep = NULL, *parent_ep; struct cpl_pass_accept_req *req = cplhdr(skb); unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); struct tid_info *t = dev->rdev.lldi.tids; unsigned int hwtid = GET_TID(req); struct dst_entry *dst; struct rtable *rt; - __be32 local_ip, peer_ip; + __be32 local_ip, peer_ip = 0; __be16 local_port, peer_port; int err; + u16 peer_mss = ntohs(req->tcpopt.mss); parent_ep = lookup_stid(t, stid); - PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); - + if (!parent_ep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port); + PDBG("%s parent ep %p hwtid %u laddr 0x%x raddr 0x%x lport %d " \ + "rport %d peer_mss %d\n", __func__, parent_ep, hwtid, + ntohl(local_ip), ntohl(peer_ip), ntohs(local_port), + ntohs(peer_port), peer_mss); + if (state_read(&parent_ep->com) != LISTEN) { printk(KERN_ERR "%s - listening ep not in LISTEN\n", __func__); @@ -1694,6 +1970,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } + if (peer_mss && child_ep->mtu > (peer_mss + 40)) + child_ep->mtu = peer_mss + 40; + state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; @@ -1715,6 +1994,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); accept_cr(child_ep, peer_ip, skb, req); + set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); goto out; reject: reject_cr(dev, hwtid, peer_ip, skb); @@ -1734,12 +2014,17 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid, + ntohs(req->tcp_opt)); + set_emss(ep, ntohs(req->tcp_opt)); + insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); dst_confirm(ep->dst); state_set(&ep->com, MPA_REQ_WAIT); start_ep_timer(ep); send_flowc(ep, skb); + set_bit(PASS_ESTAB, &ep->com.history); return 0; } @@ -1759,6 +2044,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); dst_confirm(ep->dst); + set_bit(PEER_CLOSE, &ep->com.history); mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: @@ -1838,74 +2124,6 @@ static int is_neg_adv_abort(unsigned int status) status == CPL_ERR_PERSIST_NEG_ADVICE; } -static int c4iw_reconnect(struct c4iw_ep *ep) -{ - struct rtable *rt; - int err = 0; - - PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); - init_timer(&ep->timer); - - /* - * Allocate an active TID to initiate a TCP connection. - */ - ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); - if (ep->atid == -1) { - printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); - err = -ENOMEM; - goto fail2; - } - - /* find a route */ - rt = find_route(ep->com.dev, - ep->com.cm_id->local_addr.sin_addr.s_addr, - ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->com.cm_id->local_addr.sin_port, - ep->com.cm_id->remote_addr.sin_port, 0); - if (!rt) { - printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); - err = -EHOSTUNREACH; - goto fail3; - } - ep->dst = &rt->dst; - - err = import_ep(ep, ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->dst, ep->com.dev, false); - if (err) { - printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - goto fail4; - } - - PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", - __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, - ep->l2t->idx); - - state_set(&ep->com, CONNECTING); - ep->tos = 0; - - /* send connect request to rnic */ - err = send_connect(ep); - if (!err) - goto out; - - cxgb4_l2t_release(ep->l2t); -fail4: - dst_release(ep->dst); -fail3: - cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); -fail2: - /* - * remember to send notification to upper layer. - * We are in here so the upper layer is not aware that this is - * re-connect attempt and so, upper layer is still waiting for - * response of 1st connect request. - */ - connect_reply_upcall(ep, -ECONNRESET); - c4iw_put_ep(&ep->com); -out: - return err; -} - static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); @@ -1926,6 +2144,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) } PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); + set_bit(PEER_ABORT, &ep->com.history); /* * Wake up any threads in rdma_init() or rdma_fini(). @@ -2140,6 +2359,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) c4iw_put_ep(&ep->com); return -ECONNRESET; } + set_bit(ULP_REJECT, &ep->com.history); BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); @@ -2169,6 +2389,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); BUG_ON(!qp); + set_bit(ULP_ACCEPT, &ep->com.history); if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { abort_connection(ep, NULL, GFP_KERNEL); @@ -2292,6 +2513,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto fail2; } + insert_handle(dev, &dev->atid_idr, ep, ep->atid); PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__, ntohl(cm_id->local_addr.sin_addr.s_addr), @@ -2337,6 +2559,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) fail4: dst_release(ep->dst); fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); fail2: cm_id->rem_ref(cm_id); @@ -2351,7 +2574,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *ep; - might_sleep(); ep = alloc_ep(sizeof(*ep), GFP_KERNEL); @@ -2370,30 +2592,54 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) /* * Allocate a server TID. */ - ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (dev->rdev.lldi.enable_fw_ofld_conn) + ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, PF_INET, ep); + else + ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (ep->stid == -1) { printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); err = -ENOMEM; goto fail2; } - + insert_handle(dev, &dev->stid_idr, ep, ep->stid); state_set(&ep->com, LISTEN); - c4iw_init_wr_wait(&ep->com.wr_wait); - err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid, - ep->com.local_addr.sin_addr.s_addr, - ep->com.local_addr.sin_port, - ep->com.dev->rdev.lldi.rxq_ids[0]); - if (err) - goto fail3; - - /* wait for pass_open_rpl */ - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, - __func__); + if (dev->rdev.lldi.enable_fw_ofld_conn) { + do { + err = cxgb4_create_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + 0, + ep->com.dev->rdev.lldi.rxq_ids[0], + 0, + 0); + if (err == -EBUSY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(100)); + } + } while (err == -EBUSY); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], + ep->stid, ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + 0, + ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + &ep->com.wr_wait, + 0, 0, __func__); + } if (!err) { cm_id->provider_data = ep; goto out; } -fail3: + pr_err("%s cxgb4_create_server/filter failed err %d " \ + "stid %d laddr %08x lport %d\n", \ + __func__, err, ep->stid, + ntohl(ep->com.local_addr.sin_addr.s_addr), + ntohs(ep->com.local_addr.sin_port)); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); fail2: cm_id->rem_ref(cm_id); @@ -2412,12 +2658,19 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) might_sleep(); state_set(&ep->com, DEAD); - c4iw_init_wr_wait(&ep->com.wr_wait); - err = listen_stop(ep); - if (err) - goto done; - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, - __func__); + if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn) { + err = cxgb4_remove_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.dev->rdev.lldi.rxq_ids[0], 0); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = listen_stop(ep); + if (err) + goto done; + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, + 0, 0, __func__); + } + remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); done: cm_id->rem_ref(cm_id); @@ -2481,10 +2734,13 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { + set_bit(EP_DISC_ABORT, &ep->com.history); close_complete_upcall(ep); ret = send_abort(ep, NULL, gfp); - } else + } else { + set_bit(EP_DISC_CLOSE, &ep->com.history); ret = send_halfclose(ep, gfp); + } if (ret) fatal = 1; } @@ -2494,10 +2750,323 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) return ret; } -static int async_event(struct c4iw_dev *dev, struct sk_buff *skb) +static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct c4iw_ep *ep; + int atid = be32_to_cpu(req->tid); + + ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, req->tid); + if (!ep) + return; + + switch (req->retval) { + case FW_ENOMEM: + set_bit(ACT_RETRY_NOMEM, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + case FW_EADDRINUSE: + set_bit(ACT_RETRY_INUSE, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + break; + default: + pr_info("%s unexpected ofld conn wr retval %d\n", + __func__, req->retval); + break; + } + pr_err("active ofld_connect_wr failure %d atid %d\n", + req->retval, atid); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.act_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + connect_reply_upcall(ep, status2errno(req->retval)); + state_set(&ep->com, DEAD); + remove_handle(dev, &dev->atid_idr, atid); + cxgb4_free_atid(dev->rdev.lldi.tids, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); +} + +static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct sk_buff *rpl_skb; + struct cpl_pass_accept_req *cpl; + int ret; + + rpl_skb = (struct sk_buff *)cpu_to_be64(req->cookie); + BUG_ON(!rpl_skb); + if (req->retval) { + PDBG("%s passive open failure %d\n", __func__, req->retval); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pas_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + kfree_skb(rpl_skb); + } else { + cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); + OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, + htonl(req->tid))); + ret = pass_accept_req(dev, rpl_skb); + if (!ret) + kfree_skb(rpl_skb); + } + return; +} + +static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_fw6_msg *rpl = cplhdr(skb); - c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + struct cpl_fw6_msg_ofld_connection_wr_rpl *req; + + switch (rpl->type) { + case FW6_TYPE_CQE: + c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + break; + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: + req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data; + switch (req->t_state) { + case TCP_SYN_SENT: + active_ofld_conn_reply(dev, skb, req); + break; + case TCP_SYN_RECV: + passive_ofld_conn_reply(dev, skb, req); + break; + default: + pr_err("%s unexpected ofld conn wr state %d\n", + __func__, req->t_state); + break; + } + break; + } + return 0; +} + +static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) +{ + u32 l2info; + u16 vlantag, len, hdr_len; + u8 intf; + struct cpl_rx_pkt *cpl = cplhdr(skb); + struct cpl_pass_accept_req *req; + struct tcp_options_received tmp_opt; + + /* Store values from cpl_rx_pkt in temporary location. */ + vlantag = cpl->vlan; + len = cpl->len; + l2info = cpl->l2info; + hdr_len = cpl->hdr_len; + intf = cpl->iff; + + __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); + + /* + * We need to parse the TCP options from SYN packet. + * to generate cpl_pass_accept_req. + */ + memset(&tmp_opt, 0, sizeof(tmp_opt)); + tcp_clear_options(&tmp_opt); + tcp_parse_options(skb, &tmp_opt, 0, 0, NULL); + + req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->l2info = cpu_to_be16(V_SYN_INTF(intf) | + V_SYN_MAC_IDX(G_RX_MACIDX(htonl(l2info))) | + F_SYN_XACT_MATCH); + req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN(htonl(l2info))) | + V_TCP_HDR_LEN(G_RX_TCPHDR_LEN(htons(hdr_len))) | + V_IP_HDR_LEN(G_RX_IPHDR_LEN(htons(hdr_len))) | + V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(htonl(l2info)))); + req->vlan = vlantag; + req->len = len; + req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | + PASS_OPEN_TOS(tos)); + req->tcpopt.mss = htons(tmp_opt.mss_clamp); + if (tmp_opt.wscale_ok) + req->tcpopt.wsf = tmp_opt.snd_wscale; + req->tcpopt.tstamp = tmp_opt.saw_tstamp; + if (tmp_opt.sack_ok) + req->tcpopt.sack = 1; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0)); + return; +} + +static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, + __be32 laddr, __be16 lport, + __be32 raddr, __be16 rport, + u32 rcv_isn, u32 filter, u16 window, + u32 rss_qid, u8 port_id) +{ + struct sk_buff *req_skb; + struct fw_ofld_connection_wr *req; + struct cpl_pass_accept_req *cpl = cplhdr(skb); + + req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL); + req->le.filter = filter; + req->le.lport = lport; + req->le.pport = rport; + req->le.u.ipv4.lip = laddr; + req->le.u.ipv4.pip = raddr; + req->tcb.rcv_nxt = htonl(rcv_isn + 1); + req->tcb.rcv_adv = htons(window); + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_RECV) | + V_FW_OFLD_CONNECTION_WR_RCV_SCALE(cpl->tcpopt.wsf) | + V_FW_OFLD_CONNECTION_WR_ASTID( + GET_PASS_OPEN_TID(ntohl(cpl->tos_stid)))); + + /* + * We store the qid in opt2 which will be used by the firmware + * to send us the wr response. + */ + req->tcb.opt2 = htonl(V_RSS_QUEUE(rss_qid)); + + /* + * We initialize the MSS index in TCB to 0xF. + * So that when driver sends cpl_pass_accept_rpl + * TCB picks up the correct value. If this was 0 + * TP will ignore any value > 0 for MSS index. + */ + req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); + req->cookie = cpu_to_be64((u64)skb); + + set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); + cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); +} + +/* + * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt + * messages when a filter is being used instead of server to + * redirect a syn packet. When packets hit filter they are redirected + * to the offload queue and driver tries to establish the connection + * using firmware work request. + */ +static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) +{ + int stid; + unsigned int filter; + struct ethhdr *eh = NULL; + struct vlan_ethhdr *vlan_eh = NULL; + struct iphdr *iph; + struct tcphdr *tcph; + struct rss_header *rss = (void *)skb->data; + struct cpl_rx_pkt *cpl = (void *)skb->data; + struct cpl_pass_accept_req *req = (void *)(rss + 1); + struct l2t_entry *e; + struct dst_entry *dst; + struct rtable *rt; + struct c4iw_ep *lep; + u16 window; + struct port_info *pi; + struct net_device *pdev; + u16 rss_qid; + int step; + u32 tx_chan; + struct neighbour *neigh; + + /* Drop all non-SYN packets */ + if (!(cpl->l2info & cpu_to_be32(F_RXF_SYN))) + goto reject; + + /* + * Drop all packets which did not hit the filter. + * Unlikely to happen. + */ + if (!(rss->filter_hit && rss->filter_tid)) + goto reject; + + /* + * Calculate the server tid from filter hit index from cpl_rx_pkt. + */ + stid = cpu_to_be32(rss->hash_val) - dev->rdev.lldi.tids->sftid_base + + dev->rdev.lldi.tids->nstids; + + lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); + if (!lep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } + + if (G_RX_ETHHDR_LEN(ntohl(cpl->l2info)) == ETH_HLEN) { + eh = (struct ethhdr *)(req + 1); + iph = (struct iphdr *)(eh + 1); + } else { + vlan_eh = (struct vlan_ethhdr *)(req + 1); + iph = (struct iphdr *)(vlan_eh + 1); + skb->vlan_tci = ntohs(cpl->vlan); + } + + if (iph->version != 0x4) + goto reject; + + tcph = (struct tcphdr *)(iph + 1); + skb_set_network_header(skb, (void *)iph - (void *)rss); + skb_set_transport_header(skb, (void *)tcph - (void *)rss); + skb_get(skb); + + PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__, + ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), + ntohs(tcph->source), iph->tos); + + rt = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, + iph->tos); + if (!rt) { + pr_err("%s - failed to find dst entry!\n", + __func__); + goto reject; + } + dst = &rt->dst; + neigh = dst_neigh_lookup_skb(dst, skb); + + if (neigh->dev->flags & IFF_LOOPBACK) { + pdev = ip_dev_find(&init_net, iph->daddr); + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + tx_chan = cxgb4_port_chan(pdev); + dev_put(pdev); + } else { + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + neigh->dev, 0); + pi = (struct port_info *)netdev_priv(neigh->dev); + tx_chan = cxgb4_port_chan(neigh->dev); + } + if (!e) { + pr_err("%s - failed to allocate l2t entry!\n", + __func__); + goto free_dst; + } + + step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; + rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; + window = htons(tcph->window); + + /* Calcuate filter portion for LE region. */ + filter = cpu_to_be32(select_ntuple(dev, dst, e)); + + /* + * Synthesize the cpl_pass_accept_req. We have everything except the + * TID. Once firmware sends a reply with TID we update the TID field + * in cpl and pass it through the regular cpl_pass_accept_req path. + */ + build_cpl_pass_accept_req(skb, stid, iph->tos); + send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr, + tcph->source, ntohl(tcph->seq), filter, window, + rss_qid, pi->port_id); + cxgb4_l2t_release(e); +free_dst: + dst_release(dst); +reject: return 0; } @@ -2520,7 +3089,8 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_CLOSE_CON_RPL] = close_con_rpl, [CPL_RDMA_TERMINATE] = terminate, [CPL_FW4_ACK] = fw4_ack, - [CPL_FW6_MSG] = async_event + [CPL_FW6_MSG] = deferred_fw6_msg, + [CPL_RX_PKT] = rx_pkt }; static void process_timeout(struct c4iw_ep *ep) @@ -2531,6 +3101,7 @@ static void process_timeout(struct c4iw_ep *ep) mutex_lock(&ep->com.mutex); PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); + set_bit(TIMEDOUT, &ep->com.history); switch (ep->com.state) { case MPA_REQ_SENT: __state_set(&ep->com, ABORTING); @@ -2651,7 +3222,7 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s type %u\n", __func__, rpl->type); switch (rpl->type) { - case 1: + case FW6_TYPE_WR_RPL: ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); @@ -2659,7 +3230,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_wake_up(wr_waitp, ret ? -ret : 0); kfree_skb(skb); break; - case 2: + case FW6_TYPE_CQE: + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: sched(dev, skb); break; default: @@ -2722,7 +3294,8 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { [CPL_RDMA_TERMINATE] = sched, [CPL_FW4_ACK] = sched, [CPL_SET_TCB_RPL] = set_tcb_rpl, - [CPL_FW6_MSG] = fw6_msg + [CPL_FW6_MSG] = fw6_msg, + [CPL_RX_PKT] = sched }; int __init c4iw_cm_init(void) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index cb4ecd783700..ba11c76c0b5a 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -279,6 +279,11 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " DB State: %s Transitions %llu\n", db_state_str[dev->db_state], dev->rdev.stats.db_state_transitions); + seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full); + seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.act_ofld_conn_fails); + seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.pas_ofld_conn_fails); return 0; } @@ -309,6 +314,9 @@ static ssize_t stats_clear(struct file *file, const char __user *buf, dev->rdev.stats.db_empty = 0; dev->rdev.stats.db_drop = 0; dev->rdev.stats.db_state_transitions = 0; + dev->rdev.stats.tcam_full = 0; + dev->rdev.stats.act_ofld_conn_fails = 0; + dev->rdev.stats.pas_ofld_conn_fails = 0; mutex_unlock(&dev->rdev.stats.lock); return count; } @@ -322,6 +330,113 @@ static const struct file_operations stats_debugfs_fops = { .write = stats_clear, }; +static int dump_ep(int id, void *p, void *data) +{ + struct c4iw_ep *ep = p; + struct c4iw_debugfs_data *epd = data; + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p qp %p state %d flags 0x%lx history 0x%lx " + "hwtid %d atid %d %pI4:%d <-> %pI4:%d\n", + ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, + ep->com.flags, ep->com.history, ep->hwtid, ep->atid, + &ep->com.local_addr.sin_addr.s_addr, + ntohs(ep->com.local_addr.sin_port), + &ep->com.remote_addr.sin_addr.s_addr, + ntohs(ep->com.remote_addr.sin_port)); + if (cc < space) + epd->pos += cc; + return 0; +} + +static int dump_listen_ep(int id, void *p, void *data) +{ + struct c4iw_listen_ep *ep = p; + struct c4iw_debugfs_data *epd = data; + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p state %d flags 0x%lx stid %d backlog %d " + "%pI4:%d\n", ep, ep->com.cm_id, (int)ep->com.state, + ep->com.flags, ep->stid, ep->backlog, + &ep->com.local_addr.sin_addr.s_addr, + ntohs(ep->com.local_addr.sin_port)); + if (cc < space) + epd->pos += cc; + return 0; +} + +static int ep_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd = file->private_data; + if (!epd) { + pr_info("%s null qpd?\n", __func__); + return 0; + } + vfree(epd->buf); + kfree(epd); + return 0; +} + +static int ep_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd; + int ret = 0; + int count = 1; + + epd = kmalloc(sizeof(*epd), GFP_KERNEL); + if (!epd) { + ret = -ENOMEM; + goto out; + } + epd->devp = inode->i_private; + epd->pos = 0; + + spin_lock_irq(&epd->devp->lock); + idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count); + idr_for_each(&epd->devp->atid_idr, count_idrs, &count); + idr_for_each(&epd->devp->stid_idr, count_idrs, &count); + spin_unlock_irq(&epd->devp->lock); + + epd->bufsize = count * 160; + epd->buf = vmalloc(epd->bufsize); + if (!epd->buf) { + ret = -ENOMEM; + goto err1; + } + + spin_lock_irq(&epd->devp->lock); + idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd); + idr_for_each(&epd->devp->atid_idr, dump_ep, epd); + idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd); + spin_unlock_irq(&epd->devp->lock); + + file->private_data = epd; + goto out; +err1: + kfree(epd); +out: + return ret; +} + +static const struct file_operations ep_debugfs_fops = { + .owner = THIS_MODULE, + .open = ep_open, + .release = ep_release, + .read = debugfs_read, +}; + static int setup_debugfs(struct c4iw_dev *devp) { struct dentry *de; @@ -344,6 +459,11 @@ static int setup_debugfs(struct c4iw_dev *devp) if (de && de->d_inode) de->d_inode->i_size = 4096; + de = debugfs_create_file("eps", S_IWUSR, devp->debugfs_root, + (void *)devp, &ep_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + return 0; } @@ -475,6 +595,9 @@ static void c4iw_dealloc(struct uld_ctx *ctx) idr_destroy(&ctx->dev->cqidr); idr_destroy(&ctx->dev->qpidr); idr_destroy(&ctx->dev->mmidr); + idr_destroy(&ctx->dev->hwtid_idr); + idr_destroy(&ctx->dev->stid_idr); + idr_destroy(&ctx->dev->atid_idr); iounmap(ctx->dev->rdev.oc_mw_kva); ib_dealloc_device(&ctx->dev->ibdev); ctx->dev = NULL; @@ -532,6 +655,9 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) idr_init(&devp->cqidr); idr_init(&devp->qpidr); idr_init(&devp->mmidr); + idr_init(&devp->hwtid_idr); + idr_init(&devp->stid_idr); + idr_init(&devp->atid_idr); spin_lock_init(&devp->lock); mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); @@ -577,14 +703,76 @@ out: return ctx; } +static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, + const __be64 *rsp, + u32 pktshift) +{ + struct sk_buff *skb; + + /* + * Allocate space for cpl_pass_accept_req which will be synthesized by + * driver. Once the driver synthesizes the request the skb will go + * through the regular cpl_pass_accept_req processing. + * The math here assumes sizeof cpl_pass_accept_req >= sizeof + * cpl_rx_pkt. + */ + skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift); + + /* + * This skb will contain: + * rss_header from the rspq descriptor (1 flit) + * cpl_rx_pkt struct from the rspq descriptor (2 flits) + * space for the difference between the size of an + * rx_pkt and pass_accept_req cpl (1 flit) + * the packet data from the gl + */ + skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header)); + skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) + + sizeof(struct cpl_pass_accept_req), + gl->va + pktshift, + gl->tot_len - pktshift); + return skb; +} + +static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl, + const __be64 *rsp) +{ + unsigned int opcode = *(u8 *)rsp; + struct sk_buff *skb; + + if (opcode != CPL_RX_PKT) + goto out; + + skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift); + if (skb == NULL) + goto out; + + if (c4iw_handlers[opcode] == NULL) { + pr_info("%s no handler opcode 0x%x...\n", __func__, + opcode); + kfree_skb(skb); + goto out; + } + c4iw_handlers[opcode](dev, skb); + return 1; +out: + return 0; +} + static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, const struct pkt_gl *gl) { struct uld_ctx *ctx = handle; struct c4iw_dev *dev = ctx->dev; struct sk_buff *skb; - const struct cpl_act_establish *rpl; - unsigned int opcode; + u8 opcode; if (gl == NULL) { /* omit RSS and rsp_ctrl at end of descriptor */ @@ -601,19 +789,29 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, u32 qid = be32_to_cpu(rc->pldbuflen_qid); c4iw_ev_handler(dev, qid); return 0; + } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) { + if (recv_rx_pkt(dev, gl, rsp)) + return 0; + + pr_info("%s: unexpected FL contents at %p, " \ + "RSS %#llx, FL %#llx, len %u\n", + pci_name(ctx->lldi.pdev), gl->va, + (unsigned long long)be64_to_cpu(*rsp), + (unsigned long long)be64_to_cpu(*(u64 *)gl->va), + gl->tot_len); + + return 0; } else { skb = cxgb4_pktgl_to_skb(gl, 128, 128); if (unlikely(!skb)) goto nomem; } - rpl = cplhdr(skb); - opcode = rpl->ot.opcode; - + opcode = *(u8 *)rsp; if (c4iw_handlers[opcode]) c4iw_handlers[opcode](dev, skb); else - printk(KERN_INFO "%s no handler opcode 0x%x...\n", __func__, + pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); return 0; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 9beb3a9f0336..9c1644fb0259 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -130,6 +130,9 @@ struct c4iw_stats { u64 db_empty; u64 db_drop; u64 db_state_transitions; + u64 tcam_full; + u64 act_ofld_conn_fails; + u64 pas_ofld_conn_fails; }; struct c4iw_rdev { @@ -223,6 +226,9 @@ struct c4iw_dev { struct dentry *debugfs_root; enum db_state db_state; int qpcnt; + struct idr hwtid_idr; + struct idr atid_idr; + struct idr stid_idr; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -712,6 +718,31 @@ enum c4iw_ep_flags { CLOSE_SENT = 3, }; +enum c4iw_ep_history { + ACT_OPEN_REQ = 0, + ACT_OFLD_CONN = 1, + ACT_OPEN_RPL = 2, + ACT_ESTAB = 3, + PASS_ACCEPT_REQ = 4, + PASS_ESTAB = 5, + ABORT_UPCALL = 6, + ESTAB_UPCALL = 7, + CLOSE_UPCALL = 8, + ULP_ACCEPT = 9, + ULP_REJECT = 10, + TIMEDOUT = 11, + PEER_ABORT = 12, + PEER_CLOSE = 13, + CONNREQ_UPCALL = 14, + ABORT_CONN = 15, + DISCONN_UPCALL = 16, + EP_DISC_CLOSE = 17, + EP_DISC_ABORT = 18, + CONN_RPL_UPCALL = 19, + ACT_RETRY_NOMEM = 20, + ACT_RETRY_INUSE = 21 +}; + struct c4iw_ep_common { struct iw_cm_id *cm_id; struct c4iw_qp *qp; @@ -723,6 +754,7 @@ struct c4iw_ep_common { struct sockaddr_in remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; + unsigned long history; }; struct c4iw_listen_ep { @@ -760,6 +792,7 @@ struct c4iw_ep { u8 tos; u8 retry_with_mpa_v1; u8 tried_with_mpa_v1; + unsigned int retry_count; }; static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 72ae63f0072d..03103d2bd641 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -752,6 +752,9 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ dev->trans_start = jiffies; ++tx->tx_head; + skb_orphan(skb); + skb_dst_drop(skb); + if (++priv->tx_outstanding == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f10221f40803..a1bca70e20aa 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -615,8 +615,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, address->last_send = priv->tx_head; ++priv->tx_head; - skb_orphan(skb); + skb_orphan(skb); + skb_dst_drop(skb); } if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 55074cba20eb..c1c74e030a58 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -57,17 +57,9 @@ * physically contiguous memory regions it is mapping into page sizes * that we support. * - * Traditionally the IOMMU core just handed us the mappings directly, - * after making sure the size is an order of a 4KiB page and that the - * mapping has natural alignment. - * - * To retain this behavior, we currently advertise that we support - * all page sizes that are an order of 4KiB. - * - * If at some point we'd like to utilize the IOMMU core's new behavior, - * we could change this to advertise the real page sizes we support. + * 512GB Pages are not supported due to a hardware bug */ -#define AMD_IOMMU_PGSIZES (~0xFFFUL) +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) static DEFINE_RWLOCK(amd_iommu_devtable_lock); @@ -140,6 +132,9 @@ static void free_dev_data(struct iommu_dev_data *dev_data) list_del(&dev_data->dev_data_list); spin_unlock_irqrestore(&dev_data_list_lock, flags); + if (dev_data->group) + iommu_group_put(dev_data->group); + kfree(dev_data); } @@ -274,41 +269,23 @@ static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) *from = to; } -#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - -static int iommu_init_device(struct device *dev) +static struct pci_bus *find_hosted_bus(struct pci_bus *bus) { - struct pci_dev *dma_pdev = NULL, *pdev = to_pci_dev(dev); - struct iommu_dev_data *dev_data; - struct iommu_group *group; - u16 alias; - int ret; - - if (dev->archdata.iommu) - return 0; - - dev_data = find_dev_data(get_device_id(dev)); - if (!dev_data) - return -ENOMEM; - - alias = amd_iommu_alias_table[dev_data->devid]; - if (alias != dev_data->devid) { - struct iommu_dev_data *alias_data; + while (!bus->self) { + if (!pci_is_root_bus(bus)) + bus = bus->parent; + else + return ERR_PTR(-ENODEV); + } - alias_data = find_dev_data(alias); - if (alias_data == NULL) { - pr_err("AMD-Vi: Warning: Unhandled device %s\n", - dev_name(dev)); - free_dev_data(dev_data); - return -ENOTSUPP; - } - dev_data->alias_data = alias_data; + return bus; +} - dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); - } +#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - if (dma_pdev == NULL) - dma_pdev = pci_dev_get(pdev); +static struct pci_dev *get_isolation_root(struct pci_dev *pdev) +{ + struct pci_dev *dma_pdev = pdev; /* Account for quirked devices */ swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); @@ -330,14 +307,9 @@ static int iommu_init_device(struct device *dev) * Finding the next device may require skipping virtual buses. */ while (!pci_is_root_bus(dma_pdev->bus)) { - struct pci_bus *bus = dma_pdev->bus; - - while (!bus->self) { - if (!pci_is_root_bus(bus)) - bus = bus->parent; - else - goto root_bus; - } + struct pci_bus *bus = find_hosted_bus(dma_pdev->bus); + if (IS_ERR(bus)) + break; if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) break; @@ -345,19 +317,137 @@ static int iommu_init_device(struct device *dev) swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); } -root_bus: - group = iommu_group_get(&dma_pdev->dev); - pci_dev_put(dma_pdev); + return dma_pdev; +} + +static int use_pdev_iommu_group(struct pci_dev *pdev, struct device *dev) +{ + struct iommu_group *group = iommu_group_get(&pdev->dev); + int ret; + if (!group) { group = iommu_group_alloc(); if (IS_ERR(group)) return PTR_ERR(group); + + WARN_ON(&pdev->dev != dev); } ret = iommu_group_add_device(group, dev); - iommu_group_put(group); + return ret; +} + +static int use_dev_data_iommu_group(struct iommu_dev_data *dev_data, + struct device *dev) +{ + if (!dev_data->group) { + struct iommu_group *group = iommu_group_alloc(); + if (IS_ERR(group)) + return PTR_ERR(group); + + dev_data->group = group; + } + + return iommu_group_add_device(dev_data->group, dev); +} + +static int init_iommu_group(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct iommu_group *group; + struct pci_dev *dma_pdev; + int ret; + + group = iommu_group_get(dev); + if (group) { + iommu_group_put(group); + return 0; + } + + dev_data = find_dev_data(get_device_id(dev)); + if (!dev_data) + return -ENOMEM; + + if (dev_data->alias_data) { + u16 alias; + struct pci_bus *bus; + + if (dev_data->alias_data->group) + goto use_group; + + /* + * If the alias device exists, it's effectively just a first + * level quirk for finding the DMA source. + */ + alias = amd_iommu_alias_table[dev_data->devid]; + dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); + if (dma_pdev) { + dma_pdev = get_isolation_root(dma_pdev); + goto use_pdev; + } + + /* + * If the alias is virtual, try to find a parent device + * and test whether the IOMMU group is actualy rooted above + * the alias. Be careful to also test the parent device if + * we think the alias is the root of the group. + */ + bus = pci_find_bus(0, alias >> 8); + if (!bus) + goto use_group; + + bus = find_hosted_bus(bus); + if (IS_ERR(bus) || !bus->self) + goto use_group; + + dma_pdev = get_isolation_root(pci_dev_get(bus->self)); + if (dma_pdev != bus->self || (dma_pdev->multifunction && + !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))) + goto use_pdev; + + pci_dev_put(dma_pdev); + goto use_group; + } + + dma_pdev = get_isolation_root(pci_dev_get(to_pci_dev(dev))); +use_pdev: + ret = use_pdev_iommu_group(dma_pdev, dev); + pci_dev_put(dma_pdev); + return ret; +use_group: + return use_dev_data_iommu_group(dev_data->alias_data, dev); +} + +static int iommu_init_device(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct iommu_dev_data *dev_data; + u16 alias; + int ret; + + if (dev->archdata.iommu) + return 0; + + dev_data = find_dev_data(get_device_id(dev)); + if (!dev_data) + return -ENOMEM; + + alias = amd_iommu_alias_table[dev_data->devid]; + if (alias != dev_data->devid) { + struct iommu_dev_data *alias_data; + + alias_data = find_dev_data(alias); + if (alias_data == NULL) { + pr_err("AMD-Vi: Warning: Unhandled device %s\n", + dev_name(dev)); + free_dev_data(dev_data); + return -ENOTSUPP; + } + dev_data->alias_data = alias_data; + } + ret = init_iommu_group(dev); if (ret) return ret; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index c9aa3d079ff0..e38ab438bb34 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -426,6 +426,7 @@ struct iommu_dev_data { struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reference count */ + struct iommu_group *group; /* IOMMU group for virtual aliases */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Default for device is pt_domain */ diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0badfa48b32b..c2c07a4a7f21 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1827,10 +1827,17 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, if (!pte) return -ENOMEM; /* It is large page*/ - if (largepage_lvl > 1) + if (largepage_lvl > 1) { pteval |= DMA_PTE_LARGE_PAGE; - else + /* Ensure that old small page tables are removed to make room + for superpage, if they exist. */ + dma_pte_clear_range(domain, iov_pfn, + iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1); + dma_pte_free_pagetable(domain, iov_pfn, + iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1); + } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; + } } /* We don't need lock here, nobody else @@ -2320,8 +2327,39 @@ static int domain_add_dev_info(struct dmar_domain *domain, return 0; } +static bool device_has_rmrr(struct pci_dev *dev) +{ + struct dmar_rmrr_unit *rmrr; + int i; + + for_each_rmrr_units(rmrr) { + for (i = 0; i < rmrr->devices_cnt; i++) { + /* + * Return TRUE if this RMRR contains the device that + * is passed in. + */ + if (rmrr->devices[i] == dev) + return true; + } + } + return false; +} + static int iommu_should_identity_map(struct pci_dev *pdev, int startup) { + + /* + * We want to prevent any device associated with an RMRR from + * getting placed into the SI Domain. This is done because + * problems exist when devices are moved in and out of domains + * and their respective RMRR info is lost. We exempt USB devices + * from this process due to their usage of RMRRs that are known + * to not be needed after BIOS hand-off to OS. + */ + if (device_has_rmrr(pdev) && + (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) + return 0; + if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) return 1; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index badc17c2bcb4..18108c1405e2 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -16,13 +16,13 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/ioport.h> -#include <linux/clk.h> #include <linux/platform_device.h> #include <linux/iommu.h> #include <linux/omap-iommu.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/io.h> +#include <linux/pm_runtime.h> #include <asm/cacheflush.h> @@ -143,31 +143,44 @@ EXPORT_SYMBOL_GPL(omap_iommu_arch_version); static int iommu_enable(struct omap_iommu *obj) { int err; + struct platform_device *pdev = to_platform_device(obj->dev); + struct iommu_platform_data *pdata = pdev->dev.platform_data; - if (!obj) + if (!obj || !pdata) return -EINVAL; if (!arch_iommu) return -ENODEV; - clk_enable(obj->clk); + if (pdata->deassert_reset) { + err = pdata->deassert_reset(pdev, pdata->reset_name); + if (err) { + dev_err(obj->dev, "deassert_reset failed: %d\n", err); + return err; + } + } + + pm_runtime_get_sync(obj->dev); err = arch_iommu->enable(obj); - clk_disable(obj->clk); return err; } static void iommu_disable(struct omap_iommu *obj) { - if (!obj) - return; + struct platform_device *pdev = to_platform_device(obj->dev); + struct iommu_platform_data *pdata = pdev->dev.platform_data; - clk_enable(obj->clk); + if (!obj || !pdata) + return; arch_iommu->disable(obj); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); + + if (pdata->assert_reset) + pdata->assert_reset(pdev, pdata->reset_name); } /* @@ -290,7 +303,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) if (!obj || !obj->nr_tlb_entries || !e) return -EINVAL; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); iotlb_lock_get(obj, &l); if (l.base == obj->nr_tlb_entries) { @@ -320,7 +333,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) cr = iotlb_alloc_cr(obj, e); if (IS_ERR(cr)) { - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return PTR_ERR(cr); } @@ -334,7 +347,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e) l.vict = l.base; iotlb_lock_set(obj, &l); out: - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return err; } @@ -364,7 +377,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da) int i; struct cr_regs cr; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) { u32 start; @@ -383,7 +396,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da) iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY); } } - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); if (i == obj->nr_tlb_entries) dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da); @@ -397,7 +410,7 @@ static void flush_iotlb_all(struct omap_iommu *obj) { struct iotlb_lock l; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); l.base = 0; l.vict = 0; @@ -405,7 +418,7 @@ static void flush_iotlb_all(struct omap_iommu *obj) iommu_write_reg(obj, 1, MMU_GFLUSH); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); } #if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE) @@ -415,11 +428,11 @@ ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes) if (!obj || !buf) return -EINVAL; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); bytes = arch_iommu->dump_ctx(obj, buf, bytes); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return bytes; } @@ -433,7 +446,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num) struct cr_regs tmp; struct cr_regs *p = crs; - clk_enable(obj->clk); + pm_runtime_get_sync(obj->dev); iotlb_lock_get(obj, &saved); for_each_iotlb_cr(obj, num, i, tmp) { @@ -443,7 +456,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num) } iotlb_lock_set(obj, &saved); - clk_disable(obj->clk); + pm_runtime_put_sync(obj->dev); return p - crs; } @@ -807,9 +820,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) if (!obj->refcount) return IRQ_NONE; - clk_enable(obj->clk); errs = iommu_report_fault(obj, &da); - clk_disable(obj->clk); if (errs == 0) return IRQ_HANDLED; @@ -931,17 +942,10 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev) struct resource *res; struct iommu_platform_data *pdata = pdev->dev.platform_data; - if (pdev->num_resources != 2) - return -EINVAL; - obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL); if (!obj) return -ENOMEM; - obj->clk = clk_get(&pdev->dev, pdata->clk_name); - if (IS_ERR(obj->clk)) - goto err_clk; - obj->nr_tlb_entries = pdata->nr_tlb_entries; obj->name = pdata->name; obj->dev = &pdev->dev; @@ -984,6 +988,9 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev) goto err_irq; platform_set_drvdata(pdev, obj); + pm_runtime_irq_safe(obj->dev); + pm_runtime_enable(obj->dev); + dev_info(&pdev->dev, "%s registered\n", obj->name); return 0; @@ -992,8 +999,6 @@ err_irq: err_ioremap: release_mem_region(res->start, resource_size(res)); err_mem: - clk_put(obj->clk); -err_clk: kfree(obj); return err; } @@ -1014,7 +1019,8 @@ static int __devexit omap_iommu_remove(struct platform_device *pdev) release_mem_region(res->start, resource_size(res)); iounmap(obj->regbase); - clk_put(obj->clk); + pm_runtime_disable(obj->dev); + dev_info(&pdev->dev, "%s removed\n", obj->name); kfree(obj); return 0; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 2b5f3c04d167..120084206602 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -29,7 +29,6 @@ struct iotlb_entry { struct omap_iommu { const char *name; struct module *owner; - struct clk *clk; void __iomem *regbase; struct device *dev; void *isr_priv; @@ -116,8 +115,6 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) * MMU Register offsets */ #define MMU_REVISION 0x00 -#define MMU_SYSCONFIG 0x10 -#define MMU_SYSSTATUS 0x14 #define MMU_IRQSTATUS 0x18 #define MMU_IRQENABLE 0x1c #define MMU_WALKING_ST 0x40 diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c index c02020292377..d745094a69dd 100644 --- a/drivers/iommu/omap-iommu2.c +++ b/drivers/iommu/omap-iommu2.c @@ -28,19 +28,6 @@ */ #define IOMMU_ARCH_VERSION 0x00000011 -/* SYSCONF */ -#define MMU_SYS_IDLE_SHIFT 3 -#define MMU_SYS_IDLE_FORCE (0 << MMU_SYS_IDLE_SHIFT) -#define MMU_SYS_IDLE_NONE (1 << MMU_SYS_IDLE_SHIFT) -#define MMU_SYS_IDLE_SMART (2 << MMU_SYS_IDLE_SHIFT) -#define MMU_SYS_IDLE_MASK (3 << MMU_SYS_IDLE_SHIFT) - -#define MMU_SYS_SOFTRESET (1 << 1) -#define MMU_SYS_AUTOIDLE 1 - -/* SYSSTATUS */ -#define MMU_SYS_RESETDONE 1 - /* IRQSTATUS & IRQENABLE */ #define MMU_IRQ_MULTIHITFAULT (1 << 4) #define MMU_IRQ_TABLEWALKFAULT (1 << 3) @@ -97,7 +84,6 @@ static void __iommu_set_twl(struct omap_iommu *obj, bool on) static int omap2_iommu_enable(struct omap_iommu *obj) { u32 l, pa; - unsigned long timeout; if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd, SZ_16K)) return -EINVAL; @@ -106,29 +92,10 @@ static int omap2_iommu_enable(struct omap_iommu *obj) if (!IS_ALIGNED(pa, SZ_16K)) return -EINVAL; - iommu_write_reg(obj, MMU_SYS_SOFTRESET, MMU_SYSCONFIG); - - timeout = jiffies + msecs_to_jiffies(20); - do { - l = iommu_read_reg(obj, MMU_SYSSTATUS); - if (l & MMU_SYS_RESETDONE) - break; - } while (!time_after(jiffies, timeout)); - - if (!(l & MMU_SYS_RESETDONE)) { - dev_err(obj->dev, "can't take mmu out of reset\n"); - return -ENODEV; - } - l = iommu_read_reg(obj, MMU_REVISION); dev_info(obj->dev, "%s: version %d.%d\n", obj->name, (l >> 4) & 0xf, l & 0xf); - l = iommu_read_reg(obj, MMU_SYSCONFIG); - l &= ~MMU_SYS_IDLE_MASK; - l |= (MMU_SYS_IDLE_SMART | MMU_SYS_AUTOIDLE); - iommu_write_reg(obj, l, MMU_SYSCONFIG); - iommu_write_reg(obj, pa, MMU_TTB); __iommu_set_twl(obj, true); @@ -142,7 +109,6 @@ static void omap2_iommu_disable(struct omap_iommu *obj) l &= ~MMU_CNTL_MASK; iommu_write_reg(obj, l, MMU_CNTL); - iommu_write_reg(obj, MMU_SYS_IDLE_FORCE, MMU_SYSCONFIG); dev_dbg(obj->dev, "%s is shutting down\n", obj->name); } @@ -271,8 +237,6 @@ omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len) char *p = buf; pr_reg(REVISION); - pr_reg(SYSCONFIG); - pr_reg(SYSSTATUS); pr_reg(IRQSTATUS); pr_reg(IRQENABLE); pr_reg(WALKING_ST); diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index c16e8fc8a4bd..4c9db62814ff 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -398,6 +398,7 @@ static int tegra_gart_probe(struct platform_device *pdev) do_gart_setup(gart, NULL); gart_handle = gart; + bus_set_iommu(&platform_bus_type, &gart_iommu_ops); return 0; fail: @@ -450,7 +451,6 @@ static struct platform_driver tegra_gart_driver = { static int __devinit tegra_gart_init(void) { - bus_set_iommu(&platform_bus_type, &gart_iommu_ops); return platform_driver_register(&tegra_gart_driver); } diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 4252d743963d..25c1210c0832 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -694,10 +694,8 @@ static void __smmu_iommu_unmap(struct smmu_as *as, dma_addr_t iova) *pte = _PTE_VACANT(iova); FLUSH_CPU_DCACHE(pte, page, sizeof(*pte)); flush_ptc_and_tlb(as->smmu, as, iova, pte, page, 0); - if (!--(*count)) { + if (!--(*count)) free_ptbl(as, iova); - smmu_flush_regs(as->smmu, 0); - } } static void __smmu_iommu_map_pfn(struct smmu_as *as, dma_addr_t iova, @@ -1232,6 +1230,7 @@ static int tegra_smmu_probe(struct platform_device *pdev) smmu_debugfs_create(smmu); smmu_handle = smmu; + bus_set_iommu(&platform_bus_type, &smmu_iommu_ops); return 0; } @@ -1276,7 +1275,6 @@ static struct platform_driver tegra_smmu_driver = { static int __devinit tegra_smmu_init(void) { - bus_set_iommu(&platform_bus_type, &smmu_iommu_ops); return platform_driver_register(&tegra_smmu_driver); } diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 28c99c623bcd..22b720ec80cb 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -1217,8 +1217,7 @@ static void __exit dsp_cleanup(void) { mISDN_unregister_Bprotocol(&DSP); - if (timer_pending(&dsp_spl_tl)) - del_timer(&dsp_spl_tl); + del_timer_sync(&dsp_spl_tl); if (!list_empty(&dsp_ilist)) { printk(KERN_ERR "mISDN_dsp: Audio DSP object inst list not " diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index b5fdcb78a75b..a5ebc0083d87 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -225,7 +225,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) * eventfd (ie. the appropriate virtqueue thread)? */ if (!send_notify_to_eventfd(cpu)) { - /* OK, we tell the main Laucher. */ + /* OK, we tell the main Launcher. */ if (put_user(cpu->pending_notify, user)) return -EFAULT; return sizeof(cpu->pending_notify); diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index e4e841567459..aefb78e3cbf9 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -208,31 +208,6 @@ void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) EXPORT_SYMBOL_GPL(dm_cell_release); /* - * There are a couple of places where we put a bio into a cell briefly - * before taking it out again. In these situations we know that no other - * bio may be in the cell. This function releases the cell, and also does - * a sanity check. - */ -static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - BUG_ON(cell->holder != bio); - BUG_ON(!bio_list_empty(&cell->bios)); - - __cell_release(cell, NULL); -} - -void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - unsigned long flags; - struct dm_bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release_singleton(cell, bio); - spin_unlock_irqrestore(&prison->lock, flags); -} -EXPORT_SYMBOL_GPL(dm_cell_release_singleton); - -/* * Sometimes we don't want the holder, just the additional bios. */ static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 4e0ac376700a..53d1a7a84e2f 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h @@ -44,7 +44,6 @@ int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, struct bio *inmate, struct dm_bio_prison_cell **ref); void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); -void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); void dm_cell_error(struct dm_bio_prison_cell *cell); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index bbf459bca61d..f7369f9d8595 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1689,8 +1689,7 @@ bad: return ret; } -static int crypt_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int crypt_map(struct dm_target *ti, struct bio *bio) { struct dm_crypt_io *io; struct crypt_config *cc = ti->private; @@ -1846,7 +1845,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 11, 0}, + .version = {1, 12, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index f53846f9ab50..cc1bd048acb2 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -274,8 +274,7 @@ static void delay_resume(struct dm_target *ti) atomic_set(&dc->may_delay, 1); } -static int delay_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int delay_map(struct dm_target *ti, struct bio *bio) { struct delay_c *dc = ti->private; @@ -338,7 +337,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index cc15543a6ad7..9721f2ffb1a2 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -39,6 +39,10 @@ enum feature_flag_bits { DROP_WRITES }; +struct per_bio_data { + bool bio_submitted; +}; + static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, struct dm_target *ti) { @@ -214,6 +218,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->per_bio_data_size = sizeof(struct per_bio_data); ti->private = fc; return 0; @@ -265,11 +270,12 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) } } -static int flakey_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int flakey_map(struct dm_target *ti, struct bio *bio) { struct flakey_c *fc = ti->private; unsigned elapsed; + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + pb->bio_submitted = false; /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; @@ -277,7 +283,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, /* * Flag this bio as submitted while down. */ - map_context->ll = 1; + pb->bio_submitted = true; /* * Map reads as normal. @@ -314,17 +320,16 @@ map_bio: return DM_MAPIO_REMAPPED; } -static int flakey_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) { struct flakey_c *fc = ti->private; - unsigned bio_submitted_while_down = map_context->ll; + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); /* * Corrupt successful READs while in down state. * If flags were specified, only corrupt those that match. */ - if (fc->corrupt_bio_byte && !error && bio_submitted_while_down && + if (fc->corrupt_bio_byte && !error && pb->bio_submitted && (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && all_corrupt_bio_flags_match(bio, fc)) corrupt_bio_data(bio, fc); @@ -406,7 +411,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 1c46f97d6664..ea49834377c8 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -287,7 +287,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, unsigned num_bvecs; sector_t remaining = where->count; struct request_queue *q = bdev_get_queue(where->bdev); - sector_t discard_sectors; + unsigned short logical_block_size = queue_logical_block_size(q); + sector_t num_sectors; /* * where->count may be zero if rw holds a flush and we need to @@ -297,7 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, /* * Allocate a suitably sized-bio. */ - if (rw & REQ_DISCARD) + if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) num_bvecs = 1; else num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), @@ -310,9 +311,21 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, store_io_and_region_in_bio(bio, io, region); if (rw & REQ_DISCARD) { - discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); - bio->bi_size = discard_sectors << SECTOR_SHIFT; - remaining -= discard_sectors; + num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + remaining -= num_sectors; + } else if (rw & REQ_WRITE_SAME) { + /* + * WRITE SAME only uses a single page. + */ + dp->get_page(dp, &page, &len, &offset); + bio_add_page(bio, page, logical_block_size, offset); + num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + + offset = 0; + remaining -= num_sectors; + dp->next_page(dp); } else while (remaining) { /* * Try and add as many pages as possible. diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index afd95986d099..0666b5d14b88 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1543,7 +1543,21 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) return r; } -static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) +#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */ +#define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */ + +static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags) +{ + if (param_flags & DM_WIPE_BUFFER) + memset(param, 0, param_size); + + if (param_flags & DM_PARAMS_VMALLOC) + vfree(param); + else + kfree(param); +} + +static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags) { struct dm_ioctl tmp, *dmi; int secure_data; @@ -1556,7 +1570,21 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) secure_data = tmp.flags & DM_SECURE_DATA_FLAG; - dmi = vmalloc(tmp.data_size); + *param_flags = secure_data ? DM_WIPE_BUFFER : 0; + + /* + * Try to avoid low memory issues when a device is suspended. + * Use kmalloc() rather than vmalloc() when we can. + */ + dmi = NULL; + if (tmp.data_size <= KMALLOC_MAX_SIZE) + dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + + if (!dmi) { + dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + *param_flags |= DM_PARAMS_VMALLOC; + } + if (!dmi) { if (secure_data && clear_user(user, tmp.data_size)) return -EFAULT; @@ -1566,6 +1594,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) if (copy_from_user(dmi, user, tmp.data_size)) goto bad; + /* + * Abort if something changed the ioctl data while it was being copied. + */ + if (dmi->data_size != tmp.data_size) { + DMERR("rejecting ioctl: data size modified while processing parameters"); + goto bad; + } + /* Wipe the user buffer so we do not return it to userspace */ if (secure_data && clear_user(user, tmp.data_size)) goto bad; @@ -1574,9 +1610,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) return 0; bad: - if (secure_data) - memset(dmi, 0, tmp.data_size); - vfree(dmi); + free_params(dmi, tmp.data_size, *param_flags); + return -EFAULT; } @@ -1613,7 +1648,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) static int ctl_ioctl(uint command, struct dm_ioctl __user *user) { int r = 0; - int wipe_buffer; + int param_flags; unsigned int cmd; struct dm_ioctl *uninitialized_var(param); ioctl_fn fn = NULL; @@ -1649,24 +1684,14 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) } /* - * Trying to avoid low memory issues when a device is - * suspended. - */ - current->flags |= PF_MEMALLOC; - - /* * Copy the parameters into kernel space. */ - r = copy_params(user, ¶m); - - current->flags &= ~PF_MEMALLOC; + r = copy_params(user, ¶m, ¶m_flags); if (r) return r; input_param_size = param->data_size; - wipe_buffer = param->flags & DM_SECURE_DATA_FLAG; - r = validate_params(cmd, param); if (r) goto out; @@ -1681,10 +1706,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) r = -EFAULT; out: - if (wipe_buffer) - memset(param, 0, input_param_size); - - vfree(param); + free_params(param, input_param_size, param_flags); return r; } diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index bed444c93d8d..68c02673263b 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -349,7 +349,7 @@ static void complete_io(unsigned long error, void *context) struct dm_kcopyd_client *kc = job->kc; if (error) { - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err |= error; else job->read_err = 1; @@ -361,7 +361,7 @@ static void complete_io(unsigned long error, void *context) } } - if (job->rw == WRITE) + if (job->rw & WRITE) push(&kc->complete_jobs, job); else { @@ -432,7 +432,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, if (r < 0) { /* error this rogue job */ - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err = (unsigned long) -1L; else job->read_err = 1; @@ -585,6 +585,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, unsigned int flags, dm_kcopyd_notify_fn fn, void *context) { struct kcopyd_job *job; + int i; /* * Allocate an array of jobs consisting of one master job @@ -611,7 +612,16 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, memset(&job->source, 0, sizeof job->source); job->source.count = job->dests[0].count; job->pages = &zero_page_list; - job->rw = WRITE; + + /* + * Use WRITE SAME to optimize zeroing if all dests support it. + */ + job->rw = WRITE | REQ_WRITE_SAME; + for (i = 0; i < job->num_dests; i++) + if (!bdev_write_same(job->dests[i].bdev)) { + job->rw = WRITE; + break; + } } job->fn = fn; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 1bf19a93eef0..328cad5617ab 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -55,6 +55,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->num_write_same_requests = 1; ti->private = lc; return 0; @@ -87,8 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) bio->bi_sector = linear_map_sector(ti, bio->bi_sector); } -static int linear_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int linear_map(struct dm_target *ti, struct bio *bio) { linear_map_bio(ti, bio); @@ -155,7 +155,7 @@ static int linear_iterate_devices(struct dm_target *ti, static struct target_type linear_target = { .name = "linear", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 45d94a7e7f6d..3d8984edeff7 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -295,9 +295,11 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) * Choose a reasonable default. All figures in sectors. */ if (min_region_size > (1 << 13)) { + /* If not a power of 2, make it the next power of 2 */ + if (min_region_size & (min_region_size - 1)) + region_size = 1 << fls(region_size); DMINFO("Choosing default region size of %lu sectors", region_size); - region_size = min_region_size; } else { DMINFO("Choosing default region size of 4MiB"); region_size = 1 << 13; /* sectors */ @@ -1216,7 +1218,7 @@ static void raid_dtr(struct dm_target *ti) context_free(rs); } -static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) +static int raid_map(struct dm_target *ti, struct bio *bio) { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; @@ -1430,7 +1432,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 3, 1}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index fd61f98ee1f6..fa519185ebba 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -61,7 +61,6 @@ struct mirror_set { struct dm_region_hash *rh; struct dm_kcopyd_client *kcopyd_client; struct dm_io_client *io_client; - mempool_t *read_record_pool; /* recovery */ region_t nr_regions; @@ -139,14 +138,13 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) queue_bio(ms, bio, WRITE); } -#define MIN_READ_RECORDS 20 -struct dm_raid1_read_record { +struct dm_raid1_bio_record { struct mirror *m; + /* if details->bi_bdev == NULL, details were not saved */ struct dm_bio_details details; + region_t write_region; }; -static struct kmem_cache *_dm_raid1_read_record_cache; - /* * Every mirror should look like this one. */ @@ -876,19 +874,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, atomic_set(&ms->suspend, 0); atomic_set(&ms->default_mirror, DEFAULT_MIRROR); - ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS, - _dm_raid1_read_record_cache); - - if (!ms->read_record_pool) { - ti->error = "Error creating mirror read_record_pool"; - kfree(ms); - return NULL; - } - ms->io_client = dm_io_client_create(); if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; - mempool_destroy(ms->read_record_pool); kfree(ms); return NULL; } @@ -900,7 +888,6 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, if (IS_ERR(ms->rh)) { ti->error = "Error creating dirty region hash"; dm_io_client_destroy(ms->io_client); - mempool_destroy(ms->read_record_pool); kfree(ms); return NULL; } @@ -916,7 +903,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, dm_io_client_destroy(ms->io_client); dm_region_hash_destroy(ms->rh); - mempool_destroy(ms->read_record_pool); kfree(ms); } @@ -1088,6 +1074,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record); ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", @@ -1155,18 +1142,20 @@ static void mirror_dtr(struct dm_target *ti) /* * Mirror mapping function */ -static int mirror_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int mirror_map(struct dm_target *ti, struct bio *bio) { int r, rw = bio_rw(bio); struct mirror *m; struct mirror_set *ms = ti->private; - struct dm_raid1_read_record *read_record = NULL; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); + struct dm_raid1_bio_record *bio_record = + dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); + + bio_record->details.bi_bdev = NULL; if (rw == WRITE) { /* Save region for mirror_end_io() handler */ - map_context->ll = dm_rh_bio_to_region(ms->rh, bio); + bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio); queue_bio(ms, bio, rw); return DM_MAPIO_SUBMITTED; } @@ -1194,33 +1183,29 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, if (unlikely(!m)) return -EIO; - read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO); - if (likely(read_record)) { - dm_bio_record(&read_record->details, bio); - map_context->ptr = read_record; - read_record->m = m; - } + dm_bio_record(&bio_record->details, bio); + bio_record->m = m; map_bio(m, bio); return DM_MAPIO_REMAPPED; } -static int mirror_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) { int rw = bio_rw(bio); struct mirror_set *ms = (struct mirror_set *) ti->private; struct mirror *m = NULL; struct dm_bio_details *bd = NULL; - struct dm_raid1_read_record *read_record = map_context->ptr; + struct dm_raid1_bio_record *bio_record = + dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); /* * We need to dec pending if this was a write. */ if (rw == WRITE) { if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) - dm_rh_dec(ms->rh, map_context->ll); + dm_rh_dec(ms->rh, bio_record->write_region); return error; } @@ -1231,7 +1216,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, goto out; if (unlikely(error)) { - if (!read_record) { + if (!bio_record->details.bi_bdev) { /* * There wasn't enough memory to record necessary * information for a retry or there was no other @@ -1241,7 +1226,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, return -EIO; } - m = read_record->m; + m = bio_record->m; DMERR("Mirror read failed from %s. Trying alternative device.", m->dev->name); @@ -1253,22 +1238,18 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, * mirror. */ if (default_ok(m) || mirror_available(ms, bio)) { - bd = &read_record->details; + bd = &bio_record->details; dm_bio_restore(bd, bio); - mempool_free(read_record, ms->read_record_pool); - map_context->ptr = NULL; + bio_record->details.bi_bdev = NULL; queue_bio(ms, bio, rw); - return 1; + return DM_ENDIO_INCOMPLETE; } DMERR("All replicated volumes dead, failing I/O"); } out: - if (read_record) { - mempool_free(read_record, ms->read_record_pool); - map_context->ptr = NULL; - } + bio_record->details.bi_bdev = NULL; return error; } @@ -1422,7 +1403,7 @@ static int mirror_iterate_devices(struct dm_target *ti, static struct target_type mirror_target = { .name = "mirror", - .version = {1, 12, 1}, + .version = {1, 13, 1}, .module = THIS_MODULE, .ctr = mirror_ctr, .dtr = mirror_dtr, @@ -1439,13 +1420,6 @@ static int __init dm_mirror_init(void) { int r; - _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0); - if (!_dm_raid1_read_record_cache) { - DMERR("Can't allocate dm_raid1_read_record cache"); - r = -ENOMEM; - goto bad_cache; - } - r = dm_register_target(&mirror_target); if (r < 0) { DMERR("Failed to register mirror target"); @@ -1455,15 +1429,12 @@ static int __init dm_mirror_init(void) return 0; bad_target: - kmem_cache_destroy(_dm_raid1_read_record_cache); -bad_cache: return r; } static void __exit dm_mirror_exit(void) { dm_unregister_target(&mirror_target); - kmem_cache_destroy(_dm_raid1_read_record_cache); } /* Module hooks */ diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a143921feaf6..59fc18ae52c2 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -79,7 +79,6 @@ struct dm_snapshot { /* Chunks with outstanding reads */ spinlock_t tracked_chunk_lock; - mempool_t *tracked_chunk_pool; struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; /* The on disk metadata handler */ @@ -191,35 +190,38 @@ struct dm_snap_tracked_chunk { chunk_t chunk; }; -static struct kmem_cache *tracked_chunk_cache; +static void init_tracked_chunk(struct bio *bio) +{ + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + INIT_HLIST_NODE(&c->node); +} -static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, - chunk_t chunk) +static bool is_bio_tracked(struct bio *bio) { - struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, - GFP_NOIO); - unsigned long flags; + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + return !hlist_unhashed(&c->node); +} + +static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk) +{ + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); c->chunk = chunk; - spin_lock_irqsave(&s->tracked_chunk_lock, flags); + spin_lock_irq(&s->tracked_chunk_lock); hlist_add_head(&c->node, &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); - spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); - - return c; + spin_unlock_irq(&s->tracked_chunk_lock); } -static void stop_tracking_chunk(struct dm_snapshot *s, - struct dm_snap_tracked_chunk *c) +static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) { + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); unsigned long flags; spin_lock_irqsave(&s->tracked_chunk_lock, flags); hlist_del(&c->node); spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); - - mempool_free(c, s->tracked_chunk_pool); } static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) @@ -1120,14 +1122,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_pending_pool; } - s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, - tracked_chunk_cache); - if (!s->tracked_chunk_pool) { - ti->error = "Could not allocate tracked_chunk mempool for " - "tracking reads"; - goto bad_tracked_chunk_pool; - } - for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); @@ -1135,6 +1129,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = s; ti->num_flush_requests = num_flush_requests; + ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ @@ -1183,9 +1178,6 @@ bad_read_metadata: unregister_snapshot(s); bad_load_and_register: - mempool_destroy(s->tracked_chunk_pool); - -bad_tracked_chunk_pool: mempool_destroy(s->pending_pool); bad_pending_pool: @@ -1290,8 +1282,6 @@ static void snapshot_dtr(struct dm_target *ti) BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); #endif - mempool_destroy(s->tracked_chunk_pool); - __free_exceptions(s); mempool_destroy(s->pending_pool); @@ -1577,8 +1567,7 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, s->store->chunk_mask); } -static int snapshot_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int snapshot_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; struct dm_snapshot *s = ti->private; @@ -1586,6 +1575,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, chunk_t chunk; struct dm_snap_pending_exception *pe = NULL; + init_tracked_chunk(bio); + if (bio->bi_rw & REQ_FLUSH) { bio->bi_bdev = s->cow->bdev; return DM_MAPIO_REMAPPED; @@ -1670,7 +1661,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, } } else { bio->bi_bdev = s->origin->bdev; - map_context->ptr = track_chunk(s, chunk); + track_chunk(s, bio, chunk); } out_unlock: @@ -1691,20 +1682,20 @@ out: * If merging is currently taking place on the chunk in question, the * I/O is deferred by adding it to s->bios_queued_during_merge. */ -static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; struct dm_snapshot *s = ti->private; int r = DM_MAPIO_REMAPPED; chunk_t chunk; + init_tracked_chunk(bio); + if (bio->bi_rw & REQ_FLUSH) { - if (!map_context->target_request_nr) + if (!dm_bio_get_target_request_nr(bio)) bio->bi_bdev = s->origin->bdev; else bio->bi_bdev = s->cow->bdev; - map_context->ptr = NULL; return DM_MAPIO_REMAPPED; } @@ -1733,7 +1724,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, remap_exception(s, e, bio, chunk); if (bio_rw(bio) == WRITE) - map_context->ptr = track_chunk(s, chunk); + track_chunk(s, bio, chunk); goto out_unlock; } @@ -1751,14 +1742,12 @@ out_unlock: return r; } -static int snapshot_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error) { struct dm_snapshot *s = ti->private; - struct dm_snap_tracked_chunk *c = map_context->ptr; - if (c) - stop_tracking_chunk(s, c); + if (is_bio_tracked(bio)) + stop_tracking_chunk(s, bio); return 0; } @@ -2127,8 +2116,7 @@ static void origin_dtr(struct dm_target *ti) dm_put_device(ti, dev); } -static int origin_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int origin_map(struct dm_target *ti, struct bio *bio) { struct dm_dev *dev = ti->private; bio->bi_bdev = dev->bdev; @@ -2193,7 +2181,7 @@ static int origin_iterate_devices(struct dm_target *ti, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 7, 1}, + .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, @@ -2206,7 +2194,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2220,7 +2208,7 @@ static struct target_type snapshot_target = { static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2281,17 +2269,8 @@ static int __init dm_snapshot_init(void) goto bad_pending_cache; } - tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); - if (!tracked_chunk_cache) { - DMERR("Couldn't create cache to track chunks in use."); - r = -ENOMEM; - goto bad_tracked_chunk_cache; - } - return 0; -bad_tracked_chunk_cache: - kmem_cache_destroy(pending_cache); bad_pending_cache: kmem_cache_destroy(exception_cache); bad_exception_cache: @@ -2317,7 +2296,6 @@ static void __exit dm_snapshot_exit(void) exit_origin_hash(); kmem_cache_destroy(pending_cache); kmem_cache_destroy(exception_cache); - kmem_cache_destroy(tracked_chunk_cache); dm_exception_store_exit(); } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index e2f876539743..c89cde86d400 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -162,6 +162,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = stripes; ti->num_discard_requests = stripes; + ti->num_write_same_requests = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -251,8 +252,8 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, *result += sc->chunk_size; /* next chunk */ } -static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, - uint32_t target_stripe) +static int stripe_map_range(struct stripe_c *sc, struct bio *bio, + uint32_t target_stripe) { sector_t begin, end; @@ -271,23 +272,23 @@ static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, } } -static int stripe_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; unsigned target_request_nr; if (bio->bi_rw & REQ_FLUSH) { - target_request_nr = map_context->target_request_nr; + target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; return DM_MAPIO_REMAPPED; } - if (unlikely(bio->bi_rw & REQ_DISCARD)) { - target_request_nr = map_context->target_request_nr; + if (unlikely(bio->bi_rw & REQ_DISCARD) || + unlikely(bio->bi_rw & REQ_WRITE_SAME)) { + target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); - return stripe_map_discard(sc, bio, target_request_nr); + return stripe_map_range(sc, bio, target_request_nr); } stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); @@ -342,8 +343,7 @@ static int stripe_status(struct dm_target *ti, status_type_t type, return 0; } -static int stripe_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) { unsigned i; char major_minor[16]; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 100368eb7991..daf25d0890b3 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -967,13 +967,22 @@ bool dm_table_request_based(struct dm_table *t) int dm_table_alloc_md_mempools(struct dm_table *t) { unsigned type = dm_table_get_type(t); + unsigned per_bio_data_size = 0; + struct dm_target *tgt; + unsigned i; if (unlikely(type == DM_TYPE_NONE)) { DMWARN("no table type is set, can't allocate mempools"); return -EINVAL; } - t->mempools = dm_alloc_md_mempools(type, t->integrity_supported); + if (type == DM_TYPE_BIO_BASED) + for (i = 0; i < t->num_targets; i++) { + tgt = t->targets + i; + per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); + } + + t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size); if (!t->mempools) return -ENOMEM; @@ -1414,6 +1423,33 @@ static bool dm_table_all_devices_attribute(struct dm_table *t, return 1; } +static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !q->limits.max_write_same_sectors; +} + +static bool dm_table_supports_write_same(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i = 0; + + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_write_same_requests) + return false; + + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) + return false; + } + + return true; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1445,6 +1481,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); + if (!dm_table_supports_write_same(t)) + q->limits.max_write_same_sectors = 0; + dm_table_set_integrity(t); /* diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 8da366cf381c..617d21a77256 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -126,15 +126,14 @@ static void io_err_dtr(struct dm_target *tt) /* empty */ } -static int io_err_map(struct dm_target *tt, struct bio *bio, - union map_info *map_context) +static int io_err_map(struct dm_target *tt, struct bio *bio) { return -EIO; } static struct target_type error_target = { .name = "error", - .version = {1, 0, 1}, + .version = {1, 1, 0}, .ctr = io_err_ctr, .dtr = io_err_dtr, .map = io_err_map, diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 693e149e9727..4d6e85367b84 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -408,7 +408,7 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->tl_info.tm = pmd->tm; pmd->tl_info.levels = 1; - pmd->tl_info.value_type.context = &pmd->info; + pmd->tl_info.value_type.context = &pmd->bl_info; pmd->tl_info.value_type.size = sizeof(__le64); pmd->tl_info.value_type.inc = subtree_inc; pmd->tl_info.value_type.dec = subtree_dec; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 058acf3a5ba7..675ae5274016 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -186,7 +186,6 @@ struct pool { struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; - mempool_t *endio_hook_pool; process_bio_fn process_bio; process_bio_fn process_discard; @@ -304,7 +303,7 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) bio_list_init(master); while ((bio = bio_list_pop(&bios))) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); if (h->tc == tc) bio_endio(bio, DM_ENDIO_REQUEUE); @@ -368,6 +367,17 @@ static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) dm_thin_changed_this_transaction(tc->td); } +static void inc_all_io_entry(struct pool *pool, struct bio *bio) +{ + struct dm_thin_endio_hook *h; + + if (bio->bi_rw & REQ_DISCARD) + return; + + h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); + h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds); +} + static void issue(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; @@ -474,7 +484,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) static void overwrite_endio(struct bio *bio, int err) { unsigned long flags; - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct dm_thin_new_mapping *m = h->overwrite_mapping; struct pool *pool = m->tc->pool; @@ -499,8 +509,7 @@ static void overwrite_endio(struct bio *bio, int err) /* * This sends the bios in the cell back to the deferred_bios list. */ -static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, - dm_block_t data_block) +static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) { struct pool *pool = tc->pool; unsigned long flags; @@ -513,17 +522,13 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, } /* - * Same as cell_defer above, except it omits one particular detainee, - * a write bio that covers the block and has already been processed. + * Same as cell_defer except it omits the original holder of the cell. */ -static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell) +static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) { - struct bio_list bios; struct pool *pool = tc->pool; unsigned long flags; - bio_list_init(&bios); - spin_lock_irqsave(&pool->lock, flags); dm_cell_release_no_holder(cell, &pool->deferred_bios); spin_unlock_irqrestore(&pool->lock, flags); @@ -561,7 +566,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); if (r) { - DMERR("dm_thin_insert_block() failed"); + DMERR_LIMIT("dm_thin_insert_block() failed"); dm_cell_error(m->cell); goto out; } @@ -573,10 +578,10 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) * the bios in the cell. */ if (bio) { - cell_defer_except(tc, m->cell); + cell_defer_no_holder(tc, m->cell); bio_endio(bio, 0); } else - cell_defer(tc, m->cell, m->data_block); + cell_defer(tc, m->cell); out: list_del(&m->list); @@ -588,8 +593,8 @@ static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) struct thin_c *tc = m->tc; bio_io_error(m->bio); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); + cell_defer_no_holder(tc, m->cell); + cell_defer_no_holder(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } @@ -597,13 +602,15 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; + inc_all_io_entry(tc->pool, m->bio); + cell_defer_no_holder(tc, m->cell); + cell_defer_no_holder(tc, m->cell2); + if (m->pass_discard) remap_and_issue(tc, m->bio, m->data_block); else bio_endio(m->bio, 0); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } @@ -614,7 +621,7 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m) r = dm_thin_remove_block(tc->td, m->virt_block); if (r) - DMERR("dm_thin_remove_block() failed"); + DMERR_LIMIT("dm_thin_remove_block() failed"); process_prepared_discard_passdown(m); } @@ -706,11 +713,12 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, * bio immediately. Otherwise we use kcopyd to clone the data first. */ if (io_overwrites_block(pool, bio)) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); + inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_dest); } else { struct dm_io_region from, to; @@ -727,7 +735,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, 0, copy_complete, m); if (r < 0) { mempool_free(m, pool->mapping_pool); - DMERR("dm_kcopyd_copy() failed"); + DMERR_LIMIT("dm_kcopyd_copy() failed"); dm_cell_error(cell); } } @@ -775,11 +783,12 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, process_prepared_mapping(m); else if (io_overwrites_block(pool, bio)) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); + inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_block); } else { int r; @@ -792,7 +801,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); if (r < 0) { mempool_free(m, pool->mapping_pool); - DMERR("dm_kcopyd_zero() failed"); + DMERR_LIMIT("dm_kcopyd_zero() failed"); dm_cell_error(cell); } } @@ -804,7 +813,7 @@ static int commit(struct pool *pool) r = dm_pool_commit_metadata(pool->pmd); if (r) - DMERR("commit failed, error = %d", r); + DMERR_LIMIT("commit failed: error = %d", r); return r; } @@ -889,7 +898,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) */ static void retry_on_resume(struct bio *bio) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; struct pool *pool = tc->pool; unsigned long flags; @@ -936,7 +945,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) */ build_data_key(tc->td, lookup_result.block, &key2); if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) { - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); break; } @@ -962,13 +971,15 @@ static void process_discard(struct thin_c *tc, struct bio *bio) wake_worker(pool); } } else { + inc_all_io_entry(pool, bio); + cell_defer_no_holder(tc, cell); + cell_defer_no_holder(tc, cell2); + /* * The DM core makes sure that the discard doesn't span * a block boundary. So we submit the discard of a * partial block appropriately. */ - dm_cell_release_singleton(cell, bio); - dm_cell_release_singleton(cell2, bio); if ((!lookup_result.shared) && pool->pf.discard_passdown) remap_and_issue(tc, bio, lookup_result.block); else @@ -980,13 +991,14 @@ static void process_discard(struct thin_c *tc, struct bio *bio) /* * It isn't provisioned, just forget it. */ - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); bio_endio(bio, 0); break; default: - DMERR("discard: find block unexpectedly returned %d", r); - dm_cell_release_singleton(cell, bio); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); + cell_defer_no_holder(tc, cell); bio_io_error(bio); break; } @@ -1012,7 +1024,8 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, break; default: - DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", + __func__, r); dm_cell_error(cell); break; } @@ -1037,11 +1050,12 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, if (bio_data_dir(bio) == WRITE && bio->bi_size) break_sharing(tc, bio, block, &key, lookup_result, cell); else { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); + inc_all_io_entry(pool, bio); + cell_defer_no_holder(tc, cell); - dm_cell_release_singleton(cell, bio); remap_and_issue(tc, bio, lookup_result->block); } } @@ -1056,7 +1070,9 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block * Remap empty bios (flushes) immediately, without provisioning. */ if (!bio->bi_size) { - dm_cell_release_singleton(cell, bio); + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_and_issue(tc, bio, 0); return; } @@ -1066,7 +1082,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block */ if (bio_data_dir(bio) == READ) { zero_fill_bio(bio); - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); bio_endio(bio, 0); return; } @@ -1085,7 +1101,8 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block break; default: - DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", + __func__, r); set_pool_mode(tc->pool, PM_READ_ONLY); dm_cell_error(cell); break; @@ -1111,34 +1128,31 @@ static void process_bio(struct thin_c *tc, struct bio *bio) r = dm_thin_find_block(tc->td, block, 1, &lookup_result); switch (r) { case 0: - /* - * We can release this cell now. This thread is the only - * one that puts bios into a cell, and we know there were - * no preceding bios. - */ - /* - * TODO: this will probably have to change when discard goes - * back in. - */ - dm_cell_release_singleton(cell, bio); - - if (lookup_result.shared) + if (lookup_result.shared) { process_shared_bio(tc, bio, block, &lookup_result); - else + cell_defer_no_holder(tc, cell); + } else { + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_and_issue(tc, bio, lookup_result.block); + } break; case -ENODATA: if (bio_data_dir(bio) == READ && tc->origin_dev) { - dm_cell_release_singleton(cell, bio); + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_to_origin_and_issue(tc, bio); } else provision_block(tc, bio, block, cell); break; default: - DMERR("dm_thin_find_block() failed, error = %d", r); - dm_cell_release_singleton(cell, bio); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); + cell_defer_no_holder(tc, cell); bio_io_error(bio); break; } @@ -1156,8 +1170,10 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) case 0: if (lookup_result.shared && (rw == WRITE) && bio->bi_size) bio_io_error(bio); - else + else { + inc_all_io_entry(tc->pool, bio); remap_and_issue(tc, bio, lookup_result.block); + } break; case -ENODATA: @@ -1167,6 +1183,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) } if (tc->origin_dev) { + inc_all_io_entry(tc->pool, bio); remap_to_origin_and_issue(tc, bio); break; } @@ -1176,7 +1193,8 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) break; default: - DMERR("dm_thin_find_block() failed, error = %d", r); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); bio_io_error(bio); break; } @@ -1207,7 +1225,7 @@ static void process_deferred_bios(struct pool *pool) spin_unlock_irqrestore(&pool->lock, flags); while ((bio = bio_list_pop(&bios))) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; /* @@ -1340,32 +1358,30 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) wake_worker(pool); } -static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio) +static void thin_hook_bio(struct thin_c *tc, struct bio *bio) { - struct pool *pool = tc->pool; - struct dm_thin_endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO); + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->tc = tc; h->shared_read_entry = NULL; - h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : dm_deferred_entry_inc(pool->all_io_ds); + h->all_io_entry = NULL; h->overwrite_mapping = NULL; - - return h; } /* * Non-blocking function called from the thin target's map function. */ -static int thin_bio_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int thin_bio_map(struct dm_target *ti, struct bio *bio) { int r; struct thin_c *tc = ti->private; dm_block_t block = get_bio_block(tc, bio); struct dm_thin_device *td = tc->td; struct dm_thin_lookup_result result; + struct dm_bio_prison_cell *cell1, *cell2; + struct dm_cell_key key; - map_context->ptr = thin_hook_bio(tc, bio); + thin_hook_bio(tc, bio); if (get_pool_mode(tc->pool) == PM_FAIL) { bio_io_error(bio); @@ -1400,12 +1416,25 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * shared flag will be set in their case. */ thin_defer_bio(tc, bio); - r = DM_MAPIO_SUBMITTED; - } else { - remap(tc, bio, result.block); - r = DM_MAPIO_REMAPPED; + return DM_MAPIO_SUBMITTED; } - break; + + build_virtual_key(tc->td, block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1)) + return DM_MAPIO_SUBMITTED; + + build_data_key(tc->td, result.block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) { + cell_defer_no_holder(tc, cell1); + return DM_MAPIO_SUBMITTED; + } + + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell2); + cell_defer_no_holder(tc, cell1); + + remap(tc, bio, result.block); + return DM_MAPIO_REMAPPED; case -ENODATA: if (get_pool_mode(tc->pool) == PM_READ_ONLY) { @@ -1414,8 +1443,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * of doing so. Just error it. */ bio_io_error(bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; } /* fall through */ @@ -1425,8 +1453,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * provide the hint to load the metadata into cache. */ thin_defer_bio(tc, bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; default: /* @@ -1435,11 +1462,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * pool is switched to fail-io mode. */ bio_io_error(bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; } - - return r; } static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) @@ -1566,14 +1590,12 @@ static void __pool_destroy(struct pool *pool) if (pool->next_mapping) mempool_free(pool->next_mapping, pool->mapping_pool); mempool_destroy(pool->mapping_pool); - mempool_destroy(pool->endio_hook_pool); dm_deferred_set_destroy(pool->shared_read_ds); dm_deferred_set_destroy(pool->all_io_ds); kfree(pool); } static struct kmem_cache *_new_mapping_cache; -static struct kmem_cache *_endio_hook_cache; static struct pool *pool_create(struct mapped_device *pool_md, struct block_device *metadata_dev, @@ -1667,13 +1689,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_mapping_pool; } - pool->endio_hook_pool = mempool_create_slab_pool(ENDIO_HOOK_POOL_SIZE, - _endio_hook_cache); - if (!pool->endio_hook_pool) { - *error = "Error creating pool's endio_hook mempool"; - err_p = ERR_PTR(-ENOMEM); - goto bad_endio_hook_pool; - } pool->ref_count = 1; pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; @@ -1682,8 +1697,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, return pool; -bad_endio_hook_pool: - mempool_destroy(pool->mapping_pool); bad_mapping_pool: dm_deferred_set_destroy(pool->all_io_ds); bad_all_io_ds: @@ -1966,8 +1979,7 @@ out_unlock: return r; } -static int pool_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int pool_map(struct dm_target *ti, struct bio *bio) { int r; struct pool_c *pt = ti->private; @@ -2358,7 +2370,9 @@ static int pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("rw "); - if (pool->pf.discard_enabled && pool->pf.discard_passdown) + if (!pool->pf.discard_enabled) + DMEMIT("ignore_discard"); + else if (pool->pf.discard_passdown) DMEMIT("discard_passdown"); else DMEMIT("no_discard_passdown"); @@ -2454,7 +2468,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2576,6 +2590,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_flush_requests = 1; ti->flush_supported = true; + ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { @@ -2609,20 +2624,17 @@ out_unlock: return r; } -static int thin_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int thin_map(struct dm_target *ti, struct bio *bio) { bio->bi_sector = dm_target_offset(ti, bio->bi_sector); - return thin_bio_map(ti, bio, map_context); + return thin_bio_map(ti, bio); } -static int thin_endio(struct dm_target *ti, - struct bio *bio, int err, - union map_info *map_context) +static int thin_endio(struct dm_target *ti, struct bio *bio, int err) { unsigned long flags; - struct dm_thin_endio_hook *h = map_context->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct list_head work; struct dm_thin_new_mapping *m, *tmp; struct pool *pool = h->tc->pool; @@ -2643,14 +2655,15 @@ static int thin_endio(struct dm_target *ti, if (h->all_io_entry) { INIT_LIST_HEAD(&work); dm_deferred_entry_dec(h->all_io_entry, &work); - spin_lock_irqsave(&pool->lock, flags); - list_for_each_entry_safe(m, tmp, &work, list) - list_add(&m->list, &pool->prepared_discards); - spin_unlock_irqrestore(&pool->lock, flags); + if (!list_empty(&work)) { + spin_lock_irqsave(&pool->lock, flags); + list_for_each_entry_safe(m, tmp, &work, list) + list_add(&m->list, &pool->prepared_discards); + spin_unlock_irqrestore(&pool->lock, flags); + wake_worker(pool); + } } - mempool_free(h, pool->endio_hook_pool); - return 0; } @@ -2745,7 +2758,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, @@ -2779,14 +2792,8 @@ static int __init dm_thin_init(void) if (!_new_mapping_cache) goto bad_new_mapping_cache; - _endio_hook_cache = KMEM_CACHE(dm_thin_endio_hook, 0); - if (!_endio_hook_cache) - goto bad_endio_hook_cache; - return 0; -bad_endio_hook_cache: - kmem_cache_destroy(_new_mapping_cache); bad_new_mapping_cache: dm_unregister_target(&pool_target); bad_pool_target: @@ -2801,7 +2808,6 @@ static void dm_thin_exit(void) dm_unregister_target(&pool_target); kmem_cache_destroy(_new_mapping_cache); - kmem_cache_destroy(_endio_hook_cache); } module_init(dm_thin_init); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 9e7328bb4030..52cde982164a 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -55,7 +55,6 @@ struct dm_verity { unsigned shash_descsize;/* the size of temporary space for crypto */ int hash_failed; /* set to 1 if hash of any block failed */ - mempool_t *io_mempool; /* mempool of struct dm_verity_io */ mempool_t *vec_mempool; /* mempool of bio vector */ struct workqueue_struct *verify_wq; @@ -66,7 +65,6 @@ struct dm_verity { struct dm_verity_io { struct dm_verity *v; - struct bio *bio; /* original values of bio->bi_end_io and bio->bi_private */ bio_end_io_t *orig_bi_end_io; @@ -389,8 +387,8 @@ test_block_hash: */ static void verity_finish_io(struct dm_verity_io *io, int error) { - struct bio *bio = io->bio; struct dm_verity *v = io->v; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; @@ -398,8 +396,6 @@ static void verity_finish_io(struct dm_verity_io *io, int error) if (io->io_vec != io->io_vec_inline) mempool_free(io->io_vec, v->vec_mempool); - mempool_free(io, v->io_mempool); - bio_endio(bio, error); } @@ -462,8 +458,7 @@ no_prefetch_cluster: * Bio map function. It allocates dm_verity_io structure and bio vector and * fills them. Then it issues prefetches and the I/O. */ -static int verity_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int verity_map(struct dm_target *ti, struct bio *bio) { struct dm_verity *v = ti->private; struct dm_verity_io *io; @@ -486,9 +481,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio, if (bio_data_dir(bio) == WRITE) return -EIO; - io = mempool_alloc(v->io_mempool, GFP_NOIO); + io = dm_per_bio_data(bio, ti->per_bio_data_size); io->v = v; - io->bio = bio; io->orig_bi_end_io = bio->bi_end_io; io->orig_bi_private = bio->bi_private; io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); @@ -610,9 +604,6 @@ static void verity_dtr(struct dm_target *ti) if (v->vec_mempool) mempool_destroy(v->vec_mempool); - if (v->io_mempool) - mempool_destroy(v->io_mempool); - if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -841,13 +832,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - v->io_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, - sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2); - if (!v->io_mempool) { - ti->error = "Cannot allocate io mempool"; - r = -ENOMEM; - goto bad; - } + ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, BIO_MAX_PAGES * sizeof(struct bio_vec)); @@ -875,7 +860,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index cc2b3cb81946..69a5c3b3b340 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -33,8 +33,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * Return zeros only on reads */ -static int zero_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int zero_map(struct dm_target *ti, struct bio *bio) { switch(bio_rw(bio)) { case READ: @@ -56,7 +55,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio, static struct target_type zero_target = { .name = "zero", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = zero_ctr, .map = zero_map, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 77e6eff41cae..c72e4d5a9617 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -63,18 +63,6 @@ struct dm_io { }; /* - * For bio-based dm. - * One of these is allocated per target within a bio. Hopefully - * this will be simplified out one day. - */ -struct dm_target_io { - struct dm_io *io; - struct dm_target *ti; - union map_info info; - struct bio clone; -}; - -/* * For request-based dm. * One of these is allocated per request. */ @@ -657,7 +645,7 @@ static void clone_endio(struct bio *bio, int error) error = -EIO; if (endio) { - r = endio(tio->ti, bio, error, &tio->info); + r = endio(tio->ti, bio, error); if (r < 0 || r == DM_ENDIO_REQUEUE) /* * error and requeue request are handled @@ -1016,7 +1004,7 @@ static void __map_bio(struct dm_target *ti, struct dm_target_io *tio) */ atomic_inc(&tio->io->io_count); sector = clone->bi_sector; - r = ti->type->map(ti, clone, &tio->info); + r = ti->type->map(ti, clone); if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ @@ -1111,6 +1099,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); + tio->target_request_nr = 0; return tio; } @@ -1121,7 +1110,7 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs); struct bio *clone = &tio->clone; - tio->info.target_request_nr = request_nr; + tio->target_request_nr = request_nr; /* * Discard requests require the bio's inline iovecs be initialized. @@ -1174,7 +1163,28 @@ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) ci->sector_count = 0; } -static int __clone_and_map_discard(struct clone_info *ci) +typedef unsigned (*get_num_requests_fn)(struct dm_target *ti); + +static unsigned get_num_discard_requests(struct dm_target *ti) +{ + return ti->num_discard_requests; +} + +static unsigned get_num_write_same_requests(struct dm_target *ti) +{ + return ti->num_write_same_requests; +} + +typedef bool (*is_split_required_fn)(struct dm_target *ti); + +static bool is_split_required_for_discard(struct dm_target *ti) +{ + return ti->split_discard_requests; +} + +static int __clone_and_map_changing_extent_only(struct clone_info *ci, + get_num_requests_fn get_num_requests, + is_split_required_fn is_split_required) { struct dm_target *ti; sector_t len; @@ -1185,15 +1195,15 @@ static int __clone_and_map_discard(struct clone_info *ci) return -EIO; /* - * Even though the device advertised discard support, - * that does not mean every target supports it, and + * Even though the device advertised support for this type of + * request, that does not mean every target supports it, and * reconfiguration might also have changed that since the * check was performed. */ - if (!ti->num_discard_requests) + if (!get_num_requests || !get_num_requests(ti)) return -EOPNOTSUPP; - if (!ti->split_discard_requests) + if (is_split_required && !is_split_required(ti)) len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); else len = min(ci->sector_count, max_io_len(ci->sector, ti)); @@ -1206,6 +1216,17 @@ static int __clone_and_map_discard(struct clone_info *ci) return 0; } +static int __clone_and_map_discard(struct clone_info *ci) +{ + return __clone_and_map_changing_extent_only(ci, get_num_discard_requests, + is_split_required_for_discard); +} + +static int __clone_and_map_write_same(struct clone_info *ci) +{ + return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL); +} + static int __clone_and_map(struct clone_info *ci) { struct bio *bio = ci->bio; @@ -1215,6 +1236,8 @@ static int __clone_and_map(struct clone_info *ci) if (unlikely(bio->bi_rw & REQ_DISCARD)) return __clone_and_map_discard(ci); + else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) + return __clone_and_map_write_same(ci); ti = dm_table_find_target(ci->map, ci->sector); if (!dm_target_is_valid(ti)) @@ -1946,13 +1969,20 @@ static void free_dev(struct mapped_device *md) static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { - struct dm_md_mempools *p; + struct dm_md_mempools *p = dm_table_get_md_mempools(t); - if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) - /* the md already has necessary mempools */ + if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) { + /* + * The md already has necessary mempools. Reload just the + * bioset because front_pad may have changed because + * a different table was loaded. + */ + bioset_free(md->bs); + md->bs = p->bs; + p->bs = NULL; goto out; + } - p = dm_table_get_md_mempools(t); BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); md->io_pool = p->io_pool; @@ -2711,7 +2741,7 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) { struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; @@ -2719,6 +2749,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) if (!pools) return NULL; + per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io)); + pools->io_pool = (type == DM_TYPE_BIO_BASED) ? mempool_create_slab_pool(MIN_IOS, _io_cache) : mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); @@ -2734,7 +2766,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) pools->bs = (type == DM_TYPE_BIO_BASED) ? bioset_create(pool_size, - offsetof(struct dm_target_io, clone)) : + per_bio_data_size + offsetof(struct dm_target_io, clone)) : bioset_create(pool_size, offsetof(struct dm_rq_clone_bio_info, clone)); if (!pools->bs) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 6a99fefaa743..45b97da1bd06 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -159,7 +159,7 @@ void dm_kcopyd_exit(void); /* * Mempool operations */ -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity); +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size); void dm_free_md_mempools(struct dm_md_mempools *pools); #endif diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index a3ae09124a67..28c3ed072a79 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -428,15 +428,17 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, if (!v) return 0; r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); - if (unlikely(r)) + if (unlikely(r)) { + DMERR_LIMIT("%s validator check failed for block %llu", v->name, + (unsigned long long) dm_bufio_get_block_number(buf)); return r; + } aux->validator = v; } else { if (unlikely(aux->validator != v)) { - DMERR("validator mismatch (old=%s vs new=%s) for block %llu", - aux->validator->name, v ? v->name : "NULL", - (unsigned long long) - dm_bufio_get_block_number(buf)); + DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", + aux->validator->name, v ? v->name : "NULL", + (unsigned long long) dm_bufio_get_block_number(buf)); return -EINVAL; } } diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index 5709bfeab1e8..accbb05f17b6 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -36,13 +36,13 @@ struct node_header { __le32 padding; } __packed; -struct node { +struct btree_node { struct node_header header; __le64 keys[0]; } __packed; -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt); int new_block(struct dm_btree_info *info, struct dm_block **result); @@ -64,7 +64,7 @@ struct ro_spine { void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); int exit_ro_spine(struct ro_spine *s); int ro_step(struct ro_spine *s, dm_block_t new_child); -struct node *ro_node(struct ro_spine *s); +struct btree_node *ro_node(struct ro_spine *s); struct shadow_spine { struct dm_btree_info *info; @@ -98,17 +98,17 @@ int shadow_root(struct shadow_spine *s); /* * Some inlines. */ -static inline __le64 *key_ptr(struct node *n, uint32_t index) +static inline __le64 *key_ptr(struct btree_node *n, uint32_t index) { return n->keys + index; } -static inline void *value_base(struct node *n) +static inline void *value_base(struct btree_node *n) { return &n->keys[le32_to_cpu(n->header.max_entries)]; } -static inline void *value_ptr(struct node *n, uint32_t index) +static inline void *value_ptr(struct btree_node *n, uint32_t index) { uint32_t value_size = le32_to_cpu(n->header.value_size); return value_base(n) + (value_size * index); @@ -117,7 +117,7 @@ static inline void *value_ptr(struct node *n, uint32_t index) /* * Assumes the values are suitably-aligned and converts to core format. */ -static inline uint64_t value64(struct node *n, uint32_t index) +static inline uint64_t value64(struct btree_node *n, uint32_t index) { __le64 *values_le = value_base(n); @@ -127,7 +127,7 @@ static inline uint64_t value64(struct node *n, uint32_t index) /* * Searching for a key within a single node. */ -int lower_bound(struct node *n, uint64_t key); +int lower_bound(struct btree_node *n, uint64_t key); extern struct dm_block_validator btree_node_validator; diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index aa71e2359a07..c4f28133ef82 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -53,7 +53,7 @@ /* * Some little utilities for moving node data around. */ -static void node_shift(struct node *n, int shift) +static void node_shift(struct btree_node *n, int shift) { uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); uint32_t value_size = le32_to_cpu(n->header.value_size); @@ -79,7 +79,7 @@ static void node_shift(struct node *n, int shift) } } -static void node_copy(struct node *left, struct node *right, int shift) +static void node_copy(struct btree_node *left, struct btree_node *right, int shift) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t value_size = le32_to_cpu(left->header.value_size); @@ -108,7 +108,7 @@ static void node_copy(struct node *left, struct node *right, int shift) /* * Delete a specific entry from a leaf node. */ -static void delete_at(struct node *n, unsigned index) +static void delete_at(struct btree_node *n, unsigned index) { unsigned nr_entries = le32_to_cpu(n->header.nr_entries); unsigned nr_to_copy = nr_entries - (index + 1); @@ -128,7 +128,7 @@ static void delete_at(struct node *n, unsigned index) n->header.nr_entries = cpu_to_le32(nr_entries - 1); } -static unsigned merge_threshold(struct node *n) +static unsigned merge_threshold(struct btree_node *n) { return le32_to_cpu(n->header.max_entries) / 3; } @@ -136,7 +136,7 @@ static unsigned merge_threshold(struct node *n) struct child { unsigned index; struct dm_block *block; - struct node *n; + struct btree_node *n; }; static struct dm_btree_value_type le64_type = { @@ -147,7 +147,7 @@ static struct dm_btree_value_type le64_type = { .equal = NULL }; -static int init_child(struct dm_btree_info *info, struct node *parent, +static int init_child(struct dm_btree_info *info, struct btree_node *parent, unsigned index, struct child *result) { int r, inc; @@ -177,7 +177,7 @@ static int exit_child(struct dm_btree_info *info, struct child *c) return dm_tm_unlock(info->tm, c->block); } -static void shift(struct node *left, struct node *right, int count) +static void shift(struct btree_node *left, struct btree_node *right, int count) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); @@ -203,11 +203,11 @@ static void shift(struct node *left, struct node *right, int count) right->header.nr_entries = cpu_to_le32(nr_right + count); } -static void __rebalance2(struct dm_btree_info *info, struct node *parent, +static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *r) { - struct node *left = l->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); unsigned threshold = 2 * merge_threshold(left) + 1; @@ -239,7 +239,7 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent; + struct btree_node *parent; struct child left, right; parent = dm_block_data(shadow_current(s)); @@ -270,9 +270,9 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, * in right, then rebalance2. This wastes some cpu, but I want something * simple atm. */ -static void delete_center_node(struct dm_btree_info *info, struct node *parent, +static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { uint32_t max_entries = le32_to_cpu(left->header.max_entries); @@ -301,9 +301,9 @@ static void delete_center_node(struct dm_btree_info *info, struct node *parent, /* * Redistributes entries among 3 sibling nodes. */ -static void redistribute3(struct dm_btree_info *info, struct node *parent, +static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { int s; @@ -343,12 +343,12 @@ static void redistribute3(struct dm_btree_info *info, struct node *parent, *key_ptr(parent, r->index) = right->keys[0]; } -static void __rebalance3(struct dm_btree_info *info, struct node *parent, +static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r) { - struct node *left = l->n; - struct node *center = c->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *center = c->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_center = le32_to_cpu(center->header.nr_entries); @@ -371,7 +371,7 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent = dm_block_data(shadow_current(s)); + struct btree_node *parent = dm_block_data(shadow_current(s)); struct child left, center, right; /* @@ -421,7 +421,7 @@ static int get_nr_entries(struct dm_transaction_manager *tm, { int r; struct dm_block *block; - struct node *n; + struct btree_node *n; r = dm_tm_read_lock(tm, b, &btree_node_validator, &block); if (r) @@ -438,7 +438,7 @@ static int rebalance_children(struct shadow_spine *s, { int i, r, has_left_sibling, has_right_sibling; uint32_t child_entries; - struct node *n; + struct btree_node *n; n = dm_block_data(shadow_current(s)); @@ -483,7 +483,7 @@ static int rebalance_children(struct shadow_spine *s, return r; } -static int do_leaf(struct node *n, uint64_t key, unsigned *index) +static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index) { int i = lower_bound(n, key); @@ -506,7 +506,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, uint64_t key, unsigned *index) { int i = *index, r; - struct node *n; + struct btree_node *n; for (;;) { r = shadow_step(s, root, vt); @@ -556,7 +556,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, unsigned level, last_level = info->levels - 1; int index = 0, r = 0; struct shadow_spine spine; - struct node *n; + struct btree_node *n; init_shadow_spine(&spine, info); for (level = 0; level < info->levels; level++) { diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index d9a7912ee8ee..f199a0c4ed04 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -23,7 +23,7 @@ static void node_prepare_for_write(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; h->blocknr = cpu_to_le64(dm_block_location(b)); @@ -38,15 +38,15 @@ static int node_check(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; size_t value_size; __le32 csum_disk; uint32_t flags; if (dm_block_location(b) != le64_to_cpu(h->blocknr)) { - DMERR("node_check failed blocknr %llu wanted %llu", - le64_to_cpu(h->blocknr), dm_block_location(b)); + DMERR_LIMIT("node_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(h->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -54,8 +54,8 @@ static int node_check(struct dm_block_validator *v, block_size - sizeof(__le32), BTREE_CSUM_XOR)); if (csum_disk != h->csum) { - DMERR("node_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); + DMERR_LIMIT("node_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); return -EILSEQ; } @@ -63,12 +63,12 @@ static int node_check(struct dm_block_validator *v, if (sizeof(struct node_header) + (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) { - DMERR("node_check failed: max_entries too large"); + DMERR_LIMIT("node_check failed: max_entries too large"); return -EILSEQ; } if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) { - DMERR("node_check failed, too many entries"); + DMERR_LIMIT("node_check failed: too many entries"); return -EILSEQ; } @@ -77,7 +77,7 @@ static int node_check(struct dm_block_validator *v, */ flags = le32_to_cpu(h->flags); if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) { - DMERR("node_check failed, node is neither INTERNAL or LEAF"); + DMERR_LIMIT("node_check failed: node is neither INTERNAL or LEAF"); return -EILSEQ; } @@ -164,7 +164,7 @@ int ro_step(struct ro_spine *s, dm_block_t new_child) return r; } -struct node *ro_node(struct ro_spine *s) +struct btree_node *ro_node(struct ro_spine *s) { struct dm_block *block; diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index d12b2cc51f1a..4caf66918cdb 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -38,7 +38,7 @@ static void array_insert(void *base, size_t elt_size, unsigned nr_elts, /*----------------------------------------------------------------*/ /* makes the assumption that no two keys are the same. */ -static int bsearch(struct node *n, uint64_t key, int want_hi) +static int bsearch(struct btree_node *n, uint64_t key, int want_hi) { int lo = -1, hi = le32_to_cpu(n->header.nr_entries); @@ -58,12 +58,12 @@ static int bsearch(struct node *n, uint64_t key, int want_hi) return want_hi ? hi : lo; } -int lower_bound(struct node *n, uint64_t key) +int lower_bound(struct btree_node *n, uint64_t key) { return bsearch(n, key, 0); } -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt) { unsigned i; @@ -77,7 +77,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n, vt->inc(vt->context, value_ptr(n, i)); } -static int insert_at(size_t value_size, struct node *node, unsigned index, +static int insert_at(size_t value_size, struct btree_node *node, unsigned index, uint64_t key, void *value) __dm_written_to_disk(value) { @@ -122,7 +122,7 @@ int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root) { int r; struct dm_block *b; - struct node *n; + struct btree_node *n; size_t block_size; uint32_t max_entries; @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(dm_btree_empty); #define MAX_SPINE_DEPTH 64 struct frame { struct dm_block *b; - struct node *n; + struct btree_node *n; unsigned level; unsigned nr_children; unsigned current_child; @@ -230,6 +230,11 @@ static void pop_frame(struct del_stack *s) dm_tm_unlock(s->tm, f->b); } +static bool is_internal_level(struct dm_btree_info *info, struct frame *f) +{ + return f->level < (info->levels - 1); +} + int dm_btree_del(struct dm_btree_info *info, dm_block_t root) { int r; @@ -241,7 +246,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) s->tm = info->tm; s->top = -1; - r = push_frame(s, root, 1); + r = push_frame(s, root, 0); if (r) goto out; @@ -267,7 +272,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) if (r) goto out; - } else if (f->level != (info->levels - 1)) { + } else if (is_internal_level(info, f)) { b = value64(f->n, f->current_child); f->current_child++; r = push_frame(s, b, f->level + 1); @@ -295,7 +300,7 @@ EXPORT_SYMBOL_GPL(dm_btree_del); /*----------------------------------------------------------------*/ static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, - int (*search_fn)(struct node *, uint64_t), + int (*search_fn)(struct btree_node *, uint64_t), uint64_t *result_key, void *v, size_t value_size) { int i, r; @@ -406,7 +411,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *parent; - struct node *ln, *rn, *pn; + struct btree_node *ln, *rn, *pn; __le64 location; left = shadow_current(s); @@ -491,7 +496,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *new_parent; - struct node *pn, *ln, *rn; + struct btree_node *pn, *ln, *rn; __le64 val; new_parent = shadow_current(s); @@ -576,7 +581,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, uint64_t key, unsigned *index) { int r, i = *index, top = 1; - struct node *node; + struct btree_node *node; for (;;) { r = shadow_step(s, root, vt); @@ -643,7 +648,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root, unsigned level, index = -1, last_level = info->levels - 1; dm_block_t block = root; struct shadow_spine spine; - struct node *n; + struct btree_node *n; struct dm_btree_value_type le64_type; le64_type.context = NULL; diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index f3a9af8cdec3..3e7a88d99eb0 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -39,8 +39,8 @@ static int index_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) { - DMERR("index_check failed blocknr %llu wanted %llu", - le64_to_cpu(mi_le->blocknr), dm_block_location(b)); + DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(mi_le->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -48,8 +48,8 @@ static int index_check(struct dm_block_validator *v, block_size - sizeof(__le32), INDEX_CSUM_XOR)); if (csum_disk != mi_le->csum) { - DMERR("index_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); + DMERR_LIMIT("index_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); return -EILSEQ; } @@ -89,8 +89,8 @@ static int bitmap_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) { - DMERR("bitmap check failed blocknr %llu wanted %llu", - le64_to_cpu(disk_header->blocknr), dm_block_location(b)); + DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu", + le64_to_cpu(disk_header->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -98,8 +98,8 @@ static int bitmap_check(struct dm_block_validator *v, block_size - sizeof(__le32), BITMAP_CSUM_XOR)); if (csum_disk != disk_header->csum) { - DMERR("bitmap check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); + DMERR_LIMIT("bitmap check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); return -EILSEQ; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index e89ae5e7a519..906cf3df71af 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -337,7 +337,7 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) { int r = sm_metadata_new_block_(sm, b); if (r) - DMERR("out of metadata space"); + DMERR("unable to allocate new metadata block"); return r; } diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 770a0d01e0b9..05164d7f054b 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -25,7 +25,6 @@ #include <linux/dma-mapping.h> #include <linux/spinlock.h> #include <linux/gpio.h> -#include <plat/cpu.h> #include <linux/platform_device.h> #include <linux/platform_data/usb-omap.h> #include <linux/pm_runtime.h> @@ -384,7 +383,7 @@ static void omap_usbhs_init(struct device *dev) reg &= ~OMAP_UHH_HOSTCONFIG_P3_CONNECT_STATUS; /* Bypass the TLL module for PHY mode operation */ - if (cpu_is_omap3430() && (omap_rev() <= OMAP3430_REV_ES2_1)) { + if (pdata->single_ulpi_bypass) { dev_dbg(dev, "OMAP3 ES version <= ES2.1\n"); if (is_ehci_phy_mode(pdata->port_mode[0]) || is_ehci_phy_mode(pdata->port_mode[1]) || diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 8d082b46426b..d971817182f7 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -53,6 +53,10 @@ #include <linux/kthread.h> #include "xpc.h" +#ifdef CONFIG_X86_64 +#include <asm/traps.h> +#endif + /* define two XPC debug device structures to be used with dev_dbg() et al */ struct device_driver xpc_dbg_name = { @@ -1079,6 +1083,9 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) return NOTIFY_DONE; } +/* Used to only allow one cpu to complete disconnect */ +static unsigned int xpc_die_disconnecting; + /* * Notify other partitions to deactivate from us by first disengaging from all * references to our memory. @@ -1092,6 +1099,9 @@ xpc_die_deactivate(void) long keep_waiting; long wait_to_print; + if (cmpxchg(&xpc_die_disconnecting, 0, 1)) + return; + /* keep xpc_hb_checker thread from doing anything (just in case) */ xpc_exiting = 1; @@ -1159,7 +1169,7 @@ xpc_die_deactivate(void) * about the lack of a heartbeat. */ static int -xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) +xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args) { #ifdef CONFIG_IA64 /* !!! temporary kludge */ switch (event) { @@ -1191,7 +1201,27 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) break; } #else - xpc_die_deactivate(); + struct die_args *die_args = _die_args; + + switch (event) { + case DIE_TRAP: + if (die_args->trapnr == X86_TRAP_DF) + xpc_die_deactivate(); + + if (((die_args->trapnr == X86_TRAP_MF) || + (die_args->trapnr == X86_TRAP_XF)) && + !user_mode_vm(die_args->regs)) + xpc_die_deactivate(); + + break; + case DIE_INT3: + case DIE_DEBUG: + break; + case DIE_OOPS: + case DIE_GPF: + default: + xpc_die_deactivate(); + } #endif return NOTIFY_DONE; diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c index fec406b4553d..c071d410488f 100644 --- a/drivers/mtd/ubi/attach.c +++ b/drivers/mtd/ubi/attach.c @@ -322,7 +322,6 @@ static struct ubi_ainf_volume *add_volume(struct ubi_attach_info *ai, int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, int pnum, const struct ubi_vid_hdr *vid_hdr) { - void *buf; int len, err, second_is_newer, bitflips = 0, corrupted = 0; uint32_t data_crc, crc; struct ubi_vid_hdr *vh = NULL; @@ -393,18 +392,14 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, /* Read the data of the copy and check the CRC */ len = be32_to_cpu(vid_hdr->data_size); - buf = vmalloc(len); - if (!buf) { - err = -ENOMEM; - goto out_free_vidh; - } - err = ubi_io_read_data(ubi, buf, pnum, 0, len); + mutex_lock(&ubi->buf_mutex); + err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, len); if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) - goto out_free_buf; + goto out_unlock; data_crc = be32_to_cpu(vid_hdr->data_crc); - crc = crc32(UBI_CRC32_INIT, buf, len); + crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, len); if (crc != data_crc) { dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x", pnum, crc, data_crc); @@ -415,8 +410,8 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, dbg_bld("PEB %d CRC is OK", pnum); bitflips = !!err; } + mutex_unlock(&ubi->buf_mutex); - vfree(buf); ubi_free_vid_hdr(ubi, vh); if (second_is_newer) @@ -426,8 +421,8 @@ int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_ainf_peb *aeb, return second_is_newer | (bitflips << 1) | (corrupted << 2); -out_free_buf: - vfree(buf); +out_unlock: + mutex_unlock(&ubi->buf_mutex); out_free_vidh: ubi_free_vid_hdr(ubi, vh); return err; @@ -1453,7 +1448,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) goto out_wl; #ifdef CONFIG_MTD_UBI_FASTMAP - if (ubi->fm && ubi->dbg->chk_gen) { + if (ubi->fm && ubi_dbg_chk_gen(ubi)) { struct ubi_attach_info *scan_ai; scan_ai = alloc_ai("ubi_ckh_aeb_slab_cache"); @@ -1503,7 +1498,7 @@ static int self_check_ai(struct ubi_device *ubi, struct ubi_attach_info *ai) struct ubi_ainf_peb *aeb, *last_aeb; uint8_t *buf; - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return 0; /* diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 344b4cb49d4e..a56133585e92 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -825,8 +825,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id) * No available PEBs to re-size the volume, clear the flag on * flash and exit. */ - memcpy(&vtbl_rec, &ubi->vtbl[vol_id], - sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol_id]; err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); if (err) ubi_err("cannot clean auto-resize flag for volume %d", @@ -986,14 +985,10 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, if (!ubi->fm_buf) goto out_free; #endif - err = ubi_debugging_init_dev(ubi); - if (err) - goto out_free; - err = ubi_attach(ubi, 0); if (err) { ubi_err("failed to attach mtd%d, error %d", mtd->index, err); - goto out_debugging; + goto out_free; } if (ubi->autoresize_vol_id != -1) { @@ -1060,8 +1055,6 @@ out_detach: ubi_wl_close(ubi); ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); -out_debugging: - ubi_debugging_exit_dev(ubi); out_free: vfree(ubi->peb_buf); vfree(ubi->fm_buf); @@ -1139,7 +1132,6 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); put_mtd_device(ubi->mtd); - ubi_debugging_exit_dev(ubi); vfree(ubi->peb_buf); vfree(ubi->fm_buf); ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index 26908a59506b..63cb1d7236ce 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -217,32 +217,6 @@ void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req) pr_err("\t1st 16 characters of name: %s\n", nm); } -/** - * ubi_debugging_init_dev - initialize debugging for an UBI device. - * @ubi: UBI device description object - * - * This function initializes debugging-related data for UBI device @ubi. - * Returns zero in case of success and a negative error code in case of - * failure. - */ -int ubi_debugging_init_dev(struct ubi_device *ubi) -{ - ubi->dbg = kzalloc(sizeof(struct ubi_debug_info), GFP_KERNEL); - if (!ubi->dbg) - return -ENOMEM; - - return 0; -} - -/** - * ubi_debugging_exit_dev - free debugging data for an UBI device. - * @ubi: UBI device description object - */ -void ubi_debugging_exit_dev(struct ubi_device *ubi) -{ - kfree(ubi->dbg); -} - /* * Root directory for UBI stuff in debugfs. Contains sub-directories which * contain the stuff specific to particular UBI devices. @@ -295,7 +269,7 @@ static ssize_t dfs_file_read(struct file *file, char __user *user_buf, ubi = ubi_get_device(ubi_num); if (!ubi) return -ENODEV; - d = ubi->dbg; + d = &ubi->dbg; if (dent == d->dfs_chk_gen) val = d->chk_gen; @@ -341,7 +315,7 @@ static ssize_t dfs_file_write(struct file *file, const char __user *user_buf, ubi = ubi_get_device(ubi_num); if (!ubi) return -ENODEV; - d = ubi->dbg; + d = &ubi->dbg; buf_size = min_t(size_t, count, (sizeof(buf) - 1)); if (copy_from_user(buf, user_buf, buf_size)) { @@ -398,7 +372,7 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi) unsigned long ubi_num = ubi->ubi_num; const char *fname; struct dentry *dent; - struct ubi_debug_info *d = ubi->dbg; + struct ubi_debug_info *d = &ubi->dbg; if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; @@ -471,5 +445,5 @@ out: void ubi_debugfs_exit_dev(struct ubi_device *ubi) { if (IS_ENABLED(CONFIG_DEBUG_FS)) - debugfs_remove_recursive(ubi->dbg->dfs_dir); + debugfs_remove_recursive(ubi->dbg.dfs_dir); } diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 3dbc877d9663..33f8f3b2c9b2 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -60,51 +60,11 @@ void ubi_dump_aeb(const struct ubi_ainf_peb *aeb, int type); void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req); int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len); -int ubi_debugging_init_dev(struct ubi_device *ubi); -void ubi_debugging_exit_dev(struct ubi_device *ubi); int ubi_debugfs_init(void); void ubi_debugfs_exit(void); int ubi_debugfs_init_dev(struct ubi_device *ubi); void ubi_debugfs_exit_dev(struct ubi_device *ubi); -/* - * The UBI debugfs directory name pattern and maximum name length (3 for "ubi" - * + 2 for the number plus 1 for the trailing zero byte. - */ -#define UBI_DFS_DIR_NAME "ubi%d" -#define UBI_DFS_DIR_LEN (3 + 2 + 1) - -/** - * struct ubi_debug_info - debugging information for an UBI device. - * - * @chk_gen: if UBI general extra checks are enabled - * @chk_io: if UBI I/O extra checks are enabled - * @disable_bgt: disable the background task for testing purposes - * @emulate_bitflips: emulate bit-flips for testing purposes - * @emulate_io_failures: emulate write/erase failures for testing purposes - * @dfs_dir_name: name of debugfs directory containing files of this UBI device - * @dfs_dir: direntry object of the UBI device debugfs directory - * @dfs_chk_gen: debugfs knob to enable UBI general extra checks - * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks - * @dfs_disable_bgt: debugfs knob to disable the background task - * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips - * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures - */ -struct ubi_debug_info { - unsigned int chk_gen:1; - unsigned int chk_io:1; - unsigned int disable_bgt:1; - unsigned int emulate_bitflips:1; - unsigned int emulate_io_failures:1; - char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; - struct dentry *dfs_dir; - struct dentry *dfs_chk_gen; - struct dentry *dfs_chk_io; - struct dentry *dfs_disable_bgt; - struct dentry *dfs_emulate_bitflips; - struct dentry *dfs_emulate_io_failures; -}; - /** * ubi_dbg_is_bgt_disabled - if the background thread is disabled. * @ubi: UBI device description object @@ -114,7 +74,7 @@ struct ubi_debug_info { */ static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) { - return ubi->dbg->disable_bgt; + return ubi->dbg.disable_bgt; } /** @@ -125,7 +85,7 @@ static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) */ static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { - if (ubi->dbg->emulate_bitflips) + if (ubi->dbg.emulate_bitflips) return !(random32() % 200); return 0; } @@ -139,7 +99,7 @@ static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) */ static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) { - if (ubi->dbg->emulate_io_failures) + if (ubi->dbg.emulate_io_failures) return !(random32() % 500); return 0; } @@ -153,9 +113,18 @@ static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) */ static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) { - if (ubi->dbg->emulate_io_failures) + if (ubi->dbg.emulate_io_failures) return !(random32() % 400); return 0; } +static inline int ubi_dbg_chk_io(const struct ubi_device *ubi) +{ + return ubi->dbg.chk_io; +} + +static inline int ubi_dbg_chk_gen(const struct ubi_device *ubi) +{ + return ubi->dbg.chk_gen; +} #endif /* !__UBI_DEBUG_H__ */ diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 1a5f53c090d4..0648c6996d43 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -814,10 +814,8 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (max_sqnum > ai->max_sqnum) ai->max_sqnum = max_sqnum; - list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) { - list_del(&tmp_aeb->u.list); - list_add_tail(&tmp_aeb->u.list, &ai->free); - } + list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) + list_move_tail(&tmp_aeb->u.list, &ai->free); /* * If fastmap is leaking PEBs (must not happen), raise a diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c index 4bd4db8c84c9..b93807b4c459 100644 --- a/drivers/mtd/ubi/gluebi.c +++ b/drivers/mtd/ubi/gluebi.c @@ -171,17 +171,17 @@ static void gluebi_put_device(struct mtd_info *mtd) static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, unsigned char *buf) { - int err = 0, lnum, offs, total_read; + int err = 0, lnum, offs, bytes_left; struct gluebi_device *gluebi; gluebi = container_of(mtd, struct gluebi_device, mtd); lnum = div_u64_rem(from, mtd->erasesize, &offs); - total_read = len; - while (total_read) { + bytes_left = len; + while (bytes_left) { size_t to_read = mtd->erasesize - offs; - if (to_read > total_read) - to_read = total_read; + if (to_read > bytes_left) + to_read = bytes_left; err = ubi_read(gluebi->desc, lnum, buf, offs, to_read); if (err) @@ -189,11 +189,11 @@ static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len, lnum += 1; offs = 0; - total_read -= to_read; + bytes_left -= to_read; buf += to_read; } - *retlen = len - total_read; + *retlen = len - bytes_left; return err; } @@ -211,7 +211,7 @@ static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len, static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { - int err = 0, lnum, offs, total_written; + int err = 0, lnum, offs, bytes_left; struct gluebi_device *gluebi; gluebi = container_of(mtd, struct gluebi_device, mtd); @@ -220,12 +220,12 @@ static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len, if (len % mtd->writesize || offs % mtd->writesize) return -EINVAL; - total_written = len; - while (total_written) { + bytes_left = len; + while (bytes_left) { size_t to_write = mtd->erasesize - offs; - if (to_write > total_written) - to_write = total_written; + if (to_write > bytes_left) + to_write = bytes_left; err = ubi_leb_write(gluebi->desc, lnum, buf, offs, to_write); if (err) @@ -233,11 +233,11 @@ static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len, lnum += 1; offs = 0; - total_written -= to_write; + bytes_left -= to_write; buf += to_write; } - *retlen = len - total_written; + *retlen = len - bytes_left; return err; } diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 78a1dcbf2107..bf79def40126 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -1132,7 +1132,7 @@ static int self_check_not_bad(const struct ubi_device *ubi, int pnum) { int err; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; err = ubi_io_is_bad(ubi, pnum); @@ -1159,7 +1159,7 @@ static int self_check_ec_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; magic = be32_to_cpu(ec_hdr->magic); @@ -1197,7 +1197,7 @@ static int self_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) uint32_t crc, hdr_crc; struct ubi_ec_hdr *ec_hdr; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -1241,7 +1241,7 @@ static int self_check_vid_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; magic = be32_to_cpu(vid_hdr->magic); @@ -1282,7 +1282,7 @@ static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) struct ubi_vid_hdr *vid_hdr; void *p; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); @@ -1334,7 +1334,7 @@ static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, void *buf1; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); @@ -1398,7 +1398,7 @@ int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) void *buf; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - if (!ubi->dbg->chk_io) + if (!ubi_dbg_chk_io(ubi)) return 0; buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 7d57469723cf..8ea6297a208f 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -85,6 +85,13 @@ #define UBI_UNKNOWN -1 /* + * The UBI debugfs directory name pattern and maximum name length (3 for "ubi" + * + 2 for the number plus 1 for the trailing zero byte. + */ +#define UBI_DFS_DIR_NAME "ubi%d" +#define UBI_DFS_DIR_LEN (3 + 2 + 1) + +/* * Error codes returned by the I/O sub-system. * * UBI_IO_FF: the read region of flash contains only 0xFFs @@ -342,6 +349,37 @@ struct ubi_volume_desc { struct ubi_wl_entry; /** + * struct ubi_debug_info - debugging information for an UBI device. + * + * @chk_gen: if UBI general extra checks are enabled + * @chk_io: if UBI I/O extra checks are enabled + * @disable_bgt: disable the background task for testing purposes + * @emulate_bitflips: emulate bit-flips for testing purposes + * @emulate_io_failures: emulate write/erase failures for testing purposes + * @dfs_dir_name: name of debugfs directory containing files of this UBI device + * @dfs_dir: direntry object of the UBI device debugfs directory + * @dfs_chk_gen: debugfs knob to enable UBI general extra checks + * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks + * @dfs_disable_bgt: debugfs knob to disable the background task + * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips + * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures + */ +struct ubi_debug_info { + unsigned int chk_gen:1; + unsigned int chk_io:1; + unsigned int disable_bgt:1; + unsigned int emulate_bitflips:1; + unsigned int emulate_io_failures:1; + char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; + struct dentry *dfs_dir; + struct dentry *dfs_chk_gen; + struct dentry *dfs_chk_io; + struct dentry *dfs_disable_bgt; + struct dentry *dfs_emulate_bitflips; + struct dentry *dfs_emulate_io_failures; +}; + +/** * struct ubi_device - UBI device description structure * @dev: UBI device object to use the the Linux device model * @cdev: character device object to create character device @@ -545,7 +583,7 @@ struct ubi_device { struct mutex buf_mutex; struct mutex ckvol_mutex; - struct ubi_debug_info *dbg; + struct ubi_debug_info dbg; }; /** diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index 9f2ebd8750e7..ec2c2dc1c1ca 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -64,8 +64,7 @@ static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol) return 0; } - memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], - sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol->vol_id]; vtbl_rec.upd_marker = 1; mutex_lock(&ubi->device_mutex); @@ -93,8 +92,7 @@ static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol, dbg_gen("clear update marker for volume %d", vol->vol_id); - memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], - sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol->vol_id]; ubi_assert(vol->upd_marker && vtbl_rec.upd_marker); vtbl_rec.upd_marker = 0; diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 9169e58c262e..8330703c098f 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -535,7 +535,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) } /* Change volume table record */ - memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); + vtbl_rec = ubi->vtbl[vol_id]; vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); if (err) @@ -847,7 +847,7 @@ static int self_check_volumes(struct ubi_device *ubi) { int i, err = 0; - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return 0; for (i = 0; i < ubi->vtbl_slots; i++) { diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 926e3df14fb2..d77b1c1d7c72 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -858,7 +858,7 @@ out_free: */ static void self_vtbl_check(const struct ubi_device *ubi) { - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return; if (vtbl_check(ubi, ubi->vtbl)) { diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 2144f611196e..5df49d3cb5c7 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1,5 +1,4 @@ /* - * @ubi: UBI device description object * Copyright (c) International Business Machines Corp., 2006 * * This program is free software; you can redistribute it and/or modify @@ -2050,7 +2049,7 @@ static int self_check_ec(struct ubi_device *ubi, int pnum, int ec) long long read_ec; struct ubi_ec_hdr *ec_hdr; - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -2090,7 +2089,7 @@ out_free: static int self_check_in_wl_tree(const struct ubi_device *ubi, struct ubi_wl_entry *e, struct rb_root *root) { - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return 0; if (in_wl_tree(e, root)) @@ -2116,7 +2115,7 @@ static int self_check_in_pq(const struct ubi_device *ubi, struct ubi_wl_entry *p; int i; - if (!ubi->dbg->chk_gen) + if (!ubi_dbg_chk_gen(ubi)) return 0; for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ef2cb2418535..b7d45f367d4a 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4431,8 +4431,6 @@ static void bond_uninit(struct net_device *bond_dev) list_del(&bond->bond_list); - bond_work_cancel_all(bond); - bond_debug_unregister(bond); __hw_addr_flush(&bond->mc_list); diff --git a/drivers/net/can/sja1000/sja1000_of_platform.c b/drivers/net/can/sja1000/sja1000_of_platform.c index 0f5917000aa2..6433b81256cd 100644 --- a/drivers/net/can/sja1000/sja1000_of_platform.c +++ b/drivers/net/can/sja1000/sja1000_of_platform.c @@ -121,7 +121,7 @@ static int sja1000_ofp_probe(struct platform_device *ofdev) } irq = irq_of_parse_and_map(np, 0); - if (irq == NO_IRQ) { + if (irq == 0) { dev_err(&ofdev->dev, "no irq found\n"); err = -ENODEV; goto exit_unmap_mem; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 378988b5709a..6db997c78a5f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -35,6 +35,8 @@ #ifndef __CXGB4_H__ #define __CXGB4_H__ +#include "t4_hw.h" + #include <linux/bitops.h> #include <linux/cache.h> #include <linux/interrupt.h> @@ -212,6 +214,8 @@ struct tp_err_stats { struct tp_params { unsigned int ntxchan; /* # of Tx channels */ unsigned int tre; /* log2 of core clocks per TP tick */ + unsigned short tx_modq_map; /* TX modulation scheduler queue to */ + /* channel map */ uint32_t dack_re; /* DACK timer resolution */ unsigned short tx_modq[NCHAN]; /* channel to modulation queue map */ @@ -526,6 +530,7 @@ struct adapter { struct net_device *port[MAX_NPORTS]; u8 chan_map[NCHAN]; /* channel -> port map */ + u32 filter_mode; unsigned int l2t_start; unsigned int l2t_end; struct l2t_data *l2t; @@ -545,6 +550,129 @@ struct adapter { spinlock_t stats_lock; }; +/* Defined bit width of user definable filter tuples + */ +#define ETHTYPE_BITWIDTH 16 +#define FRAG_BITWIDTH 1 +#define MACIDX_BITWIDTH 9 +#define FCOE_BITWIDTH 1 +#define IPORT_BITWIDTH 3 +#define MATCHTYPE_BITWIDTH 3 +#define PROTO_BITWIDTH 8 +#define TOS_BITWIDTH 8 +#define PF_BITWIDTH 8 +#define VF_BITWIDTH 8 +#define IVLAN_BITWIDTH 16 +#define OVLAN_BITWIDTH 16 + +/* Filter matching rules. These consist of a set of ingress packet field + * (value, mask) tuples. The associated ingress packet field matches the + * tuple when ((field & mask) == value). (Thus a wildcard "don't care" field + * rule can be constructed by specifying a tuple of (0, 0).) A filter rule + * matches an ingress packet when all of the individual individual field + * matching rules are true. + * + * Partial field masks are always valid, however, while it may be easy to + * understand their meanings for some fields (e.g. IP address to match a + * subnet), for others making sensible partial masks is less intuitive (e.g. + * MPS match type) ... + * + * Most of the following data structures are modeled on T4 capabilities. + * Drivers for earlier chips use the subsets which make sense for those chips. + * We really need to come up with a hardware-independent mechanism to + * represent hardware filter capabilities ... + */ +struct ch_filter_tuple { + /* Compressed header matching field rules. The TP_VLAN_PRI_MAP + * register selects which of these fields will participate in the + * filter match rules -- up to a maximum of 36 bits. Because + * TP_VLAN_PRI_MAP is a global register, all filters must use the same + * set of fields. + */ + uint32_t ethtype:ETHTYPE_BITWIDTH; /* Ethernet type */ + uint32_t frag:FRAG_BITWIDTH; /* IP fragmentation header */ + uint32_t ivlan_vld:1; /* inner VLAN valid */ + uint32_t ovlan_vld:1; /* outer VLAN valid */ + uint32_t pfvf_vld:1; /* PF/VF valid */ + uint32_t macidx:MACIDX_BITWIDTH; /* exact match MAC index */ + uint32_t fcoe:FCOE_BITWIDTH; /* FCoE packet */ + uint32_t iport:IPORT_BITWIDTH; /* ingress port */ + uint32_t matchtype:MATCHTYPE_BITWIDTH; /* MPS match type */ + uint32_t proto:PROTO_BITWIDTH; /* protocol type */ + uint32_t tos:TOS_BITWIDTH; /* TOS/Traffic Type */ + uint32_t pf:PF_BITWIDTH; /* PCI-E PF ID */ + uint32_t vf:VF_BITWIDTH; /* PCI-E VF ID */ + uint32_t ivlan:IVLAN_BITWIDTH; /* inner VLAN */ + uint32_t ovlan:OVLAN_BITWIDTH; /* outer VLAN */ + + /* Uncompressed header matching field rules. These are always + * available for field rules. + */ + uint8_t lip[16]; /* local IP address (IPv4 in [3:0]) */ + uint8_t fip[16]; /* foreign IP address (IPv4 in [3:0]) */ + uint16_t lport; /* local port */ + uint16_t fport; /* foreign port */ +}; + +/* A filter ioctl command. + */ +struct ch_filter_specification { + /* Administrative fields for filter. + */ + uint32_t hitcnts:1; /* count filter hits in TCB */ + uint32_t prio:1; /* filter has priority over active/server */ + + /* Fundamental filter typing. This is the one element of filter + * matching that doesn't exist as a (value, mask) tuple. + */ + uint32_t type:1; /* 0 => IPv4, 1 => IPv6 */ + + /* Packet dispatch information. Ingress packets which match the + * filter rules will be dropped, passed to the host or switched back + * out as egress packets. + */ + uint32_t action:2; /* drop, pass, switch */ + + uint32_t rpttid:1; /* report TID in RSS hash field */ + + uint32_t dirsteer:1; /* 0 => RSS, 1 => steer to iq */ + uint32_t iq:10; /* ingress queue */ + + uint32_t maskhash:1; /* dirsteer=0: store RSS hash in TCB */ + uint32_t dirsteerhash:1;/* dirsteer=1: 0 => TCB contains RSS hash */ + /* 1 => TCB contains IQ ID */ + + /* Switch proxy/rewrite fields. An ingress packet which matches a + * filter with "switch" set will be looped back out as an egress + * packet -- potentially with some Ethernet header rewriting. + */ + uint32_t eport:2; /* egress port to switch packet out */ + uint32_t newdmac:1; /* rewrite destination MAC address */ + uint32_t newsmac:1; /* rewrite source MAC address */ + uint32_t newvlan:2; /* rewrite VLAN Tag */ + uint8_t dmac[ETH_ALEN]; /* new destination MAC address */ + uint8_t smac[ETH_ALEN]; /* new source MAC address */ + uint16_t vlan; /* VLAN Tag to insert */ + + /* Filter rule value/mask pairs. + */ + struct ch_filter_tuple val; + struct ch_filter_tuple mask; +}; + +enum { + FILTER_PASS = 0, /* default */ + FILTER_DROP, + FILTER_SWITCH +}; + +enum { + VLAN_NOCHANGE = 0, /* default */ + VLAN_REMOVE, + VLAN_INSERT, + VLAN_REWRITE +}; + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@ -701,6 +829,12 @@ static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd, void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, const u32 *vals, unsigned int nregs, unsigned int start_idx); +void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, + unsigned int data_reg, u32 *vals, unsigned int nregs, + unsigned int start_idx); + +struct fw_filter_wr; + void t4_intr_enable(struct adapter *adapter); void t4_intr_disable(struct adapter *adapter); int t4_slow_intr_handler(struct adapter *adapter); @@ -737,6 +871,8 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, void t4_load_mtus(struct adapter *adap, const unsigned short *mtus, const unsigned short *alpha, const unsigned short *beta); +void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid); + void t4_wol_magic_enable(struct adapter *adap, unsigned int port, const u8 *addr); int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index a27b4ae20f43..f0718e1a8369 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -175,6 +175,30 @@ enum { MIN_FL_ENTRIES = 16 }; +/* Host shadow copy of ingress filter entry. This is in host native format + * and doesn't match the ordering or bit order, etc. of the hardware of the + * firmware command. The use of bit-field structure elements is purely to + * remind ourselves of the field size limitations and save memory in the case + * where the filter table is large. + */ +struct filter_entry { + /* Administrative fields for filter. + */ + u32 valid:1; /* filter allocated and valid */ + u32 locked:1; /* filter is administratively locked */ + + u32 pending:1; /* filter action is pending firmware reply */ + u32 smtidx:8; /* Source MAC Table index for smac */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + + /* The filter itself. Most of this is a straight copy of information + * provided by the extended ioctl(). Some fields are translated to + * internal forms -- for instance the Ingress Queue ID passed in from + * the ioctl() is translated into the Absolute Ingress Queue ID. + */ + struct ch_filter_specification fs; +}; + #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -325,6 +349,9 @@ enum { static unsigned int tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT; +module_param(tp_vlan_pri_map, uint, 0644); +MODULE_PARM_DESC(tp_vlan_pri_map, "global compressed filter configuration"); + static struct dentry *cxgb4_debugfs_root; static LIST_HEAD(adapter_list); @@ -506,8 +533,67 @@ static int link_start(struct net_device *dev) return ret; } -/* - * Response queue handler for the FW event queue. +/* Clear a filter and release any of its resources that we own. This also + * clears the filter's "pending" status. + */ +static void clear_filter(struct adapter *adap, struct filter_entry *f) +{ + /* If the new or old filter have loopback rewriteing rules then we'll + * need to free any existing Layer Two Table (L2T) entries of the old + * filter rule. The firmware will handle freeing up any Source MAC + * Table (SMT) entries used for rewriting Source MAC Addresses in + * loopback rules. + */ + if (f->l2t) + cxgb4_l2t_release(f->l2t); + + /* The zeroing of the filter rule below clears the filter valid, + * pending, locked flags, l2t pointer, etc. so it's all we need for + * this operation. + */ + memset(f, 0, sizeof(*f)); +} + +/* Handle a filter write/deletion reply. + */ +static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) +{ + unsigned int idx = GET_TID(rpl); + unsigned int nidx = idx - adap->tids.ftid_base; + unsigned int ret; + struct filter_entry *f; + + if (idx >= adap->tids.ftid_base && nidx < + (adap->tids.nftids + adap->tids.nsftids)) { + idx = nidx; + ret = GET_TCB_COOKIE(rpl->cookie); + f = &adap->tids.ftid_tab[idx]; + + if (ret == FW_FILTER_WR_FLT_DELETED) { + /* Clear the filter when we get confirmation from the + * hardware that the filter has been deleted. + */ + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { + dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", + idx); + clear_filter(adap, f); + } else if (ret == FW_FILTER_WR_FLT_ADDED) { + f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; + f->pending = 0; /* asynchronous setup completed */ + f->valid = 1; + } else { + /* Something went wrong. Issue a warning about the + * problem and clear everything out. + */ + dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", + idx, ret); + clear_filter(adap, f); + } + } +} + +/* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct pkt_gl *gl) @@ -542,6 +628,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct cpl_l2t_write_rpl *p = (void *)rsp; do_l2t_write_rpl(q->adap, p); + } else if (opcode == CPL_SET_TCB_RPL) { + const struct cpl_set_tcb_rpl *p = (void *)rsp; + + filter_rpl(q->adap, p); } else dev_err(q->adap->pdev_dev, "unexpected CPL %#x on FW event queue\n", opcode); @@ -983,6 +1073,148 @@ static void t4_free_mem(void *addr) kfree(addr); } +/* Send a Work Request to write the filter at a specified index. We construct + * a Firmware Filter Work Request to have the work done and put the indicated + * filter into "pending" mode which will prevent any further actions against + * it till we get a reply from the firmware on the completion status of the + * request. + */ +static int set_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct sk_buff *skb; + struct fw_filter_wr *fwr; + unsigned int ftid; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter->l2t); + if (f->l2t == NULL) + return -EAGAIN; + if (t4_l2t_set_switching(adapter, f->l2t, f->fs.vlan, + f->fs.eport, f->fs.dmac)) { + cxgb4_l2t_release(f->l2t); + f->l2t = NULL; + return -ENOMEM; + } + } + + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL | __GFP_NOFAIL); + fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); + memset(fwr, 0, sizeof(*fwr)); + + /* It would be nice to put most of the following in t4_hw.c but most + * of the work is translating the cxgbtool ch_filter_specification + * into the Work Request and the definition of that structure is + * currently in cxgbtool.h which isn't appropriate to pull into the + * common code. We may eventually try to come up with a more neutral + * filter specification structure but for now it's easiest to simply + * put this fairly direct code in line ... + */ + fwr->op_pkd = htonl(FW_WR_OP(FW_FILTER_WR)); + fwr->len16_pkd = htonl(FW_WR_LEN16(sizeof(*fwr)/16)); + fwr->tid_to_iq = + htonl(V_FW_FILTER_WR_TID(ftid) | + V_FW_FILTER_WR_RQTYPE(f->fs.type) | + V_FW_FILTER_WR_NOREPLY(0) | + V_FW_FILTER_WR_IQ(f->fs.iq)); + fwr->del_filter_to_l2tix = + htonl(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | + V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | + V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | + V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | + V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | + V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | + V_FW_FILTER_WR_DMAC(f->fs.newdmac) | + V_FW_FILTER_WR_SMAC(f->fs.newsmac) | + V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) | + V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | + V_FW_FILTER_WR_TXCHAN(f->fs.eport) | + V_FW_FILTER_WR_PRIO(f->fs.prio) | + V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); + fwr->ethtype = htons(f->fs.val.ethtype); + fwr->ethtypem = htons(f->fs.mask.ethtype); + fwr->frag_to_ovlan_vldm = + (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | + V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | + V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.ivlan_vld) | + V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.ovlan_vld) | + V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.ivlan_vld) | + V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.ovlan_vld)); + fwr->smac_sel = 0; + fwr->rx_chan_rx_rpl_iq = + htons(V_FW_FILTER_WR_RX_CHAN(0) | + V_FW_FILTER_WR_RX_RPL_IQ(adapter->sge.fw_evtq.abs_id)); + fwr->maci_to_matchtypem = + htonl(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | + V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | + V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | + V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | + V_FW_FILTER_WR_PORT(f->fs.val.iport) | + V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | + V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | + V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); + fwr->ptcl = f->fs.val.proto; + fwr->ptclm = f->fs.mask.proto; + fwr->ttyp = f->fs.val.tos; + fwr->ttypm = f->fs.mask.tos; + fwr->ivlan = htons(f->fs.val.ivlan); + fwr->ivlanm = htons(f->fs.mask.ivlan); + fwr->ovlan = htons(f->fs.val.ovlan); + fwr->ovlanm = htons(f->fs.mask.ovlan); + memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); + memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); + memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); + memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); + fwr->lp = htons(f->fs.val.lport); + fwr->lpm = htons(f->fs.mask.lport); + fwr->fp = htons(f->fs.val.fport); + fwr->fpm = htons(f->fs.mask.fport); + if (f->fs.newsmac) + memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; +} + +/* Delete the filter at a specified index. + */ +static int del_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct sk_buff *skb; + struct fw_filter_wr *fwr; + unsigned int len, ftid; + + len = sizeof(*fwr); + ftid = adapter->tids.ftid_base + fidx; + + skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL); + fwr = (struct fw_filter_wr *)__skb_put(skb, len); + t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + t4_mgmt_tx(adapter, skb); + return 0; +} + static inline int is_offload(const struct adapter *adap) { return adap->params.offload; @@ -2195,7 +2427,7 @@ int cxgb4_alloc_atid(struct tid_info *t, void *data) if (t->afree) { union aopen_entry *p = t->afree; - atid = p - t->atid_tab; + atid = (p - t->atid_tab) + t->atid_base; t->afree = p->next; p->data = data; t->atids_in_use++; @@ -2210,7 +2442,7 @@ EXPORT_SYMBOL(cxgb4_alloc_atid); */ void cxgb4_free_atid(struct tid_info *t, unsigned int atid) { - union aopen_entry *p = &t->atid_tab[atid]; + union aopen_entry *p = &t->atid_tab[atid - t->atid_base]; spin_lock_bh(&t->atid_lock); p->next = t->afree; @@ -2249,8 +2481,34 @@ int cxgb4_alloc_stid(struct tid_info *t, int family, void *data) } EXPORT_SYMBOL(cxgb4_alloc_stid); -/* - * Release a server TID. +/* Allocate a server filter TID and set it to the supplied value. + */ +int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data) +{ + int stid; + + spin_lock_bh(&t->stid_lock); + if (family == PF_INET) { + stid = find_next_zero_bit(t->stid_bmap, + t->nstids + t->nsftids, t->nstids); + if (stid < (t->nstids + t->nsftids)) + __set_bit(stid, t->stid_bmap); + else + stid = -1; + } else { + stid = -1; + } + if (stid >= 0) { + t->stid_tab[stid].data = data; + stid += t->stid_base; + t->stids_in_use++; + } + spin_unlock_bh(&t->stid_lock); + return stid; +} +EXPORT_SYMBOL(cxgb4_alloc_sftid); + +/* Release a server TID. */ void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family) { @@ -2362,18 +2620,26 @@ EXPORT_SYMBOL(cxgb4_remove_tid); static int tid_init(struct tid_info *t) { size_t size; + unsigned int stid_bmap_size; unsigned int natids = t->natids; - size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + + stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + size = t->ntids * sizeof(*t->tid_tab) + + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + - BITS_TO_LONGS(t->nstids) * sizeof(long); + t->nsftids * sizeof(*t->stid_tab) + + stid_bmap_size * sizeof(long) + + t->nftids * sizeof(*t->ftid_tab) + + t->nsftids * sizeof(*t->ftid_tab); + t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) return -ENOMEM; t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; - t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids]; + t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; + t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); @@ -2388,7 +2654,7 @@ static int tid_init(struct tid_info *t) t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids); + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); return 0; } @@ -2404,7 +2670,8 @@ static int tid_init(struct tid_info *t) * Returns <0 on error and one of the %NET_XMIT_* values on success. */ int cxgb4_create_server(const struct net_device *dev, unsigned int stid, - __be32 sip, __be16 sport, unsigned int queue) + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue) { unsigned int chan; struct sk_buff *skb; @@ -2750,6 +3017,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) { void *handle; struct cxgb4_lld_info lli; + unsigned short i; lli.pdev = adap->pdev; lli.l2t = adap->l2t; @@ -2776,10 +3044,16 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET( t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >> (adap->fn * 4)); + lli.filt_mode = adap->filter_mode; + /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ + for (i = 0; i < NCHAN; i++) + lli.tx_modq[i] = i; lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS); lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL); lli.fw_vers = adap->params.fw_vers; lli.dbfifo_int_thresh = dbfifo_int_thresh; + lli.sge_pktshift = adap->sge.pktshift; + lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; handle = ulds[uld].add(&lli); if (IS_ERR(handle)) { @@ -2999,6 +3273,126 @@ static int cxgb_close(struct net_device *dev) return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false); } +/* Return an error number if the indicated filter isn't writable ... + */ +static int writable_filter(struct filter_entry *f) +{ + if (f->locked) + return -EPERM; + if (f->pending) + return -EBUSY; + + return 0; +} + +/* Delete the filter at the specified index (if valid). The checks for all + * the common problems with doing this like the filter being locked, currently + * pending in another operation, etc. + */ +static int delete_filter(struct adapter *adapter, unsigned int fidx) +{ + struct filter_entry *f; + int ret; + + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + return -EINVAL; + + f = &adapter->tids.ftid_tab[fidx]; + ret = writable_filter(f); + if (ret) + return ret; + if (f->valid) + return del_filter_wr(adapter, fidx); + + return 0; +} + +int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue, unsigned char port, unsigned char mask) +{ + int ret; + struct filter_entry *f; + struct adapter *adap; + int i; + u8 *val; + + adap = netdev2adap(dev); + + /* Adjust stid to correct filter index */ + stid -= adap->tids.nstids; + stid += adap->tids.nftids; + + /* Check to make sure the filter requested is writable ... + */ + f = &adap->tids.ftid_tab[stid]; + ret = writable_filter(f); + if (ret) + return ret; + + /* Clear out any old resources being used by the filter before + * we start constructing the new filter. + */ + if (f->valid) + clear_filter(adap, f); + + /* Clear out filter specifications */ + memset(&f->fs, 0, sizeof(struct ch_filter_specification)); + f->fs.val.lport = cpu_to_be16(sport); + f->fs.mask.lport = ~0; + val = (u8 *)&sip; + if ((val[0] | val[1] | val[2] | val[3]) != 0) { + for (i = 0; i < 4; i++) { + f->fs.val.lip[i] = val[i]; + f->fs.mask.lip[i] = ~0; + } + if (adap->filter_mode & F_PORT) { + f->fs.val.iport = port; + f->fs.mask.iport = mask; + } + } + + f->fs.dirsteer = 1; + f->fs.iq = queue; + /* Mark filter as locked */ + f->locked = 1; + f->fs.rpttid = 1; + + ret = set_filter_wr(adap, stid); + if (ret) { + clear_filter(adap, f); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(cxgb4_create_server_filter); + +int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, + unsigned int queue, bool ipv6) +{ + int ret; + struct filter_entry *f; + struct adapter *adap; + + adap = netdev2adap(dev); + + /* Adjust stid to correct filter index */ + stid -= adap->tids.nstids; + stid += adap->tids.nftids; + + f = &adap->tids.ftid_tab[stid]; + /* Unlock the filter */ + f->locked = 0; + + ret = delete_filter(adap, stid); + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL(cxgb4_remove_server_filter); + static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev, struct rtnl_link_stats64 *ns) { @@ -3245,6 +3639,34 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c) v = t4_read_reg(adap, TP_PIO_DATA); t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR); + /* first 4 Tx modulation queues point to consecutive Tx channels */ + adap->params.tp.tx_modq_map = 0xE4; + t4_write_reg(adap, A_TP_TX_MOD_QUEUE_REQ_MAP, + V_TX_MOD_QUEUE_REQ_MAP(adap->params.tp.tx_modq_map)); + + /* associate each Tx modulation queue with consecutive Tx channels */ + v = 0x84218421; + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_HDR); + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_FIFO); + t4_write_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, A_TP_TX_SCHED_PCMD); + +#define T4_TX_MODQ_10G_WEIGHT_DEFAULT 16 /* in KB units */ + if (is_offload(adap)) { + t4_write_reg(adap, A_TP_TX_MOD_QUEUE_WEIGHT0, + V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT)); + t4_write_reg(adap, A_TP_TX_MOD_CHANNEL_WEIGHT, + V_TX_MODQ_WEIGHT0(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT1(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT2(T4_TX_MODQ_10G_WEIGHT_DEFAULT) | + V_TX_MODQ_WEIGHT3(T4_TX_MODQ_10G_WEIGHT_DEFAULT)); + } + /* get basic stuff going */ return t4_early_init(adap, adap->fn); } @@ -4035,6 +4457,10 @@ static int adap_init0(struct adapter *adap) for (j = 0; j < NCHAN; j++) adap->params.tp.tx_modq[j] = j; + t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &adap->filter_mode, 1, + TP_VLAN_PRI_MAP); + adap->flags |= FW_OK; return 0; @@ -4661,6 +5087,17 @@ static void remove_one(struct pci_dev *pdev) if (adapter->debugfs_root) debugfs_remove_recursive(adapter->debugfs_root); + /* If we allocated filters, free up state associated with any + * valid filters ... + */ + if (adapter->tids.ftid_tab) { + struct filter_entry *f = &adapter->tids.ftid_tab[0]; + for (i = 0; i < (adapter->tids.nftids + + adapter->tids.nsftids); i++, f++) + if (f->valid) + clear_filter(adapter, f); + } + if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 39bec73ff87c..e2bbc7f3e2de 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -38,6 +38,7 @@ #include <linux/cache.h> #include <linux/spinlock.h> #include <linux/skbuff.h> +#include <linux/inetdevice.h> #include <linux/atomic.h> /* CPL message priority levels */ @@ -97,7 +98,9 @@ struct tid_info { union aopen_entry *atid_tab; unsigned int natids; + unsigned int atid_base; + struct filter_entry *ftid_tab; unsigned int nftids; unsigned int ftid_base; unsigned int aftid_base; @@ -129,7 +132,7 @@ static inline void *lookup_atid(const struct tid_info *t, unsigned int atid) static inline void *lookup_stid(const struct tid_info *t, unsigned int stid) { stid -= t->stid_base; - return stid < t->nstids ? t->stid_tab[stid].data : NULL; + return stid < (t->nstids + t->nsftids) ? t->stid_tab[stid].data : NULL; } static inline void cxgb4_insert_tid(struct tid_info *t, void *data, @@ -141,6 +144,7 @@ static inline void cxgb4_insert_tid(struct tid_info *t, void *data, int cxgb4_alloc_atid(struct tid_info *t, void *data); int cxgb4_alloc_stid(struct tid_info *t, int family, void *data); +int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data); void cxgb4_free_atid(struct tid_info *t, unsigned int atid); void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family); void cxgb4_remove_tid(struct tid_info *t, unsigned int qid, unsigned int tid); @@ -148,8 +152,14 @@ void cxgb4_remove_tid(struct tid_info *t, unsigned int qid, unsigned int tid); struct in6_addr; int cxgb4_create_server(const struct net_device *dev, unsigned int stid, - __be32 sip, __be16 sport, unsigned int queue); - + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue); +int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, + __be32 sip, __be16 sport, __be16 vlan, + unsigned int queue, + unsigned char port, unsigned char mask); +int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, + unsigned int queue, bool ipv6); static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { skb_set_queue_mapping(skb, (queue << 1) | prio); @@ -221,9 +231,16 @@ struct cxgb4_lld_info { unsigned int iscsi_iolen; /* iSCSI max I/O length */ unsigned short udb_density; /* # of user DB/page */ unsigned short ucq_density; /* # of user CQs/page */ + unsigned short filt_mode; /* filter optional components */ + unsigned short tx_modq[NCHAN]; /* maps each tx channel to a */ + /* scheduler queue */ void __iomem *gts_reg; /* address of GTS register */ void __iomem *db_reg; /* address of kernel doorbell */ int dbfifo_int_thresh; /* doorbell fifo int threshold */ + unsigned int sge_pktshift; /* Padding between CPL and */ + /* packet data */ + bool enable_fw_ofld_conn; /* Enable connection through fw */ + /* WR */ }; struct cxgb4_uld_info { diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 6ac77a62f361..29878098101e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -484,6 +484,38 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) handle_failed_resolution(adap, arpq); } +/* Allocate an L2T entry for use by a switching rule. Such need to be + * explicitly freed and while busy they are not on any hash chain, so normal + * address resolution updates do not see them. + */ +struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d) +{ + struct l2t_entry *e; + + write_lock_bh(&d->lock); + e = alloc_l2e(d); + if (e) { + spin_lock(&e->lock); /* avoid race with t4_l2t_free */ + e->state = L2T_STATE_SWITCHING; + atomic_set(&e->refcnt, 1); + spin_unlock(&e->lock); + } + write_unlock_bh(&d->lock); + return e; +} + +/* Sets/updates the contents of a switching L2T entry that has been allocated + * with an earlier call to @t4_l2t_alloc_switching. + */ +int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, + u8 port, u8 *eth_addr) +{ + e->vlan = vlan; + e->lport = port; + memcpy(e->dmac, eth_addr, ETH_ALEN); + return write_l2e(adap, e, 0); +} + struct l2t_data *t4_init_l2t(void) { int i; diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h index 02b31d0c6410..108c0f1fce1c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h @@ -100,6 +100,9 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, unsigned int priority); void t4_l2t_update(struct adapter *adap, struct neighbour *neigh); +struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d); +int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, + u8 port, u8 *eth_addr); struct l2t_data *t4_init_l2t(void); void do_l2t_write_rpl(struct adapter *p, const struct cpl_l2t_write_rpl *rpl); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 8d9c7547b070..22f3af5166bf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -109,7 +109,7 @@ void t4_set_reg_field(struct adapter *adapter, unsigned int addr, u32 mask, * Reads registers that are accessed indirectly through an address/data * register pair. */ -static void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, +void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, u32 *vals, unsigned int nregs, unsigned int start_idx) { @@ -2268,6 +2268,26 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, return 0; } +/* t4_mk_filtdelwr - create a delete filter WR + * @ftid: the filter ID + * @wr: the filter work request to populate + * @qid: ingress queue to receive the delete notification + * + * Creates a filter work request to delete the supplied filter. If @qid is + * negative the delete notification is suppressed. + */ +void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid) +{ + memset(wr, 0, sizeof(*wr)); + wr->op_pkd = htonl(FW_WR_OP(FW_FILTER_WR)); + wr->len16_pkd = htonl(FW_WR_LEN16(sizeof(*wr) / 16)); + wr->tid_to_iq = htonl(V_FW_FILTER_WR_TID(ftid) | + V_FW_FILTER_WR_NOREPLY(qid < 0)); + wr->del_filter_to_l2tix = htonl(F_FW_FILTER_WR_DEL_FILTER); + if (qid >= 0) + wr->rx_chan_rx_rpl_iq = htons(V_FW_FILTER_WR_RX_RPL_IQ(qid)); +} + #define INIT_CMD(var, cmd, rd_wr) do { \ (var).op_to_write = htonl(FW_CMD_OP(FW_##cmd##_CMD) | \ FW_CMD_REQUEST | FW_CMD_##rd_wr); \ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index b760808fd6d9..261d17703adc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -193,8 +193,24 @@ struct work_request_hdr { __be64 wr_lo; }; +/* wr_hi fields */ +#define S_WR_OP 24 +#define V_WR_OP(x) ((__u64)(x) << S_WR_OP) + #define WR_HDR struct work_request_hdr wr +/* option 0 fields */ +#define S_MSS_IDX 60 +#define M_MSS_IDX 0xF +#define V_MSS_IDX(x) ((__u64)(x) << S_MSS_IDX) +#define G_MSS_IDX(x) (((x) >> S_MSS_IDX) & M_MSS_IDX) + +/* option 2 fields */ +#define S_RSS_QUEUE 0 +#define M_RSS_QUEUE 0x3FF +#define V_RSS_QUEUE(x) ((x) << S_RSS_QUEUE) +#define G_RSS_QUEUE(x) (((x) >> S_RSS_QUEUE) & M_RSS_QUEUE) + struct cpl_pass_open_req { WR_HDR; union opcode_tid ot; @@ -204,12 +220,14 @@ struct cpl_pass_open_req { __be32 peer_ip; __be64 opt0; #define TX_CHAN(x) ((x) << 2) +#define NO_CONG(x) ((x) << 4) #define DELACK(x) ((x) << 5) #define ULP_MODE(x) ((x) << 8) #define RCV_BUFSIZ(x) ((x) << 12) #define DSCP(x) ((x) << 22) #define SMAC_SEL(x) ((u64)(x) << 28) #define L2T_IDX(x) ((u64)(x) << 36) +#define TCAM_BYPASS(x) ((u64)(x) << 48) #define NAGLE(x) ((u64)(x) << 49) #define WND_SCALE(x) ((u64)(x) << 50) #define KEEP_ALIVE(x) ((u64)(x) << 54) @@ -247,8 +265,10 @@ struct cpl_pass_accept_rpl { #define RSS_QUEUE_VALID (1 << 10) #define RX_COALESCE_VALID(x) ((x) << 11) #define RX_COALESCE(x) ((x) << 12) +#define PACE(x) ((x) << 16) #define TX_QUEUE(x) ((x) << 23) #define RX_CHANNEL(x) ((x) << 26) +#define CCTRL_ECN(x) ((x) << 27) #define WND_SCALE_EN(x) ((x) << 28) #define TSTAMPS_EN(x) ((x) << 29) #define SACK_EN(x) ((x) << 30) @@ -292,6 +312,9 @@ struct cpl_pass_establish { union opcode_tid ot; __be32 rsvd; __be32 tos_stid; +#define PASS_OPEN_TID(x) ((x) << 0) +#define PASS_OPEN_TOS(x) ((x) << 24) +#define GET_PASS_OPEN_TID(x) (((x) >> 0) & 0xFFFFFF) #define GET_POPEN_TID(x) ((x) & 0xffffff) #define GET_POPEN_TOS(x) (((x) >> 24) & 0xff) __be16 mac_idx; @@ -332,6 +355,7 @@ struct cpl_set_tcb_field { __be16 word_cookie; #define TCB_WORD(x) ((x) << 0) #define TCB_COOKIE(x) ((x) << 5) +#define GET_TCB_COOKIE(x) (((x) >> 5) & 7) __be64 mask; __be64 val; }; @@ -536,6 +560,37 @@ struct cpl_rx_pkt { __be16 err_vec; }; +/* rx_pkt.l2info fields */ +#define S_RX_ETHHDR_LEN 0 +#define M_RX_ETHHDR_LEN 0x1F +#define V_RX_ETHHDR_LEN(x) ((x) << S_RX_ETHHDR_LEN) +#define G_RX_ETHHDR_LEN(x) (((x) >> S_RX_ETHHDR_LEN) & M_RX_ETHHDR_LEN) + +#define S_RX_MACIDX 8 +#define M_RX_MACIDX 0x1FF +#define V_RX_MACIDX(x) ((x) << S_RX_MACIDX) +#define G_RX_MACIDX(x) (((x) >> S_RX_MACIDX) & M_RX_MACIDX) + +#define S_RXF_SYN 21 +#define V_RXF_SYN(x) ((x) << S_RXF_SYN) +#define F_RXF_SYN V_RXF_SYN(1U) + +#define S_RX_CHAN 28 +#define M_RX_CHAN 0xF +#define V_RX_CHAN(x) ((x) << S_RX_CHAN) +#define G_RX_CHAN(x) (((x) >> S_RX_CHAN) & M_RX_CHAN) + +/* rx_pkt.hdr_len fields */ +#define S_RX_TCPHDR_LEN 0 +#define M_RX_TCPHDR_LEN 0x3F +#define V_RX_TCPHDR_LEN(x) ((x) << S_RX_TCPHDR_LEN) +#define G_RX_TCPHDR_LEN(x) (((x) >> S_RX_TCPHDR_LEN) & M_RX_TCPHDR_LEN) + +#define S_RX_IPHDR_LEN 6 +#define M_RX_IPHDR_LEN 0x3FF +#define V_RX_IPHDR_LEN(x) ((x) << S_RX_IPHDR_LEN) +#define G_RX_IPHDR_LEN(x) (((x) >> S_RX_IPHDR_LEN) & M_RX_IPHDR_LEN) + struct cpl_trace_pkt { u8 opcode; u8 intf; @@ -634,6 +689,17 @@ struct cpl_fw6_msg { /* cpl_fw6_msg.type values */ enum { FW6_TYPE_CMD_RPL = 0, + FW6_TYPE_WR_RPL = 1, + FW6_TYPE_CQE = 2, + FW6_TYPE_OFLD_CONNECTION_WR_RPL = 3, +}; + +struct cpl_fw6_msg_ofld_connection_wr_rpl { + __u64 cookie; + __be32 tid; /* or atid in case of active failure */ + __u8 t_state; + __u8 retval; + __u8 rsvd[2]; }; enum { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 75393f5cff41..83ec5f7844ac 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1064,4 +1064,41 @@ #define ADDRESS(x) ((x) << ADDRESS_SHIFT) #define XGMAC_PORT_INT_CAUSE 0x10dc + +#define A_TP_TX_MOD_QUEUE_REQ_MAP 0x7e28 + +#define A_TP_TX_MOD_CHANNEL_WEIGHT 0x7e34 + +#define S_TX_MOD_QUEUE_REQ_MAP 0 +#define M_TX_MOD_QUEUE_REQ_MAP 0xffffU +#define V_TX_MOD_QUEUE_REQ_MAP(x) ((x) << S_TX_MOD_QUEUE_REQ_MAP) + +#define A_TP_TX_MOD_QUEUE_WEIGHT0 0x7e30 + +#define S_TX_MODQ_WEIGHT3 24 +#define M_TX_MODQ_WEIGHT3 0xffU +#define V_TX_MODQ_WEIGHT3(x) ((x) << S_TX_MODQ_WEIGHT3) + +#define S_TX_MODQ_WEIGHT2 16 +#define M_TX_MODQ_WEIGHT2 0xffU +#define V_TX_MODQ_WEIGHT2(x) ((x) << S_TX_MODQ_WEIGHT2) + +#define S_TX_MODQ_WEIGHT1 8 +#define M_TX_MODQ_WEIGHT1 0xffU +#define V_TX_MODQ_WEIGHT1(x) ((x) << S_TX_MODQ_WEIGHT1) + +#define S_TX_MODQ_WEIGHT0 0 +#define M_TX_MODQ_WEIGHT0 0xffU +#define V_TX_MODQ_WEIGHT0(x) ((x) << S_TX_MODQ_WEIGHT0) + +#define A_TP_TX_SCHED_HDR 0x23 + +#define A_TP_TX_SCHED_FIFO 0x24 + +#define A_TP_TX_SCHED_PCMD 0x25 + +#define S_PORT 1 +#define V_PORT(x) ((x) << S_PORT) +#define F_PORT V_PORT(1U) + #endif /* __T4_REGS_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 0abc864cdd3a..a0dcccd846c9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -35,6 +35,45 @@ #ifndef _T4FW_INTERFACE_H_ #define _T4FW_INTERFACE_H_ +enum fw_retval { + FW_SUCCESS = 0, /* completed sucessfully */ + FW_EPERM = 1, /* operation not permitted */ + FW_ENOENT = 2, /* no such file or directory */ + FW_EIO = 5, /* input/output error; hw bad */ + FW_ENOEXEC = 8, /* exec format error; inv microcode */ + FW_EAGAIN = 11, /* try again */ + FW_ENOMEM = 12, /* out of memory */ + FW_EFAULT = 14, /* bad address; fw bad */ + FW_EBUSY = 16, /* resource busy */ + FW_EEXIST = 17, /* file exists */ + FW_EINVAL = 22, /* invalid argument */ + FW_ENOSPC = 28, /* no space left on device */ + FW_ENOSYS = 38, /* functionality not implemented */ + FW_EPROTO = 71, /* protocol error */ + FW_EADDRINUSE = 98, /* address already in use */ + FW_EADDRNOTAVAIL = 99, /* cannot assigned requested address */ + FW_ENETDOWN = 100, /* network is down */ + FW_ENETUNREACH = 101, /* network is unreachable */ + FW_ENOBUFS = 105, /* no buffer space available */ + FW_ETIMEDOUT = 110, /* timeout */ + FW_EINPROGRESS = 115, /* fw internal */ + FW_SCSI_ABORT_REQUESTED = 128, /* */ + FW_SCSI_ABORT_TIMEDOUT = 129, /* */ + FW_SCSI_ABORTED = 130, /* */ + FW_SCSI_CLOSE_REQUESTED = 131, /* */ + FW_ERR_LINK_DOWN = 132, /* */ + FW_RDEV_NOT_READY = 133, /* */ + FW_ERR_RDEV_LOST = 134, /* */ + FW_ERR_RDEV_LOGO = 135, /* */ + FW_FCOE_NO_XCHG = 136, /* */ + FW_SCSI_RSP_ERR = 137, /* */ + FW_ERR_RDEV_IMPL_LOGO = 138, /* */ + FW_SCSI_UNDER_FLOW_ERR = 139, /* */ + FW_SCSI_OVER_FLOW_ERR = 140, /* */ + FW_SCSI_DDP_ERR = 141, /* DDP error*/ + FW_SCSI_TASK_ERR = 142, /* No SCSI tasks available */ +}; + #define FW_T4VF_SGE_BASE_ADDR 0x0000 #define FW_T4VF_MPS_BASE_ADDR 0x0100 #define FW_T4VF_PL_BASE_ADDR 0x0200 @@ -46,6 +85,7 @@ enum fw_wr_opcodes { FW_ULPTX_WR = 0x04, FW_TP_WR = 0x05, FW_ETH_TX_PKT_WR = 0x08, + FW_OFLD_CONNECTION_WR = 0x2f, FW_FLOWC_WR = 0x0a, FW_OFLD_TX_DATA_WR = 0x0b, FW_CMD_WR = 0x10, @@ -81,6 +121,282 @@ struct fw_wr_hdr { #define FW_WR_LEN16(x) ((x) << 0) #define HW_TPL_FR_MT_PR_IV_P_FC 0X32B +#define HW_TPL_FR_MT_PR_OV_P_FC 0X327 + +/* filter wr reply code in cookie in CPL_SET_TCB_RPL */ +enum fw_filter_wr_cookie { + FW_FILTER_WR_SUCCESS, + FW_FILTER_WR_FLT_ADDED, + FW_FILTER_WR_FLT_DELETED, + FW_FILTER_WR_SMT_TBL_FULL, + FW_FILTER_WR_EINVAL, +}; + +struct fw_filter_wr { + __be32 op_pkd; + __be32 len16_pkd; + __be64 r3; + __be32 tid_to_iq; + __be32 del_filter_to_l2tix; + __be16 ethtype; + __be16 ethtypem; + __u8 frag_to_ovlan_vldm; + __u8 smac_sel; + __be16 rx_chan_rx_rpl_iq; + __be32 maci_to_matchtypem; + __u8 ptcl; + __u8 ptclm; + __u8 ttyp; + __u8 ttypm; + __be16 ivlan; + __be16 ivlanm; + __be16 ovlan; + __be16 ovlanm; + __u8 lip[16]; + __u8 lipm[16]; + __u8 fip[16]; + __u8 fipm[16]; + __be16 lp; + __be16 lpm; + __be16 fp; + __be16 fpm; + __be16 r7; + __u8 sma[6]; +}; + +#define S_FW_FILTER_WR_TID 12 +#define M_FW_FILTER_WR_TID 0xfffff +#define V_FW_FILTER_WR_TID(x) ((x) << S_FW_FILTER_WR_TID) +#define G_FW_FILTER_WR_TID(x) \ + (((x) >> S_FW_FILTER_WR_TID) & M_FW_FILTER_WR_TID) + +#define S_FW_FILTER_WR_RQTYPE 11 +#define M_FW_FILTER_WR_RQTYPE 0x1 +#define V_FW_FILTER_WR_RQTYPE(x) ((x) << S_FW_FILTER_WR_RQTYPE) +#define G_FW_FILTER_WR_RQTYPE(x) \ + (((x) >> S_FW_FILTER_WR_RQTYPE) & M_FW_FILTER_WR_RQTYPE) +#define F_FW_FILTER_WR_RQTYPE V_FW_FILTER_WR_RQTYPE(1U) + +#define S_FW_FILTER_WR_NOREPLY 10 +#define M_FW_FILTER_WR_NOREPLY 0x1 +#define V_FW_FILTER_WR_NOREPLY(x) ((x) << S_FW_FILTER_WR_NOREPLY) +#define G_FW_FILTER_WR_NOREPLY(x) \ + (((x) >> S_FW_FILTER_WR_NOREPLY) & M_FW_FILTER_WR_NOREPLY) +#define F_FW_FILTER_WR_NOREPLY V_FW_FILTER_WR_NOREPLY(1U) + +#define S_FW_FILTER_WR_IQ 0 +#define M_FW_FILTER_WR_IQ 0x3ff +#define V_FW_FILTER_WR_IQ(x) ((x) << S_FW_FILTER_WR_IQ) +#define G_FW_FILTER_WR_IQ(x) \ + (((x) >> S_FW_FILTER_WR_IQ) & M_FW_FILTER_WR_IQ) + +#define S_FW_FILTER_WR_DEL_FILTER 31 +#define M_FW_FILTER_WR_DEL_FILTER 0x1 +#define V_FW_FILTER_WR_DEL_FILTER(x) ((x) << S_FW_FILTER_WR_DEL_FILTER) +#define G_FW_FILTER_WR_DEL_FILTER(x) \ + (((x) >> S_FW_FILTER_WR_DEL_FILTER) & M_FW_FILTER_WR_DEL_FILTER) +#define F_FW_FILTER_WR_DEL_FILTER V_FW_FILTER_WR_DEL_FILTER(1U) + +#define S_FW_FILTER_WR_RPTTID 25 +#define M_FW_FILTER_WR_RPTTID 0x1 +#define V_FW_FILTER_WR_RPTTID(x) ((x) << S_FW_FILTER_WR_RPTTID) +#define G_FW_FILTER_WR_RPTTID(x) \ + (((x) >> S_FW_FILTER_WR_RPTTID) & M_FW_FILTER_WR_RPTTID) +#define F_FW_FILTER_WR_RPTTID V_FW_FILTER_WR_RPTTID(1U) + +#define S_FW_FILTER_WR_DROP 24 +#define M_FW_FILTER_WR_DROP 0x1 +#define V_FW_FILTER_WR_DROP(x) ((x) << S_FW_FILTER_WR_DROP) +#define G_FW_FILTER_WR_DROP(x) \ + (((x) >> S_FW_FILTER_WR_DROP) & M_FW_FILTER_WR_DROP) +#define F_FW_FILTER_WR_DROP V_FW_FILTER_WR_DROP(1U) + +#define S_FW_FILTER_WR_DIRSTEER 23 +#define M_FW_FILTER_WR_DIRSTEER 0x1 +#define V_FW_FILTER_WR_DIRSTEER(x) ((x) << S_FW_FILTER_WR_DIRSTEER) +#define G_FW_FILTER_WR_DIRSTEER(x) \ + (((x) >> S_FW_FILTER_WR_DIRSTEER) & M_FW_FILTER_WR_DIRSTEER) +#define F_FW_FILTER_WR_DIRSTEER V_FW_FILTER_WR_DIRSTEER(1U) + +#define S_FW_FILTER_WR_MASKHASH 22 +#define M_FW_FILTER_WR_MASKHASH 0x1 +#define V_FW_FILTER_WR_MASKHASH(x) ((x) << S_FW_FILTER_WR_MASKHASH) +#define G_FW_FILTER_WR_MASKHASH(x) \ + (((x) >> S_FW_FILTER_WR_MASKHASH) & M_FW_FILTER_WR_MASKHASH) +#define F_FW_FILTER_WR_MASKHASH V_FW_FILTER_WR_MASKHASH(1U) + +#define S_FW_FILTER_WR_DIRSTEERHASH 21 +#define M_FW_FILTER_WR_DIRSTEERHASH 0x1 +#define V_FW_FILTER_WR_DIRSTEERHASH(x) ((x) << S_FW_FILTER_WR_DIRSTEERHASH) +#define G_FW_FILTER_WR_DIRSTEERHASH(x) \ + (((x) >> S_FW_FILTER_WR_DIRSTEERHASH) & M_FW_FILTER_WR_DIRSTEERHASH) +#define F_FW_FILTER_WR_DIRSTEERHASH V_FW_FILTER_WR_DIRSTEERHASH(1U) + +#define S_FW_FILTER_WR_LPBK 20 +#define M_FW_FILTER_WR_LPBK 0x1 +#define V_FW_FILTER_WR_LPBK(x) ((x) << S_FW_FILTER_WR_LPBK) +#define G_FW_FILTER_WR_LPBK(x) \ + (((x) >> S_FW_FILTER_WR_LPBK) & M_FW_FILTER_WR_LPBK) +#define F_FW_FILTER_WR_LPBK V_FW_FILTER_WR_LPBK(1U) + +#define S_FW_FILTER_WR_DMAC 19 +#define M_FW_FILTER_WR_DMAC 0x1 +#define V_FW_FILTER_WR_DMAC(x) ((x) << S_FW_FILTER_WR_DMAC) +#define G_FW_FILTER_WR_DMAC(x) \ + (((x) >> S_FW_FILTER_WR_DMAC) & M_FW_FILTER_WR_DMAC) +#define F_FW_FILTER_WR_DMAC V_FW_FILTER_WR_DMAC(1U) + +#define S_FW_FILTER_WR_SMAC 18 +#define M_FW_FILTER_WR_SMAC 0x1 +#define V_FW_FILTER_WR_SMAC(x) ((x) << S_FW_FILTER_WR_SMAC) +#define G_FW_FILTER_WR_SMAC(x) \ + (((x) >> S_FW_FILTER_WR_SMAC) & M_FW_FILTER_WR_SMAC) +#define F_FW_FILTER_WR_SMAC V_FW_FILTER_WR_SMAC(1U) + +#define S_FW_FILTER_WR_INSVLAN 17 +#define M_FW_FILTER_WR_INSVLAN 0x1 +#define V_FW_FILTER_WR_INSVLAN(x) ((x) << S_FW_FILTER_WR_INSVLAN) +#define G_FW_FILTER_WR_INSVLAN(x) \ + (((x) >> S_FW_FILTER_WR_INSVLAN) & M_FW_FILTER_WR_INSVLAN) +#define F_FW_FILTER_WR_INSVLAN V_FW_FILTER_WR_INSVLAN(1U) + +#define S_FW_FILTER_WR_RMVLAN 16 +#define M_FW_FILTER_WR_RMVLAN 0x1 +#define V_FW_FILTER_WR_RMVLAN(x) ((x) << S_FW_FILTER_WR_RMVLAN) +#define G_FW_FILTER_WR_RMVLAN(x) \ + (((x) >> S_FW_FILTER_WR_RMVLAN) & M_FW_FILTER_WR_RMVLAN) +#define F_FW_FILTER_WR_RMVLAN V_FW_FILTER_WR_RMVLAN(1U) + +#define S_FW_FILTER_WR_HITCNTS 15 +#define M_FW_FILTER_WR_HITCNTS 0x1 +#define V_FW_FILTER_WR_HITCNTS(x) ((x) << S_FW_FILTER_WR_HITCNTS) +#define G_FW_FILTER_WR_HITCNTS(x) \ + (((x) >> S_FW_FILTER_WR_HITCNTS) & M_FW_FILTER_WR_HITCNTS) +#define F_FW_FILTER_WR_HITCNTS V_FW_FILTER_WR_HITCNTS(1U) + +#define S_FW_FILTER_WR_TXCHAN 13 +#define M_FW_FILTER_WR_TXCHAN 0x3 +#define V_FW_FILTER_WR_TXCHAN(x) ((x) << S_FW_FILTER_WR_TXCHAN) +#define G_FW_FILTER_WR_TXCHAN(x) \ + (((x) >> S_FW_FILTER_WR_TXCHAN) & M_FW_FILTER_WR_TXCHAN) + +#define S_FW_FILTER_WR_PRIO 12 +#define M_FW_FILTER_WR_PRIO 0x1 +#define V_FW_FILTER_WR_PRIO(x) ((x) << S_FW_FILTER_WR_PRIO) +#define G_FW_FILTER_WR_PRIO(x) \ + (((x) >> S_FW_FILTER_WR_PRIO) & M_FW_FILTER_WR_PRIO) +#define F_FW_FILTER_WR_PRIO V_FW_FILTER_WR_PRIO(1U) + +#define S_FW_FILTER_WR_L2TIX 0 +#define M_FW_FILTER_WR_L2TIX 0xfff +#define V_FW_FILTER_WR_L2TIX(x) ((x) << S_FW_FILTER_WR_L2TIX) +#define G_FW_FILTER_WR_L2TIX(x) \ + (((x) >> S_FW_FILTER_WR_L2TIX) & M_FW_FILTER_WR_L2TIX) + +#define S_FW_FILTER_WR_FRAG 7 +#define M_FW_FILTER_WR_FRAG 0x1 +#define V_FW_FILTER_WR_FRAG(x) ((x) << S_FW_FILTER_WR_FRAG) +#define G_FW_FILTER_WR_FRAG(x) \ + (((x) >> S_FW_FILTER_WR_FRAG) & M_FW_FILTER_WR_FRAG) +#define F_FW_FILTER_WR_FRAG V_FW_FILTER_WR_FRAG(1U) + +#define S_FW_FILTER_WR_FRAGM 6 +#define M_FW_FILTER_WR_FRAGM 0x1 +#define V_FW_FILTER_WR_FRAGM(x) ((x) << S_FW_FILTER_WR_FRAGM) +#define G_FW_FILTER_WR_FRAGM(x) \ + (((x) >> S_FW_FILTER_WR_FRAGM) & M_FW_FILTER_WR_FRAGM) +#define F_FW_FILTER_WR_FRAGM V_FW_FILTER_WR_FRAGM(1U) + +#define S_FW_FILTER_WR_IVLAN_VLD 5 +#define M_FW_FILTER_WR_IVLAN_VLD 0x1 +#define V_FW_FILTER_WR_IVLAN_VLD(x) ((x) << S_FW_FILTER_WR_IVLAN_VLD) +#define G_FW_FILTER_WR_IVLAN_VLD(x) \ + (((x) >> S_FW_FILTER_WR_IVLAN_VLD) & M_FW_FILTER_WR_IVLAN_VLD) +#define F_FW_FILTER_WR_IVLAN_VLD V_FW_FILTER_WR_IVLAN_VLD(1U) + +#define S_FW_FILTER_WR_OVLAN_VLD 4 +#define M_FW_FILTER_WR_OVLAN_VLD 0x1 +#define V_FW_FILTER_WR_OVLAN_VLD(x) ((x) << S_FW_FILTER_WR_OVLAN_VLD) +#define G_FW_FILTER_WR_OVLAN_VLD(x) \ + (((x) >> S_FW_FILTER_WR_OVLAN_VLD) & M_FW_FILTER_WR_OVLAN_VLD) +#define F_FW_FILTER_WR_OVLAN_VLD V_FW_FILTER_WR_OVLAN_VLD(1U) + +#define S_FW_FILTER_WR_IVLAN_VLDM 3 +#define M_FW_FILTER_WR_IVLAN_VLDM 0x1 +#define V_FW_FILTER_WR_IVLAN_VLDM(x) ((x) << S_FW_FILTER_WR_IVLAN_VLDM) +#define G_FW_FILTER_WR_IVLAN_VLDM(x) \ + (((x) >> S_FW_FILTER_WR_IVLAN_VLDM) & M_FW_FILTER_WR_IVLAN_VLDM) +#define F_FW_FILTER_WR_IVLAN_VLDM V_FW_FILTER_WR_IVLAN_VLDM(1U) + +#define S_FW_FILTER_WR_OVLAN_VLDM 2 +#define M_FW_FILTER_WR_OVLAN_VLDM 0x1 +#define V_FW_FILTER_WR_OVLAN_VLDM(x) ((x) << S_FW_FILTER_WR_OVLAN_VLDM) +#define G_FW_FILTER_WR_OVLAN_VLDM(x) \ + (((x) >> S_FW_FILTER_WR_OVLAN_VLDM) & M_FW_FILTER_WR_OVLAN_VLDM) +#define F_FW_FILTER_WR_OVLAN_VLDM V_FW_FILTER_WR_OVLAN_VLDM(1U) + +#define S_FW_FILTER_WR_RX_CHAN 15 +#define M_FW_FILTER_WR_RX_CHAN 0x1 +#define V_FW_FILTER_WR_RX_CHAN(x) ((x) << S_FW_FILTER_WR_RX_CHAN) +#define G_FW_FILTER_WR_RX_CHAN(x) \ + (((x) >> S_FW_FILTER_WR_RX_CHAN) & M_FW_FILTER_WR_RX_CHAN) +#define F_FW_FILTER_WR_RX_CHAN V_FW_FILTER_WR_RX_CHAN(1U) + +#define S_FW_FILTER_WR_RX_RPL_IQ 0 +#define M_FW_FILTER_WR_RX_RPL_IQ 0x3ff +#define V_FW_FILTER_WR_RX_RPL_IQ(x) ((x) << S_FW_FILTER_WR_RX_RPL_IQ) +#define G_FW_FILTER_WR_RX_RPL_IQ(x) \ + (((x) >> S_FW_FILTER_WR_RX_RPL_IQ) & M_FW_FILTER_WR_RX_RPL_IQ) + +#define S_FW_FILTER_WR_MACI 23 +#define M_FW_FILTER_WR_MACI 0x1ff +#define V_FW_FILTER_WR_MACI(x) ((x) << S_FW_FILTER_WR_MACI) +#define G_FW_FILTER_WR_MACI(x) \ + (((x) >> S_FW_FILTER_WR_MACI) & M_FW_FILTER_WR_MACI) + +#define S_FW_FILTER_WR_MACIM 14 +#define M_FW_FILTER_WR_MACIM 0x1ff +#define V_FW_FILTER_WR_MACIM(x) ((x) << S_FW_FILTER_WR_MACIM) +#define G_FW_FILTER_WR_MACIM(x) \ + (((x) >> S_FW_FILTER_WR_MACIM) & M_FW_FILTER_WR_MACIM) + +#define S_FW_FILTER_WR_FCOE 13 +#define M_FW_FILTER_WR_FCOE 0x1 +#define V_FW_FILTER_WR_FCOE(x) ((x) << S_FW_FILTER_WR_FCOE) +#define G_FW_FILTER_WR_FCOE(x) \ + (((x) >> S_FW_FILTER_WR_FCOE) & M_FW_FILTER_WR_FCOE) +#define F_FW_FILTER_WR_FCOE V_FW_FILTER_WR_FCOE(1U) + +#define S_FW_FILTER_WR_FCOEM 12 +#define M_FW_FILTER_WR_FCOEM 0x1 +#define V_FW_FILTER_WR_FCOEM(x) ((x) << S_FW_FILTER_WR_FCOEM) +#define G_FW_FILTER_WR_FCOEM(x) \ + (((x) >> S_FW_FILTER_WR_FCOEM) & M_FW_FILTER_WR_FCOEM) +#define F_FW_FILTER_WR_FCOEM V_FW_FILTER_WR_FCOEM(1U) + +#define S_FW_FILTER_WR_PORT 9 +#define M_FW_FILTER_WR_PORT 0x7 +#define V_FW_FILTER_WR_PORT(x) ((x) << S_FW_FILTER_WR_PORT) +#define G_FW_FILTER_WR_PORT(x) \ + (((x) >> S_FW_FILTER_WR_PORT) & M_FW_FILTER_WR_PORT) + +#define S_FW_FILTER_WR_PORTM 6 +#define M_FW_FILTER_WR_PORTM 0x7 +#define V_FW_FILTER_WR_PORTM(x) ((x) << S_FW_FILTER_WR_PORTM) +#define G_FW_FILTER_WR_PORTM(x) \ + (((x) >> S_FW_FILTER_WR_PORTM) & M_FW_FILTER_WR_PORTM) + +#define S_FW_FILTER_WR_MATCHTYPE 3 +#define M_FW_FILTER_WR_MATCHTYPE 0x7 +#define V_FW_FILTER_WR_MATCHTYPE(x) ((x) << S_FW_FILTER_WR_MATCHTYPE) +#define G_FW_FILTER_WR_MATCHTYPE(x) \ + (((x) >> S_FW_FILTER_WR_MATCHTYPE) & M_FW_FILTER_WR_MATCHTYPE) + +#define S_FW_FILTER_WR_MATCHTYPEM 0 +#define M_FW_FILTER_WR_MATCHTYPEM 0x7 +#define V_FW_FILTER_WR_MATCHTYPEM(x) ((x) << S_FW_FILTER_WR_MATCHTYPEM) +#define G_FW_FILTER_WR_MATCHTYPEM(x) \ + (((x) >> S_FW_FILTER_WR_MATCHTYPEM) & M_FW_FILTER_WR_MATCHTYPEM) struct fw_ulptx_wr { __be32 op_to_compl; @@ -100,6 +416,108 @@ struct fw_eth_tx_pkt_wr { __be64 r3; }; +struct fw_ofld_connection_wr { + __be32 op_compl; + __be32 len16_pkd; + __u64 cookie; + __be64 r2; + __be64 r3; + struct fw_ofld_connection_le { + __be32 version_cpl; + __be32 filter; + __be32 r1; + __be16 lport; + __be16 pport; + union fw_ofld_connection_leip { + struct fw_ofld_connection_le_ipv4 { + __be32 pip; + __be32 lip; + __be64 r0; + __be64 r1; + __be64 r2; + } ipv4; + struct fw_ofld_connection_le_ipv6 { + __be64 pip_hi; + __be64 pip_lo; + __be64 lip_hi; + __be64 lip_lo; + } ipv6; + } u; + } le; + struct fw_ofld_connection_tcb { + __be32 t_state_to_astid; + __be16 cplrxdataack_cplpassacceptrpl; + __be16 rcv_adv; + __be32 rcv_nxt; + __be32 tx_max; + __be64 opt0; + __be32 opt2; + __be32 r1; + __be64 r2; + __be64 r3; + } tcb; +}; + +#define S_FW_OFLD_CONNECTION_WR_VERSION 31 +#define M_FW_OFLD_CONNECTION_WR_VERSION 0x1 +#define V_FW_OFLD_CONNECTION_WR_VERSION(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_VERSION) +#define G_FW_OFLD_CONNECTION_WR_VERSION(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_VERSION) & \ + M_FW_OFLD_CONNECTION_WR_VERSION) +#define F_FW_OFLD_CONNECTION_WR_VERSION \ + V_FW_OFLD_CONNECTION_WR_VERSION(1U) + +#define S_FW_OFLD_CONNECTION_WR_CPL 30 +#define M_FW_OFLD_CONNECTION_WR_CPL 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPL(x) ((x) << S_FW_OFLD_CONNECTION_WR_CPL) +#define G_FW_OFLD_CONNECTION_WR_CPL(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPL) & M_FW_OFLD_CONNECTION_WR_CPL) +#define F_FW_OFLD_CONNECTION_WR_CPL V_FW_OFLD_CONNECTION_WR_CPL(1U) + +#define S_FW_OFLD_CONNECTION_WR_T_STATE 28 +#define M_FW_OFLD_CONNECTION_WR_T_STATE 0xf +#define V_FW_OFLD_CONNECTION_WR_T_STATE(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_T_STATE) +#define G_FW_OFLD_CONNECTION_WR_T_STATE(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_T_STATE) & \ + M_FW_OFLD_CONNECTION_WR_T_STATE) + +#define S_FW_OFLD_CONNECTION_WR_RCV_SCALE 24 +#define M_FW_OFLD_CONNECTION_WR_RCV_SCALE 0xf +#define V_FW_OFLD_CONNECTION_WR_RCV_SCALE(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_RCV_SCALE) +#define G_FW_OFLD_CONNECTION_WR_RCV_SCALE(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_RCV_SCALE) & \ + M_FW_OFLD_CONNECTION_WR_RCV_SCALE) + +#define S_FW_OFLD_CONNECTION_WR_ASTID 0 +#define M_FW_OFLD_CONNECTION_WR_ASTID 0xffffff +#define V_FW_OFLD_CONNECTION_WR_ASTID(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_ASTID) +#define G_FW_OFLD_CONNECTION_WR_ASTID(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_ASTID) & M_FW_OFLD_CONNECTION_WR_ASTID) + +#define S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK 15 +#define M_FW_OFLD_CONNECTION_WR_CPLRXDATAACK 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) +#define G_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) & \ + M_FW_OFLD_CONNECTION_WR_CPLRXDATAACK) +#define F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK \ + V_FW_OFLD_CONNECTION_WR_CPLRXDATAACK(1U) + +#define S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL 14 +#define M_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL 0x1 +#define V_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(x) \ + ((x) << S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) +#define G_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(x) \ + (((x) >> S_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) & \ + M_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL) +#define F_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL \ + V_FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL(1U) + enum fw_flowc_mnem { FW_FLOWC_MNEM_PFNVFN, /* PFN [15:8] VFN [7:0] */ FW_FLOWC_MNEM_CH, diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index abf26c7c1d19..3bc1912afba9 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -616,7 +616,7 @@ static inline bool be_error(struct be_adapter *adapter) return adapter->eeh_error || adapter->hw_error || adapter->fw_timeout; } -static inline bool be_crit_error(struct be_adapter *adapter) +static inline bool be_hw_error(struct be_adapter *adapter) { return adapter->eeh_error || adapter->hw_error; } diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index f2875aa47661..8a250c38fb82 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -298,7 +298,12 @@ void be_async_mcc_enable(struct be_adapter *adapter) void be_async_mcc_disable(struct be_adapter *adapter) { + spin_lock_bh(&adapter->mcc_cq_lock); + adapter->mcc_obj.rearm_cq = false; + be_cq_notify(adapter, adapter->mcc_obj.cq.id, false, 0); + + spin_unlock_bh(&adapter->mcc_cq_lock); } int be_process_mcc(struct be_adapter *adapter) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index f95612b907ae..9dca22be8125 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1689,15 +1689,41 @@ static void be_rx_cq_clean(struct be_rx_obj *rxo) struct be_queue_info *rxq = &rxo->q; struct be_queue_info *rx_cq = &rxo->cq; struct be_rx_compl_info *rxcp; + struct be_adapter *adapter = rxo->adapter; + int flush_wait = 0; u16 tail; - /* First cleanup pending rx completions */ - while ((rxcp = be_rx_compl_get(rxo)) != NULL) { - be_rx_compl_discard(rxo, rxcp); - be_cq_notify(rxo->adapter, rx_cq->id, false, 1); + /* Consume pending rx completions. + * Wait for the flush completion (identified by zero num_rcvd) + * to arrive. Notify CQ even when there are no more CQ entries + * for HW to flush partially coalesced CQ entries. + * In Lancer, there is no need to wait for flush compl. + */ + for (;;) { + rxcp = be_rx_compl_get(rxo); + if (rxcp == NULL) { + if (lancer_chip(adapter)) + break; + + if (flush_wait++ > 10 || be_hw_error(adapter)) { + dev_warn(&adapter->pdev->dev, + "did not receive flush compl\n"); + break; + } + be_cq_notify(adapter, rx_cq->id, true, 0); + mdelay(1); + } else { + be_rx_compl_discard(rxo, rxcp); + be_cq_notify(adapter, rx_cq->id, true, 1); + if (rxcp->num_rcvd == 0) + break; + } } - /* Then free posted rx buffer that were not used */ + /* After cleanup, leave the CQ in unarmed state */ + be_cq_notify(adapter, rx_cq->id, false, 0); + + /* Then free posted rx buffers that were not used */ tail = (rxq->head + rxq->len - atomic_read(&rxq->used)) % rxq->len; for (; atomic_read(&rxq->used) > 0; index_inc(&tail, rxq->len)) { page_info = get_rx_page_info(rxo, tail); @@ -2157,7 +2183,7 @@ void be_detect_error(struct be_adapter *adapter) u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0; u32 i; - if (be_crit_error(adapter)) + if (be_hw_error(adapter)) return; if (lancer_chip(adapter)) { @@ -2398,13 +2424,22 @@ static int be_close(struct net_device *netdev) be_roce_dev_close(adapter); - be_async_mcc_disable(adapter); - if (!lancer_chip(adapter)) be_intr_set(adapter, false); - for_all_evt_queues(adapter, eqo, i) { + for_all_evt_queues(adapter, eqo, i) napi_disable(&eqo->napi); + + be_async_mcc_disable(adapter); + + /* Wait for all pending tx completions to arrive so that + * all tx skbs are freed. + */ + be_tx_compl_clean(adapter); + + be_rx_qs_destroy(adapter); + + for_all_evt_queues(adapter, eqo, i) { if (msix_enabled(adapter)) synchronize_irq(be_msix_vec_get(adapter, eqo)); else @@ -2414,12 +2449,6 @@ static int be_close(struct net_device *netdev) be_irq_unregister(adapter); - /* Wait for all pending tx completions to arrive so that - * all tx skbs are freed. - */ - be_tx_compl_clean(adapter); - - be_rx_qs_destroy(adapter); return 0; } diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 5ba6e1cbd346..ec490d741fc0 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -94,9 +94,8 @@ config GIANFAR config FEC_PTP bool "PTP Hardware Clock (PHC)" - depends on FEC && ARCH_MXC + depends on FEC && ARCH_MXC && !SOC_IMX25 && !SOC_IMX27 && !SOC_IMX35 && !SOC_IMX5 select PTP_1588_CLOCK - default y if SOC_IMX6Q --help--- Say Y here if you want to use PTP Hardware Clock (PHC) in the driver. Only the basic clock operations have been implemented. diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9a9de51ecc91..8b3d0512a46b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1338,6 +1338,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; + u32 dword_field; int err; u8 byte_field; @@ -1372,10 +1373,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); + MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); + if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { + param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + } else { + MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET); + if (byte_field & 0x8) + param->steering_mode = MLX4_STEERING_MODE_B0; + else + param->steering_mode = MLX4_STEERING_MODE_A0; + } /* steering attributes */ - if (dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) { - + if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 2c2e7ade2a34..dbf2f69cc59f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -172,6 +172,7 @@ struct mlx4_init_hca_param { u8 log_uar_sz; u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 fs_hash_enable_bits; + u8 steering_mode; /* for QUERY_HCA */ u64 dev_cap_enabled; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index b2acbe7706a3..e1bafffbc3b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -85,15 +85,15 @@ static int probe_vf; module_param(probe_vf, int, 0644); MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)"); -int mlx4_log_num_mgm_entry_size = 10; +int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; module_param_named(log_num_mgm_entry_size, mlx4_log_num_mgm_entry_size, int, 0444); MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " of qp per mcg, for example:" - " 10 gives 248.range: 9<=" + " 10 gives 248.range: 7 <=" " log_num_mgm_entry_size <= 12." - " Not in use with device managed" - " flow steering"); + " To activate device managed" + " flow steering when available, set to -1"); static bool enable_64b_cqe_eqe; module_param(enable_64b_cqe_eqe, bool, 0444); @@ -281,28 +281,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; - if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) { - dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; - dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; - dev->caps.fs_log_max_ucast_qp_range_size = - dev_cap->fs_log_max_ucast_qp_range_size; - } else { - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) { - dev->caps.steering_mode = MLX4_STEERING_MODE_B0; - } else { - dev->caps.steering_mode = MLX4_STEERING_MODE_A0; - - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) - mlx4_warn(dev, "Must have UC_STEER and MC_STEER flags " - "set to use B0 steering. Falling back to A0 steering mode.\n"); - } - dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); - } - mlx4_dbg(dev, "Steering mode is: %s\n", - mlx4_steering_mode_str(dev->caps.steering_mode)); - /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; @@ -493,6 +471,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +static void slave_adjust_steering_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap, + struct mlx4_init_hca_param *hca_param) +{ + dev->caps.steering_mode = hca_param->steering_mode; + if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else + dev->caps.num_qp_per_mgm = + 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); + + mlx4_dbg(dev, "Steering mode is: %s\n", + mlx4_steering_mode_str(dev->caps.steering_mode)); +} + static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; @@ -635,6 +630,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.cqe_size = 32; } + slave_adjust_steering_mode(dev, &dev_cap, &hca_param); + return 0; err_mem: @@ -1321,6 +1318,59 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) } } +static int choose_log_fs_mgm_entry_size(int qp_per_entry) +{ + int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; + + for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; + i++) { + if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) + break; + } + + return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; +} + +static void choose_steering_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap) +{ + if (mlx4_log_num_mgm_entry_size == -1 && + dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && + (!mlx4_is_mfunc(dev) || + (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) && + choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= + MLX4_MIN_MGM_LOG_ENTRY_SIZE) { + dev->oper_log_mgm_entry_size = + choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); + dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else { + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + dev->caps.steering_mode = MLX4_STEERING_MODE_B0; + else { + dev->caps.steering_mode = MLX4_STEERING_MODE_A0; + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " + "set to use B0 steering. Falling back to A0 steering mode.\n"); + } + dev->oper_log_mgm_entry_size = + mlx4_log_num_mgm_entry_size > 0 ? + mlx4_log_num_mgm_entry_size : + MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; + dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); + } + mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " + "modparam log_num_mgm_entry_size = %d\n", + mlx4_steering_mode_str(dev->caps.steering_mode), + dev->oper_log_mgm_entry_size, + mlx4_log_num_mgm_entry_size); +} + static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1360,6 +1410,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto err_stop_fw; } + choose_steering_mode(dev, &dev_cap); + if (mlx4_is_master(dev)) mlx4_parav_master_pf_caps(dev); @@ -2452,6 +2504,17 @@ static int __init mlx4_verify_params(void) port_type_array[0] = true; } + if (mlx4_log_num_mgm_entry_size != -1 && + (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || + mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { + pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " + "in legal range (-1 or %d..%d)\n", + mlx4_log_num_mgm_entry_size, + MLX4_MIN_MGM_LOG_ENTRY_SIZE, + MLX4_MAX_MGM_LOG_ENTRY_SIZE); + return -1; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index e151c21baf2b..1ee4db3c6400 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -54,12 +54,7 @@ struct mlx4_mgm { int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { - if (dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) - return 1 << MLX4_FS_MGM_LOG_ENTRY_SIZE; - else - return min((1 << mlx4_log_num_mgm_entry_size), - MLX4_MAX_MGM_ENTRY_SIZE); + return 1 << dev->oper_log_mgm_entry_size; } int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 1cf42036d7bb..116c5c29d2d1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -94,8 +94,10 @@ enum { }; enum { - MLX4_MAX_MGM_ENTRY_SIZE = 0x1000, - MLX4_MAX_QP_PER_MGM = 4 * (MLX4_MAX_MGM_ENTRY_SIZE / 16 - 2), + MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10, + MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7, + MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12, + MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE) / 16 - 2), MLX4_MTT_ENTRY_PER_SEG = 8, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index b05705f50f0f..561ed2a22a17 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3071,6 +3071,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC]; int err; + int qpn; struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; @@ -3080,13 +3081,21 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; + qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; + err = get_res(dev, slave, qpn, RES_QP, NULL); + if (err) { + pr_err("Steering rule with qpn 0x%x rejected.\n", qpn); + return err; + } rule_header = (struct _rule_hw *)(ctrl + 1); header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: - if (validate_eth_header_mac(slave, rule_header, rlist)) - return -EINVAL; + if (validate_eth_header_mac(slave, rule_header, rlist)) { + err = -EINVAL; + goto err_put; + } break; case MLX4_NET_TRANS_RULE_ID_IB: break; @@ -3094,14 +3103,17 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n"); - if (add_eth_header(dev, slave, inbox, rlist, header_id)) - return -EINVAL; + if (add_eth_header(dev, slave, inbox, rlist, header_id)) { + err = -EINVAL; + goto err_put; + } vhcr->in_modifier += sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; break; default: pr_err("Corrupted mailbox.\n"); - return -EINVAL; + err = -EINVAL; + goto err_put; } err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, @@ -3109,16 +3121,18 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) - return err; + goto err_put; err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0); if (err) { mlx4_err(dev, "Fail to add flow steering resources.\n "); /* detach rule*/ mlx4_cmd(dev, vhcr->out_param, 0, 0, - MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } +err_put: + put_res(dev, slave, qpn, RES_QP); return err; } diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 83f0ea929d3d..8ebc352bcbe6 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -4761,7 +4761,7 @@ static void transmit_cleanup(struct dev_info *hw_priv, int normal) struct ksz_dma_buf *dma_buf; struct net_device *dev = NULL; - spin_lock(&hw_priv->hwlock); + spin_lock_irq(&hw_priv->hwlock); last = info->last; while (info->avail < info->alloc) { @@ -4795,7 +4795,7 @@ static void transmit_cleanup(struct dev_info *hw_priv, int normal) info->avail++; } info->last = last; - spin_unlock(&hw_priv->hwlock); + spin_unlock_irq(&hw_priv->hwlock); /* Notify the network subsystem that the packet has been sent. */ if (dev) @@ -5259,11 +5259,15 @@ static irqreturn_t netdev_intr(int irq, void *dev_id) struct dev_info *hw_priv = priv->adapter; struct ksz_hw *hw = &hw_priv->hw; + spin_lock(&hw_priv->hwlock); + hw_read_intr(hw, &int_enable); /* Not our interrupt! */ - if (!int_enable) + if (!int_enable) { + spin_unlock(&hw_priv->hwlock); return IRQ_NONE; + } do { hw_ack_intr(hw, int_enable); @@ -5310,6 +5314,8 @@ static irqreturn_t netdev_intr(int irq, void *dev_id) hw_ena_intr(hw); + spin_unlock(&hw_priv->hwlock); + return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 537902479689..bc7ec64e9c7a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -36,8 +36,8 @@ #define _QLCNIC_LINUX_MAJOR 5 #define _QLCNIC_LINUX_MINOR 0 -#define _QLCNIC_LINUX_SUBVERSION 29 -#define QLCNIC_LINUX_VERSIONID "5.0.29" +#define _QLCNIC_LINUX_SUBVERSION 30 +#define QLCNIC_LINUX_VERSIONID "5.0.30" #define QLCNIC_DRV_IDC_VER 0x01 #define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\ (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION)) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index 58f094ca052e..b14b8f0787ea 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -134,7 +134,7 @@ int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter) __le32 *tmp_buf; struct qlcnic_cmd_args cmd; struct qlcnic_hardware_context *ahw; - struct qlcnic_dump_template_hdr *tmpl_hdr, *tmp_tmpl; + struct qlcnic_dump_template_hdr *tmpl_hdr; dma_addr_t tmp_addr_t = 0; ahw = adapter->ahw; @@ -150,6 +150,8 @@ int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter) } temp_size = cmd.rsp.arg2; version = cmd.rsp.arg3; + dev_info(&adapter->pdev->dev, + "minidump template version = 0x%x", version); if (!temp_size) return -EIO; @@ -174,7 +176,6 @@ int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter) err = -EIO; goto error; } - tmp_tmpl = tmp_addr; ahw->fw_dump.tmpl_hdr = vzalloc(temp_size); if (!ahw->fw_dump.tmpl_hdr) { err = -EIO; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index fc48e000f35f..7a6d5ebe4e0f 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -365,7 +365,7 @@ static int qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter, struct cmd_desc_type0 *cmd_desc_arr, int nr_desc) { - u32 i, producer, consumer; + u32 i, producer; struct qlcnic_cmd_buffer *pbuf; struct cmd_desc_type0 *cmd_desc; struct qlcnic_host_tx_ring *tx_ring; @@ -379,7 +379,6 @@ qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter, __netif_tx_lock_bh(tx_ring->txq); producer = tx_ring->producer; - consumer = tx_ring->sw_consumer; if (nr_desc >= qlcnic_tx_avail(tx_ring)) { netif_tx_stop_queue(tx_ring->txq); @@ -402,7 +401,7 @@ qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter, pbuf->frag_count = 0; memcpy(&tx_ring->desc_head[producer], - &cmd_desc_arr[i], sizeof(struct cmd_desc_type0)); + cmd_desc, sizeof(struct cmd_desc_type0)); producer = get_next_index(producer, tx_ring->num_desc); i++; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index a7554d9aab0c..d833f5927891 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -445,13 +445,10 @@ static int qlcnic_set_function_modes(struct qlcnic_adapter *adapter) { u8 id; - u32 ref_count; int i, ret = 1; u32 data = QLCNIC_MGMT_FUNC; struct qlcnic_hardware_context *ahw = adapter->ahw; - /* If other drivers are not in use set their privilege level */ - ref_count = QLCRD32(adapter, QLCNIC_CRB_DRV_ACTIVE); ret = qlcnic_api_lock(adapter); if (ret) goto err_lock; @@ -531,11 +528,9 @@ static int qlcnic_setup_pci_map(struct pci_dev *pdev, { u32 offset; void __iomem *mem_ptr0 = NULL; - resource_size_t mem_base; unsigned long mem_len, pci_len0 = 0, bar0_len; /* remap phys address */ - mem_base = pci_resource_start(pdev, 0); /* 0 is for BAR 0 */ mem_len = pci_resource_len(pdev, 0); qlcnic_get_bar_length(pdev->device, &bar0_len); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index 12ff29270745..0b8d8625834c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -197,7 +197,7 @@ static u32 qlcnic_dump_ctrl(struct qlcnic_adapter *adapter, int i, k, timeout = 0; void __iomem *base = adapter->ahw->pci_base0; u32 addr, data; - u8 opcode, no_ops; + u8 no_ops; struct __ctrl *ctr = &entry->region.ctrl; struct qlcnic_dump_template_hdr *t_hdr = adapter->ahw->fw_dump.tmpl_hdr; @@ -206,7 +206,6 @@ static u32 qlcnic_dump_ctrl(struct qlcnic_adapter *adapter, for (i = 0; i < no_ops; i++) { k = 0; - opcode = 0; for (k = 0; k < 8; k++) { if (!(ctr->opcode & (1 << k))) continue; diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index cb6fc5a743ca..5ac93323a40c 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -577,28 +577,30 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) { struct net_device *dev = dev_instance; struct cp_private *cp; + int handled = 0; u16 status; if (unlikely(dev == NULL)) return IRQ_NONE; cp = netdev_priv(dev); + spin_lock(&cp->lock); + status = cpr16(IntrStatus); if (!status || (status == 0xFFFF)) - return IRQ_NONE; + goto out_unlock; + + handled = 1; netif_dbg(cp, intr, dev, "intr, status %04x cmd %02x cpcmd %04x\n", status, cpr8(Cmd), cpr16(CpCmd)); cpw16(IntrStatus, status & ~cp_rx_intr_mask); - spin_lock(&cp->lock); - /* close possible race's with dev_close */ if (unlikely(!netif_running(dev))) { cpw16(IntrMask, 0); - spin_unlock(&cp->lock); - return IRQ_HANDLED; + goto out_unlock; } if (status & (RxOK | RxErr | RxEmpty | RxFIFOOvr)) @@ -612,7 +614,6 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) if (status & LinkChg) mii_check_media(&cp->mii_if, netif_msg_link(cp), false); - spin_unlock(&cp->lock); if (status & PciErr) { u16 pci_status; @@ -625,7 +626,10 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance) /* TODO: reset hardware */ } - return IRQ_HANDLED; +out_unlock: + spin_unlock(&cp->lock); + + return IRQ_RETVAL(handled); } #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 022b45bc14ff..a670d23d9340 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2386,8 +2386,6 @@ static const struct of_device_id smc91x_match[] = { {}, }; MODULE_DEVICE_TABLE(of, smc91x_match); -#else -#define smc91x_match NULL #endif static struct dev_pm_ops smc_drv_pm_ops = { @@ -2402,7 +2400,7 @@ static struct platform_driver smc_driver = { .name = CARDNAME, .owner = THIS_MODULE, .pm = &smc_drv_pm_ops, - .of_match_table = smc91x_match, + .of_match_table = of_match_ptr(smc91x_match), }, }; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 4616bf27d515..e112877d15d3 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2575,11 +2575,13 @@ static const struct dev_pm_ops smsc911x_pm_ops = { #define SMSC911X_PM_OPS NULL #endif +#ifdef CONFIG_OF static const struct of_device_id smsc911x_dt_ids[] = { { .compatible = "smsc,lan9115", }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, smsc911x_dt_ids); +#endif static struct platform_driver smsc911x_driver = { .probe = smsc911x_drv_probe, @@ -2588,7 +2590,7 @@ static struct platform_driver smsc911x_driver = { .name = SMSC_CHIPNAME, .owner = THIS_MODULE, .pm = SMSC911X_PM_OPS, - .of_match_table = smsc911x_dt_ids, + .of_match_table = of_match_ptr(smsc911x_dt_ids), }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 023a4fb4efa5..b05df8983be5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -127,14 +127,14 @@ static inline int stmmac_register_platform(void) } static inline void stmmac_unregister_platform(void) { - platform_driver_register(&stmmac_pltfr_driver); + platform_driver_unregister(&stmmac_pltfr_driver); } #else static inline int stmmac_register_platform(void) { pr_debug("stmmac: do not register the platf driver\n"); - return -EINVAL; + return 0; } static inline void stmmac_unregister_platform(void) { @@ -162,7 +162,7 @@ static inline int stmmac_register_pci(void) { pr_debug("stmmac: do not register the PCI driver\n"); - return -EINVAL; + return 0; } static inline void stmmac_unregister_pci(void) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 542edbcd92c7..f07c0612abf6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2194,18 +2194,20 @@ int stmmac_restore(struct net_device *ndev) */ static int __init stmmac_init(void) { - int err_plt = 0; - int err_pci = 0; - - err_plt = stmmac_register_platform(); - err_pci = stmmac_register_pci(); - - if ((err_pci) && (err_plt)) { - pr_err("stmmac: driver registration failed\n"); - return -EINVAL; - } + int ret; + ret = stmmac_register_platform(); + if (ret) + goto err; + ret = stmmac_register_pci(); + if (ret) + goto err_pci; return 0; +err_pci: + stmmac_unregister_platform(); +err: + pr_err("stmmac: driver registration failed\n"); + return ret; } static void __exit stmmac_exit(void) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index 337766738eca..5e62c1aeeffb 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -27,8 +27,6 @@ #include <linux/uaccess.h> #include <linux/workqueue.h> -#include <plat/clock.h> - #include "cpts.h" #ifdef CONFIG_TI_CPTS diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 40b426edc9e6..504f7f1cad94 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -138,6 +138,8 @@ struct tun_file { /* only used for fasnyc */ unsigned int flags; u16 queue_index; + struct list_head next; + struct tun_struct *detached; }; struct tun_flow_entry { @@ -182,6 +184,8 @@ struct tun_struct { struct hlist_head flows[TUN_NUM_FLOW_ENTRIES]; struct timer_list flow_gc_timer; unsigned long ageing_time; + unsigned int numdisabled; + struct list_head disabled; }; static inline u32 tun_hashfn(u32 rxhash) @@ -385,6 +389,23 @@ static void tun_set_real_num_queues(struct tun_struct *tun) netif_set_real_num_rx_queues(tun->dev, tun->numqueues); } +static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile) +{ + tfile->detached = tun; + list_add_tail(&tfile->next, &tun->disabled); + ++tun->numdisabled; +} + +static struct tun_struct *tun_enable_queue(struct tun_file *tfile) +{ + struct tun_struct *tun = tfile->detached; + + tfile->detached = NULL; + list_del_init(&tfile->next); + --tun->numdisabled; + return tun; +} + static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -406,20 +427,25 @@ static void __tun_detach(struct tun_file *tfile, bool clean) ntfile->queue_index = index; --tun->numqueues; - sock_put(&tfile->sk); + if (clean) + sock_put(&tfile->sk); + else + tun_disable_queue(tun, tfile); synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); /* Drop read queue */ skb_queue_purge(&tfile->sk.sk_receive_queue); tun_set_real_num_queues(tun); - - if (tun->numqueues == 0 && !(tun->flags & TUN_PERSIST)) - if (dev->reg_state == NETREG_REGISTERED) - unregister_netdevice(dev); - } + } else if (tfile->detached && clean) + tun = tun_enable_queue(tfile); if (clean) { + if (tun && tun->numqueues == 0 && tun->numdisabled == 0 && + !(tun->flags & TUN_PERSIST)) + if (tun->dev->reg_state == NETREG_REGISTERED) + unregister_netdevice(tun->dev); + BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags)); sk_release_kernel(&tfile->sk); @@ -436,7 +462,7 @@ static void tun_detach(struct tun_file *tfile, bool clean) static void tun_detach_all(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); - struct tun_file *tfile; + struct tun_file *tfile, *tmp; int i, n = tun->numqueues; for (i = 0; i < n; i++) { @@ -457,6 +483,12 @@ static void tun_detach_all(struct net_device *dev) skb_queue_purge(&tfile->sk.sk_receive_queue); sock_put(&tfile->sk); } + list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_enable_queue(tfile); + skb_queue_purge(&tfile->sk.sk_receive_queue); + sock_put(&tfile->sk); + } + BUG_ON(tun->numdisabled != 0); } static int tun_attach(struct tun_struct *tun, struct file *file) @@ -473,7 +505,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file) goto out; err = -E2BIG; - if (tun->numqueues == MAX_TAP_QUEUES) + if (!tfile->detached && + tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES) goto out; err = 0; @@ -487,9 +520,13 @@ static int tun_attach(struct tun_struct *tun, struct file *file) tfile->queue_index = tun->numqueues; rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); - sock_hold(&tfile->sk); tun->numqueues++; + if (tfile->detached) + tun_enable_queue(tfile); + else + sock_hold(&tfile->sk); + tun_set_real_num_queues(tun); /* device is allowed to go away first, so no need to hold extra @@ -1162,6 +1199,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; } + skb_reset_network_header(skb); rxhash = skb_get_rxhash(skb); netif_rx_ni(skb); @@ -1349,6 +1387,7 @@ static void tun_free_netdev(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); + BUG_ON(!(list_empty(&tun->disabled))); tun_flow_uninit(tun); free_netdev(dev); } @@ -1543,6 +1582,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) err = tun_attach(tun, file); if (err < 0) return err; + + if (tun->flags & TUN_TAP_MQ && + (tun->numqueues + tun->numdisabled > 1)) + return err; } else { char *name; @@ -1601,6 +1644,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) TUN_USER_FEATURES; dev->features = dev->hw_features; + INIT_LIST_HEAD(&tun->disabled); err = tun_attach(tun, file); if (err < 0) goto err_free_dev; @@ -1755,32 +1799,28 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) { struct tun_file *tfile = file->private_data; struct tun_struct *tun; - struct net_device *dev; int ret = 0; rtnl_lock(); if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { - dev = __dev_get_by_name(tfile->net, ifr->ifr_name); - if (!dev) { - ret = -EINVAL; - goto unlock; - } - - tun = netdev_priv(dev); - if (dev->netdev_ops != &tap_netdev_ops && - dev->netdev_ops != &tun_netdev_ops) + tun = tfile->detached; + if (!tun) ret = -EINVAL; else if (tun_not_capable(tun)) ret = -EPERM; else ret = tun_attach(tun, file); - } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) - __tun_detach(tfile, false); - else + } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { + tun = rcu_dereference_protected(tfile->tun, + lockdep_rtnl_is_held()); + if (!tun || !(tun->flags & TUN_TAP_MQ)) + ret = -EINVAL; + else + __tun_detach(tfile, false); + } else ret = -EINVAL; -unlock: rtnl_unlock(); return ret; } @@ -2092,6 +2132,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) file->private_data = tfile; set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); + INIT_LIST_HEAD(&tfile->next); return 0; } diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index d0129827602b..3f3d12d766e7 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -457,12 +457,6 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf) } EXPORT_SYMBOL_GPL(usbnet_cdc_bind); -static int cdc_manage_power(struct usbnet *dev, int on) -{ - dev->intf->needs_remote_wakeup = on; - return 0; -} - static const struct driver_info cdc_info = { .description = "CDC Ethernet Device", .flags = FLAG_ETHER | FLAG_POINTTOPOINT, @@ -470,7 +464,7 @@ static const struct driver_info cdc_info = { .bind = usbnet_cdc_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_status, - .manage_power = cdc_manage_power, + .manage_power = usbnet_manage_power, }; static const struct driver_info wwan_info = { @@ -479,7 +473,7 @@ static const struct driver_info wwan_info = { .bind = usbnet_cdc_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_status, - .manage_power = cdc_manage_power, + .manage_power = usbnet_manage_power, }; /*-------------------------------------------------------------------------*/ @@ -487,6 +481,7 @@ static const struct driver_info wwan_info = { #define HUAWEI_VENDOR_ID 0x12D1 #define NOVATEL_VENDOR_ID 0x1410 #define ZTE_VENDOR_ID 0x19D2 +#define DELL_VENDOR_ID 0x413C static const struct usb_device_id products [] = { /* @@ -594,27 +589,29 @@ static const struct usb_device_id products [] = { /* Novatel USB551L and MC551 - handled by qmi_wwan */ { - .match_flags = USB_DEVICE_ID_MATCH_VENDOR - | USB_DEVICE_ID_MATCH_PRODUCT - | USB_DEVICE_ID_MATCH_INT_INFO, - .idVendor = NOVATEL_VENDOR_ID, - .idProduct = 0xB001, - .bInterfaceClass = USB_CLASS_COMM, - .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, - .bInterfaceProtocol = USB_CDC_PROTO_NONE, + USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0xB001, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, /* Novatel E362 - handled by qmi_wwan */ { - .match_flags = USB_DEVICE_ID_MATCH_VENDOR - | USB_DEVICE_ID_MATCH_PRODUCT - | USB_DEVICE_ID_MATCH_INT_INFO, - .idVendor = NOVATEL_VENDOR_ID, - .idProduct = 0x9010, - .bInterfaceClass = USB_CLASS_COMM, - .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, - .bInterfaceProtocol = USB_CDC_PROTO_NONE, + USB_DEVICE_AND_INTERFACE_INFO(NOVATEL_VENDOR_ID, 0x9010, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + +/* Dell Wireless 5800 (Novatel E362) - handled by qmi_wwan */ +{ + USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x8195, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + +/* Dell Wireless 5800 (Novatel E362) - handled by qmi_wwan */ +{ + USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x8196, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = 0, }, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index d38bc20a60e2..71b6e92b8e9b 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1129,19 +1129,13 @@ static void cdc_ncm_disconnect(struct usb_interface *intf) usbnet_disconnect(intf); } -static int cdc_ncm_manage_power(struct usbnet *dev, int status) -{ - dev->intf->needs_remote_wakeup = status; - return 0; -} - static const struct driver_info cdc_ncm_info = { .description = "CDC NCM", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .check_connect = cdc_ncm_check_connect, - .manage_power = cdc_ncm_manage_power, + .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, @@ -1155,7 +1149,7 @@ static const struct driver_info wwan_info = { .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .check_connect = cdc_ncm_check_connect, - .manage_power = cdc_ncm_manage_power, + .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 1ea91f4237f0..91d7cb9728eb 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -383,6 +383,20 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, + { /* Dell Wireless 5800 (Novatel E362) */ + USB_DEVICE_AND_INTERFACE_INFO(0x413C, 0x8195, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&qmi_wwan_info, + }, + { /* Dell Wireless 5800 V2 (Novatel E362) */ + USB_DEVICE_AND_INTERFACE_INFO(0x413C, 0x8196, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&qmi_wwan_info, + }, /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ @@ -419,6 +433,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x0199, 1)}, /* ZTE MF820S */ {QMI_FIXED_INTF(0x19d2, 0x0200, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0257, 3)}, /* ZTE MF821 */ + {QMI_FIXED_INTF(0x19d2, 0x0284, 4)}, /* ZTE MF880 */ {QMI_FIXED_INTF(0x19d2, 0x0326, 4)}, /* ZTE MF821D */ {QMI_FIXED_INTF(0x19d2, 0x1008, 4)}, /* ZTE (Vodafone) K3570-Z */ {QMI_FIXED_INTF(0x19d2, 0x1010, 4)}, /* ZTE (Vodafone) K3571-Z */ diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index c04110ba677f..3d4bf01641b4 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -719,7 +719,8 @@ int usbnet_stop (struct net_device *net) dev->flags = 0; del_timer_sync (&dev->delay); tasklet_kill (&dev->bh); - if (info->manage_power) + if (info->manage_power && + !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags)) info->manage_power(dev, 0); else usb_autopm_put_interface(dev->intf); @@ -794,14 +795,14 @@ int usbnet_open (struct net_device *net) tasklet_schedule (&dev->bh); if (info->manage_power) { retval = info->manage_power(dev, 1); - if (retval < 0) - goto done_manage_power_error; - usb_autopm_put_interface(dev->intf); + if (retval < 0) { + retval = 0; + set_bit(EVENT_NO_RUNTIME_PM, &dev->flags); + } else { + usb_autopm_put_interface(dev->intf); + } } return retval; - -done_manage_power_error: - clear_bit(EVENT_DEV_OPEN, &dev->flags); done: usb_autopm_put_interface(dev->intf); done_nopm: @@ -1615,6 +1616,16 @@ void usbnet_device_suggests_idle(struct usbnet *dev) } EXPORT_SYMBOL(usbnet_device_suggests_idle); +/* + * For devices that can do without special commands + */ +int usbnet_manage_power(struct usbnet *dev, int on) +{ + dev->intf->needs_remote_wakeup = on; + return 0; +} +EXPORT_SYMBOL(usbnet_manage_power); + /*-------------------------------------------------------------------------*/ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, u16 value, u16 index, void *data, u16 size) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 68d64f0313ea..a6fcf15adc4f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -130,7 +130,6 @@ struct skb_vnet_hdr { struct virtio_net_hdr hdr; struct virtio_net_hdr_mrg_rxbuf mhdr; }; - unsigned int num_sg; }; struct padded_vnet_hdr { @@ -530,10 +529,10 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) err = add_recvbuf_small(rq, gfp); oom = err == -ENOMEM; - if (err < 0) + if (err) break; ++rq->num; - } while (err > 0); + } while (rq->vq->num_free); if (unlikely(rq->num > rq->max)) rq->max = rq->num; virtqueue_kick(rq->vq); @@ -640,10 +639,10 @@ static int virtnet_open(struct net_device *dev) return 0; } -static unsigned int free_old_xmit_skbs(struct send_queue *sq) +static void free_old_xmit_skbs(struct send_queue *sq) { struct sk_buff *skb; - unsigned int len, tot_sgs = 0; + unsigned int len; struct virtnet_info *vi = sq->vq->vdev->priv; struct virtnet_stats *stats = this_cpu_ptr(vi->stats); @@ -655,10 +654,8 @@ static unsigned int free_old_xmit_skbs(struct send_queue *sq) stats->tx_packets++; u64_stats_update_end(&stats->tx_syncp); - tot_sgs += skb_vnet_hdr(skb)->num_sg; dev_kfree_skb_any(skb); } - return tot_sgs; } static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) @@ -666,6 +663,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; struct virtnet_info *vi = sq->vq->vdev->priv; + unsigned num_sg; pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); @@ -704,8 +702,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) else sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr); - hdr->num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1; - return virtqueue_add_buf(sq->vq, sq->sg, hdr->num_sg, + num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1; + return virtqueue_add_buf(sq->vq, sq->sg, num_sg, 0, skb, GFP_ATOMIC); } @@ -714,28 +712,20 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int qnum = skb_get_queue_mapping(skb); struct send_queue *sq = &vi->sq[qnum]; - int capacity; + int err; /* Free up any pending old buffers before queueing new ones. */ free_old_xmit_skbs(sq); /* Try to transmit */ - capacity = xmit_skb(sq, skb); - - /* This can happen with OOM and indirect buffers. */ - if (unlikely(capacity < 0)) { - if (likely(capacity == -ENOMEM)) { - if (net_ratelimit()) - dev_warn(&dev->dev, - "TXQ (%d) failure: out of memory\n", - qnum); - } else { - dev->stats.tx_fifo_errors++; - if (net_ratelimit()) - dev_warn(&dev->dev, - "Unexpected TXQ (%d) failure: %d\n", - qnum, capacity); - } + err = xmit_skb(sq, skb); + + /* This should not happen! */ + if (unlikely(err)) { + dev->stats.tx_fifo_errors++; + if (net_ratelimit()) + dev_warn(&dev->dev, + "Unexpected TXQ (%d) queue failure: %d\n", qnum, err); dev->stats.tx_dropped++; kfree_skb(skb); return NETDEV_TX_OK; @@ -748,12 +738,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Apparently nice girls don't return TX_BUSY; stop the queue * before it gets out of hand. Naturally, this wastes entries. */ - if (capacity < 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { netif_stop_subqueue(dev, qnum); if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ - capacity += free_old_xmit_skbs(sq); - if (capacity >= 2+MAX_SKB_FRAGS) { + free_old_xmit_skbs(sq); + if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { netif_start_subqueue(dev, qnum); virtqueue_disable_cb(sq->vq); } diff --git a/drivers/net/wimax/i2400m/i2400m-usb.h b/drivers/net/wimax/i2400m/i2400m-usb.h index 6650fde99e1d..9f1e947f3557 100644 --- a/drivers/net/wimax/i2400m/i2400m-usb.h +++ b/drivers/net/wimax/i2400m/i2400m-usb.h @@ -152,6 +152,9 @@ enum { /* Device IDs */ USB_DEVICE_ID_I6050 = 0x0186, USB_DEVICE_ID_I6050_2 = 0x0188, + USB_DEVICE_ID_I6150 = 0x07d6, + USB_DEVICE_ID_I6150_2 = 0x07d7, + USB_DEVICE_ID_I6150_3 = 0x07d9, USB_DEVICE_ID_I6250 = 0x0187, }; diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index 713d033891e6..080f36303a4f 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -510,6 +510,9 @@ int i2400mu_probe(struct usb_interface *iface, switch (id->idProduct) { case USB_DEVICE_ID_I6050: case USB_DEVICE_ID_I6050_2: + case USB_DEVICE_ID_I6150: + case USB_DEVICE_ID_I6150_2: + case USB_DEVICE_ID_I6150_3: case USB_DEVICE_ID_I6250: i2400mu->i6050 = 1; break; @@ -759,6 +762,9 @@ static struct usb_device_id i2400mu_id_table[] = { { USB_DEVICE(0x8086, USB_DEVICE_ID_I6050) }, { USB_DEVICE(0x8086, USB_DEVICE_ID_I6050_2) }, + { USB_DEVICE(0x8087, USB_DEVICE_ID_I6150) }, + { USB_DEVICE(0x8087, USB_DEVICE_ID_I6150_2) }, + { USB_DEVICE(0x8087, USB_DEVICE_ID_I6150_3) }, { USB_DEVICE(0x8086, USB_DEVICE_ID_I6250) }, { USB_DEVICE(0x8086, 0x0181) }, { USB_DEVICE(0x8086, 0x1403) }, diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile index 062dfdff6364..67156efe14c4 100644 --- a/drivers/net/wireless/Makefile +++ b/drivers/net/wireless/Makefile @@ -47,7 +47,7 @@ obj-$(CONFIG_RT2X00) += rt2x00/ obj-$(CONFIG_P54_COMMON) += p54/ -obj-$(CONFIG_ATH_COMMON) += ath/ +obj-$(CONFIG_ATH_CARDS) += ath/ obj-$(CONFIG_MAC80211_HWSIM) += mac80211_hwsim.o diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 4ffb6a584cd0..44f8b3f3cbed 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -685,6 +685,14 @@ void rt2x00lib_rxdone(struct queue_entry *entry, gfp_t gfp) * to mac80211. */ rx_status = IEEE80211_SKB_RXCB(entry->skb); + + /* Ensure that all fields of rx_status are initialized + * properly. The skb->cb array was used for driver + * specific informations, so rx_status might contain + * garbage. + */ + memset(rx_status, 0, sizeof(*rx_status)); + rx_status->mactime = rxdesc.timestamp; rx_status->band = rt2x00dev->curr_band; rx_status->freq = rt2x00dev->curr_freq; diff --git a/drivers/of/base.c b/drivers/of/base.c index db8d211a0d05..2390ddb22d60 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -629,7 +629,7 @@ struct device_node *of_find_matching_node_and_match(struct device_node *from, read_unlock(&devtree_lock); return np; } -EXPORT_SYMBOL(of_find_matching_node); +EXPORT_SYMBOL(of_find_matching_node_and_match); /** * of_modalias_node - Lookup appropriate modalias for a device node diff --git a/drivers/pinctrl/pinctrl-exynos5440.c b/drivers/pinctrl/pinctrl-exynos5440.c index b8635f634e91..07db89528dc3 100644 --- a/drivers/pinctrl/pinctrl-exynos5440.c +++ b/drivers/pinctrl/pinctrl-exynos5440.c @@ -117,7 +117,7 @@ struct exynos5440_pinctrl_priv_data { }; /* list of all possible config options supported */ -struct pin_config { +static struct pin_config { char *prop_cfg; unsigned int cfg_type; } pcfgs[] = { diff --git a/drivers/pinctrl/pinctrl-samsung.c b/drivers/pinctrl/pinctrl-samsung.c index 8f31b656c4e9..864fed822f9d 100644 --- a/drivers/pinctrl/pinctrl-samsung.c +++ b/drivers/pinctrl/pinctrl-samsung.c @@ -37,7 +37,7 @@ #define FSUFFIX_LEN sizeof(FUNCTION_SUFFIX) /* list of all possible config options supported */ -struct pin_config { +static struct pin_config { char *prop_cfg; unsigned int cfg_type; } pcfgs[] = { diff --git a/drivers/pinctrl/pinctrl-samsung.h b/drivers/pinctrl/pinctrl-samsung.h index 5addfd16e3cc..e2d4e67f7e88 100644 --- a/drivers/pinctrl/pinctrl-samsung.h +++ b/drivers/pinctrl/pinctrl-samsung.h @@ -104,7 +104,7 @@ struct samsung_pinctrl_drv_data; /** * struct samsung_pin_bank: represent a controller pin-bank. - * @reg_offset: starting offset of the pin-bank registers. + * @pctl_offset: starting offset of the pin-bank registers. * @pin_base: starting pin number of the bank. * @nr_pins: number of pins included in this bank. * @func_width: width of the function selector bit field. diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 1859f71372e2..027096fe6a12 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -764,7 +764,7 @@ int rpmsg_send_offchannel_raw(struct rpmsg_channel *rpdev, u32 src, u32 dst, /* add message to the remote processor's virtqueue */ err = virtqueue_add_buf(vrp->svq, &sg, 1, 0, msg, GFP_KERNEL); - if (err < 0) { + if (err) { /* * need to reclaim the buffer here, otherwise it's lost * (memory won't leak, but rpmsg won't use it again for TX). @@ -776,8 +776,6 @@ int rpmsg_send_offchannel_raw(struct rpmsg_channel *rpdev, u32 src, u32 dst, /* tell the remote processor it has a pending message to read */ virtqueue_kick(vrp->svq); - - err = 0; out: mutex_unlock(&vrp->tx_lock); return err; @@ -980,7 +978,7 @@ static int rpmsg_probe(struct virtio_device *vdev) err = virtqueue_add_buf(vrp->rvq, &sg, 0, 1, cpu_addr, GFP_KERNEL); - WARN_ON(err < 0); /* sanity check; this can't really happen */ + WARN_ON(err); /* sanity check; this can't really happen */ } /* suppress "tx-complete" interrupts */ diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index f8a0aab218cb..5143629dedbd 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -244,7 +244,6 @@ void rtc_device_unregister(struct rtc_device *rtc) rtc_proc_del_device(rtc); device_unregister(&rtc->dev); rtc->ops = NULL; - ida_simple_remove(&rtc_ida, rtc->id); mutex_unlock(&rtc->ops_lock); put_device(&rtc->dev); } diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index 18a4f0dd78a3..8da7a5cf83c6 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -36,6 +36,7 @@ #include <linux/platform_device.h> #include <linux/rtc.h> #include <linux/sched.h> +#include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/of.h> diff --git a/drivers/scsi/csiostor/t4fw_api_stor.h b/drivers/scsi/csiostor/t4fw_api_stor.h index 1223e0d5fc07..097e52c0f8e1 100644 --- a/drivers/scsi/csiostor/t4fw_api_stor.h +++ b/drivers/scsi/csiostor/t4fw_api_stor.h @@ -40,45 +40,6 @@ * R E T U R N V A L U E S ********************************/ -enum fw_retval { - FW_SUCCESS = 0, /* completed sucessfully */ - FW_EPERM = 1, /* operation not permitted */ - FW_ENOENT = 2, /* no such file or directory */ - FW_EIO = 5, /* input/output error; hw bad */ - FW_ENOEXEC = 8, /* exec format error; inv microcode */ - FW_EAGAIN = 11, /* try again */ - FW_ENOMEM = 12, /* out of memory */ - FW_EFAULT = 14, /* bad address; fw bad */ - FW_EBUSY = 16, /* resource busy */ - FW_EEXIST = 17, /* file exists */ - FW_EINVAL = 22, /* invalid argument */ - FW_ENOSPC = 28, /* no space left on device */ - FW_ENOSYS = 38, /* functionality not implemented */ - FW_EPROTO = 71, /* protocol error */ - FW_EADDRINUSE = 98, /* address already in use */ - FW_EADDRNOTAVAIL = 99, /* cannot assigned requested address */ - FW_ENETDOWN = 100, /* network is down */ - FW_ENETUNREACH = 101, /* network is unreachable */ - FW_ENOBUFS = 105, /* no buffer space available */ - FW_ETIMEDOUT = 110, /* timeout */ - FW_EINPROGRESS = 115, /* fw internal */ - FW_SCSI_ABORT_REQUESTED = 128, /* */ - FW_SCSI_ABORT_TIMEDOUT = 129, /* */ - FW_SCSI_ABORTED = 130, /* */ - FW_SCSI_CLOSE_REQUESTED = 131, /* */ - FW_ERR_LINK_DOWN = 132, /* */ - FW_RDEV_NOT_READY = 133, /* */ - FW_ERR_RDEV_LOST = 134, /* */ - FW_ERR_RDEV_LOGO = 135, /* */ - FW_FCOE_NO_XCHG = 136, /* */ - FW_SCSI_RSP_ERR = 137, /* */ - FW_ERR_RDEV_IMPL_LOGO = 138, /* */ - FW_SCSI_UNDER_FLOW_ERR = 139, /* */ - FW_SCSI_OVER_FLOW_ERR = 140, /* */ - FW_SCSI_DDP_ERR = 141, /* DDP error*/ - FW_SCSI_TASK_ERR = 142, /* No SCSI tasks available */ -}; - enum fw_fcoe_link_sub_op { FCOE_LINK_DOWN = 0x0, FCOE_LINK_UP = 0x1, diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index dd8dc27fa32c..74ab67a169ec 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -215,7 +215,7 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) static int virtscsi_kick_event(struct virtio_scsi *vscsi, struct virtio_scsi_event_node *event_node) { - int ret; + int err; struct scatterlist sg; unsigned long flags; @@ -223,13 +223,14 @@ static int virtscsi_kick_event(struct virtio_scsi *vscsi, spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags); - ret = virtqueue_add_buf(vscsi->event_vq.vq, &sg, 0, 1, event_node, GFP_ATOMIC); - if (ret >= 0) + err = virtqueue_add_buf(vscsi->event_vq.vq, &sg, 0, 1, event_node, + GFP_ATOMIC); + if (!err) virtqueue_kick(vscsi->event_vq.vq); spin_unlock_irqrestore(&vscsi->event_vq.vq_lock, flags); - return ret; + return err; } static int virtscsi_kick_event_all(struct virtio_scsi *vscsi) @@ -410,22 +411,23 @@ static int virtscsi_kick_cmd(struct virtio_scsi_target_state *tgt, { unsigned int out_num, in_num; unsigned long flags; - int ret; + int err; + bool needs_kick = false; spin_lock_irqsave(&tgt->tgt_lock, flags); virtscsi_map_cmd(tgt, cmd, &out_num, &in_num, req_size, resp_size); spin_lock(&vq->vq_lock); - ret = virtqueue_add_buf(vq->vq, tgt->sg, out_num, in_num, cmd, gfp); + err = virtqueue_add_buf(vq->vq, tgt->sg, out_num, in_num, cmd, gfp); spin_unlock(&tgt->tgt_lock); - if (ret >= 0) - ret = virtqueue_kick_prepare(vq->vq); + if (!err) + needs_kick = virtqueue_kick_prepare(vq->vq); spin_unlock_irqrestore(&vq->vq_lock, flags); - if (ret > 0) + if (needs_kick) virtqueue_notify(vq->vq); - return ret; + return err; } static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) @@ -467,7 +469,7 @@ static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) if (virtscsi_kick_cmd(tgt, &vscsi->req_vq, cmd, sizeof cmd->req.cmd, sizeof cmd->resp.cmd, - GFP_ATOMIC) >= 0) + GFP_ATOMIC) == 0) ret = 0; else mempool_free(cmd, virtscsi_cmd_pool); diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 75c0c4f5fdf2..ab34497bcfee 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -20,6 +20,7 @@ #include <linux/spi/spi.h> #include <linux/slab.h> #include <linux/platform_data/atmel.h> +#include <linux/of.h> #include <asm/io.h> #include <asm/gpio.h> @@ -768,6 +769,10 @@ static int atmel_spi_setup(struct spi_device *spi) /* chipselect must have been muxed as GPIO (e.g. in board setup) */ npcs_pin = (unsigned int)spi->controller_data; + + if (gpio_is_valid(spi->cs_gpio)) + npcs_pin = spi->cs_gpio; + asd = spi->controller_state; if (!asd) { asd = kzalloc(sizeof(struct atmel_spi_device), GFP_KERNEL); @@ -937,8 +942,9 @@ static int atmel_spi_probe(struct platform_device *pdev) /* the spi->mode bits understood by this driver: */ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + master->dev.of_node = pdev->dev.of_node; master->bus_num = pdev->id; - master->num_chipselect = 4; + master->num_chipselect = master->dev.of_node ? 0 : 4; master->setup = atmel_spi_setup; master->transfer = atmel_spi_transfer; master->cleanup = atmel_spi_cleanup; @@ -1064,11 +1070,20 @@ static int atmel_spi_resume(struct platform_device *pdev) #define atmel_spi_resume NULL #endif +#if defined(CONFIG_OF) +static const struct of_device_id atmel_spi_dt_ids[] = { + { .compatible = "atmel,at91rm9200-spi" }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(of, atmel_spi_dt_ids); +#endif static struct platform_driver atmel_spi_driver = { .driver = { .name = "atmel_spi", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(atmel_spi_dt_ids), }, .suspend = atmel_spi_suspend, .resume = atmel_spi_resume, diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 4dd7b7ce5c5a..ad93231a8038 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -215,6 +215,10 @@ static void flush_fifo(struct s3c64xx_spi_driver_data *sdd) writel(0, regs + S3C64XX_SPI_PACKET_CNT); val = readl(regs + S3C64XX_SPI_CH_CFG); + val &= ~(S3C64XX_SPI_CH_RXCH_ON | S3C64XX_SPI_CH_TXCH_ON); + writel(val, regs + S3C64XX_SPI_CH_CFG); + + val = readl(regs + S3C64XX_SPI_CH_CFG); val |= S3C64XX_SPI_CH_SW_RST; val &= ~S3C64XX_SPI_CH_HS_EN; writel(val, regs + S3C64XX_SPI_CH_CFG); @@ -248,10 +252,6 @@ static void flush_fifo(struct s3c64xx_spi_driver_data *sdd) val = readl(regs + S3C64XX_SPI_MODE_CFG); val &= ~(S3C64XX_SPI_MODE_TXDMA_ON | S3C64XX_SPI_MODE_RXDMA_ON); writel(val, regs + S3C64XX_SPI_MODE_CFG); - - val = readl(regs + S3C64XX_SPI_CH_CFG); - val &= ~(S3C64XX_SPI_CH_RXCH_ON | S3C64XX_SPI_CH_TXCH_ON); - writel(val, regs + S3C64XX_SPI_CH_CFG); } static void s3c64xx_spi_dmacb(void *data) @@ -771,8 +771,6 @@ static int s3c64xx_spi_transfer_one_message(struct spi_master *master, if (list_is_last(&xfer->transfer_list, &msg->transfers)) cs_toggle = 1; - else - disable_cs(sdd, spi); } msg->actual_length += xfer->len; diff --git a/drivers/spi/spi-sh-hspi.c b/drivers/spi/spi-sh-hspi.c index 32f7b55fce09..60cfae51c713 100644 --- a/drivers/spi/spi-sh-hspi.c +++ b/drivers/spi/spi-sh-hspi.c @@ -290,7 +290,7 @@ static int hspi_probe(struct platform_device *pdev) } clk = clk_get(NULL, "shyway_clk"); - if (!clk) { + if (IS_ERR(clk)) { dev_err(&pdev->dev, "shyway_clk is required\n"); ret = -EINVAL; goto error0; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index ab095acdb2a8..19ee901577da 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -824,6 +824,7 @@ static void of_register_spi_devices(struct spi_master *master) struct spi_device *spi; struct device_node *nc; const __be32 *prop; + char modalias[SPI_NAME_SIZE + 4]; int rc; int len; @@ -887,7 +888,9 @@ static void of_register_spi_devices(struct spi_master *master) spi->dev.of_node = nc; /* Register the new device */ - request_module(spi->modalias); + snprintf(modalias, sizeof(modalias), "%s%s", SPI_MODULE_PREFIX, + spi->modalias); + request_module(modalias); rc = spi_add_device(spi); if (rc) { dev_err(&master->dev, "spi_device register error %s\n", diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 23f797eb7a28..57d6b29c039c 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -41,8 +41,7 @@ #include <linux/of.h> #include <linux/gpio.h> #include <linux/pinctrl/consumer.h> - -#include <plat/omap-serial.h> +#include <linux/platform_data/serial-omap.h> #define OMAP_MAX_HSUART_PORTS 6 diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6c119944bbb6..b28e66c4376a 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -43,6 +43,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) u16 cmd; u8 msix_pos; + ret = pci_enable_device(pdev); + if (ret) + return ret; + vdev->reset_works = (pci_reset_function(pdev) == 0); pci_save_state(pdev); vdev->pci_saved_state = pci_store_saved_state(pdev); @@ -51,8 +55,11 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) __func__, dev_name(&pdev->dev)); ret = vfio_config_init(vdev); - if (ret) - goto out; + if (ret) { + pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state); + pci_disable_device(pdev); + return ret; + } if (likely(!nointxmask)) vdev->pci_2_3 = pci_intx_mask_supported(pdev); @@ -77,24 +84,15 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; - ret = pci_enable_device(pdev); - if (ret) - goto out; - - return ret; - -out: - kfree(vdev->pci_saved_state); - vdev->pci_saved_state = NULL; - vfio_config_free(vdev); - return ret; + return 0; } static void vfio_pci_disable(struct vfio_pci_device *vdev) { + struct pci_dev *pdev = vdev->pdev; int bar; - pci_disable_device(vdev->pdev); + pci_disable_device(pdev); vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, @@ -104,22 +102,40 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) vfio_config_free(vdev); - pci_reset_function(vdev->pdev); - - if (pci_load_and_free_saved_state(vdev->pdev, - &vdev->pci_saved_state) == 0) - pci_restore_state(vdev->pdev); - else - pr_info("%s: Couldn't reload %s saved state\n", - __func__, dev_name(&vdev->pdev->dev)); - for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { if (!vdev->barmap[bar]) continue; - pci_iounmap(vdev->pdev, vdev->barmap[bar]); - pci_release_selected_regions(vdev->pdev, 1 << bar); + pci_iounmap(pdev, vdev->barmap[bar]); + pci_release_selected_regions(pdev, 1 << bar); vdev->barmap[bar] = NULL; } + + /* + * If we have saved state, restore it. If we can reset the device, + * even better. Resetting with current state seems better than + * nothing, but saving and restoring current state without reset + * is just busy work. + */ + if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) { + pr_info("%s: Couldn't reload %s saved state\n", + __func__, dev_name(&pdev->dev)); + + if (!vdev->reset_works) + return; + + pci_save_state(pdev); + } + + /* + * Disable INTx and MSI, presumably to avoid spurious interrupts + * during reset. Stolen from pci_reset_function() + */ + pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); + + if (vdev->reset_works) + __pci_reset_function(pdev); + + pci_restore_state(pdev); } static void vfio_pci_release(void *device_data) @@ -327,15 +343,10 @@ static long vfio_pci_ioctl(void *device_data, hdr.count > vfio_pci_get_irq_count(vdev, hdr.index)) return -EINVAL; - data = kmalloc(hdr.count * size, GFP_KERNEL); - if (!data) - return -ENOMEM; - - if (copy_from_user(data, (void __user *)(arg + minsz), - hdr.count * size)) { - kfree(data); - return -EFAULT; - } + data = memdup_user((void __user *)(arg + minsz), + hdr.count * size); + if (IS_ERR(data)) + return PTR_ERR(data); } mutex_lock(&vdev->igate); @@ -562,9 +573,9 @@ static int __init vfio_pci_init(void) return 0; -out_virqfd: - vfio_pci_virqfd_exit(); out_driver: + vfio_pci_virqfd_exit(); +out_virqfd: vfio_pci_uninit_perm_bits(); return ret; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 56097c6d072d..12c264d3b058 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -191,6 +191,17 @@ static void vfio_container_put(struct vfio_container *container) kref_put(&container->kref, vfio_container_release); } +static void vfio_group_unlock_and_free(struct vfio_group *group) +{ + mutex_unlock(&vfio.group_lock); + /* + * Unregister outside of lock. A spurious callback is harmless now + * that the group is no longer in vfio.group_list. + */ + iommu_group_unregister_notifier(group->iommu_group, &group->nb); + kfree(group); +} + /** * Group objects - create, release, get, put, search */ @@ -229,8 +240,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) minor = vfio_alloc_group_minor(group); if (minor < 0) { - mutex_unlock(&vfio.group_lock); - kfree(group); + vfio_group_unlock_and_free(group); return ERR_PTR(minor); } @@ -239,8 +249,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) if (tmp->iommu_group == iommu_group) { vfio_group_get(tmp); vfio_free_group_minor(minor); - mutex_unlock(&vfio.group_lock); - kfree(group); + vfio_group_unlock_and_free(group); return tmp; } } @@ -249,8 +258,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) group, "%d", iommu_group_id(iommu_group)); if (IS_ERR(dev)) { vfio_free_group_minor(minor); - mutex_unlock(&vfio.group_lock); - kfree(group); + vfio_group_unlock_and_free(group); return (struct vfio_group *)dev; /* ERR_PTR */ } @@ -274,16 +282,7 @@ static void vfio_group_release(struct kref *kref) device_destroy(vfio.class, MKDEV(MAJOR(vfio.devt), group->minor)); list_del(&group->vfio_next); vfio_free_group_minor(group->minor); - - mutex_unlock(&vfio.group_lock); - - /* - * Unregister outside of lock. A spurious callback is harmless now - * that the group is no longer in vfio.group_list. - */ - iommu_group_unregister_notifier(group->iommu_group, &group->nb); - - kfree(group); + vfio_group_unlock_and_free(group); } static void vfio_group_put(struct vfio_group *group) @@ -466,8 +465,9 @@ static int vfio_dev_viable(struct device *dev, void *data) { struct vfio_group *group = data; struct vfio_device *device; + struct device_driver *drv = ACCESS_ONCE(dev->driver); - if (!dev->driver || vfio_whitelisted_driver(dev->driver)) + if (!drv || vfio_whitelisted_driver(drv)) return 0; device = vfio_group_get_device(group, dev); diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 809b0de59c09..ee59b74768d9 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -10,33 +10,32 @@ static DEFINE_IDA(virtio_index_ida); static ssize_t device_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_d); return sprintf(buf, "0x%04x\n", dev->id.device); } static ssize_t vendor_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_d); return sprintf(buf, "0x%04x\n", dev->id.vendor); } static ssize_t status_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_d); return sprintf(buf, "0x%08x\n", dev->config->get_status(dev)); } static ssize_t modalias_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - + struct virtio_device *dev = dev_to_virtio(_d); return sprintf(buf, "virtio:d%08Xv%08X\n", dev->id.device, dev->id.vendor); } static ssize_t features_show(struct device *_d, struct device_attribute *attr, char *buf) { - struct virtio_device *dev = container_of(_d, struct virtio_device, dev); + struct virtio_device *dev = dev_to_virtio(_d); unsigned int i; ssize_t len = 0; @@ -71,10 +70,10 @@ static inline int virtio_id_match(const struct virtio_device *dev, static int virtio_dev_match(struct device *_dv, struct device_driver *_dr) { unsigned int i; - struct virtio_device *dev = container_of(_dv,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_dv); const struct virtio_device_id *ids; - ids = container_of(_dr, struct virtio_driver, driver)->id_table; + ids = drv_to_virtio(_dr)->id_table; for (i = 0; ids[i].device; i++) if (virtio_id_match(dev, &ids[i])) return 1; @@ -83,7 +82,7 @@ static int virtio_dev_match(struct device *_dv, struct device_driver *_dr) static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env) { - struct virtio_device *dev = container_of(_dv,struct virtio_device,dev); + struct virtio_device *dev = dev_to_virtio(_dv); return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X", dev->id.device, dev->id.vendor); @@ -98,8 +97,7 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, unsigned int fbit) { unsigned int i; - struct virtio_driver *drv = container_of(vdev->dev.driver, - struct virtio_driver, driver); + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); for (i = 0; i < drv->feature_table_size; i++) if (drv->feature_table[i] == fbit) @@ -111,9 +109,8 @@ EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature); static int virtio_dev_probe(struct device *_d) { int err, i; - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - struct virtio_driver *drv = container_of(dev->dev.driver, - struct virtio_driver, driver); + struct virtio_device *dev = dev_to_virtio(_d); + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); u32 device_features; /* We have a driver! */ @@ -152,9 +149,8 @@ static int virtio_dev_probe(struct device *_d) static int virtio_dev_remove(struct device *_d) { - struct virtio_device *dev = container_of(_d,struct virtio_device,dev); - struct virtio_driver *drv = container_of(dev->dev.driver, - struct virtio_driver, driver); + struct virtio_device *dev = dev_to_virtio(_d); + struct virtio_driver *drv = drv_to_virtio(dev->dev.driver); drv->remove(dev); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 2a70558b36ea..d19fe3e323b4 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -139,10 +139,9 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) struct page *page = balloon_page_enqueue(vb_dev_info); if (!page) { - if (printk_ratelimit()) - dev_printk(KERN_INFO, &vb->vdev->dev, - "Out of puff! Can't get %u pages\n", - VIRTIO_BALLOON_PAGES_PER_PAGE); + dev_info_ratelimited(&vb->vdev->dev, + "Out of puff! Can't get %u pages\n", + VIRTIO_BALLOON_PAGES_PER_PAGE); /* Sleep for at least 1/5 of a second before retry. */ msleep(200); break; diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 6b1b7e184939..634f80bcdbd7 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -225,7 +225,7 @@ static void vm_notify(struct virtqueue *vq) /* We write the queue's selector into the notification register to * signal the other end */ - writel(virtqueue_get_queue_index(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); + writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY); } /* Notify all virtqueues on an interrupt. */ @@ -266,7 +266,7 @@ static void vm_del_vq(struct virtqueue *vq) struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev); struct virtio_mmio_vq_info *info = vq->priv; unsigned long flags, size; - unsigned int index = virtqueue_get_queue_index(vq); + unsigned int index = vq->index; spin_lock_irqsave(&vm_dev->lock, flags); list_del(&info->node); @@ -521,25 +521,33 @@ static int vm_cmdline_set(const char *device, int err; struct resource resources[2] = {}; char *str; - long long int base; + long long int base, size; + unsigned int irq; int processed, consumed = 0; struct platform_device *pdev; - resources[0].flags = IORESOURCE_MEM; - resources[1].flags = IORESOURCE_IRQ; - - resources[0].end = memparse(device, &str) - 1; + /* Consume "size" part of the command line parameter */ + size = memparse(device, &str); + /* Get "@<base>:<irq>[:<id>]" chunks */ processed = sscanf(str, "@%lli:%u%n:%d%n", - &base, &resources[1].start, &consumed, + &base, &irq, &consumed, &vm_cmdline_id, &consumed); - if (processed < 2 || processed > 3 || str[consumed]) + /* + * sscanf() must processes at least 2 chunks; also there + * must be no extra characters after the last chunk, so + * str[consumed] must be '\0' + */ + if (processed < 2 || str[consumed]) return -EINVAL; + resources[0].flags = IORESOURCE_MEM; resources[0].start = base; - resources[0].end += base; - resources[1].end = resources[1].start; + resources[0].end = base + size - 1; + + resources[1].flags = IORESOURCE_IRQ; + resources[1].start = resources[1].end = irq; if (!vm_cmdline_parent_registered) { err = device_register(&vm_cmdline_parent); diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index c33aea36598a..e3ecc94591ad 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -203,8 +203,7 @@ static void vp_notify(struct virtqueue *vq) /* we write the queue's selector into the notification register to * signal the other end */ - iowrite16(virtqueue_get_queue_index(vq), - vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); + iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); } /* Handle a configuration change: Tell driver if it wants to know. */ @@ -479,8 +478,7 @@ static void vp_del_vq(struct virtqueue *vq) list_del(&info->node); spin_unlock_irqrestore(&vp_dev->lock, flags); - iowrite16(virtqueue_get_queue_index(vq), - vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); if (vp_dev->msix_enabled) { iowrite16(VIRTIO_MSI_NO_VECTOR, @@ -830,16 +828,4 @@ static struct pci_driver virtio_pci_driver = { #endif }; -static int __init virtio_pci_init(void) -{ - return pci_register_driver(&virtio_pci_driver); -} - -module_init(virtio_pci_init); - -static void __exit virtio_pci_exit(void) -{ - pci_unregister_driver(&virtio_pci_driver); -} - -module_exit(virtio_pci_exit); +module_pci_driver(virtio_pci_driver); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index e639584b2dbd..ffd7e7da5d3b 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -93,8 +93,6 @@ struct vring_virtqueue /* Host publishes avail event idx */ bool event; - /* Number of free buffers */ - unsigned int num_free; /* Head of free buffer list. */ unsigned int free_head; /* Number we've added since last sync. */ @@ -106,9 +104,6 @@ struct vring_virtqueue /* How to notify other side. FIXME: commonalize hcalls! */ void (*notify)(struct virtqueue *vq); - /* Index of the queue */ - int queue_index; - #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; @@ -135,6 +130,13 @@ static int vring_add_indirect(struct vring_virtqueue *vq, unsigned head; int i; + /* + * We require lowmem mappings for the descriptors because + * otherwise virt_to_phys will give us bogus addresses in the + * virtqueue. + */ + gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); + desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); if (!desc) return -ENOMEM; @@ -160,7 +162,7 @@ static int vring_add_indirect(struct vring_virtqueue *vq, desc[i-1].next = 0; /* We're about to use a buffer */ - vq->num_free--; + vq->vq.num_free--; /* Use a single buffer which doesn't continue */ head = vq->free_head; @@ -174,13 +176,6 @@ static int vring_add_indirect(struct vring_virtqueue *vq, return head; } -int virtqueue_get_queue_index(struct virtqueue *_vq) -{ - struct vring_virtqueue *vq = to_vvq(_vq); - return vq->queue_index; -} -EXPORT_SYMBOL_GPL(virtqueue_get_queue_index); - /** * virtqueue_add_buf - expose buffer to other end * @vq: the struct virtqueue we're talking about. @@ -193,10 +188,7 @@ EXPORT_SYMBOL_GPL(virtqueue_get_queue_index); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * - * Returns remaining capacity of queue or a negative error - * (ie. ENOSPC). Note that it only really makes sense to treat all - * positive return values as "available": indirect buffers mean that - * we can put an entire sg[] array inside a single queue entry. + * Returns zero or a negative error (ie. ENOSPC, ENOMEM). */ int virtqueue_add_buf(struct virtqueue *_vq, struct scatterlist sg[], @@ -228,7 +220,7 @@ int virtqueue_add_buf(struct virtqueue *_vq, /* If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ - if (vq->indirect && (out + in) > 1 && vq->num_free) { + if (vq->indirect && (out + in) > 1 && vq->vq.num_free) { head = vring_add_indirect(vq, sg, out, in, gfp); if (likely(head >= 0)) goto add_head; @@ -237,9 +229,9 @@ int virtqueue_add_buf(struct virtqueue *_vq, BUG_ON(out + in > vq->vring.num); BUG_ON(out + in == 0); - if (vq->num_free < out + in) { + if (vq->vq.num_free < out + in) { pr_debug("Can't add buf len %i - avail = %i\n", - out + in, vq->num_free); + out + in, vq->vq.num_free); /* FIXME: for historical reasons, we force a notify here if * there are outgoing parts to the buffer. Presumably the * host should service the ring ASAP. */ @@ -250,7 +242,7 @@ int virtqueue_add_buf(struct virtqueue *_vq, } /* We're about to use some buffers from the free list. */ - vq->num_free -= out + in; + vq->vq.num_free -= out + in; head = vq->free_head; for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { @@ -296,7 +288,7 @@ add_head: pr_debug("Added buffer head %i to %p\n", head, vq); END_USE(vq); - return vq->num_free; + return 0; } EXPORT_SYMBOL_GPL(virtqueue_add_buf); @@ -393,13 +385,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { i = vq->vring.desc[i].next; - vq->num_free++; + vq->vq.num_free++; } vq->vring.desc[i].next = vq->free_head; vq->free_head = head; /* Plus final descriptor */ - vq->num_free++; + vq->vq.num_free++; } static inline bool more_used(const struct vring_virtqueue *vq) @@ -599,7 +591,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) return buf; } /* That should have freed everything. */ - BUG_ON(vq->num_free != vq->vring.num); + BUG_ON(vq->vq.num_free != vq->vring.num); END_USE(vq); return NULL; @@ -653,12 +645,13 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.name = name; + vq->vq.num_free = num; + vq->vq.index = index; vq->notify = notify; vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; vq->num_added = 0; - vq->queue_index = index; list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; @@ -673,7 +666,6 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; /* Put everything in free lists. */ - vq->num_free = num; vq->free_head = 0; for (i = 0; i < num-1; i++) { vq->vring.desc[i].next = i+1; diff --git a/fs/Kconfig b/fs/Kconfig index eaff24a19502..cfe512fd1caf 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -220,6 +220,7 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" +source "fs/f2fs/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/Makefile b/fs/Makefile index 1d7af79288a0..9d53192236fc 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -123,6 +123,7 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ +obj-$(CONFIG_F2FS_FS) += f2fs/ obj-y += exofs/ # Multiple modules obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index e9bad5093a3f..5f95d1ed9c6d 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -45,6 +45,14 @@ static int adfs_readpage(struct file *file, struct page *page) return block_read_full_page(page, adfs_get_block); } +static void adfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) + truncate_pagecache(inode, to, inode->i_size); +} + static int adfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -55,11 +63,8 @@ static int adfs_write_begin(struct file *file, struct address_space *mapping, ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, adfs_get_block, &ADFS_I(mapping->host)->mmu_private); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + adfs_write_failed(mapping, pos + len); return ret; } diff --git a/fs/affs/file.c b/fs/affs/file.c index 2f4c935cb327..af3261b78102 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -39,7 +39,6 @@ const struct file_operations affs_file_operations = { }; const struct inode_operations affs_file_inode_operations = { - .truncate = affs_truncate, .setattr = affs_notify_change, }; @@ -402,6 +401,16 @@ static int affs_readpage(struct file *file, struct page *page) return block_read_full_page(page, affs_get_block); } +static void affs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + affs_truncate(inode); + } +} + static int affs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -412,11 +421,8 @@ static int affs_write_begin(struct file *file, struct address_space *mapping, ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, affs_get_block, &AFFS_I(mapping->host)->mmu_private); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + affs_write_failed(mapping, pos + len); return ret; } diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 15c484268229..0e092d08680e 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -237,9 +237,12 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); + error = inode_newsize_ok(inode, attr->ia_size); if (error) return error; + + truncate_setsize(inode, attr->ia_size); + affs_truncate(inode); } setattr_copy(inode, attr); diff --git a/fs/bfs/file.c b/fs/bfs/file.c index f20e8a71062f..ad3ea1497cc3 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -161,6 +161,14 @@ static int bfs_readpage(struct file *file, struct page *page) return block_read_full_page(page, bfs_get_block); } +static void bfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) + truncate_pagecache(inode, to, inode->i_size); +} + static int bfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -169,11 +177,8 @@ static int bfs_write_begin(struct file *file, struct address_space *mapping, ret = block_write_begin(mapping, pos, len, flags, pagep, bfs_get_block); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + bfs_write_failed(mapping, pos + len); return ret; } diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9be335fb8a7c..0c8869fdd14e 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -172,7 +172,10 @@ static int load_misc_binary(struct linux_binprm *bprm) goto _error; bprm->argc ++; - bprm->interp = iname; /* for binfmt_script */ + /* Update interp in case binfmt_script needs it. */ + retval = bprm_change_interp(iname, bprm); + if (retval < 0) + goto _error; interp_file = open_exec (iname); retval = PTR_ERR (interp_file); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 1610a91637e5..5027a3e14922 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -80,7 +80,9 @@ static int load_script(struct linux_binprm *bprm) retval = copy_strings_kernel(1, &i_name, bprm); if (retval) return retval; bprm->argc++; - bprm->interp = interp; + retval = bprm_change_interp(interp, bprm); + if (retval < 0) + return retval; /* * OK, now restart the process with the interpreter's dentry. diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c7b67cf24bba..eea5da7a2b9a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1138,13 +1138,13 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, switch (tm->op) { case MOD_LOG_KEY_REMOVE_WHILE_FREEING: BUG_ON(tm->slot < n); - case MOD_LOG_KEY_REMOVE: - n++; case MOD_LOG_KEY_REMOVE_WHILE_MOVING: + case MOD_LOG_KEY_REMOVE: btrfs_set_node_key(eb, &tm->key, tm->slot); btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); btrfs_set_node_ptr_generation(eb, tm->slot, tm->generation); + n++; break; case MOD_LOG_KEY_REPLACE: BUG_ON(tm->slot >= n); @@ -4611,12 +4611,6 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 nritems; int ret; - if (level) { - ret = tree_mod_log_insert_key(root->fs_info, parent, slot, - MOD_LOG_KEY_REMOVE); - BUG_ON(ret < 0); - } - nritems = btrfs_header_nritems(parent); if (slot != nritems - 1) { if (level) @@ -4627,6 +4621,10 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); + } else if (level) { + ret = tree_mod_log_insert_key(root->fs_info, parent, slot, + MOD_LOG_KEY_REMOVE); + BUG_ON(ret < 0); } nritems--; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67ed24ae86bb..16d9e8e191e6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4262,16 +4262,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - if (unlikely(d_need_lookup(dentry))) { - memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); - kfree(dentry->d_fsdata); - dentry->d_fsdata = NULL; - /* This thing is hashed, drop it for now */ - d_drop(dentry); - } else { - ret = btrfs_inode_by_name(dir, dentry, &location); - } - + ret = btrfs_inode_by_name(dir, dentry, &location); if (ret < 0) return ERR_PTR(ret); @@ -4341,11 +4332,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct dentry *ret; ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); - if (unlikely(d_need_lookup(dentry))) { - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_NEED_LOOKUP; - spin_unlock(&dentry->d_lock); - } return ret; } diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 67bef6d01484..746ce532e130 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -41,12 +41,12 @@ static struct fscache_object *cachefiles_alloc_object( _enter("{%s},%p,", cache->cache.identifier, cookie); - lookup_data = kmalloc(sizeof(*lookup_data), GFP_KERNEL); + lookup_data = kmalloc(sizeof(*lookup_data), cachefiles_gfp); if (!lookup_data) goto nomem_lookup_data; /* create a new object record and a temporary leaf image */ - object = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); + object = kmem_cache_alloc(cachefiles_object_jar, cachefiles_gfp); if (!object) goto nomem_object; @@ -63,7 +63,7 @@ static struct fscache_object *cachefiles_alloc_object( * - stick the length on the front and leave space on the back for the * encoder */ - buffer = kmalloc((2 + 512) + 3, GFP_KERNEL); + buffer = kmalloc((2 + 512) + 3, cachefiles_gfp); if (!buffer) goto nomem_buffer; @@ -219,7 +219,7 @@ static void cachefiles_update_object(struct fscache_object *_object) return; } - auxdata = kmalloc(2 + 512 + 3, GFP_KERNEL); + auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp); if (!auxdata) { _leave(" [nomem]"); return; @@ -441,6 +441,54 @@ truncate_failed: } /* + * Invalidate an object + */ +static void cachefiles_invalidate_object(struct fscache_operation *op) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + struct path path; + uint64_t ni_size; + int ret; + + object = container_of(op->object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + op->object->cookie->def->get_attr(op->object->cookie->netfs_data, + &ni_size); + + _enter("{OBJ%x},[%llu]", + op->object->debug_id, (unsigned long long)ni_size); + + if (object->backer) { + ASSERT(S_ISREG(object->backer->d_inode->i_mode)); + + fscache_set_store_limit(&object->fscache, ni_size); + + path.dentry = object->backer; + path.mnt = cache->mnt; + + cachefiles_begin_secure(cache, &saved_cred); + ret = vfs_truncate(&path, 0); + if (ret == 0) + ret = vfs_truncate(&path, ni_size); + cachefiles_end_secure(cache, saved_cred); + + if (ret != 0) { + fscache_set_store_limit(&object->fscache, 0); + if (ret == -EIO) + cachefiles_io_error_obj(object, + "Invalidate failed"); + } + } + + fscache_op_complete(op, true); + _leave(""); +} + +/* * dissociate a cache from all the pages it was backing */ static void cachefiles_dissociate_pages(struct fscache_cache *cache) @@ -455,6 +503,7 @@ const struct fscache_cache_ops cachefiles_cache_ops = { .lookup_complete = cachefiles_lookup_complete, .grab_object = cachefiles_grab_object, .update_object = cachefiles_update_object, + .invalidate_object = cachefiles_invalidate_object, .drop_object = cachefiles_drop_object, .put_object = cachefiles_put_object, .sync_cache = cachefiles_sync_cache, diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index bd6bc1bde2d7..49382519907a 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -23,6 +23,8 @@ extern unsigned cachefiles_debug; #define CACHEFILES_DEBUG_KLEAVE 2 #define CACHEFILES_DEBUG_KDEBUG 4 +#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC) + /* * node records */ diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c index 81b8b2b3a674..33b58c60f2d1 100644 --- a/fs/cachefiles/key.c +++ b/fs/cachefiles/key.c @@ -78,7 +78,7 @@ char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type) _debug("max: %d", max); - key = kmalloc(max, GFP_KERNEL); + key = kmalloc(max, cachefiles_gfp); if (!key) return NULL; diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index b0b5f7cdfffa..8c01c5fcdf75 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -40,8 +40,7 @@ void __cachefiles_printk_object(struct cachefiles_object *object, printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", prefix, fscache_object_states[object->fscache.state], object->fscache.flags, work_busy(&object->fscache.work), - object->fscache.events, - object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK); + object->fscache.events, object->fscache.event_mask); printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", prefix, object->fscache.n_ops, object->fscache.n_in_progress, object->fscache.n_exclusive); diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index c994691d9445..480992259707 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -77,25 +77,25 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, struct page *backpage = monitor->back_page, *backpage2; int ret; - kenter("{ino=%lx},{%lx,%lx}", + _enter("{ino=%lx},{%lx,%lx}", object->backer->d_inode->i_ino, backpage->index, backpage->flags); /* skip if the page was truncated away completely */ if (backpage->mapping != bmapping) { - kleave(" = -ENODATA [mapping]"); + _leave(" = -ENODATA [mapping]"); return -ENODATA; } backpage2 = find_get_page(bmapping, backpage->index); if (!backpage2) { - kleave(" = -ENODATA [gone]"); + _leave(" = -ENODATA [gone]"); return -ENODATA; } if (backpage != backpage2) { put_page(backpage2); - kleave(" = -ENODATA [different]"); + _leave(" = -ENODATA [different]"); return -ENODATA; } @@ -114,7 +114,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, if (PageUptodate(backpage)) goto unlock_discard; - kdebug("reissue read"); + _debug("reissue read"); ret = bmapping->a_ops->readpage(NULL, backpage); if (ret < 0) goto unlock_discard; @@ -129,7 +129,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, } /* it'll reappear on the todo list */ - kleave(" = -EINPROGRESS"); + _leave(" = -EINPROGRESS"); return -EINPROGRESS; unlock_discard: @@ -137,7 +137,7 @@ unlock_discard: spin_lock_irq(&object->work_lock); list_del(&monitor->op_link); spin_unlock_irq(&object->work_lock); - kleave(" = %d", ret); + _leave(" = %d", ret); return ret; } @@ -174,11 +174,13 @@ static void cachefiles_read_copier(struct fscache_operation *_op) _debug("- copy {%lu}", monitor->back_page->index); recheck: - if (PageUptodate(monitor->back_page)) { + if (test_bit(FSCACHE_COOKIE_INVALIDATING, + &object->fscache.cookie->flags)) { + error = -ESTALE; + } else if (PageUptodate(monitor->back_page)) { copy_highpage(monitor->netfs_page, monitor->back_page); - - pagevec_add(&pagevec, monitor->netfs_page); - fscache_mark_pages_cached(monitor->op, &pagevec); + fscache_mark_page_cached(monitor->op, + monitor->netfs_page); error = 0; } else if (!PageError(monitor->back_page)) { /* the page has probably been truncated */ @@ -198,6 +200,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op) fscache_end_io(op, monitor->netfs_page, error); page_cache_release(monitor->netfs_page); + fscache_retrieval_complete(op, 1); fscache_put_retrieval(op); kfree(monitor); @@ -239,7 +242,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, _debug("read back %p{%lu,%d}", netpage, netpage->index, page_count(netpage)); - monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); + monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); if (!monitor) goto nomem; @@ -258,13 +261,14 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, goto backing_page_already_present; if (!newpage) { - newpage = page_cache_alloc_cold(bmapping); + newpage = __page_cache_alloc(cachefiles_gfp | + __GFP_COLD); if (!newpage) goto nomem_monitor; } ret = add_to_page_cache(newpage, bmapping, - netpage->index, GFP_KERNEL); + netpage->index, cachefiles_gfp); if (ret == 0) goto installed_new_backing_page; if (ret != -EEXIST) @@ -335,11 +339,11 @@ backing_page_already_present: backing_page_already_uptodate: _debug("- uptodate"); - pagevec_add(pagevec, netpage); - fscache_mark_pages_cached(op, pagevec); + fscache_mark_page_cached(op, netpage); copy_highpage(netpage, backpage); fscache_end_io(op, netpage, 0); + fscache_retrieval_complete(op, 1); success: _debug("success"); @@ -357,10 +361,13 @@ out: read_error: _debug("read error %d", ret); - if (ret == -ENOMEM) + if (ret == -ENOMEM) { + fscache_retrieval_complete(op, 1); goto out; + } io_error: cachefiles_io_error_obj(object, "Page read error on backing file"); + fscache_retrieval_complete(op, 1); ret = -ENOBUFS; goto out; @@ -370,6 +377,7 @@ nomem_monitor: fscache_put_retrieval(monitor->op); kfree(monitor); nomem: + fscache_retrieval_complete(op, 1); _leave(" = -ENOMEM"); return -ENOMEM; } @@ -408,7 +416,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, _enter("{%p},{%lx},,,", object, page->index); if (!object->backer) - return -ENOBUFS; + goto enobufs; inode = object->backer->d_inode; ASSERT(S_ISREG(inode->i_mode)); @@ -417,7 +425,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, /* calculate the shift required to use bmap */ if (inode->i_sb->s_blocksize > PAGE_SIZE) - return -ENOBUFS; + goto enobufs; shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; @@ -448,15 +456,20 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, &pagevec); } else if (cachefiles_has_space(cache, 0, 1) == 0) { /* there's space in the cache we can use */ - pagevec_add(&pagevec, page); - fscache_mark_pages_cached(op, &pagevec); + fscache_mark_page_cached(op, page); + fscache_retrieval_complete(op, 1); ret = -ENODATA; } else { - ret = -ENOBUFS; + goto enobufs; } _leave(" = %d", ret); return ret; + +enobufs: + fscache_retrieval_complete(op, 1); + _leave(" = -ENOBUFS"); + return -ENOBUFS; } /* @@ -465,8 +478,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, */ static int cachefiles_read_backing_file(struct cachefiles_object *object, struct fscache_retrieval *op, - struct list_head *list, - struct pagevec *mark_pvec) + struct list_head *list) { struct cachefiles_one_read *monitor = NULL; struct address_space *bmapping = object->backer->d_inode->i_mapping; @@ -485,7 +497,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, netpage, netpage->index, page_count(netpage)); if (!monitor) { - monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); + monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); if (!monitor) goto nomem; @@ -500,13 +512,14 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, goto backing_page_already_present; if (!newpage) { - newpage = page_cache_alloc_cold(bmapping); + newpage = __page_cache_alloc(cachefiles_gfp | + __GFP_COLD); if (!newpage) goto nomem; } ret = add_to_page_cache(newpage, bmapping, - netpage->index, GFP_KERNEL); + netpage->index, cachefiles_gfp); if (ret == 0) goto installed_new_backing_page; if (ret != -EEXIST) @@ -536,10 +549,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, _debug("- monitor add"); ret = add_to_page_cache(netpage, op->mapping, netpage->index, - GFP_KERNEL); + cachefiles_gfp); if (ret < 0) { if (ret == -EEXIST) { page_cache_release(netpage); + fscache_retrieval_complete(op, 1); continue; } goto nomem; @@ -612,10 +626,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, _debug("- uptodate"); ret = add_to_page_cache(netpage, op->mapping, netpage->index, - GFP_KERNEL); + cachefiles_gfp); if (ret < 0) { if (ret == -EEXIST) { page_cache_release(netpage); + fscache_retrieval_complete(op, 1); continue; } goto nomem; @@ -626,16 +641,17 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, page_cache_release(backpage); backpage = NULL; - if (!pagevec_add(mark_pvec, netpage)) - fscache_mark_pages_cached(op, mark_pvec); + fscache_mark_page_cached(op, netpage); page_cache_get(netpage); if (!pagevec_add(&lru_pvec, netpage)) __pagevec_lru_add_file(&lru_pvec); + /* the netpage is unlocked and marked up to date here */ fscache_end_io(op, netpage, 0); page_cache_release(netpage); netpage = NULL; + fscache_retrieval_complete(op, 1); continue; } @@ -661,6 +677,7 @@ out: list_for_each_entry_safe(netpage, _n, list, lru) { list_del(&netpage->lru); page_cache_release(netpage); + fscache_retrieval_complete(op, 1); } _leave(" = %d", ret); @@ -669,15 +686,17 @@ out: nomem: _debug("nomem"); ret = -ENOMEM; - goto out; + goto record_page_complete; read_error: _debug("read error %d", ret); if (ret == -ENOMEM) - goto out; + goto record_page_complete; io_error: cachefiles_io_error_obj(object, "Page read error on backing file"); ret = -ENOBUFS; +record_page_complete: + fscache_retrieval_complete(op, 1); goto out; } @@ -709,7 +728,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, *nr_pages); if (!object->backer) - return -ENOBUFS; + goto all_enobufs; space = 1; if (cachefiles_has_space(cache, 0, *nr_pages) < 0) @@ -722,7 +741,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, /* calculate the shift required to use bmap */ if (inode->i_sb->s_blocksize > PAGE_SIZE) - return -ENOBUFS; + goto all_enobufs; shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; @@ -762,7 +781,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, nrbackpages++; } else if (space && pagevec_add(&pagevec, page) == 0) { fscache_mark_pages_cached(op, &pagevec); + fscache_retrieval_complete(op, 1); ret = -ENODATA; + } else { + fscache_retrieval_complete(op, 1); } } @@ -775,18 +797,18 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, /* submit the apparently valid pages to the backing fs to be read from * disk */ if (nrbackpages > 0) { - ret2 = cachefiles_read_backing_file(object, op, &backpages, - &pagevec); + ret2 = cachefiles_read_backing_file(object, op, &backpages); if (ret2 == -ENOMEM || ret2 == -EINTR) ret = ret2; } - if (pagevec_count(&pagevec) > 0) - fscache_mark_pages_cached(op, &pagevec); - _leave(" = %d [nr=%u%s]", ret, *nr_pages, list_empty(pages) ? " empty" : ""); return ret; + +all_enobufs: + fscache_retrieval_complete(op, *nr_pages); + return -ENOBUFS; } /* @@ -806,7 +828,6 @@ int cachefiles_allocate_page(struct fscache_retrieval *op, { struct cachefiles_object *object; struct cachefiles_cache *cache; - struct pagevec pagevec; int ret; object = container_of(op->op.object, @@ -817,14 +838,12 @@ int cachefiles_allocate_page(struct fscache_retrieval *op, _enter("%p,{%lx},", object, page->index); ret = cachefiles_has_space(cache, 0, 1); - if (ret == 0) { - pagevec_init(&pagevec, 0); - pagevec_add(&pagevec, page); - fscache_mark_pages_cached(op, &pagevec); - } else { + if (ret == 0) + fscache_mark_page_cached(op, page); + else ret = -ENOBUFS; - } + fscache_retrieval_complete(op, 1); _leave(" = %d", ret); return ret; } @@ -874,6 +893,7 @@ int cachefiles_allocate_pages(struct fscache_retrieval *op, ret = -ENOBUFS; } + fscache_retrieval_complete(op, *nr_pages); _leave(" = %d", ret); return ret; } diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index e18b183b47e1..73b46288b54b 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -174,7 +174,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object, ASSERT(dentry); ASSERT(dentry->d_inode); - auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL); + auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, cachefiles_gfp); if (!auxbuf) { _leave(" = -ENOMEM"); return -ENOMEM; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6690269f5dde..064d1a68d2c1 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -267,6 +267,14 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) kfree(req->r_pages); } +static void ceph_unlock_page_vector(struct page **pages, int num_pages) +{ + int i; + + for (i = 0; i < num_pages; i++) + unlock_page(pages[i]); +} + /* * start an async read(ahead) operation. return nr_pages we submitted * a read for on success, or negative error code. @@ -347,6 +355,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) return nr_pages; out_pages: + ceph_unlock_page_vector(pages, nr_pages); ceph_release_page_vector(pages, nr_pages); out: ceph_osdc_put_request(req); @@ -1078,23 +1087,51 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = file->f_dentry->d_inode; + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_file_info *fi = file->private_data; struct page *page; pgoff_t index = pos >> PAGE_CACHE_SHIFT; - int r; + int r, want, got = 0; + + if (fi->fmode & CEPH_FILE_MODE_LAZY) + want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; + else + want = CEPH_CAP_FILE_BUFFER; + + dout("write_begin %p %llx.%llx %llu~%u getting caps. i_size %llu\n", + inode, ceph_vinop(inode), pos, len, inode->i_size); + r = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos+len); + if (r < 0) + return r; + dout("write_begin %p %llx.%llx %llu~%u got cap refs on %s\n", + inode, ceph_vinop(inode), pos, len, ceph_cap_string(got)); + if (!(got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO))) { + ceph_put_cap_refs(ci, got); + return -EAGAIN; + } do { /* get a page */ page = grab_cache_page_write_begin(mapping, index, 0); - if (!page) - return -ENOMEM; - *pagep = page; + if (!page) { + r = -ENOMEM; + break; + } dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); r = ceph_update_writeable_page(file, pos, len, page); + if (r) + page_cache_release(page); } while (r == -EAGAIN); + if (r) { + ceph_put_cap_refs(ci, got); + } else { + *pagep = page; + *(int *)fsdata = got; + } return r; } @@ -1108,10 +1145,12 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct inode *inode = file->f_dentry->d_inode; + struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; unsigned from = pos & (PAGE_CACHE_SIZE - 1); int check_cap = 0; + int got = (unsigned long)fsdata; dout("write_end file %p inode %p page %p %d~%d (%d)\n", file, inode, page, (int)pos, (int)copied, (int)len); @@ -1134,6 +1173,19 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, up_read(&mdsc->snap_rwsem); page_cache_release(page); + if (copied > 0) { + int dirty; + spin_lock(&ci->i_ceph_lock); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + spin_unlock(&ci->i_ceph_lock); + if (dirty) + __mark_inode_dirty(inode, dirty); + } + + dout("write_end %p %llx.%llx %llu~%u dropping cap refs on %s\n", + inode, ceph_vinop(inode), pos, len, ceph_cap_string(got)); + ceph_put_cap_refs(ci, got); + if (check_cap) ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 3251e9cc6401..a1d9bb30c1bf 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -236,8 +236,10 @@ static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, if (!ctx) { cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); if (cap) { + spin_lock(&mdsc->caps_list_lock); mdsc->caps_use_count++; mdsc->caps_total_count++; + spin_unlock(&mdsc->caps_list_lock); } return cap; } @@ -1349,11 +1351,15 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) if (!ci->i_head_snapc) ci->i_head_snapc = ceph_get_snap_context( ci->i_snap_realm->cached_context); - dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode, - ci->i_head_snapc); + dout(" inode %p now dirty snapc %p auth cap %p\n", + &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); BUG_ON(!list_empty(&ci->i_dirty_item)); spin_lock(&mdsc->cap_dirty_lock); - list_add(&ci->i_dirty_item, &mdsc->cap_dirty); + if (ci->i_auth_cap) + list_add(&ci->i_dirty_item, &mdsc->cap_dirty); + else + list_add(&ci->i_dirty_item, + &mdsc->cap_dirty_migrating); spin_unlock(&mdsc->cap_dirty_lock); if (ci->i_flushing_caps == 0) { ihold(inode); @@ -2388,7 +2394,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, &atime); /* max size increase? */ - if (max_size != ci->i_max_size) { + if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); ci->i_max_size = max_size; if (max_size >= ci->i_wanted_max_size) { @@ -2745,6 +2751,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, /* make sure we re-request max_size, if necessary */ spin_lock(&ci->i_ceph_lock); + ci->i_wanted_max_size = 0; /* reset */ ci->i_requested_max_size = 0; spin_unlock(&ci->i_ceph_lock); } @@ -2840,8 +2847,6 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, session, snaptrace, snaptrace_len); - ceph_check_caps(ceph_inode(inode), 0, session); - goto done_unlocked; } /* the rest require a cap */ @@ -2858,6 +2863,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: + case CEPH_CAP_OP_IMPORT: handle_cap_grant(inode, h, session, cap, msg->middle); goto done_unlocked; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d4dfdcf76d7f..e51558fca3a3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -712,63 +712,53 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; loff_t endoff = pos + iov->iov_len; - int want, got = 0; - int ret, err; + int got = 0; + int ret, err, written; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; retry_snap: + written = 0; if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) return -ENOSPC; __ceph_do_pending_vmtruncate(inode); - dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", - inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, - inode->i_size); - if (fi->fmode & CEPH_FILE_MODE_LAZY) - want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; - else - want = CEPH_CAP_FILE_BUFFER; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); - if (ret < 0) - goto out_put; - - dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, - ceph_cap_string(got)); - - if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || - (iocb->ki_filp->f_flags & O_DIRECT) || - (inode->i_sb->s_flags & MS_SYNCHRONOUS) || - (fi->flags & CEPH_F_SYNC)) { - ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, - &iocb->ki_pos); - } else { - /* - * buffered write; drop Fw early to avoid slow - * revocation if we get stuck on balance_dirty_pages - */ - int dirty; - - spin_lock(&ci->i_ceph_lock); - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); - spin_unlock(&ci->i_ceph_lock); - ceph_put_cap_refs(ci, got); + /* + * try to do a buffered write. if we don't have sufficient + * caps, we'll get -EAGAIN from generic_file_aio_write, or a + * short write if we only get caps for some pages. + */ + if (!(iocb->ki_filp->f_flags & O_DIRECT) && + !(inode->i_sb->s_flags & MS_SYNCHRONOUS) && + !(fi->flags & CEPH_F_SYNC)) { ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + if (ret >= 0) + written = ret; + if ((ret >= 0 || ret == -EIOCBQUEUED) && ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { - err = vfs_fsync_range(file, pos, pos + ret - 1, 1); + err = vfs_fsync_range(file, pos, pos + written - 1, 1); if (err < 0) ret = err; } + if ((ret < 0 && ret != -EAGAIN) || pos + written >= endoff) + goto out; + } - if (dirty) - __mark_inode_dirty(inode, dirty); + dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", + inode, ceph_vinop(inode), pos + written, + (unsigned)iov->iov_len - written, inode->i_size); + ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, 0, &got, endoff); + if (ret < 0) goto out; - } + dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", + inode, ceph_vinop(inode), pos + written, + (unsigned)iov->iov_len - written, ceph_cap_string(got)); + ret = ceph_sync_write(file, iov->iov_base + written, + iov->iov_len - written, &iocb->ki_pos); if (ret >= 0) { int dirty; spin_lock(&ci->i_ceph_lock); @@ -777,13 +767,10 @@ retry_snap: if (dirty) __mark_inode_dirty(inode, dirty); } - -out_put: dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", - inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, - ceph_cap_string(got)); + inode, ceph_vinop(inode), pos + written, + (unsigned)iov->iov_len - written, ceph_cap_string(got)); ceph_put_cap_refs(ci, got); - out: if (ret == -EOLDSNAPC) { dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ba95eea201bf..2971eaa65cdc 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1466,7 +1466,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); u64 to; - int wrbuffer_refs, wake = 0; + int wrbuffer_refs, finish = 0; retry: spin_lock(&ci->i_ceph_lock); @@ -1498,15 +1498,18 @@ retry: truncate_inode_pages(inode->i_mapping, to); spin_lock(&ci->i_ceph_lock); - ci->i_truncate_pending--; - if (ci->i_truncate_pending == 0) - wake = 1; + if (to == ci->i_truncate_size) { + ci->i_truncate_pending = 0; + finish = 1; + } spin_unlock(&ci->i_ceph_lock); + if (!finish) + goto retry; if (wrbuffer_refs == 0) ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); - if (wake) - wake_up_all(&ci->i_cap_wq); + + wake_up_all(&ci->i_cap_wq); } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1bcf712655d9..9165eb8309eb 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1590,7 +1590,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, } else if (rpath || rino) { *ino = rino; *ppath = rpath; - *pathlen = strlen(rpath); + *pathlen = rpath ? strlen(rpath) : 0; dout(" path %.*s\n", *pathlen, rpath); } @@ -1876,9 +1876,14 @@ finish: static void __wake_requests(struct ceph_mds_client *mdsc, struct list_head *head) { - struct ceph_mds_request *req, *nreq; + struct ceph_mds_request *req; + LIST_HEAD(tmp_list); + + list_splice_init(head, &tmp_list); - list_for_each_entry_safe(req, nreq, head, r_wait) { + while (!list_empty(&tmp_list)) { + req = list_entry(tmp_list.next, + struct ceph_mds_request, r_wait); list_del_init(&req->r_wait); __do_request(mdsc, req); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 2eb43f211325..e86aa9948124 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -403,8 +403,6 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); - if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) - seq_printf(m, ",osdtimeout=%d", opt->osd_timeout); if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) seq_printf(m, ",osdkeepalivetimeout=%d", opt->osd_keepalive_timeout); @@ -849,7 +847,7 @@ static int ceph_register_bdi(struct super_block *sb, fsc->backing_dev_info.ra_pages = default_backing_dev_info.ra_pages; - err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", + err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld", atomic_long_inc_return(&bdi_seq)); if (!err) sb->s_bdi = &fsc->backing_dev_info; diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index 86e92ef2abc1..69ae3d3c3b31 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -37,7 +37,6 @@ void dump_smb(void *, int); #define CIFS_TIMER 0x04 extern int cifsFYI; -extern int cifsERROR; /* * debug ON @@ -64,10 +63,7 @@ do { \ /* error event message: e.g., i/o error */ #define cifserror(fmt, ...) \ -do { \ - if (cifsERROR) \ - printk(KERN_ERR "CIFS VFS: " fmt "\n", ##__VA_ARGS__); \ -} while (0) + printk(KERN_ERR "CIFS VFS: " fmt "\n", ##__VA_ARGS__); \ #define cERROR(set, fmt, ...) \ do { \ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index ce9f3c5421bf..f653835d067b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -54,7 +54,6 @@ #endif int cifsFYI = 0; -int cifsERROR = 1; int traceSMB = 0; bool enable_oplocks = true; unsigned int linuxExtEnabled = 1; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7635b5db26a7..17c3643e5950 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1624,14 +1624,11 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_unc: string = vol->UNC; vol->UNC = match_strdup(args); - if (vol->UNC == NULL) { - kfree(string); + if (vol->UNC == NULL) goto out_nomem; - } convert_delimiter(vol->UNC, '\\'); if (vol->UNC[0] != '\\' || vol->UNC[1] != '\\') { - kfree(string); printk(KERN_ERR "CIFS: UNC Path does not " "begin with // or \\\\\n"); goto cifs_parse_mount_err; @@ -1687,10 +1684,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, string = vol->prepath; vol->prepath = match_strdup(args); - if (vol->prepath == NULL) { - kfree(string); + if (vol->prepath == NULL) goto out_nomem; - } /* Compare old prefixpath= option to new one */ if (!string || strcmp(string, vol->prepath)) printk(KERN_WARNING "CIFS: the value of the " diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 6002fdc920ae..cdd6ff48246b 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -78,6 +78,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, struct dentry *dentry, *alias; struct inode *inode; struct super_block *sb = parent->d_inode->i_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); cFYI(1, "%s: for %s", __func__, name->name); @@ -91,10 +92,20 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, int err; inode = dentry->d_inode; - /* update inode in place if i_ino didn't change */ - if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { - cifs_fattr_to_inode(inode, fattr); - goto out; + if (inode) { + /* + * If we're generating inode numbers, then we don't + * want to clobber the existing one with the one that + * the readdir code created. + */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) + fattr->cf_uniqueid = CIFS_I(inode)->uniqueid; + + /* update inode in place if i_ino didn't change */ + if (CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { + cifs_fattr_to_inode(inode, fattr); + goto out; + } } err = d_invalidate(dentry); dput(dentry); diff --git a/fs/dcache.c b/fs/dcache.c index 3a463d0c4fe8..19153a0a810c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -455,24 +455,6 @@ void d_drop(struct dentry *dentry) EXPORT_SYMBOL(d_drop); /* - * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag - * @dentry: dentry to drop - * - * This is called when we do a lookup on a placeholder dentry that needed to be - * looked up. The dentry should have been hashed in order for it to be found by - * the lookup code, but now needs to be unhashed while we do the actual lookup - * and clear the DCACHE_NEED_LOOKUP flag. - */ -void d_clear_need_lookup(struct dentry *dentry) -{ - spin_lock(&dentry->d_lock); - __d_drop(dentry); - dentry->d_flags &= ~DCACHE_NEED_LOOKUP; - spin_unlock(&dentry->d_lock); -} -EXPORT_SYMBOL(d_clear_need_lookup); - -/* * Finish off a dentry we've decided to kill. * dentry->d_lock must be held, returns with it unlocked. * If ref is non-zero, then decrement the refcount too. @@ -565,13 +547,7 @@ repeat: if (d_unhashed(dentry)) goto kill_it; - /* - * If this dentry needs lookup, don't set the referenced flag so that it - * is more likely to be cleaned up by the dcache shrinker in case of - * memory pressure. - */ - if (!d_need_lookup(dentry)) - dentry->d_flags |= DCACHE_REFERENCED; + dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_count--; @@ -1583,7 +1559,7 @@ EXPORT_SYMBOL(d_find_any_alias); */ struct dentry *d_obtain_alias(struct inode *inode) { - static const struct qstr anonstring = { .name = "" }; + static const struct qstr anonstring = QSTR_INIT("/", 1); struct dentry *tmp; struct dentry *res; @@ -1737,13 +1713,6 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, } /* - * We are going to instantiate this dentry, unhash it and clear the - * lookup flag so we can do that. - */ - if (unlikely(d_need_lookup(found))) - d_clear_need_lookup(found); - - /* * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ diff --git a/fs/exec.c b/fs/exec.c index d8e1191cb112..18c45cac368f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1175,9 +1175,24 @@ void free_bprm(struct linux_binprm *bprm) mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } + /* If a binfmt changed the interp, free it. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); kfree(bprm); } +int bprm_change_interp(char *interp, struct linux_binprm *bprm) +{ + /* If a binfmt changed the interp, free it first. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); + bprm->interp = kstrdup(interp, GFP_KERNEL); + if (!bprm->interp) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL(bprm_change_interp); + /* * install the new credentials for this executable */ @@ -1654,7 +1669,6 @@ int get_dumpable(struct mm_struct *mm) return __get_dumpable(mm->flags); } -#ifdef __ARCH_WANT_SYS_EXECVE SYSCALL_DEFINE3(execve, const char __user *, filename, const char __user *const __user *, argv, @@ -1682,23 +1696,3 @@ asmlinkage long compat_sys_execve(const char __user * filename, return error; } #endif -#endif - -#ifdef __ARCH_WANT_KERNEL_EXECVE -int kernel_execve(const char *filename, - const char *const argv[], - const char *const envp[]) -{ - int ret = do_execve(filename, - (const char __user *const __user *)argv, - (const char __user *const __user *)envp); - if (ret < 0) - return ret; - - /* - * We were successful. We won't be returning to our caller, but - * instead to user space by manipulating the kernel stack. - */ - ret_from_kernel_execve(current_pt_regs()); -} -#endif diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 606bb074c501..5df4bb4aab14 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -322,10 +322,10 @@ static int export_encode_fh(struct inode *inode, struct fid *fid, if (parent && (len < 4)) { *max_len = 4; - return 255; + return FILEID_INVALID; } else if (len < 2) { *max_len = 2; - return 255; + return FILEID_INVALID; } len = 2; diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig new file mode 100644 index 000000000000..fd27e7e6326e --- /dev/null +++ b/fs/f2fs/Kconfig @@ -0,0 +1,53 @@ +config F2FS_FS + tristate "F2FS filesystem support (EXPERIMENTAL)" + depends on BLOCK + help + F2FS is based on Log-structured File System (LFS), which supports + versatile "flash-friendly" features. The design has been focused on + addressing the fundamental issues in LFS, which are snowball effect + of wandering tree and high cleaning overhead. + + Since flash-based storages show different characteristics according to + the internal geometry or flash memory management schemes aka FTL, F2FS + and tools support various parameters not only for configuring on-disk + layout, but also for selecting allocation and cleaning algorithms. + + If unsure, say N. + +config F2FS_STAT_FS + bool "F2FS Status Information" + depends on F2FS_FS && DEBUG_FS + default y + help + /sys/kernel/debug/f2fs/ contains information about all the partitions + mounted as f2fs. Each file shows the whole f2fs information. + + /sys/kernel/debug/f2fs/status includes: + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +config F2FS_FS_XATTR + bool "F2FS extended attributes" + depends on F2FS_FS + default y + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + <http://acl.bestbits.at/> for details). + + If unsure, say N. + +config F2FS_FS_POSIX_ACL + bool "F2FS Access Control Lists" + depends on F2FS_FS_XATTR + select FS_POSIX_ACL + default y + help + Posix Access Control Lists (ACLs) support permissions for users and + gourps beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website <http://acl.bestbits.at/>. + + If you don't know what Access Control Lists are, say N diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile new file mode 100644 index 000000000000..27a0820340b9 --- /dev/null +++ b/fs/f2fs/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_F2FS_FS) += f2fs.o + +f2fs-y := dir.o file.o inode.o namei.o hash.o super.o +f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o +f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o +f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o +f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c new file mode 100644 index 000000000000..fed74d193ffb --- /dev/null +++ b/fs/f2fs/acl.c @@ -0,0 +1,414 @@ +/* + * fs/f2fs/acl.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/acl.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/f2fs_fs.h> +#include "f2fs.h" +#include "xattr.h" +#include "acl.h" + +#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ + (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) + +static inline size_t f2fs_acl_size(int count) +{ + if (count <= 4) { + return sizeof(struct f2fs_acl_header) + + count * sizeof(struct f2fs_acl_entry_short); + } else { + return sizeof(struct f2fs_acl_header) + + 4 * sizeof(struct f2fs_acl_entry_short) + + (count - 4) * sizeof(struct f2fs_acl_entry); + } +} + +static inline int f2fs_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(struct f2fs_acl_header); + s = size - 4 * sizeof(struct f2fs_acl_entry_short); + if (s < 0) { + if (size % sizeof(struct f2fs_acl_entry_short)) + return -1; + return size / sizeof(struct f2fs_acl_entry_short); + } else { + if (s % sizeof(struct f2fs_acl_entry)) + return -1; + return s / sizeof(struct f2fs_acl_entry) + 4; + } +} + +static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) +{ + int i, count; + struct posix_acl *acl; + struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value; + struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); + const char *end = value + size; + + if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) + return ERR_PTR(-EINVAL); + + count = f2fs_acl_count(size); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + + if ((char *)entry > end) + goto fail; + + acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm); + + switch (acl->a_entries[i].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + acl->a_entries[i].e_id = ACL_UNDEFINED_ID; + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry_short)); + break; + + case ACL_USER: + acl->a_entries[i].e_uid = + make_kuid(&init_user_ns, + le32_to_cpu(entry->e_id)); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + case ACL_GROUP: + acl->a_entries[i].e_gid = + make_kgid(&init_user_ns, + le32_to_cpu(entry->e_id)); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + default: + goto fail; + } + } + if ((char *)entry != end) + goto fail; + return acl; +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) +{ + struct f2fs_acl_header *f2fs_acl; + struct f2fs_acl_entry *entry; + int i; + + f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * + sizeof(struct f2fs_acl_entry), GFP_KERNEL); + if (!f2fs_acl) + return ERR_PTR(-ENOMEM); + + f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION); + entry = (struct f2fs_acl_entry *)(f2fs_acl + 1); + + for (i = 0; i < acl->a_count; i++) { + + entry->e_tag = cpu_to_le16(acl->a_entries[i].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm); + + switch (acl->a_entries[i].e_tag) { + case ACL_USER: + entry->e_id = cpu_to_le32( + from_kuid(&init_user_ns, + acl->a_entries[i].e_uid)); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + case ACL_GROUP: + entry->e_id = cpu_to_le32( + from_kgid(&init_user_ns, + acl->a_entries[i].e_gid)); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry_short)); + break; + default: + goto fail; + } + } + *size = f2fs_acl_size(acl->a_count); + return (void *)f2fs_acl; + +fail: + kfree(f2fs_acl); + return ERR_PTR(-EINVAL); +} + +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; + void *value = NULL; + struct posix_acl *acl; + int retval; + + if (!test_opt(sbi, POSIX_ACL)) + return NULL; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + if (type == ACL_TYPE_ACCESS) + name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; + + retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + retval = f2fs_getxattr(inode, name_index, "", value, retval); + } + + if (retval < 0) { + if (retval == -ENODATA) + acl = NULL; + else + acl = ERR_PTR(retval); + } else { + acl = f2fs_acl_from_disk(value, retval); + } + kfree(value); + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); + + return acl; +} + +static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + int name_index; + void *value = NULL; + size_t size = 0; + int error; + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + error = posix_acl_equiv_mode(acl, &inode->i_mode); + if (error < 0) + return error; + set_acl_inode(fi, inode->i_mode); + if (error == 0) + acl = NULL; + } + break; + + case ACL_TYPE_DEFAULT: + name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + + default: + return -EINVAL; + } + + if (acl) { + value = f2fs_acl_to_disk(acl, &size); + if (IS_ERR(value)) { + cond_clear_inode_flag(fi, FI_ACL_MODE); + return (int)PTR_ERR(value); + } + } + + error = f2fs_setxattr(inode, name_index, "", value, size); + + kfree(value); + if (!error) + set_cached_acl(inode, type, acl); + + cond_clear_inode_flag(fi, FI_ACL_MODE); + return error; +} + +int f2fs_init_acl(struct inode *inode, struct inode *dir) +{ + struct posix_acl *acl = NULL; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + int error = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (test_opt(sbi, POSIX_ACL)) { + acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) + inode->i_mode &= ~current_umask(); + } + + if (test_opt(sbi, POSIX_ACL) && acl) { + + if (S_ISDIR(inode->i_mode)) { + error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto cleanup; + } + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + if (error < 0) + return error; + if (error > 0) + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); + } +cleanup: + posix_acl_release(acl); + return error; +} + +int f2fs_acl_chmod(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct posix_acl *acl; + int error; + mode_t mode = get_inode_mode(inode); + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + if (S_ISLNK(mode)) + return -EOPNOTSUPP; + + acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + error = posix_acl_chmod(&acl, GFP_KERNEL, mode); + if (error) + return error; + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); + posix_acl_release(acl); + return error; +} + +static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + const char *xname = POSIX_ACL_XATTR_DEFAULT; + size_t size; + + if (!test_opt(sbi, POSIX_ACL)) + return 0; + + if (type == ACL_TYPE_ACCESS) + xname = POSIX_ACL_XATTR_ACCESS; + + size = strlen(xname) + 1; + if (list && size <= list_size) + memcpy(list, xname, size); + return size; +} + +static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct posix_acl *acl; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + + acl = f2fs_get_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (!acl) + return -ENODATA; + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct inode *inode = dentry->d_inode; + struct posix_acl *acl = NULL; + int error; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(sbi, POSIX_ACL)) + return -EOPNOTSUPP; + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_valid(acl); + if (error) + goto release_and_out; + } + } else { + acl = NULL; + } + + error = f2fs_set_acl(inode, type, acl); + +release_and_out: + posix_acl_release(acl); + return error; +} + +const struct xattr_handler f2fs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; + +const struct xattr_handler f2fs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .list = f2fs_xattr_list_acl, + .get = f2fs_xattr_get_acl, + .set = f2fs_xattr_set_acl, +}; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h new file mode 100644 index 000000000000..80f430674417 --- /dev/null +++ b/fs/f2fs/acl.h @@ -0,0 +1,57 @@ +/* + * fs/f2fs/acl.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/acl.h + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_ACL_H__ +#define __F2FS_ACL_H__ + +#include <linux/posix_acl_xattr.h> + +#define F2FS_ACL_VERSION 0x0001 + +struct f2fs_acl_entry { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +}; + +struct f2fs_acl_entry_short { + __le16 e_tag; + __le16 e_perm; +}; + +struct f2fs_acl_header { + __le32 a_version; +}; + +#ifdef CONFIG_F2FS_FS_POSIX_ACL + +extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type); +extern int f2fs_acl_chmod(struct inode *inode); +extern int f2fs_init_acl(struct inode *inode, struct inode *dir); +#else +#define f2fs_check_acl NULL +#define f2fs_get_acl NULL +#define f2fs_set_acl NULL + +static inline int f2fs_acl_chmod(struct inode *inode) +{ + return 0; +} + +static inline int f2fs_init_acl(struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif +#endif /* __F2FS_ACL_H__ */ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c new file mode 100644 index 000000000000..6ef36c37e2be --- /dev/null +++ b/fs/f2fs/checkpoint.c @@ -0,0 +1,794 @@ +/* + * fs/f2fs/checkpoint.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/fs.h> +#include <linux/bio.h> +#include <linux/mpage.h> +#include <linux/writeback.h> +#include <linux/blkdev.h> +#include <linux/f2fs_fs.h> +#include <linux/pagevec.h> +#include <linux/swap.h> + +#include "f2fs.h" +#include "node.h" +#include "segment.h" + +static struct kmem_cache *orphan_entry_slab; +static struct kmem_cache *inode_entry_slab; + +/* + * We guarantee no failure on the returned page. + */ +struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct page *page = NULL; +repeat: + page = grab_cache_page(mapping, index); + if (!page) { + cond_resched(); + goto repeat; + } + + /* We wait writeback only inside grab_meta_page() */ + wait_on_page_writeback(page); + SetPageUptodate(page); + return page; +} + +/* + * We guarantee no failure on the returned page. + */ +struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct page *page; +repeat: + page = grab_cache_page(mapping, index); + if (!page) { + cond_resched(); + goto repeat; + } + if (f2fs_readpage(sbi, page, index, READ_SYNC)) { + f2fs_put_page(page, 1); + goto repeat; + } + mark_page_accessed(page); + + /* We do not allow returning an errorneous page */ + return page; +} + +static int f2fs_write_meta_page(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int err; + + wait_on_page_writeback(page); + + err = write_meta_page(sbi, page, wbc); + if (err) { + wbc->pages_skipped++; + set_page_dirty(page); + } + + dec_page_count(sbi, F2FS_DIRTY_META); + + /* In this case, we should not unlock this page */ + if (err != AOP_WRITEPAGE_ACTIVATE) + unlock_page(page); + return err; +} + +static int f2fs_write_meta_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + struct block_device *bdev = sbi->sb->s_bdev; + long written; + + if (wbc->for_kupdate) + return 0; + + if (get_pages(sbi, F2FS_DIRTY_META) == 0) + return 0; + + /* if mounting is failed, skip writing node pages */ + mutex_lock(&sbi->cp_mutex); + written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); + mutex_unlock(&sbi->cp_mutex); + wbc->nr_to_write -= written; + return 0; +} + +long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, + long nr_to_write) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + pgoff_t index = 0, end = LONG_MAX; + struct pagevec pvec; + long nwritten = 0; + struct writeback_control wbc = { + .for_reclaim = 0, + }; + + pagevec_init(&pvec, 0); + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + lock_page(page); + BUG_ON(page->mapping != mapping); + BUG_ON(!PageDirty(page)); + clear_page_dirty_for_io(page); + f2fs_write_meta_page(page, &wbc); + if (nwritten++ >= nr_to_write) + break; + } + pagevec_release(&pvec); + cond_resched(); + } + + if (nwritten) + f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); + + return nwritten; +} + +static int f2fs_set_meta_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + inc_page_count(sbi, F2FS_DIRTY_META); + F2FS_SET_SB_DIRT(sbi); + return 1; + } + return 0; +} + +const struct address_space_operations f2fs_meta_aops = { + .writepage = f2fs_write_meta_page, + .writepages = f2fs_write_meta_pages, + .set_page_dirty = f2fs_set_meta_page_dirty, +}; + +int check_orphan_space(struct f2fs_sb_info *sbi) +{ + unsigned int max_orphans; + int err = 0; + + /* + * considering 512 blocks in a segment 5 blocks are needed for cp + * and log segment summaries. Remaining blocks are used to keep + * orphan entries with the limitation one reserved segment + * for cp pack we can have max 1020*507 orphan entries + */ + max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK; + mutex_lock(&sbi->orphan_inode_mutex); + if (sbi->n_orphans >= max_orphans) + err = -ENOSPC; + mutex_unlock(&sbi->orphan_inode_mutex); + return err; +} + +void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head, *this; + struct orphan_inode_entry *new = NULL, *orphan = NULL; + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + list_for_each(this, head) { + orphan = list_entry(this, struct orphan_inode_entry, list); + if (orphan->ino == ino) + goto out; + if (orphan->ino > ino) + break; + orphan = NULL; + } +retry: + new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); + if (!new) { + cond_resched(); + goto retry; + } + new->ino = ino; + INIT_LIST_HEAD(&new->list); + + /* add new_oentry into list which is sorted by inode number */ + if (orphan) { + struct orphan_inode_entry *prev; + + /* get previous entry */ + prev = list_entry(orphan->list.prev, typeof(*prev), list); + if (&prev->list != head) + /* insert new orphan inode entry */ + list_add(&new->list, &prev->list); + else + list_add(&new->list, head); + } else { + list_add_tail(&new->list, head); + } + sbi->n_orphans++; +out: + mutex_unlock(&sbi->orphan_inode_mutex); +} + +void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *this, *next, *head; + struct orphan_inode_entry *orphan; + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + list_for_each_safe(this, next, head) { + orphan = list_entry(this, struct orphan_inode_entry, list); + if (orphan->ino == ino) { + list_del(&orphan->list); + kmem_cache_free(orphan_entry_slab, orphan); + sbi->n_orphans--; + break; + } + } + mutex_unlock(&sbi->orphan_inode_mutex); +} + +static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct inode *inode = f2fs_iget(sbi->sb, ino); + BUG_ON(IS_ERR(inode)); + clear_nlink(inode); + + /* truncate all the data during iput */ + iput(inode); +} + +int recover_orphan_inodes(struct f2fs_sb_info *sbi) +{ + block_t start_blk, orphan_blkaddr, i, j; + + if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) + return 0; + + sbi->por_doing = 1; + start_blk = __start_cp_addr(sbi) + 1; + orphan_blkaddr = __start_sum_addr(sbi) - 1; + + for (i = 0; i < orphan_blkaddr; i++) { + struct page *page = get_meta_page(sbi, start_blk + i); + struct f2fs_orphan_block *orphan_blk; + + orphan_blk = (struct f2fs_orphan_block *)page_address(page); + for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { + nid_t ino = le32_to_cpu(orphan_blk->ino[j]); + recover_orphan_inode(sbi, ino); + } + f2fs_put_page(page, 1); + } + /* clear Orphan Flag */ + clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); + sbi->por_doing = 0; + return 0; +} + +static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) +{ + struct list_head *head, *this, *next; + struct f2fs_orphan_block *orphan_blk = NULL; + struct page *page = NULL; + unsigned int nentries = 0; + unsigned short index = 1; + unsigned short orphan_blocks; + + orphan_blocks = (unsigned short)((sbi->n_orphans + + (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); + + mutex_lock(&sbi->orphan_inode_mutex); + head = &sbi->orphan_inode_list; + + /* loop for each orphan inode entry and write them in Jornal block */ + list_for_each_safe(this, next, head) { + struct orphan_inode_entry *orphan; + + orphan = list_entry(this, struct orphan_inode_entry, list); + + if (nentries == F2FS_ORPHANS_PER_BLOCK) { + /* + * an orphan block is full of 1020 entries, + * then we need to flush current orphan blocks + * and bring another one in memory + */ + orphan_blk->blk_addr = cpu_to_le16(index); + orphan_blk->blk_count = cpu_to_le16(orphan_blocks); + orphan_blk->entry_count = cpu_to_le32(nentries); + set_page_dirty(page); + f2fs_put_page(page, 1); + index++; + start_blk++; + nentries = 0; + page = NULL; + } + if (page) + goto page_exist; + + page = grab_meta_page(sbi, start_blk); + orphan_blk = (struct f2fs_orphan_block *)page_address(page); + memset(orphan_blk, 0, sizeof(*orphan_blk)); +page_exist: + orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); + } + if (!page) + goto end; + + orphan_blk->blk_addr = cpu_to_le16(index); + orphan_blk->blk_count = cpu_to_le16(orphan_blocks); + orphan_blk->entry_count = cpu_to_le32(nentries); + set_page_dirty(page); + f2fs_put_page(page, 1); +end: + mutex_unlock(&sbi->orphan_inode_mutex); +} + +static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, + block_t cp_addr, unsigned long long *version) +{ + struct page *cp_page_1, *cp_page_2 = NULL; + unsigned long blk_size = sbi->blocksize; + struct f2fs_checkpoint *cp_block; + unsigned long long cur_version = 0, pre_version = 0; + unsigned int crc = 0; + size_t crc_offset; + + /* Read the 1st cp block in this CP pack */ + cp_page_1 = get_meta_page(sbi, cp_addr); + + /* get the version number */ + cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1); + crc_offset = le32_to_cpu(cp_block->checksum_offset); + if (crc_offset >= blk_size) + goto invalid_cp1; + + crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); + if (!f2fs_crc_valid(crc, cp_block, crc_offset)) + goto invalid_cp1; + + pre_version = le64_to_cpu(cp_block->checkpoint_ver); + + /* Read the 2nd cp block in this CP pack */ + cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_page_2 = get_meta_page(sbi, cp_addr); + + cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); + crc_offset = le32_to_cpu(cp_block->checksum_offset); + if (crc_offset >= blk_size) + goto invalid_cp2; + + crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); + if (!f2fs_crc_valid(crc, cp_block, crc_offset)) + goto invalid_cp2; + + cur_version = le64_to_cpu(cp_block->checkpoint_ver); + + if (cur_version == pre_version) { + *version = cur_version; + f2fs_put_page(cp_page_2, 1); + return cp_page_1; + } +invalid_cp2: + f2fs_put_page(cp_page_2, 1); +invalid_cp1: + f2fs_put_page(cp_page_1, 1); + return NULL; +} + +int get_valid_checkpoint(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *cp_block; + struct f2fs_super_block *fsb = sbi->raw_super; + struct page *cp1, *cp2, *cur_page; + unsigned long blk_size = sbi->blocksize; + unsigned long long cp1_version = 0, cp2_version = 0; + unsigned long long cp_start_blk_no; + + sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); + if (!sbi->ckpt) + return -ENOMEM; + /* + * Finding out valid cp block involves read both + * sets( cp pack1 and cp pack 2) + */ + cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); + cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); + + /* The second checkpoint pack should start at the next segment */ + cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); + + if (cp1 && cp2) { + if (ver_after(cp2_version, cp1_version)) + cur_page = cp2; + else + cur_page = cp1; + } else if (cp1) { + cur_page = cp1; + } else if (cp2) { + cur_page = cp2; + } else { + goto fail_no_cp; + } + + cp_block = (struct f2fs_checkpoint *)page_address(cur_page); + memcpy(sbi->ckpt, cp_block, blk_size); + + f2fs_put_page(cp1, 1); + f2fs_put_page(cp2, 1); + return 0; + +fail_no_cp: + kfree(sbi->ckpt); + return -EINVAL; +} + +void set_dirty_dir_page(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct list_head *head = &sbi->dir_inode_list; + struct dir_inode_entry *new; + struct list_head *this; + + if (!S_ISDIR(inode->i_mode)) + return; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } + new->inode = inode; + INIT_LIST_HEAD(&new->list); + + spin_lock(&sbi->dir_inode_lock); + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) { + kmem_cache_free(inode_entry_slab, new); + goto out; + } + } + list_add_tail(&new->list, head); + sbi->n_dirty_dirs++; + + BUG_ON(!S_ISDIR(inode->i_mode)); +out: + inc_page_count(sbi, F2FS_DIRTY_DENTS); + inode_inc_dirty_dents(inode); + SetPagePrivate(page); + + spin_unlock(&sbi->dir_inode_lock); +} + +void remove_dirty_dir_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + + if (!S_ISDIR(inode->i_mode)) + return; + + spin_lock(&sbi->dir_inode_lock); + if (atomic_read(&F2FS_I(inode)->dirty_dents)) + goto out; + + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) { + list_del(&entry->list); + kmem_cache_free(inode_entry_slab, entry); + sbi->n_dirty_dirs--; + break; + } + } +out: + spin_unlock(&sbi->dir_inode_lock); +} + +void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->dir_inode_list; + struct dir_inode_entry *entry; + struct inode *inode; +retry: + spin_lock(&sbi->dir_inode_lock); + if (list_empty(head)) { + spin_unlock(&sbi->dir_inode_lock); + return; + } + entry = list_entry(head->next, struct dir_inode_entry, list); + inode = igrab(entry->inode); + spin_unlock(&sbi->dir_inode_lock); + if (inode) { + filemap_flush(inode->i_mapping); + iput(inode); + } else { + /* + * We should submit bio, since it exists several + * wribacking dentry pages in the freeing inode. + */ + f2fs_submit_bio(sbi, DATA, true); + } + goto retry; +} + +/* + * Freeze all the FS-operations for checkpoint. + */ +void block_operations(struct f2fs_sb_info *sbi) +{ + int t; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + + /* Stop renaming operation */ + mutex_lock_op(sbi, RENAME); + mutex_lock_op(sbi, DENTRY_OPS); + +retry_dents: + /* write all the dirty dentry pages */ + sync_dirty_dir_inodes(sbi); + + mutex_lock_op(sbi, DATA_WRITE); + if (get_pages(sbi, F2FS_DIRTY_DENTS)) { + mutex_unlock_op(sbi, DATA_WRITE); + goto retry_dents; + } + + /* block all the operations */ + for (t = DATA_NEW; t <= NODE_TRUNC; t++) + mutex_lock_op(sbi, t); + + mutex_lock(&sbi->write_inode); + + /* + * POR: we should ensure that there is no dirty node pages + * until finishing nat/sit flush. + */ +retry: + sync_node_pages(sbi, 0, &wbc); + + mutex_lock_op(sbi, NODE_WRITE); + + if (get_pages(sbi, F2FS_DIRTY_NODES)) { + mutex_unlock_op(sbi, NODE_WRITE); + goto retry; + } + mutex_unlock(&sbi->write_inode); +} + +static void unblock_operations(struct f2fs_sb_info *sbi) +{ + int t; + for (t = NODE_WRITE; t >= RENAME; t--) + mutex_unlock_op(sbi, t); +} + +static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + nid_t last_nid = 0; + block_t start_blk; + struct page *cp_page; + unsigned int data_sum_blocks, orphan_blocks; + unsigned int crc32 = 0; + void *kaddr; + int i; + + /* Flush all the NAT/SIT pages */ + while (get_pages(sbi, F2FS_DIRTY_META)) + sync_meta_pages(sbi, META, LONG_MAX); + + next_free_nid(sbi, &last_nid); + + /* + * modify checkpoint + * version number is already updated + */ + ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); + ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); + ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); + for (i = 0; i < 3; i++) { + ckpt->cur_node_segno[i] = + cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); + ckpt->cur_node_blkoff[i] = + cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE)); + ckpt->alloc_type[i + CURSEG_HOT_NODE] = + curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); + } + for (i = 0; i < 3; i++) { + ckpt->cur_data_segno[i] = + cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); + ckpt->cur_data_blkoff[i] = + cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA)); + ckpt->alloc_type[i + CURSEG_HOT_DATA] = + curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); + } + + ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); + ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); + ckpt->next_free_nid = cpu_to_le32(last_nid); + + /* 2 cp + n data seg summary + orphan inode blocks */ + data_sum_blocks = npages_for_summary_flush(sbi); + if (data_sum_blocks < 3) + set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + else + clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + + orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) + / F2FS_ORPHANS_PER_BLOCK; + ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); + + if (is_umount) { + set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); + } else { + clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + data_sum_blocks + orphan_blocks); + } + + if (sbi->n_orphans) + set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + else + clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + + /* update SIT/NAT bitmap */ + get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); + get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); + + crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); + *(__le32 *)((unsigned char *)ckpt + + le32_to_cpu(ckpt->checksum_offset)) + = cpu_to_le32(crc32); + + start_blk = __start_cp_addr(sbi); + + /* write out checkpoint buffer at block 0 */ + cp_page = grab_meta_page(sbi, start_blk++); + kaddr = page_address(cp_page); + memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + + if (sbi->n_orphans) { + write_orphan_inodes(sbi, start_blk); + start_blk += orphan_blocks; + } + + write_data_summaries(sbi, start_blk); + start_blk += data_sum_blocks; + if (is_umount) { + write_node_summaries(sbi, start_blk); + start_blk += NR_CURSEG_NODE_TYPE; + } + + /* writeout checkpoint block */ + cp_page = grab_meta_page(sbi, start_blk); + kaddr = page_address(cp_page); + memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + + /* wait for previous submitted node/meta pages writeback */ + while (get_pages(sbi, F2FS_WRITEBACK)) + congestion_wait(BLK_RW_ASYNC, HZ / 50); + + filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); + filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); + + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->alloc_valid_block_count = 0; + + /* Here, we only have one bio having CP pack */ + if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) + sbi->sb->s_flags |= MS_RDONLY; + else + sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + + clear_prefree_segments(sbi); + F2FS_RESET_SB_DIRT(sbi); +} + +/* + * We guarantee that this checkpoint procedure should not fail. + */ +void write_checkpoint(struct f2fs_sb_info *sbi, bool blocked, bool is_umount) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long long ckpt_ver; + + if (!blocked) { + mutex_lock(&sbi->cp_mutex); + block_operations(sbi); + } + + f2fs_submit_bio(sbi, DATA, true); + f2fs_submit_bio(sbi, NODE, true); + f2fs_submit_bio(sbi, META, true); + + /* + * update checkpoint pack index + * Increase the version number so that + * SIT entries and seg summaries are written at correct place + */ + ckpt_ver = le64_to_cpu(ckpt->checkpoint_ver); + ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); + + /* write cached NAT/SIT entries to NAT/SIT area */ + flush_nat_entries(sbi); + flush_sit_entries(sbi); + + reset_victim_segmap(sbi); + + /* unlock all the fs_lock[] in do_checkpoint() */ + do_checkpoint(sbi, is_umount); + + unblock_operations(sbi); + mutex_unlock(&sbi->cp_mutex); +} + +void init_orphan_info(struct f2fs_sb_info *sbi) +{ + mutex_init(&sbi->orphan_inode_mutex); + INIT_LIST_HEAD(&sbi->orphan_inode_list); + sbi->n_orphans = 0; +} + +int create_checkpoint_caches(void) +{ + orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", + sizeof(struct orphan_inode_entry), NULL); + if (unlikely(!orphan_entry_slab)) + return -ENOMEM; + inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", + sizeof(struct dir_inode_entry), NULL); + if (unlikely(!inode_entry_slab)) { + kmem_cache_destroy(orphan_entry_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_checkpoint_caches(void) +{ + kmem_cache_destroy(orphan_entry_slab); + kmem_cache_destroy(inode_entry_slab); +} diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c new file mode 100644 index 000000000000..655aeabc1dd4 --- /dev/null +++ b/fs/f2fs/data.c @@ -0,0 +1,702 @@ +/* + * fs/f2fs/data.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/fs.h> +#include <linux/f2fs_fs.h> +#include <linux/buffer_head.h> +#include <linux/mpage.h> +#include <linux/writeback.h> +#include <linux/backing-dev.h> +#include <linux/blkdev.h> +#include <linux/bio.h> + +#include "f2fs.h" +#include "node.h" +#include "segment.h" + +/* + * Lock ordering for the change of data block address: + * ->data_page + * ->node_page + * update block addresses in the node page + */ +static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) +{ + struct f2fs_node *rn; + __le32 *addr_array; + struct page *node_page = dn->node_page; + unsigned int ofs_in_node = dn->ofs_in_node; + + wait_on_page_writeback(node_page); + + rn = (struct f2fs_node *)page_address(node_page); + + /* Get physical address of data block */ + addr_array = blkaddr_in_node(rn); + addr_array[ofs_in_node] = cpu_to_le32(new_addr); + set_page_dirty(node_page); +} + +int reserve_new_block(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + + if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) + return -EPERM; + if (!inc_valid_block_count(sbi, dn->inode, 1)) + return -ENOSPC; + + __set_data_blkaddr(dn, NEW_ADDR); + dn->data_blkaddr = NEW_ADDR; + sync_inode_page(dn); + return 0; +} + +static int check_extent_cache(struct inode *inode, pgoff_t pgofs, + struct buffer_head *bh_result) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + pgoff_t start_fofs, end_fofs; + block_t start_blkaddr; + + read_lock(&fi->ext.ext_lock); + if (fi->ext.len == 0) { + read_unlock(&fi->ext.ext_lock); + return 0; + } + + sbi->total_hit_ext++; + start_fofs = fi->ext.fofs; + end_fofs = fi->ext.fofs + fi->ext.len - 1; + start_blkaddr = fi->ext.blk_addr; + + if (pgofs >= start_fofs && pgofs <= end_fofs) { + unsigned int blkbits = inode->i_sb->s_blocksize_bits; + size_t count; + + clear_buffer_new(bh_result); + map_bh(bh_result, inode->i_sb, + start_blkaddr + pgofs - start_fofs); + count = end_fofs - pgofs + 1; + if (count < (UINT_MAX >> blkbits)) + bh_result->b_size = (count << blkbits); + else + bh_result->b_size = UINT_MAX; + + sbi->read_hit_ext++; + read_unlock(&fi->ext.ext_lock); + return 1; + } + read_unlock(&fi->ext.ext_lock); + return 0; +} + +void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) +{ + struct f2fs_inode_info *fi = F2FS_I(dn->inode); + pgoff_t fofs, start_fofs, end_fofs; + block_t start_blkaddr, end_blkaddr; + + BUG_ON(blk_addr == NEW_ADDR); + fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node; + + /* Update the page address in the parent node */ + __set_data_blkaddr(dn, blk_addr); + + write_lock(&fi->ext.ext_lock); + + start_fofs = fi->ext.fofs; + end_fofs = fi->ext.fofs + fi->ext.len - 1; + start_blkaddr = fi->ext.blk_addr; + end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; + + /* Drop and initialize the matched extent */ + if (fi->ext.len == 1 && fofs == start_fofs) + fi->ext.len = 0; + + /* Initial extent */ + if (fi->ext.len == 0) { + if (blk_addr != NULL_ADDR) { + fi->ext.fofs = fofs; + fi->ext.blk_addr = blk_addr; + fi->ext.len = 1; + } + goto end_update; + } + + /* Frone merge */ + if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { + fi->ext.fofs--; + fi->ext.blk_addr--; + fi->ext.len++; + goto end_update; + } + + /* Back merge */ + if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { + fi->ext.len++; + goto end_update; + } + + /* Split the existing extent */ + if (fi->ext.len > 1 && + fofs >= start_fofs && fofs <= end_fofs) { + if ((end_fofs - fofs) < (fi->ext.len >> 1)) { + fi->ext.len = fofs - start_fofs; + } else { + fi->ext.fofs = fofs + 1; + fi->ext.blk_addr = start_blkaddr + + fofs - start_fofs + 1; + fi->ext.len -= fofs - start_fofs + 1; + } + goto end_update; + } + write_unlock(&fi->ext.ext_lock); + return; + +end_update: + write_unlock(&fi->ext.ext_lock); + sync_inode_page(dn); + return; +} + +struct page *find_data_page(struct inode *inode, pgoff_t index) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + int err; + + page = find_get_page(mapping, index); + if (page && PageUptodate(page)) + return page; + f2fs_put_page(page, 0); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, RDONLY_NODE); + if (err) + return ERR_PTR(err); + f2fs_put_dnode(&dn); + + if (dn.data_blkaddr == NULL_ADDR) + return ERR_PTR(-ENOENT); + + /* By fallocate(), there is no cached page, but with NEW_ADDR */ + if (dn.data_blkaddr == NEW_ADDR) + return ERR_PTR(-EINVAL); + + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); + + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) { + f2fs_put_page(page, 1); + return ERR_PTR(err); + } + unlock_page(page); + return page; +} + +/* + * If it tries to access a hole, return an error. + * Because, the callers, functions in dir.c and GC, should be able to know + * whether this page exists or not. + */ +struct page *get_lock_data_page(struct inode *inode, pgoff_t index) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct dnode_of_data dn; + struct page *page; + int err; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, RDONLY_NODE); + if (err) + return ERR_PTR(err); + f2fs_put_dnode(&dn); + + if (dn.data_blkaddr == NULL_ADDR) + return ERR_PTR(-ENOENT); + + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); + + if (PageUptodate(page)) + return page; + + BUG_ON(dn.data_blkaddr == NEW_ADDR); + BUG_ON(dn.data_blkaddr == NULL_ADDR); + + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) { + f2fs_put_page(page, 1); + return ERR_PTR(err); + } + return page; +} + +/* + * Caller ensures that this data page is never allocated. + * A new zero-filled data page is allocated in the page cache. + */ +struct page *get_new_data_page(struct inode *inode, pgoff_t index, + bool new_i_size) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + struct page *page; + struct dnode_of_data dn; + int err; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, 0); + if (err) + return ERR_PTR(err); + + if (dn.data_blkaddr == NULL_ADDR) { + if (reserve_new_block(&dn)) { + f2fs_put_dnode(&dn); + return ERR_PTR(-ENOSPC); + } + } + f2fs_put_dnode(&dn); + + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); + + if (PageUptodate(page)) + return page; + + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + } else { + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) { + f2fs_put_page(page, 1); + return ERR_PTR(err); + } + } + SetPageUptodate(page); + + if (new_i_size && + i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { + i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); + mark_inode_dirty_sync(inode); + } + return page; +} + +static void read_end_io(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + SetPageUptodate(page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + unlock_page(page); + } while (bvec >= bio->bi_io_vec); + kfree(bio->bi_private); + bio_put(bio); +} + +/* + * Fill the locked page with data located in the block address. + * Read operation is synchronous, and caller must unlock the page. + */ +int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, int type) +{ + struct block_device *bdev = sbi->sb->s_bdev; + bool sync = (type == READ_SYNC); + struct bio *bio; + + /* This page can be already read by other threads */ + if (PageUptodate(page)) { + if (!sync) + unlock_page(page); + return 0; + } + + down_read(&sbi->bio_sem); + + /* Allocate a new bio */ + bio = f2fs_bio_alloc(bdev, 1); + + /* Initialize the bio */ + bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + bio->bi_end_io = read_end_io; + + if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { + kfree(bio->bi_private); + bio_put(bio); + up_read(&sbi->bio_sem); + return -EFAULT; + } + + submit_bio(type, bio); + up_read(&sbi->bio_sem); + + /* wait for read completion if sync */ + if (sync) { + lock_page(page); + if (PageError(page)) + return -EIO; + } + return 0; +} + +/* + * This function should be used by the data read flow only where it + * does not check the "create" flag that indicates block allocation. + * The reason for this special functionality is to exploit VFS readahead + * mechanism. + */ +static int get_data_block_ro(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + unsigned int blkbits = inode->i_sb->s_blocksize_bits; + unsigned maxblocks = bh_result->b_size >> blkbits; + struct dnode_of_data dn; + pgoff_t pgofs; + int err; + + /* Get the page offset from the block offset(iblock) */ + pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); + + if (check_extent_cache(inode, pgofs, bh_result)) + return 0; + + /* When reading holes, we need its node page */ + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, pgofs, RDONLY_NODE); + if (err) + return (err == -ENOENT) ? 0 : err; + + /* It does not support data allocation */ + BUG_ON(create); + + if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { + int i; + unsigned int end_offset; + + end_offset = IS_INODE(dn.node_page) ? + ADDRS_PER_INODE : + ADDRS_PER_BLOCK; + + clear_buffer_new(bh_result); + + /* Give more consecutive addresses for the read ahead */ + for (i = 0; i < end_offset - dn.ofs_in_node; i++) + if (((datablock_addr(dn.node_page, + dn.ofs_in_node + i)) + != (dn.data_blkaddr + i)) || maxblocks == i) + break; + map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + bh_result->b_size = (i << blkbits); + } + f2fs_put_dnode(&dn); + return 0; +} + +static int f2fs_read_data_page(struct file *file, struct page *page) +{ + return mpage_readpage(page, get_data_block_ro); +} + +static int f2fs_read_data_pages(struct file *file, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); +} + +int do_write_data_page(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + block_t old_blk_addr, new_blk_addr; + struct dnode_of_data dn; + int err = 0; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, page->index, RDONLY_NODE); + if (err) + return err; + + old_blk_addr = dn.data_blkaddr; + + /* This page is already truncated */ + if (old_blk_addr == NULL_ADDR) + goto out_writepage; + + set_page_writeback(page); + + /* + * If current allocation needs SSR, + * it had better in-place writes for updated data. + */ + if (old_blk_addr != NEW_ADDR && !is_cold_data(page) && + need_inplace_update(inode)) { + rewrite_data_page(F2FS_SB(inode->i_sb), page, + old_blk_addr); + } else { + write_data_page(inode, page, &dn, + old_blk_addr, &new_blk_addr); + update_extent_cache(new_blk_addr, &dn); + F2FS_I(inode)->data_version = + le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver); + } +out_writepage: + f2fs_put_dnode(&dn); + return err; +} + +static int f2fs_write_data_page(struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = ((unsigned long long) i_size) + >> PAGE_CACHE_SHIFT; + unsigned offset; + int err = 0; + + if (page->index < end_index) + goto out; + + /* + * If the offset is out-of-range of file size, + * this page does not have to be written to disk. + */ + offset = i_size & (PAGE_CACHE_SIZE - 1); + if ((page->index >= end_index + 1) || !offset) { + if (S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + goto unlock_out; + } + + zero_user_segment(page, offset, PAGE_CACHE_SIZE); +out: + if (sbi->por_doing) + goto redirty_out; + + if (wbc->for_reclaim && !S_ISDIR(inode->i_mode) && !is_cold_data(page)) + goto redirty_out; + + mutex_lock_op(sbi, DATA_WRITE); + if (S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + err = do_write_data_page(page); + if (err && err != -ENOENT) { + wbc->pages_skipped++; + set_page_dirty(page); + } + mutex_unlock_op(sbi, DATA_WRITE); + + if (wbc->for_reclaim) + f2fs_submit_bio(sbi, DATA, true); + + if (err == -ENOENT) + goto unlock_out; + + clear_cold_data(page); + unlock_page(page); + + if (!wbc->for_reclaim && !S_ISDIR(inode->i_mode)) + f2fs_balance_fs(sbi); + return 0; + +unlock_out: + unlock_page(page); + return (err == -ENOENT) ? 0 : err; + +redirty_out: + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; +} + +#define MAX_DESIRED_PAGES_WP 4096 + +static int f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ret; + long excess_nrtw = 0, desired_nrtw; + + if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { + desired_nrtw = MAX_DESIRED_PAGES_WP; + excess_nrtw = desired_nrtw - wbc->nr_to_write; + wbc->nr_to_write = desired_nrtw; + } + + if (!S_ISDIR(inode->i_mode)) + mutex_lock(&sbi->writepages); + ret = generic_writepages(mapping, wbc); + if (!S_ISDIR(inode->i_mode)) + mutex_unlock(&sbi->writepages); + f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); + + remove_dirty_dir_inode(inode); + + wbc->nr_to_write -= excess_nrtw; + return ret; +} + +static int f2fs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; + struct dnode_of_data dn; + int err = 0; + + /* for nobh_write_end */ + *fsdata = NULL; + + f2fs_balance_fs(sbi); + + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + return -ENOMEM; + *pagep = page; + + mutex_lock_op(sbi, DATA_NEW); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, 0); + if (err) { + mutex_unlock_op(sbi, DATA_NEW); + f2fs_put_page(page, 1); + return err; + } + + if (dn.data_blkaddr == NULL_ADDR) { + err = reserve_new_block(&dn); + if (err) { + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, DATA_NEW); + f2fs_put_page(page, 1); + return err; + } + } + f2fs_put_dnode(&dn); + + mutex_unlock_op(sbi, DATA_NEW); + + if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) + return 0; + + if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { + unsigned start = pos & (PAGE_CACHE_SIZE - 1); + unsigned end = start + len; + + /* Reading beyond i_size is simple: memset to zero */ + zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); + return 0; + } + + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + } else { + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + if (err) { + f2fs_put_page(page, 1); + return err; + } + } + SetPageUptodate(page); + clear_cold_data(page); + return 0; +} + +static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + if (rw == WRITE) + return 0; + + /* Needs synchronization with the cleaner */ + return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + get_data_block_ro); +} + +static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (S_ISDIR(inode->i_mode) && PageDirty(page)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + ClearPagePrivate(page); +} + +static int f2fs_release_data_page(struct page *page, gfp_t wait) +{ + ClearPagePrivate(page); + return 0; +} + +static int f2fs_set_data_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + set_dirty_dir_page(inode, page); + return 1; + } + return 0; +} + +const struct address_space_operations f2fs_dblock_aops = { + .readpage = f2fs_read_data_page, + .readpages = f2fs_read_data_pages, + .writepage = f2fs_write_data_page, + .writepages = f2fs_write_data_pages, + .write_begin = f2fs_write_begin, + .write_end = nobh_write_end, + .set_page_dirty = f2fs_set_data_page_dirty, + .invalidatepage = f2fs_invalidate_data_page, + .releasepage = f2fs_release_data_page, + .direct_IO = f2fs_direct_IO, +}; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c new file mode 100644 index 000000000000..0e0380a588ad --- /dev/null +++ b/fs/f2fs/debug.c @@ -0,0 +1,361 @@ +/* + * f2fs debugging statistics + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Copyright (c) 2012 Linux Foundation + * Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/backing-dev.h> +#include <linux/proc_fs.h> +#include <linux/f2fs_fs.h> +#include <linux/blkdev.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "gc.h" + +static LIST_HEAD(f2fs_stat_list); +static struct dentry *debugfs_root; + +static void update_general_status(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = sbi->stat_info; + int i; + + /* valid check of the segment numbers */ + si->hit_ext = sbi->read_hit_ext; + si->total_ext = sbi->total_hit_ext; + si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); + si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); + si->ndirty_dirs = sbi->n_dirty_dirs; + si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; + si->rsvd_segs = reserved_segments(sbi); + si->overp_segs = overprovision_segments(sbi); + si->valid_count = valid_user_blocks(sbi); + si->valid_node_count = valid_node_count(sbi); + si->valid_inode_count = valid_inode_count(sbi); + si->utilization = utilization(sbi); + + si->free_segs = free_segments(sbi); + si->free_secs = free_sections(sbi); + si->prefree_count = prefree_segments(sbi); + si->dirty_count = dirty_segments(sbi); + si->node_pages = sbi->node_inode->i_mapping->nrpages; + si->meta_pages = sbi->meta_inode->i_mapping->nrpages; + si->nats = NM_I(sbi)->nat_cnt; + si->sits = SIT_I(sbi)->dirty_sentries; + si->fnids = NM_I(sbi)->fcnt; + si->bg_gc = sbi->bg_gc; + si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) + * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) + / 2; + si->util_valid = (int)(written_block_count(sbi) >> + sbi->log_blocks_per_seg) + * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) + / 2; + si->util_invalid = 50 - si->util_free - si->util_valid; + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + si->curseg[i] = curseg->segno; + si->cursec[i] = curseg->segno / sbi->segs_per_sec; + si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; + } + + for (i = 0; i < 2; i++) { + si->segment_count[i] = sbi->segment_count[i]; + si->block_count[i] = sbi->block_count[i]; + } +} + +/* + * This function calculates BDF of every segments + */ +static void update_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = sbi->stat_info; + unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; + struct sit_info *sit_i = SIT_I(sbi); + unsigned int segno, vblocks; + int ndirty = 0; + + bimodal = 0; + total_vblocks = 0; + blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); + hblks_per_sec = blks_per_sec / 2; + mutex_lock(&sit_i->sentry_lock); + for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + dist = abs(vblocks - hblks_per_sec); + bimodal += dist * dist; + + if (vblocks > 0 && vblocks < blks_per_sec) { + total_vblocks += vblocks; + ndirty++; + } + } + mutex_unlock(&sit_i->sentry_lock); + dist = sbi->total_sections * hblks_per_sec * hblks_per_sec / 100; + si->bimodal = bimodal / dist; + if (si->dirty_count) + si->avg_vblocks = total_vblocks / ndirty; + else + si->avg_vblocks = 0; +} + +/* + * This function calculates memory footprint. + */ +static void update_mem_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = sbi->stat_info; + unsigned npages; + + if (si->base_mem) + goto get_cache; + + si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; + si->base_mem += 2 * sizeof(struct f2fs_inode_info); + si->base_mem += sizeof(*sbi->ckpt); + + /* build sm */ + si->base_mem += sizeof(struct f2fs_sm_info); + + /* build sit */ + si->base_mem += sizeof(struct sit_info); + si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); + si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); + if (sbi->segs_per_sec > 1) + si->base_mem += sbi->total_sections * + sizeof(struct sec_entry); + si->base_mem += __bitmap_size(sbi, SIT_BITMAP); + + /* build free segmap */ + si->base_mem += sizeof(struct free_segmap_info); + si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(sbi->total_sections); + + /* build curseg */ + si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; + si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE; + + /* build dirty segmap */ + si->base_mem += sizeof(struct dirty_seglist_info); + si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += 2 * f2fs_bitmap_size(TOTAL_SEGS(sbi)); + + /* buld nm */ + si->base_mem += sizeof(struct f2fs_nm_info); + si->base_mem += __bitmap_size(sbi, NAT_BITMAP); + + /* build gc */ + si->base_mem += sizeof(struct f2fs_gc_kthread); + +get_cache: + /* free nids */ + si->cache_mem = NM_I(sbi)->fcnt; + si->cache_mem += NM_I(sbi)->nat_cnt; + npages = sbi->node_inode->i_mapping->nrpages; + si->cache_mem += npages << PAGE_CACHE_SHIFT; + npages = sbi->meta_inode->i_mapping->nrpages; + si->cache_mem += npages << PAGE_CACHE_SHIFT; + si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); + si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); +} + +static int stat_show(struct seq_file *s, void *v) +{ + struct f2fs_stat_info *si, *next; + int i = 0; + int j; + + list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) { + + mutex_lock(&si->stat_lock); + if (!si->sbi) { + mutex_unlock(&si->stat_lock); + continue; + } + update_general_status(si->sbi); + + seq_printf(s, "\n=====[ partition info. #%d ]=====\n", i++); + seq_printf(s, "[SB: 1] [CP: 2] [NAT: %d] [SIT: %d] ", + si->nat_area_segs, si->sit_area_segs); + seq_printf(s, "[SSA: %d] [MAIN: %d", + si->ssa_area_segs, si->main_area_segs); + seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", + si->overp_segs, si->rsvd_segs); + seq_printf(s, "Utilization: %d%% (%d valid blocks)\n", + si->utilization, si->valid_count); + seq_printf(s, " - Node: %u (Inode: %u, ", + si->valid_node_count, si->valid_inode_count); + seq_printf(s, "Other: %u)\n - Data: %u\n", + si->valid_node_count - si->valid_inode_count, + si->valid_count - si->valid_node_count); + seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", + si->main_area_segs, si->main_area_sections, + si->main_area_zones); + seq_printf(s, " - COLD data: %d, %d, %d\n", + si->curseg[CURSEG_COLD_DATA], + si->cursec[CURSEG_COLD_DATA], + si->curzone[CURSEG_COLD_DATA]); + seq_printf(s, " - WARM data: %d, %d, %d\n", + si->curseg[CURSEG_WARM_DATA], + si->cursec[CURSEG_WARM_DATA], + si->curzone[CURSEG_WARM_DATA]); + seq_printf(s, " - HOT data: %d, %d, %d\n", + si->curseg[CURSEG_HOT_DATA], + si->cursec[CURSEG_HOT_DATA], + si->curzone[CURSEG_HOT_DATA]); + seq_printf(s, " - Dir dnode: %d, %d, %d\n", + si->curseg[CURSEG_HOT_NODE], + si->cursec[CURSEG_HOT_NODE], + si->curzone[CURSEG_HOT_NODE]); + seq_printf(s, " - File dnode: %d, %d, %d\n", + si->curseg[CURSEG_WARM_NODE], + si->cursec[CURSEG_WARM_NODE], + si->curzone[CURSEG_WARM_NODE]); + seq_printf(s, " - Indir nodes: %d, %d, %d\n", + si->curseg[CURSEG_COLD_NODE], + si->cursec[CURSEG_COLD_NODE], + si->curzone[CURSEG_COLD_NODE]); + seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n", + si->main_area_segs - si->dirty_count - + si->prefree_count - si->free_segs, + si->dirty_count); + seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", + si->prefree_count, si->free_segs, si->free_secs); + seq_printf(s, "GC calls: %d (BG: %d)\n", + si->call_count, si->bg_gc); + seq_printf(s, " - data segments : %d\n", si->data_segs); + seq_printf(s, " - node segments : %d\n", si->node_segs); + seq_printf(s, "Try to move %d blocks\n", si->tot_blks); + seq_printf(s, " - data blocks : %d\n", si->data_blks); + seq_printf(s, " - node blocks : %d\n", si->node_blks); + seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", + si->hit_ext, si->total_ext); + seq_printf(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - nodes %4d in %4d\n", + si->ndirty_node, si->node_pages); + seq_printf(s, " - dents %4d in dirs:%4d\n", + si->ndirty_dent, si->ndirty_dirs); + seq_printf(s, " - meta %4d in %4d\n", + si->ndirty_meta, si->meta_pages); + seq_printf(s, " - NATs %5d > %lu\n", + si->nats, NM_WOUT_THRESHOLD); + seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", + si->sits, si->fnids); + seq_printf(s, "\nDistribution of User Blocks:"); + seq_printf(s, " [ valid | invalid | free ]\n"); + seq_printf(s, " ["); + + for (j = 0; j < si->util_valid; j++) + seq_printf(s, "-"); + seq_printf(s, "|"); + + for (j = 0; j < si->util_invalid; j++) + seq_printf(s, "-"); + seq_printf(s, "|"); + + for (j = 0; j < si->util_free; j++) + seq_printf(s, "-"); + seq_printf(s, "]\n\n"); + seq_printf(s, "SSR: %u blocks in %u segments\n", + si->block_count[SSR], si->segment_count[SSR]); + seq_printf(s, "LFS: %u blocks in %u segments\n", + si->block_count[LFS], si->segment_count[LFS]); + + /* segment usage info */ + update_sit_info(si->sbi); + seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n", + si->bimodal, si->avg_vblocks); + + /* memory footprint */ + update_mem_info(si->sbi); + seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", + (si->base_mem + si->cache_mem) >> 10, + si->base_mem >> 10, si->cache_mem >> 10); + mutex_unlock(&si->stat_lock); + } + return 0; +} + +static int stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, stat_show, inode->i_private); +} + +static const struct file_operations stat_fops = { + .open = stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int init_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_stat_info *si; + + sbi->stat_info = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); + if (!sbi->stat_info) + return -ENOMEM; + + si = sbi->stat_info; + mutex_init(&si->stat_lock); + list_add_tail(&si->stat_list, &f2fs_stat_list); + + si->all_area_segs = le32_to_cpu(raw_super->segment_count); + si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); + si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); + si->ssa_area_segs = le32_to_cpu(raw_super->segment_count_ssa); + si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); + si->main_area_sections = le32_to_cpu(raw_super->section_count); + si->main_area_zones = si->main_area_sections / + le32_to_cpu(raw_super->secs_per_zone); + si->sbi = sbi; + return 0; +} + +int f2fs_build_stats(struct f2fs_sb_info *sbi) +{ + int retval; + + retval = init_stats(sbi); + if (retval) + return retval; + + if (!debugfs_root) + debugfs_root = debugfs_create_dir("f2fs", NULL); + + debugfs_create_file("status", S_IRUGO, debugfs_root, NULL, &stat_fops); + return 0; +} + +void f2fs_destroy_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = sbi->stat_info; + + list_del(&si->stat_list); + mutex_lock(&si->stat_lock); + si->sbi = NULL; + mutex_unlock(&si->stat_lock); + kfree(sbi->stat_info); +} + +void destroy_root_stats(void) +{ + debugfs_remove_recursive(debugfs_root); + debugfs_root = NULL; +} diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c new file mode 100644 index 000000000000..b4e24f32b54e --- /dev/null +++ b/fs/f2fs/dir.c @@ -0,0 +1,672 @@ +/* + * fs/f2fs/dir.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/fs.h> +#include <linux/f2fs_fs.h> +#include "f2fs.h" +#include "acl.h" + +static unsigned long dir_blocks(struct inode *inode) +{ + return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1)) + >> PAGE_CACHE_SHIFT; +} + +static unsigned int dir_buckets(unsigned int level) +{ + if (level < MAX_DIR_HASH_DEPTH / 2) + return 1 << level; + else + return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); +} + +static unsigned int bucket_blocks(unsigned int level) +{ + if (level < MAX_DIR_HASH_DEPTH / 2) + return 2; + else + return 4; +} + +static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { + [F2FS_FT_UNKNOWN] = DT_UNKNOWN, + [F2FS_FT_REG_FILE] = DT_REG, + [F2FS_FT_DIR] = DT_DIR, + [F2FS_FT_CHRDEV] = DT_CHR, + [F2FS_FT_BLKDEV] = DT_BLK, + [F2FS_FT_FIFO] = DT_FIFO, + [F2FS_FT_SOCK] = DT_SOCK, + [F2FS_FT_SYMLINK] = DT_LNK, +}; + +#define S_SHIFT 12 +static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, + [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, +}; + +static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +{ + mode_t mode = inode->i_mode; + de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; +} + +static unsigned long dir_block_index(unsigned int level, unsigned int idx) +{ + unsigned long i; + unsigned long bidx = 0; + + for (i = 0; i < level; i++) + bidx += dir_buckets(i) * bucket_blocks(i); + bidx += idx * bucket_blocks(level); + return bidx; +} + +static bool early_match_name(const char *name, int namelen, + f2fs_hash_t namehash, struct f2fs_dir_entry *de) +{ + if (le16_to_cpu(de->name_len) != namelen) + return false; + + if (de->hash_code != namehash) + return false; + + return true; +} + +static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, + const char *name, int namelen, int *max_slots, + f2fs_hash_t namehash, struct page **res_page) +{ + struct f2fs_dir_entry *de; + unsigned long bit_pos, end_pos, next_pos; + struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); + int slots; + + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, 0); + while (bit_pos < NR_DENTRY_IN_BLOCK) { + de = &dentry_blk->dentry[bit_pos]; + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + + if (early_match_name(name, namelen, namehash, de)) { + if (!memcmp(dentry_blk->filename[bit_pos], + name, namelen)) { + *res_page = dentry_page; + goto found; + } + } + next_pos = bit_pos + slots; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, next_pos); + if (bit_pos >= NR_DENTRY_IN_BLOCK) + end_pos = NR_DENTRY_IN_BLOCK; + else + end_pos = bit_pos; + if (*max_slots < end_pos - next_pos) + *max_slots = end_pos - next_pos; + } + + de = NULL; + kunmap(dentry_page); +found: + return de; +} + +static struct f2fs_dir_entry *find_in_level(struct inode *dir, + unsigned int level, const char *name, int namelen, + f2fs_hash_t namehash, struct page **res_page) +{ + int s = GET_DENTRY_SLOTS(namelen); + unsigned int nbucket, nblock; + unsigned int bidx, end_block; + struct page *dentry_page; + struct f2fs_dir_entry *de = NULL; + bool room = false; + int max_slots = 0; + + BUG_ON(level > MAX_DIR_HASH_DEPTH); + + nbucket = dir_buckets(level); + nblock = bucket_blocks(level); + + bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); + end_block = bidx + nblock; + + for (; bidx < end_block; bidx++) { + /* no need to allocate new dentry pages to all the indices */ + dentry_page = find_data_page(dir, bidx); + if (IS_ERR(dentry_page)) { + room = true; + continue; + } + + de = find_in_block(dentry_page, name, namelen, + &max_slots, namehash, res_page); + if (de) + break; + + if (max_slots >= s) + room = true; + f2fs_put_page(dentry_page, 0); + } + + if (!de && room && F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; + } + + return de; +} + +/* + * Find an entry in the specified directory with the wanted name. + * It returns the page where the entry was found (as a parameter - res_page), + * and the entry itself. Page is returned mapped and unlocked. + * Entry is guaranteed to be valid. + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, + struct qstr *child, struct page **res_page) +{ + const char *name = child->name; + int namelen = child->len; + unsigned long npages = dir_blocks(dir); + struct f2fs_dir_entry *de = NULL; + f2fs_hash_t name_hash; + unsigned int max_depth; + unsigned int level; + + if (npages == 0) + return NULL; + + *res_page = NULL; + + name_hash = f2fs_dentry_hash(name, namelen); + max_depth = F2FS_I(dir)->i_current_depth; + + for (level = 0; level < max_depth; level++) { + de = find_in_level(dir, level, name, + namelen, name_hash, res_page); + if (de) + break; + } + if (!de && F2FS_I(dir)->chash != name_hash) { + F2FS_I(dir)->chash = name_hash; + F2FS_I(dir)->clevel = level - 1; + } + return de; +} + +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) +{ + struct page *page = NULL; + struct f2fs_dir_entry *de = NULL; + struct f2fs_dentry_block *dentry_blk = NULL; + + page = get_lock_data_page(dir, 0); + if (IS_ERR(page)) + return NULL; + + dentry_blk = kmap(page); + de = &dentry_blk->dentry[1]; + *p = page; + unlock_page(page); + return de; +} + +ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) +{ + ino_t res = 0; + struct f2fs_dir_entry *de; + struct page *page; + + de = f2fs_find_entry(dir, qstr, &page); + if (de) { + res = le32_to_cpu(de->ino); + kunmap(page); + f2fs_put_page(page, 0); + } + + return res; +} + +void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, + struct page *page, struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + + mutex_lock_op(sbi, DENTRY_OPS); + lock_page(page); + wait_on_page_writeback(page); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + kunmap(page); + set_page_dirty(page); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + mark_inode_dirty(dir); + + /* update parent inode number before releasing dentry page */ + F2FS_I(inode)->i_pino = dir->i_ino; + + f2fs_put_page(page, 1); + mutex_unlock_op(sbi, DENTRY_OPS); +} + +void init_dent_inode(struct dentry *dentry, struct page *ipage) +{ + struct f2fs_node *rn; + + if (IS_ERR(ipage)) + return; + + wait_on_page_writeback(ipage); + + /* copy dentry info. to this inode page */ + rn = (struct f2fs_node *)page_address(ipage); + rn->i.i_namelen = cpu_to_le32(dentry->d_name.len); + memcpy(rn->i.i_name, dentry->d_name.name, dentry->d_name.len); + set_page_dirty(ipage); +} + +static int init_inode_metadata(struct inode *inode, struct dentry *dentry) +{ + struct inode *dir = dentry->d_parent->d_inode; + + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + int err; + err = new_inode_page(inode, dentry); + if (err) + return err; + + if (S_ISDIR(inode->i_mode)) { + err = f2fs_make_empty(inode, dir); + if (err) { + remove_inode_page(inode); + return err; + } + } + + err = f2fs_init_acl(inode, dir); + if (err) { + remove_inode_page(inode); + return err; + } + } else { + struct page *ipage; + ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + init_dent_inode(dentry, ipage); + f2fs_put_page(ipage, 1); + } + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + inc_nlink(inode); + f2fs_write_inode(inode, NULL); + } + return 0; +} + +static void update_parent_metadata(struct inode *dir, struct inode *inode, + unsigned int current_depth) +{ + bool need_dir_update = false; + + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (S_ISDIR(inode->i_mode)) { + inc_nlink(dir); + need_dir_update = true; + } + clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); + } + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + if (F2FS_I(dir)->i_current_depth != current_depth) { + F2FS_I(dir)->i_current_depth = current_depth; + need_dir_update = true; + } + + if (need_dir_update) + f2fs_write_inode(dir, NULL); + else + mark_inode_dirty(dir); + + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); +} + +static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) +{ + int bit_start = 0; + int zero_start, zero_end; +next: + zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_start); + if (zero_start >= NR_DENTRY_IN_BLOCK) + return NR_DENTRY_IN_BLOCK; + + zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + zero_start); + if (zero_end - zero_start >= slots) + return zero_start; + + bit_start = zero_end + 1; + + if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) + return NR_DENTRY_IN_BLOCK; + goto next; +} + +int f2fs_add_link(struct dentry *dentry, struct inode *inode) +{ + unsigned int bit_pos; + unsigned int level; + unsigned int current_depth; + unsigned long bidx, block; + f2fs_hash_t dentry_hash; + struct f2fs_dir_entry *de; + unsigned int nbucket, nblock; + struct inode *dir = dentry->d_parent->d_inode; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + struct page *dentry_page = NULL; + struct f2fs_dentry_block *dentry_blk = NULL; + int slots = GET_DENTRY_SLOTS(namelen); + int err = 0; + int i; + + dentry_hash = f2fs_dentry_hash(name, dentry->d_name.len); + level = 0; + current_depth = F2FS_I(dir)->i_current_depth; + if (F2FS_I(dir)->chash == dentry_hash) { + level = F2FS_I(dir)->clevel; + F2FS_I(dir)->chash = 0; + } + +start: + if (current_depth == MAX_DIR_HASH_DEPTH) + return -ENOSPC; + + /* Increase the depth, if required */ + if (level == current_depth) + ++current_depth; + + nbucket = dir_buckets(level); + nblock = bucket_blocks(level); + + bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); + + for (block = bidx; block <= (bidx + nblock - 1); block++) { + mutex_lock_op(sbi, DENTRY_OPS); + dentry_page = get_new_data_page(dir, block, true); + if (IS_ERR(dentry_page)) { + mutex_unlock_op(sbi, DENTRY_OPS); + return PTR_ERR(dentry_page); + } + + dentry_blk = kmap(dentry_page); + bit_pos = room_for_filename(dentry_blk, slots); + if (bit_pos < NR_DENTRY_IN_BLOCK) + goto add_dentry; + + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + mutex_unlock_op(sbi, DENTRY_OPS); + } + + /* Move to next level to find the empty slot for new dentry */ + ++level; + goto start; +add_dentry: + err = init_inode_metadata(inode, dentry); + if (err) + goto fail; + + wait_on_page_writeback(dentry_page); + + de = &dentry_blk->dentry[bit_pos]; + de->hash_code = dentry_hash; + de->name_len = cpu_to_le16(namelen); + memcpy(dentry_blk->filename[bit_pos], name, namelen); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + set_page_dirty(dentry_page); + + update_parent_metadata(dir, inode, current_depth); + + /* update parent inode number before releasing dentry page */ + F2FS_I(inode)->i_pino = dir->i_ino; +fail: + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + mutex_unlock_op(sbi, DENTRY_OPS); + return err; +} + +/* + * It only removes the dentry from the dentry page,corresponding name + * entry in name page does not need to be touched during deletion. + */ +void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *inode) +{ + struct f2fs_dentry_block *dentry_blk; + unsigned int bit_pos; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + void *kaddr = page_address(page); + int i; + + mutex_lock_op(sbi, DENTRY_OPS); + + lock_page(page); + wait_on_page_writeback(page); + + dentry_blk = (struct f2fs_dentry_block *)kaddr; + bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; + for (i = 0; i < slots; i++) + test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + + /* Let's check and deallocate this dentry page */ + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + 0); + kunmap(page); /* kunmap - pair of f2fs_find_entry */ + set_page_dirty(page); + + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + if (inode && S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + f2fs_write_inode(dir, NULL); + } else { + mark_inode_dirty(dir); + } + + if (inode) { + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + drop_nlink(inode); + if (S_ISDIR(inode->i_mode)) { + drop_nlink(inode); + i_size_write(inode, 0); + } + f2fs_write_inode(inode, NULL); + if (inode->i_nlink == 0) + add_orphan_inode(sbi, inode->i_ino); + } + + if (bit_pos == NR_DENTRY_IN_BLOCK) { + truncate_hole(dir, page->index, page->index + 1); + clear_page_dirty_for_io(page); + ClearPageUptodate(page); + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(dir); + } + f2fs_put_page(page, 1); + + mutex_unlock_op(sbi, DENTRY_OPS); +} + +int f2fs_make_empty(struct inode *inode, struct inode *parent) +{ + struct page *dentry_page; + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + void *kaddr; + + dentry_page = get_new_data_page(inode, 0, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + kaddr = kmap_atomic(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)kaddr; + + de = &dentry_blk->dentry[0]; + de->name_len = cpu_to_le16(1); + de->hash_code = 0; + de->ino = cpu_to_le32(inode->i_ino); + memcpy(dentry_blk->filename[0], ".", 1); + set_de_type(de, inode); + + de = &dentry_blk->dentry[1]; + de->hash_code = 0; + de->name_len = cpu_to_le16(2); + de->ino = cpu_to_le32(parent->i_ino); + memcpy(dentry_blk->filename[1], "..", 2); + set_de_type(de, inode); + + test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); + test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + kunmap_atomic(kaddr); + + set_page_dirty(dentry_page); + f2fs_put_page(dentry_page, 1); + return 0; +} + +bool f2fs_empty_dir(struct inode *dir) +{ + unsigned long bidx; + struct page *dentry_page; + unsigned int bit_pos; + struct f2fs_dentry_block *dentry_blk; + unsigned long nblock = dir_blocks(dir); + + for (bidx = 0; bidx < nblock; bidx++) { + void *kaddr; + dentry_page = get_lock_data_page(dir, bidx); + if (IS_ERR(dentry_page)) { + if (PTR_ERR(dentry_page) == -ENOENT) + continue; + else + return false; + } + + kaddr = kmap_atomic(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)kaddr; + if (bidx == 0) + bit_pos = 2; + else + bit_pos = 0; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_pos); + kunmap_atomic(kaddr); + + f2fs_put_page(dentry_page, 1); + + if (bit_pos < NR_DENTRY_IN_BLOCK) + return false; + } + return true; +} + +static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + unsigned long pos = file->f_pos; + struct inode *inode = file->f_dentry->d_inode; + unsigned long npages = dir_blocks(inode); + unsigned char *types = NULL; + unsigned int bit_pos = 0, start_bit_pos = 0; + int over = 0; + struct f2fs_dentry_block *dentry_blk = NULL; + struct f2fs_dir_entry *de = NULL; + struct page *dentry_page = NULL; + unsigned int n = 0; + unsigned char d_type = DT_UNKNOWN; + int slots; + + types = f2fs_filetype_table; + bit_pos = (pos % NR_DENTRY_IN_BLOCK); + n = (pos / NR_DENTRY_IN_BLOCK); + + for ( ; n < npages; n++) { + dentry_page = get_lock_data_page(inode, n); + if (IS_ERR(dentry_page)) + continue; + + start_bit_pos = bit_pos; + dentry_blk = kmap(dentry_page); + while (bit_pos < NR_DENTRY_IN_BLOCK) { + d_type = DT_UNKNOWN; + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_DENTRY_IN_BLOCK, + bit_pos); + if (bit_pos >= NR_DENTRY_IN_BLOCK) + break; + + de = &dentry_blk->dentry[bit_pos]; + if (types && de->file_type < F2FS_FT_MAX) + d_type = types[de->file_type]; + + over = filldir(dirent, + dentry_blk->filename[bit_pos], + le16_to_cpu(de->name_len), + (n * NR_DENTRY_IN_BLOCK) + bit_pos, + le32_to_cpu(de->ino), d_type); + if (over) { + file->f_pos += bit_pos - start_bit_pos; + goto success; + } + slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + bit_pos += slots; + } + bit_pos = 0; + file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + dentry_page = NULL; + } +success: + if (dentry_page && !IS_ERR(dentry_page)) { + kunmap(dentry_page); + f2fs_put_page(dentry_page, 1); + } + + return 0; +} + +const struct file_operations f2fs_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = f2fs_readdir, + .fsync = f2fs_sync_file, + .unlocked_ioctl = f2fs_ioctl, +}; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h new file mode 100644 index 000000000000..a18d63db2fb6 --- /dev/null +++ b/fs/f2fs/f2fs.h @@ -0,0 +1,1083 @@ +/* + * fs/f2fs/f2fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_H +#define _LINUX_F2FS_H + +#include <linux/types.h> +#include <linux/page-flags.h> +#include <linux/buffer_head.h> +#include <linux/slab.h> +#include <linux/crc32.h> +#include <linux/magic.h> + +/* + * For mount options + */ +#define F2FS_MOUNT_BG_GC 0x00000001 +#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 +#define F2FS_MOUNT_DISCARD 0x00000004 +#define F2FS_MOUNT_NOHEAP 0x00000008 +#define F2FS_MOUNT_XATTR_USER 0x00000010 +#define F2FS_MOUNT_POSIX_ACL 0x00000020 +#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 + +#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) + +#define ver_after(a, b) (typecheck(unsigned long long, a) && \ + typecheck(unsigned long long, b) && \ + ((long long)((a) - (b)) > 0)) + +typedef u64 block_t; +typedef u32 nid_t; + +struct f2fs_mount_info { + unsigned int opt; +}; + +static inline __u32 f2fs_crc32(void *buff, size_t len) +{ + return crc32_le(F2FS_SUPER_MAGIC, buff, len); +} + +static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size) +{ + return f2fs_crc32(buff, buff_size) == blk_crc; +} + +/* + * For checkpoint manager + */ +enum { + NAT_BITMAP, + SIT_BITMAP +}; + +/* for the list of orphan inodes */ +struct orphan_inode_entry { + struct list_head list; /* list head */ + nid_t ino; /* inode number */ +}; + +/* for the list of directory inodes */ +struct dir_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ +}; + +/* for the list of fsync inodes, used only during recovery */ +struct fsync_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ + block_t blkaddr; /* block address locating the last inode */ +}; + +#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) +#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits)) + +#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne) +#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid) +#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) +#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) + +static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = nats_in_cursum(rs); + rs->n_nats = cpu_to_le16(before + i); + return before; +} + +static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = sits_in_cursum(rs); + rs->n_sits = cpu_to_le16(before + i); + return before; +} + +/* + * For INODE and NODE manager + */ +#define XATTR_NODE_OFFSET (-1) /* + * store xattrs to one node block per + * file keeping -1 as its node offset to + * distinguish from index node blocks. + */ +#define RDONLY_NODE 1 /* + * specify a read-only mode when getting + * a node block. 0 is read-write mode. + * used by get_dnode_of_data(). + */ +#define F2FS_LINK_MAX 32000 /* maximum link count per file */ + +/* for in-memory extent cache entry */ +struct extent_info { + rwlock_t ext_lock; /* rwlock for consistency */ + unsigned int fofs; /* start offset in a file */ + u32 blk_addr; /* start block address of the extent */ + unsigned int len; /* lenth of the extent */ +}; + +/* + * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. + */ +#define FADVISE_COLD_BIT 0x01 + +struct f2fs_inode_info { + struct inode vfs_inode; /* serve a vfs inode */ + unsigned long i_flags; /* keep an inode flags for ioctl */ + unsigned char i_advise; /* use to give file attribute hints */ + unsigned int i_current_depth; /* use only in directory structure */ + unsigned int i_pino; /* parent inode number */ + umode_t i_acl_mode; /* keep file acl mode temporarily */ + + /* Use below internally in f2fs*/ + unsigned long flags; /* use to pass per-file flags */ + unsigned long long data_version;/* lastes version of data for fsync */ + atomic_t dirty_dents; /* # of dirty dentry pages */ + f2fs_hash_t chash; /* hash value of given file name */ + unsigned int clevel; /* maximum level of given file name */ + nid_t i_xattr_nid; /* node id that contains xattrs */ + struct extent_info ext; /* in-memory extent cache entry */ +}; + +static inline void get_extent_info(struct extent_info *ext, + struct f2fs_extent i_ext) +{ + write_lock(&ext->ext_lock); + ext->fofs = le32_to_cpu(i_ext.fofs); + ext->blk_addr = le32_to_cpu(i_ext.blk_addr); + ext->len = le32_to_cpu(i_ext.len); + write_unlock(&ext->ext_lock); +} + +static inline void set_raw_extent(struct extent_info *ext, + struct f2fs_extent *i_ext) +{ + read_lock(&ext->ext_lock); + i_ext->fofs = cpu_to_le32(ext->fofs); + i_ext->blk_addr = cpu_to_le32(ext->blk_addr); + i_ext->len = cpu_to_le32(ext->len); + read_unlock(&ext->ext_lock); +} + +struct f2fs_nm_info { + block_t nat_blkaddr; /* base disk address of NAT */ + nid_t max_nid; /* maximum possible node ids */ + nid_t init_scan_nid; /* the first nid to be scanned */ + nid_t next_scan_nid; /* the next nid to be scanned */ + + /* NAT cache management */ + struct radix_tree_root nat_root;/* root of the nat entry cache */ + rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + unsigned int nat_cnt; /* the # of cached nat entries */ + struct list_head nat_entries; /* cached nat entry list (clean) */ + struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ + + /* free node ids management */ + struct list_head free_nid_list; /* a list for free nids */ + spinlock_t free_nid_list_lock; /* protect free nid list */ + unsigned int fcnt; /* the number of free node id */ + struct mutex build_lock; /* lock for build free nids */ + + /* for checkpoint */ + char *nat_bitmap; /* NAT bitmap pointer */ + int bitmap_size; /* bitmap size */ +}; + +/* + * this structure is used as one of function parameters. + * all the information are dedicated to a given direct node block determined + * by the data offset in a file. + */ +struct dnode_of_data { + struct inode *inode; /* vfs inode pointer */ + struct page *inode_page; /* its inode page, NULL is possible */ + struct page *node_page; /* cached direct node page */ + nid_t nid; /* node id of the direct node block */ + unsigned int ofs_in_node; /* data offset in the node page */ + bool inode_page_locked; /* inode page is locked or not */ + block_t data_blkaddr; /* block address of the node block */ +}; + +static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, + struct page *ipage, struct page *npage, nid_t nid) +{ + dn->inode = inode; + dn->inode_page = ipage; + dn->node_page = npage; + dn->nid = nid; + dn->inode_page_locked = 0; +} + +/* + * For SIT manager + * + * By default, there are 6 active log areas across the whole main area. + * When considering hot and cold data separation to reduce cleaning overhead, + * we split 3 for data logs and 3 for node logs as hot, warm, and cold types, + * respectively. + * In the current design, you should not change the numbers intentionally. + * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6 + * logs individually according to the underlying devices. (default: 6) + * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for + * data and 8 for node logs. + */ +#define NR_CURSEG_DATA_TYPE (3) +#define NR_CURSEG_NODE_TYPE (3) +#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) + +enum { + CURSEG_HOT_DATA = 0, /* directory entry blocks */ + CURSEG_WARM_DATA, /* data blocks */ + CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ + CURSEG_HOT_NODE, /* direct node blocks of directory files */ + CURSEG_WARM_NODE, /* direct node blocks of normal files */ + CURSEG_COLD_NODE, /* indirect node blocks */ + NO_CHECK_TYPE +}; + +struct f2fs_sm_info { + struct sit_info *sit_info; /* whole segment information */ + struct free_segmap_info *free_info; /* free segment information */ + struct dirty_seglist_info *dirty_info; /* dirty segment information */ + struct curseg_info *curseg_array; /* active segment information */ + + struct list_head wblist_head; /* list of under-writeback pages */ + spinlock_t wblist_lock; /* lock for checkpoint */ + + block_t seg0_blkaddr; /* block address of 0'th segment */ + block_t main_blkaddr; /* start block address of main area */ + block_t ssa_blkaddr; /* start block address of SSA area */ + + unsigned int segment_count; /* total # of segments */ + unsigned int main_segments; /* # of segments in main area */ + unsigned int reserved_segments; /* # of reserved segments */ + unsigned int ovp_segments; /* # of overprovision segments */ +}; + +/* + * For directory operation + */ +#define NODE_DIR1_BLOCK (ADDRS_PER_INODE + 1) +#define NODE_DIR2_BLOCK (ADDRS_PER_INODE + 2) +#define NODE_IND1_BLOCK (ADDRS_PER_INODE + 3) +#define NODE_IND2_BLOCK (ADDRS_PER_INODE + 4) +#define NODE_DIND_BLOCK (ADDRS_PER_INODE + 5) + +/* + * For superblock + */ +/* + * COUNT_TYPE for monitoring + * + * f2fs monitors the number of several block types such as on-writeback, + * dirty dentry blocks, dirty node blocks, and dirty meta blocks. + */ +enum count_type { + F2FS_WRITEBACK, + F2FS_DIRTY_DENTS, + F2FS_DIRTY_NODES, + F2FS_DIRTY_META, + NR_COUNT_TYPE, +}; + +/* + * FS_LOCK nesting subclasses for the lock validator: + * + * The locking order between these classes is + * RENAME -> DENTRY_OPS -> DATA_WRITE -> DATA_NEW + * -> DATA_TRUNC -> NODE_WRITE -> NODE_NEW -> NODE_TRUNC + */ +enum lock_type { + RENAME, /* for renaming operations */ + DENTRY_OPS, /* for directory operations */ + DATA_WRITE, /* for data write */ + DATA_NEW, /* for data allocation */ + DATA_TRUNC, /* for data truncate */ + NODE_NEW, /* for node allocation */ + NODE_TRUNC, /* for node truncate */ + NODE_WRITE, /* for node write */ + NR_LOCK_TYPE, +}; + +/* + * The below are the page types of bios used in submti_bio(). + * The available types are: + * DATA User data pages. It operates as async mode. + * NODE Node pages. It operates as async mode. + * META FS metadata pages such as SIT, NAT, CP. + * NR_PAGE_TYPE The number of page types. + * META_FLUSH Make sure the previous pages are written + * with waiting the bio's completion + * ... Only can be used with META. + */ +enum page_type { + DATA, + NODE, + META, + NR_PAGE_TYPE, + META_FLUSH, +}; + +struct f2fs_sb_info { + struct super_block *sb; /* pointer to VFS super block */ + struct buffer_head *raw_super_buf; /* buffer head of raw sb */ + struct f2fs_super_block *raw_super; /* raw super block pointer */ + int s_dirty; /* dirty flag for checkpoint */ + + /* for node-related operations */ + struct f2fs_nm_info *nm_info; /* node manager */ + struct inode *node_inode; /* cache node blocks */ + + /* for segment-related operations */ + struct f2fs_sm_info *sm_info; /* segment manager */ + struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ + sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ + struct rw_semaphore bio_sem; /* IO semaphore */ + + /* for checkpoint */ + struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ + struct inode *meta_inode; /* cache meta blocks */ + struct mutex cp_mutex; /* for checkpoint procedure */ + struct mutex fs_lock[NR_LOCK_TYPE]; /* for blocking FS operations */ + struct mutex write_inode; /* mutex for write inode */ + struct mutex writepages; /* mutex for writepages() */ + int por_doing; /* recovery is doing or not */ + + /* for orphan inode management */ + struct list_head orphan_inode_list; /* orphan inode list */ + struct mutex orphan_inode_mutex; /* for orphan inode list */ + unsigned int n_orphans; /* # of orphan inodes */ + + /* for directory inode management */ + struct list_head dir_inode_list; /* dir inode list */ + spinlock_t dir_inode_lock; /* for dir inode list lock */ + unsigned int n_dirty_dirs; /* # of dir inodes */ + + /* basic file system units */ + unsigned int log_sectors_per_block; /* log2 sectors per block */ + unsigned int log_blocksize; /* log2 block size */ + unsigned int blocksize; /* block size */ + unsigned int root_ino_num; /* root inode number*/ + unsigned int node_ino_num; /* node inode number*/ + unsigned int meta_ino_num; /* meta inode number*/ + unsigned int log_blocks_per_seg; /* log2 blocks per segment */ + unsigned int blocks_per_seg; /* blocks per segment */ + unsigned int segs_per_sec; /* segments per section */ + unsigned int secs_per_zone; /* sections per zone */ + unsigned int total_sections; /* total section count */ + unsigned int total_node_count; /* total node block count */ + unsigned int total_valid_node_count; /* valid node block count */ + unsigned int total_valid_inode_count; /* valid inode count */ + int active_logs; /* # of active logs */ + + block_t user_block_count; /* # of user blocks */ + block_t total_valid_block_count; /* # of valid blocks */ + block_t alloc_valid_block_count; /* # of allocated blocks */ + block_t last_valid_block_count; /* for recovery */ + u32 s_next_generation; /* for NFS support */ + atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ + + struct f2fs_mount_info mount_opt; /* mount options */ + + /* for cleaning operations */ + struct mutex gc_mutex; /* mutex for GC */ + struct f2fs_gc_kthread *gc_thread; /* GC thread */ + + /* + * for stat information. + * one is for the LFS mode, and the other is for the SSR mode. + */ + struct f2fs_stat_info *stat_info; /* FS status information */ + unsigned int segment_count[2]; /* # of allocated segments */ + unsigned int block_count[2]; /* # of allocated blocks */ + unsigned int last_victim[2]; /* last victim segment # */ + int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ + int bg_gc; /* background gc calls */ + spinlock_t stat_lock; /* lock for stat operations */ +}; + +/* + * Inline functions + */ +static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) +{ + return container_of(inode, struct f2fs_inode_info, vfs_inode); +} + +static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_super_block *)(sbi->raw_super); +} + +static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_checkpoint *)(sbi->ckpt); +} + +static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_nm_info *)(sbi->nm_info); +} + +static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_sm_info *)(sbi->sm_info); +} + +static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi) +{ + return (struct sit_info *)(SM_I(sbi)->sit_info); +} + +static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi) +{ + return (struct free_segmap_info *)(SM_I(sbi)->free_info); +} + +static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) +{ + return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); +} + +static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 1; +} + +static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 0; +} + +static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + return ckpt_flags & f; +} + +static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + ckpt_flags |= f; + cp->ckpt_flags = cpu_to_le32(ckpt_flags); +} + +static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + ckpt_flags &= (~f); + cp->ckpt_flags = cpu_to_le32(ckpt_flags); +} + +static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t) +{ + mutex_lock_nested(&sbi->fs_lock[t], t); +} + +static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t) +{ + mutex_unlock(&sbi->fs_lock[t]); +} + +/* + * Check whether the given nid is within node id range. + */ +static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +{ + BUG_ON((nid >= NM_I(sbi)->max_nid)); +} + +#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 + +/* + * Check whether the inode has blocks or not + */ +static inline int F2FS_HAS_BLOCKS(struct inode *inode) +{ + if (F2FS_I(inode)->i_xattr_nid) + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); + else + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); +} + +static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, blkcnt_t count) +{ + block_t valid_block_count; + + spin_lock(&sbi->stat_lock); + valid_block_count = + sbi->total_valid_block_count + (block_t)count; + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + inode->i_blocks += count; + sbi->total_valid_block_count = valid_block_count; + sbi->alloc_valid_block_count += (block_t)count; + spin_unlock(&sbi->stat_lock); + return true; +} + +static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, + blkcnt_t count) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(sbi->total_valid_block_count < (block_t) count); + BUG_ON(inode->i_blocks < count); + inode->i_blocks -= count; + sbi->total_valid_block_count -= (block_t)count; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_inc(&sbi->nr_pages[count_type]); + F2FS_SET_SB_DIRT(sbi); +} + +static inline void inode_inc_dirty_dents(struct inode *inode) +{ + atomic_inc(&F2FS_I(inode)->dirty_dents); +} + +static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_dec(&sbi->nr_pages[count_type]); +} + +static inline void inode_dec_dirty_dents(struct inode *inode) +{ + atomic_dec(&F2FS_I(inode)->dirty_dents); +} + +static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) +{ + return atomic_read(&sbi->nr_pages[count_type]); +} + +static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) +{ + block_t ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_block_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + + /* return NAT or SIT bitmap */ + if (flag == NAT_BITMAP) + return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); + else if (flag == SIT_BITMAP) + return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); + + return 0; +} + +static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + int offset = (flag == NAT_BITMAP) ? + le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; + return &ckpt->sit_nat_version_bitmap + offset; +} + +static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) +{ + block_t start_addr; + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver); + + start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + + /* + * odd numbered checkpoint should at cp segment 0 + * and even segent must be at cp segment 1 + */ + if (!(ckpt_version & 1)) + start_addr += sbi->blocks_per_seg; + + return start_addr; +} + +static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) +{ + return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + block_t valid_block_count; + unsigned int valid_node_count; + + spin_lock(&sbi->stat_lock); + + valid_block_count = sbi->total_valid_block_count + (block_t)count; + sbi->alloc_valid_block_count += (block_t)count; + valid_node_count = sbi->total_valid_node_count + count; + + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (valid_node_count > sbi->total_node_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (inode) + inode->i_blocks += count; + sbi->total_valid_node_count = valid_node_count; + sbi->total_valid_block_count = valid_block_count; + spin_unlock(&sbi->stat_lock); + + return true; +} + +static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + spin_lock(&sbi->stat_lock); + + BUG_ON(sbi->total_valid_block_count < count); + BUG_ON(sbi->total_valid_node_count < count); + BUG_ON(inode->i_blocks < count); + + inode->i_blocks -= count; + sbi->total_valid_node_count -= count; + sbi->total_valid_block_count -= (block_t)count; + + spin_unlock(&sbi->stat_lock); +} + +static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_node_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); + sbi->total_valid_inode_count++; + spin_unlock(&sbi->stat_lock); +} + +static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(!sbi->total_valid_inode_count); + sbi->total_valid_inode_count--; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_inode_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void f2fs_put_page(struct page *page, int unlock) +{ + if (!page || IS_ERR(page)) + return; + + if (unlock) { + BUG_ON(!PageLocked(page)); + unlock_page(page); + } + page_cache_release(page); +} + +static inline void f2fs_put_dnode(struct dnode_of_data *dn) +{ + if (dn->node_page) + f2fs_put_page(dn->node_page, 1); + if (dn->inode_page && dn->node_page != dn->inode_page) + f2fs_put_page(dn->inode_page, 0); + dn->node_page = NULL; + dn->inode_page = NULL; +} + +static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, + size_t size, void (*ctor)(void *)) +{ + return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); +} + +#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) + +static inline bool IS_INODE(struct page *page) +{ + struct f2fs_node *p = (struct f2fs_node *)page_address(page); + return RAW_IS_INODE(p); +} + +static inline __le32 *blkaddr_in_node(struct f2fs_node *node) +{ + return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; +} + +static inline block_t datablock_addr(struct page *node_page, + unsigned int offset) +{ + struct f2fs_node *raw_node; + __le32 *addr_array; + raw_node = (struct f2fs_node *)page_address(node_page); + addr_array = blkaddr_in_node(raw_node); + return le32_to_cpu(addr_array[offset]); +} + +static inline int f2fs_test_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + return mask & *addr; +} + +static inline int f2fs_set_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr |= mask; + return ret; +} + +static inline int f2fs_clear_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr &= ~mask; + return ret; +} + +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_NEED_CP, /* need to do checkpoint during fsync */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ +}; + +static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + set_bit(flag, &fi->flags); +} + +static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) +{ + return test_bit(flag, &fi->flags); +} + +static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + clear_bit(flag, &fi->flags); +} + +static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) +{ + fi->i_acl_mode = mode; + set_inode_flag(fi, FI_ACL_MODE); +} + +static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + if (is_inode_flag_set(fi, FI_ACL_MODE)) { + clear_inode_flag(fi, FI_ACL_MODE); + return 1; + } + return 0; +} + +/* + * file.c + */ +int f2fs_sync_file(struct file *, loff_t, loff_t, int); +void truncate_data_blocks(struct dnode_of_data *); +void f2fs_truncate(struct inode *); +int f2fs_setattr(struct dentry *, struct iattr *); +int truncate_hole(struct inode *, pgoff_t, pgoff_t); +long f2fs_ioctl(struct file *, unsigned int, unsigned long); + +/* + * inode.c + */ +void f2fs_set_inode_flags(struct inode *); +struct inode *f2fs_iget_nowait(struct super_block *, unsigned long); +struct inode *f2fs_iget(struct super_block *, unsigned long); +void update_inode(struct inode *, struct page *); +int f2fs_write_inode(struct inode *, struct writeback_control *); +void f2fs_evict_inode(struct inode *); + +/* + * namei.c + */ +struct dentry *f2fs_get_parent(struct dentry *child); + +/* + * dir.c + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); +ino_t f2fs_inode_by_name(struct inode *, struct qstr *); +void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, + struct page *, struct inode *); +void init_dent_inode(struct dentry *, struct page *); +int f2fs_add_link(struct dentry *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +int f2fs_make_empty(struct inode *, struct inode *); +bool f2fs_empty_dir(struct inode *); + +/* + * super.c + */ +int f2fs_sync_fs(struct super_block *, int); + +/* + * hash.c + */ +f2fs_hash_t f2fs_dentry_hash(const char *, int); + +/* + * node.c + */ +struct dnode_of_data; +struct node_info; + +int is_checkpointed_node(struct f2fs_sb_info *, nid_t); +void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); +int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); +int truncate_inode_blocks(struct inode *, pgoff_t); +int remove_inode_page(struct inode *); +int new_inode_page(struct inode *, struct dentry *); +struct page *new_node_page(struct dnode_of_data *, unsigned int); +void ra_node_page(struct f2fs_sb_info *, nid_t); +struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_node_page_ra(struct page *, int); +void sync_inode_page(struct dnode_of_data *); +int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); +bool alloc_nid(struct f2fs_sb_info *, nid_t *); +void alloc_nid_done(struct f2fs_sb_info *, nid_t); +void alloc_nid_failed(struct f2fs_sb_info *, nid_t); +void recover_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, struct node_info *, block_t); +int recover_inode_page(struct f2fs_sb_info *, struct page *); +int restore_node_summary(struct f2fs_sb_info *, unsigned int, + struct f2fs_summary_block *); +void flush_nat_entries(struct f2fs_sb_info *); +int build_node_manager(struct f2fs_sb_info *); +void destroy_node_manager(struct f2fs_sb_info *); +int create_node_manager_caches(void); +void destroy_node_manager_caches(void); + +/* + * segment.c + */ +void f2fs_balance_fs(struct f2fs_sb_info *); +void invalidate_blocks(struct f2fs_sb_info *, block_t); +void locate_dirty_segment(struct f2fs_sb_info *, unsigned int); +void clear_prefree_segments(struct f2fs_sb_info *); +int npages_for_summary_flush(struct f2fs_sb_info *); +void allocate_new_segments(struct f2fs_sb_info *); +struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); +struct bio *f2fs_bio_alloc(struct block_device *, int); +void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync); +int write_meta_page(struct f2fs_sb_info *, struct page *, + struct writeback_control *); +void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, + block_t, block_t *); +void write_data_page(struct inode *, struct page *, struct dnode_of_data*, + block_t, block_t *); +void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); +void recover_data_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void rewrite_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void write_data_summaries(struct f2fs_sb_info *, block_t); +void write_node_summaries(struct f2fs_sb_info *, block_t); +int lookup_journal_in_cursum(struct f2fs_summary_block *, + int, unsigned int, int); +void flush_sit_entries(struct f2fs_sb_info *); +int build_segment_manager(struct f2fs_sb_info *); +void reset_victim_segmap(struct f2fs_sb_info *); +void destroy_segment_manager(struct f2fs_sb_info *); + +/* + * checkpoint.c + */ +struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); +long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); +int check_orphan_space(struct f2fs_sb_info *); +void add_orphan_inode(struct f2fs_sb_info *, nid_t); +void remove_orphan_inode(struct f2fs_sb_info *, nid_t); +int recover_orphan_inodes(struct f2fs_sb_info *); +int get_valid_checkpoint(struct f2fs_sb_info *); +void set_dirty_dir_page(struct inode *, struct page *); +void remove_dirty_dir_inode(struct inode *); +void sync_dirty_dir_inodes(struct f2fs_sb_info *); +void block_operations(struct f2fs_sb_info *); +void write_checkpoint(struct f2fs_sb_info *, bool, bool); +void init_orphan_info(struct f2fs_sb_info *); +int create_checkpoint_caches(void); +void destroy_checkpoint_caches(void); + +/* + * data.c + */ +int reserve_new_block(struct dnode_of_data *); +void update_extent_cache(block_t, struct dnode_of_data *); +struct page *find_data_page(struct inode *, pgoff_t); +struct page *get_lock_data_page(struct inode *, pgoff_t); +struct page *get_new_data_page(struct inode *, pgoff_t, bool); +int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); +int do_write_data_page(struct page *); + +/* + * gc.c + */ +int start_gc_thread(struct f2fs_sb_info *); +void stop_gc_thread(struct f2fs_sb_info *); +block_t start_bidx_of_node(unsigned int); +int f2fs_gc(struct f2fs_sb_info *, int); +void build_gc_manager(struct f2fs_sb_info *); +int create_gc_caches(void); +void destroy_gc_caches(void); + +/* + * recovery.c + */ +void recover_fsync_data(struct f2fs_sb_info *); +bool space_for_roll_forward(struct f2fs_sb_info *); + +/* + * debug.c + */ +#ifdef CONFIG_F2FS_STAT_FS +struct f2fs_stat_info { + struct list_head stat_list; + struct f2fs_sb_info *sbi; + struct mutex stat_lock; + int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; + int main_area_segs, main_area_sections, main_area_zones; + int hit_ext, total_ext; + int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; + int nats, sits, fnids; + int total_count, utilization; + int bg_gc; + unsigned int valid_count, valid_node_count, valid_inode_count; + unsigned int bimodal, avg_vblocks; + int util_free, util_valid, util_invalid; + int rsvd_segs, overp_segs; + int dirty_count, node_pages, meta_pages; + int prefree_count, call_count; + int tot_segs, node_segs, data_segs, free_segs, free_secs; + int tot_blks, data_blks, node_blks; + int curseg[NR_CURSEG_TYPE]; + int cursec[NR_CURSEG_TYPE]; + int curzone[NR_CURSEG_TYPE]; + + unsigned int segment_count[2]; + unsigned int block_count[2]; + unsigned base_mem, cache_mem; +}; + +#define stat_inc_call_count(si) ((si)->call_count++) + +#define stat_inc_seg_count(sbi, type) \ + do { \ + struct f2fs_stat_info *si = sbi->stat_info; \ + (si)->tot_segs++; \ + if (type == SUM_TYPE_DATA) \ + si->data_segs++; \ + else \ + si->node_segs++; \ + } while (0) + +#define stat_inc_tot_blk_count(si, blks) \ + (si->tot_blks += (blks)) + +#define stat_inc_data_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = sbi->stat_info; \ + stat_inc_tot_blk_count(si, blks); \ + si->data_blks += (blks); \ + } while (0) + +#define stat_inc_node_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = sbi->stat_info; \ + stat_inc_tot_blk_count(si, blks); \ + si->node_blks += (blks); \ + } while (0) + +int f2fs_build_stats(struct f2fs_sb_info *); +void f2fs_destroy_stats(struct f2fs_sb_info *); +void destroy_root_stats(void); +#else +#define stat_inc_call_count(si) +#define stat_inc_seg_count(si, type) +#define stat_inc_tot_blk_count(si, blks) +#define stat_inc_data_blk_count(si, blks) +#define stat_inc_node_blk_count(sbi, blks) + +static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } +static inline void destroy_root_stats(void) { } +#endif + +extern const struct file_operations f2fs_dir_operations; +extern const struct file_operations f2fs_file_operations; +extern const struct inode_operations f2fs_file_inode_operations; +extern const struct address_space_operations f2fs_dblock_aops; +extern const struct address_space_operations f2fs_node_aops; +extern const struct address_space_operations f2fs_meta_aops; +extern const struct inode_operations f2fs_dir_inode_operations; +extern const struct inode_operations f2fs_symlink_inode_operations; +extern const struct inode_operations f2fs_special_inode_operations; +#endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c new file mode 100644 index 000000000000..f9e085dfb1f0 --- /dev/null +++ b/fs/f2fs/file.c @@ -0,0 +1,636 @@ +/* + * fs/f2fs/file.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/fs.h> +#include <linux/f2fs_fs.h> +#include <linux/stat.h> +#include <linux/buffer_head.h> +#include <linux/writeback.h> +#include <linux/falloc.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/mount.h> + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "xattr.h" +#include "acl.h" + +static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct page *page = vmf->page; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + block_t old_blk_addr; + struct dnode_of_data dn; + int err; + + f2fs_balance_fs(sbi); + + sb_start_pagefault(inode->i_sb); + + mutex_lock_op(sbi, DATA_NEW); + + /* block allocation */ + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, page->index, 0); + if (err) { + mutex_unlock_op(sbi, DATA_NEW); + goto out; + } + + old_blk_addr = dn.data_blkaddr; + + if (old_blk_addr == NULL_ADDR) { + err = reserve_new_block(&dn); + if (err) { + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, DATA_NEW); + goto out; + } + } + f2fs_put_dnode(&dn); + + mutex_unlock_op(sbi, DATA_NEW); + + lock_page(page); + if (page->mapping != inode->i_mapping || + page_offset(page) >= i_size_read(inode) || + !PageUptodate(page)) { + unlock_page(page); + err = -EFAULT; + goto out; + } + + /* + * check to see if the page is mapped already (no holes) + */ + if (PageMappedToDisk(page)) + goto out; + + /* fill the page */ + wait_on_page_writeback(page); + + /* page is wholly or partially inside EOF */ + if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { + unsigned offset; + offset = i_size_read(inode) & ~PAGE_CACHE_MASK; + zero_user_segment(page, offset, PAGE_CACHE_SIZE); + } + set_page_dirty(page); + SetPageUptodate(page); + + file_update_time(vma->vm_file); +out: + sb_end_pagefault(inode->i_sb); + return block_page_mkwrite_return(err); +} + +static const struct vm_operations_struct f2fs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = f2fs_vm_page_mkwrite, +}; + +static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode) +{ + struct dentry *dentry; + nid_t pino; + + inode = igrab(inode); + dentry = d_find_any_alias(inode); + if (!dentry) { + iput(inode); + return 0; + } + pino = dentry->d_parent->d_inode->i_ino; + dput(dentry); + iput(inode); + return !is_checkpointed_node(sbi, pino); +} + +int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) +{ + struct inode *inode = file->f_mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + unsigned long long cur_version; + int ret = 0; + bool need_cp = false; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + + if (inode->i_sb->s_flags & MS_RDONLY) + return 0; + + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + + mutex_lock(&inode->i_mutex); + + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + goto out; + + mutex_lock(&sbi->cp_mutex); + cur_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver); + mutex_unlock(&sbi->cp_mutex); + + if (F2FS_I(inode)->data_version != cur_version && + !(inode->i_state & I_DIRTY)) + goto out; + F2FS_I(inode)->data_version--; + + if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) + need_cp = true; + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP)) + need_cp = true; + if (!space_for_roll_forward(sbi)) + need_cp = true; + if (need_to_sync_dir(sbi, inode)) + need_cp = true; + + f2fs_write_inode(inode, NULL); + + if (need_cp) { + /* all the dirty node pages should be flushed for POR */ + ret = f2fs_sync_fs(inode->i_sb, 1); + clear_inode_flag(F2FS_I(inode), FI_NEED_CP); + } else { + while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0) + f2fs_write_inode(inode, NULL); + filemap_fdatawait_range(sbi->node_inode->i_mapping, + 0, LONG_MAX); + } +out: + mutex_unlock(&inode->i_mutex); + return ret; +} + +static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + file_accessed(file); + vma->vm_ops = &f2fs_file_vm_ops; + return 0; +} + +static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +{ + int nr_free = 0, ofs = dn->ofs_in_node; + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_node *raw_node; + __le32 *addr; + + raw_node = page_address(dn->node_page); + addr = blkaddr_in_node(raw_node) + ofs; + + for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { + block_t blkaddr = le32_to_cpu(*addr); + if (blkaddr == NULL_ADDR) + continue; + + update_extent_cache(NULL_ADDR, dn); + invalidate_blocks(sbi, blkaddr); + dec_valid_block_count(sbi, dn->inode, 1); + nr_free++; + } + if (nr_free) { + set_page_dirty(dn->node_page); + sync_inode_page(dn); + } + dn->ofs_in_node = ofs; + return nr_free; +} + +void truncate_data_blocks(struct dnode_of_data *dn) +{ + truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); +} + +static void truncate_partial_data_page(struct inode *inode, u64 from) +{ + unsigned offset = from & (PAGE_CACHE_SIZE - 1); + struct page *page; + + if (!offset) + return; + + page = find_data_page(inode, from >> PAGE_CACHE_SHIFT); + if (IS_ERR(page)) + return; + + lock_page(page); + wait_on_page_writeback(page); + zero_user(page, offset, PAGE_CACHE_SIZE - offset); + set_page_dirty(page); + f2fs_put_page(page, 1); +} + +static int truncate_blocks(struct inode *inode, u64 from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + unsigned int blocksize = inode->i_sb->s_blocksize; + struct dnode_of_data dn; + pgoff_t free_from; + int count = 0; + int err; + + free_from = (pgoff_t) + ((from + blocksize - 1) >> (sbi->log_blocksize)); + + mutex_lock_op(sbi, DATA_TRUNC); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, free_from, RDONLY_NODE); + if (err) { + if (err == -ENOENT) + goto free_next; + mutex_unlock_op(sbi, DATA_TRUNC); + return err; + } + + if (IS_INODE(dn.node_page)) + count = ADDRS_PER_INODE; + else + count = ADDRS_PER_BLOCK; + + count -= dn.ofs_in_node; + BUG_ON(count < 0); + if (dn.ofs_in_node || IS_INODE(dn.node_page)) { + truncate_data_blocks_range(&dn, count); + free_from += count; + } + + f2fs_put_dnode(&dn); +free_next: + err = truncate_inode_blocks(inode, free_from); + mutex_unlock_op(sbi, DATA_TRUNC); + + /* lastly zero out the first data page */ + truncate_partial_data_page(inode, from); + + return err; +} + +void f2fs_truncate(struct inode *inode) +{ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + + if (!truncate_blocks(inode, i_size_read(inode))) { + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + } + + f2fs_balance_fs(F2FS_SB(inode->i_sb)); +} + +static int f2fs_getattr(struct vfsmount *mnt, + struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + generic_fillattr(inode, stat); + stat->blocks <<= 3; + return 0; +} + +#ifdef CONFIG_F2FS_FS_POSIX_ACL +static void __setattr_copy(struct inode *inode, const struct iattr *attr) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int ia_valid = attr->ia_valid; + + if (ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + if (ia_valid & ATTR_ATIME) + inode->i_atime = timespec_trunc(attr->ia_atime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MTIME) + inode->i_mtime = timespec_trunc(attr->ia_mtime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_CTIME) + inode->i_ctime = timespec_trunc(attr->ia_ctime, + inode->i_sb->s_time_gran); + if (ia_valid & ATTR_MODE) { + umode_t mode = attr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + set_acl_inode(fi, mode); + } +} +#else +#define __setattr_copy setattr_copy +#endif + +int f2fs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct f2fs_inode_info *fi = F2FS_I(inode); + int err; + + err = inode_change_ok(inode, attr); + if (err) + return err; + + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) { + truncate_setsize(inode, attr->ia_size); + f2fs_truncate(inode); + } + + __setattr_copy(inode, attr); + + if (attr->ia_valid & ATTR_MODE) { + err = f2fs_acl_chmod(inode); + if (err || is_inode_flag_set(fi, FI_ACL_MODE)) { + inode->i_mode = fi->i_acl_mode; + clear_inode_flag(fi, FI_ACL_MODE); + } + } + + mark_inode_dirty(inode); + return err; +} + +const struct inode_operations f2fs_file_inode_operations = { + .getattr = f2fs_getattr, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +static void fill_zero(struct inode *inode, pgoff_t index, + loff_t start, loff_t len) +{ + struct page *page; + + if (!len) + return; + + page = get_new_data_page(inode, index, false); + + if (!IS_ERR(page)) { + wait_on_page_writeback(page); + zero_user(page, start, len); + set_page_dirty(page); + f2fs_put_page(page, 1); + } +} + +int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) +{ + pgoff_t index; + int err; + + for (index = pg_start; index < pg_end; index++) { + struct dnode_of_data dn; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + mutex_lock_op(sbi, DATA_TRUNC); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, index, RDONLY_NODE); + if (err) { + mutex_unlock_op(sbi, DATA_TRUNC); + if (err == -ENOENT) + continue; + return err; + } + + if (dn.data_blkaddr != NULL_ADDR) + truncate_data_blocks_range(&dn, 1); + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, DATA_TRUNC); + } + return 0; +} + +static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) +{ + pgoff_t pg_start, pg_end; + loff_t off_start, off_end; + int ret = 0; + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + if (pg_start == pg_end) { + fill_zero(inode, pg_start, off_start, + off_end - off_start); + } else { + if (off_start) + fill_zero(inode, pg_start++, off_start, + PAGE_CACHE_SIZE - off_start); + if (off_end) + fill_zero(inode, pg_end, 0, off_end); + + if (pg_start < pg_end) { + struct address_space *mapping = inode->i_mapping; + loff_t blk_start, blk_end; + + blk_start = pg_start << PAGE_CACHE_SHIFT; + blk_end = pg_end << PAGE_CACHE_SHIFT; + truncate_inode_pages_range(mapping, blk_start, + blk_end - 1); + ret = truncate_hole(inode, pg_start, pg_end); + } + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + i_size_read(inode) <= (offset + len)) { + i_size_write(inode, offset); + mark_inode_dirty(inode); + } + + return ret; +} + +static int expand_inode_data(struct inode *inode, loff_t offset, + loff_t len, int mode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + pgoff_t index, pg_start, pg_end; + loff_t new_size = i_size_read(inode); + loff_t off_start, off_end; + int ret = 0; + + ret = inode_newsize_ok(inode, (len + offset)); + if (ret) + return ret; + + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; + pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; + + off_start = offset & (PAGE_CACHE_SIZE - 1); + off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); + + for (index = pg_start; index <= pg_end; index++) { + struct dnode_of_data dn; + + mutex_lock_op(sbi, DATA_NEW); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, index, 0); + if (ret) { + mutex_unlock_op(sbi, DATA_NEW); + break; + } + + if (dn.data_blkaddr == NULL_ADDR) { + ret = reserve_new_block(&dn); + if (ret) { + f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, DATA_NEW); + break; + } + } + f2fs_put_dnode(&dn); + + mutex_unlock_op(sbi, DATA_NEW); + + if (pg_start == pg_end) + new_size = offset + len; + else if (index == pg_start && off_start) + new_size = (index + 1) << PAGE_CACHE_SHIFT; + else if (index == pg_end) + new_size = (index << PAGE_CACHE_SHIFT) + off_end; + else + new_size += PAGE_CACHE_SIZE; + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + i_size_read(inode) < new_size) { + i_size_write(inode, new_size); + mark_inode_dirty(inode); + } + + return ret; +} + +static long f2fs_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + long ret; + + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + + if (mode & FALLOC_FL_PUNCH_HOLE) + ret = punch_hole(inode, offset, len, mode); + else + ret = expand_inode_data(inode, offset, len, mode); + + f2fs_balance_fs(sbi); + return ret; +} + +#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) +#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) + +static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & F2FS_REG_FLMASK; + else + return flags & F2FS_OTHER_FLMASK; +} + +long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int flags; + int ret; + + switch (cmd) { + case FS_IOC_GETFLAGS: + flags = fi->i_flags & FS_FL_USER_VISIBLE; + return put_user(flags, (int __user *) arg); + case FS_IOC_SETFLAGS: + { + unsigned int oldflags; + + ret = mnt_want_write(filp->f_path.mnt); + if (ret) + return ret; + + if (!inode_owner_or_capable(inode)) { + ret = -EACCES; + goto out; + } + + if (get_user(flags, (int __user *) arg)) { + ret = -EFAULT; + goto out; + } + + flags = f2fs_mask_flags(inode->i_mode, flags); + + mutex_lock(&inode->i_mutex); + + oldflags = fi->i_flags; + + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); + ret = -EPERM; + goto out; + } + } + + flags = flags & FS_FL_USER_MODIFIABLE; + flags |= oldflags & ~FS_FL_USER_MODIFIABLE; + fi->i_flags = flags; + mutex_unlock(&inode->i_mutex); + + f2fs_set_inode_flags(inode); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); +out: + mnt_drop_write(filp->f_path.mnt); + return ret; + } + default: + return -ENOTTY; + } +} + +const struct file_operations f2fs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, + .open = generic_file_open, + .mmap = f2fs_file_mmap, + .fsync = f2fs_sync_file, + .fallocate = f2fs_fallocate, + .unlocked_ioctl = f2fs_ioctl, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, +}; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c new file mode 100644 index 000000000000..644aa3808273 --- /dev/null +++ b/fs/f2fs/gc.c @@ -0,0 +1,742 @@ +/* + * fs/f2fs/gc.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/backing-dev.h> +#include <linux/proc_fs.h> +#include <linux/init.h> +#include <linux/f2fs_fs.h> +#include <linux/kthread.h> +#include <linux/delay.h> +#include <linux/freezer.h> +#include <linux/blkdev.h> + +#include "f2fs.h" +#include "node.h" +#include "segment.h" +#include "gc.h" + +static struct kmem_cache *winode_slab; + +static int gc_thread_func(void *data) +{ + struct f2fs_sb_info *sbi = data; + wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; + long wait_ms; + + wait_ms = GC_THREAD_MIN_SLEEP_TIME; + + do { + if (try_to_freeze()) + continue; + else + wait_event_interruptible_timeout(*wq, + kthread_should_stop(), + msecs_to_jiffies(wait_ms)); + if (kthread_should_stop()) + break; + + f2fs_balance_fs(sbi); + + if (!test_opt(sbi, BG_GC)) + continue; + + /* + * [GC triggering condition] + * 0. GC is not conducted currently. + * 1. There are enough dirty segments. + * 2. IO subsystem is idle by checking the # of writeback pages. + * 3. IO subsystem is idle by checking the # of requests in + * bdev's request list. + * + * Note) We have to avoid triggering GCs too much frequently. + * Because it is possible that some segments can be + * invalidated soon after by user update or deletion. + * So, I'd like to wait some time to collect dirty segments. + */ + if (!mutex_trylock(&sbi->gc_mutex)) + continue; + + if (!is_idle(sbi)) { + wait_ms = increase_sleep_time(wait_ms); + mutex_unlock(&sbi->gc_mutex); + continue; + } + + if (has_enough_invalid_blocks(sbi)) + wait_ms = decrease_sleep_time(wait_ms); + else + wait_ms = increase_sleep_time(wait_ms); + + sbi->bg_gc++; + + if (f2fs_gc(sbi, 1) == GC_NONE) + wait_ms = GC_THREAD_NOGC_SLEEP_TIME; + else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME) + wait_ms = GC_THREAD_MAX_SLEEP_TIME; + + } while (!kthread_should_stop()); + return 0; +} + +int start_gc_thread(struct f2fs_sb_info *sbi) +{ + struct f2fs_gc_kthread *gc_th; + + gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); + if (!gc_th) + return -ENOMEM; + + sbi->gc_thread = gc_th; + init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); + sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, + GC_THREAD_NAME); + if (IS_ERR(gc_th->f2fs_gc_task)) { + kfree(gc_th); + return -ENOMEM; + } + return 0; +} + +void stop_gc_thread(struct f2fs_sb_info *sbi) +{ + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; + if (!gc_th) + return; + kthread_stop(gc_th->f2fs_gc_task); + kfree(gc_th); + sbi->gc_thread = NULL; +} + +static int select_gc_type(int gc_type) +{ + return (gc_type == BG_GC) ? GC_CB : GC_GREEDY; +} + +static void select_policy(struct f2fs_sb_info *sbi, int gc_type, + int type, struct victim_sel_policy *p) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + if (p->alloc_mode) { + p->gc_mode = GC_GREEDY; + p->dirty_segmap = dirty_i->dirty_segmap[type]; + p->ofs_unit = 1; + } else { + p->gc_mode = select_gc_type(gc_type); + p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; + p->ofs_unit = sbi->segs_per_sec; + } + p->offset = sbi->last_victim[p->gc_mode]; +} + +static unsigned int get_max_cost(struct f2fs_sb_info *sbi, + struct victim_sel_policy *p) +{ + if (p->gc_mode == GC_GREEDY) + return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; + else if (p->gc_mode == GC_CB) + return UINT_MAX; + else /* No other gc_mode */ + return 0; +} + +static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno; + + /* + * If the gc_type is FG_GC, we can select victim segments + * selected by background GC before. + * Those segments guarantee they have small valid blocks. + */ + segno = find_next_bit(dirty_i->victim_segmap[BG_GC], + TOTAL_SEGS(sbi), 0); + if (segno < TOTAL_SEGS(sbi)) { + clear_bit(segno, dirty_i->victim_segmap[BG_GC]); + return segno; + } + return NULL_SEGNO; +} + +static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int secno = GET_SECNO(sbi, segno); + unsigned int start = secno * sbi->segs_per_sec; + unsigned long long mtime = 0; + unsigned int vblocks; + unsigned char age = 0; + unsigned char u; + unsigned int i; + + for (i = 0; i < sbi->segs_per_sec; i++) + mtime += get_seg_entry(sbi, start + i)->mtime; + vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + + mtime = div_u64(mtime, sbi->segs_per_sec); + vblocks = div_u64(vblocks, sbi->segs_per_sec); + + u = (vblocks * 100) >> sbi->log_blocks_per_seg; + + /* Handle if the system time is changed by user */ + if (mtime < sit_i->min_mtime) + sit_i->min_mtime = mtime; + if (mtime > sit_i->max_mtime) + sit_i->max_mtime = mtime; + if (sit_i->max_mtime != sit_i->min_mtime) + age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), + sit_i->max_mtime - sit_i->min_mtime); + + return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); +} + +static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, + struct victim_sel_policy *p) +{ + if (p->alloc_mode == SSR) + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + + /* alloc_mode == LFS */ + if (p->gc_mode == GC_GREEDY) + return get_valid_blocks(sbi, segno, sbi->segs_per_sec); + else + return get_cb_cost(sbi, segno); +} + +/* + * This function is called from two pathes. + * One is garbage collection and the other is SSR segment selection. + * When it is called during GC, it just gets a victim segment + * and it does not remove it from dirty seglist. + * When it is called from SSR segment selection, it finds a segment + * which has minimum valid blocks and removes it from dirty seglist. + */ +static int get_victim_by_default(struct f2fs_sb_info *sbi, + unsigned int *result, int gc_type, int type, char alloc_mode) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct victim_sel_policy p; + unsigned int segno; + int nsearched = 0; + + p.alloc_mode = alloc_mode; + select_policy(sbi, gc_type, type, &p); + + p.min_segno = NULL_SEGNO; + p.min_cost = get_max_cost(sbi, &p); + + mutex_lock(&dirty_i->seglist_lock); + + if (p.alloc_mode == LFS && gc_type == FG_GC) { + p.min_segno = check_bg_victims(sbi); + if (p.min_segno != NULL_SEGNO) + goto got_it; + } + + while (1) { + unsigned long cost; + + segno = find_next_bit(p.dirty_segmap, + TOTAL_SEGS(sbi), p.offset); + if (segno >= TOTAL_SEGS(sbi)) { + if (sbi->last_victim[p.gc_mode]) { + sbi->last_victim[p.gc_mode] = 0; + p.offset = 0; + continue; + } + break; + } + p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; + + if (test_bit(segno, dirty_i->victim_segmap[FG_GC])) + continue; + if (gc_type == BG_GC && + test_bit(segno, dirty_i->victim_segmap[BG_GC])) + continue; + if (IS_CURSEC(sbi, GET_SECNO(sbi, segno))) + continue; + + cost = get_gc_cost(sbi, segno, &p); + + if (p.min_cost > cost) { + p.min_segno = segno; + p.min_cost = cost; + } + + if (cost == get_max_cost(sbi, &p)) + continue; + + if (nsearched++ >= MAX_VICTIM_SEARCH) { + sbi->last_victim[p.gc_mode] = segno; + break; + } + } +got_it: + if (p.min_segno != NULL_SEGNO) { + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; + if (p.alloc_mode == LFS) { + int i; + for (i = 0; i < p.ofs_unit; i++) + set_bit(*result + i, + dirty_i->victim_segmap[gc_type]); + } + } + mutex_unlock(&dirty_i->seglist_lock); + + return (p.min_segno == NULL_SEGNO) ? 0 : 1; +} + +static const struct victim_selection default_v_ops = { + .get_victim = get_victim_by_default, +}; + +static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +{ + struct list_head *this; + struct inode_entry *ie; + + list_for_each(this, ilist) { + ie = list_entry(this, struct inode_entry, list); + if (ie->inode->i_ino == ino) + return ie->inode; + } + return NULL; +} + +static void add_gc_inode(struct inode *inode, struct list_head *ilist) +{ + struct list_head *this; + struct inode_entry *new_ie, *ie; + + list_for_each(this, ilist) { + ie = list_entry(this, struct inode_entry, list); + if (ie->inode == inode) { + iput(inode); + return; + } + } +repeat: + new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); + if (!new_ie) { + cond_resched(); + goto repeat; + } + new_ie->inode = inode; + list_add_tail(&new_ie->list, ilist); +} + +static void put_gc_inode(struct list_head *ilist) +{ + struct inode_entry *ie, *next_ie; + list_for_each_entry_safe(ie, next_ie, ilist, list) { + iput(ie->inode); + list_del(&ie->list); + kmem_cache_free(winode_slab, ie); + } +} + +static int check_valid_map(struct f2fs_sb_info *sbi, + unsigned int segno, int offset) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct seg_entry *sentry; + int ret; + + mutex_lock(&sit_i->sentry_lock); + sentry = get_seg_entry(sbi, segno); + ret = f2fs_test_bit(offset, sentry->cur_valid_map); + mutex_unlock(&sit_i->sentry_lock); + return ret ? GC_OK : GC_NEXT; +} + +/* + * This function compares node address got in summary with that in NAT. + * On validity, copy that node with cold status, otherwise (invalid node) + * ignore that. + */ +static int gc_node_segment(struct f2fs_sb_info *sbi, + struct f2fs_summary *sum, unsigned int segno, int gc_type) +{ + bool initial = true; + struct f2fs_summary *entry; + int off; + +next_step: + entry = sum; + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { + nid_t nid = le32_to_cpu(entry->nid); + struct page *node_page; + int err; + + /* + * It makes sure that free segments are able to write + * all the dirty node pages before CP after this CP. + * So let's check the space of dirty node pages. + */ + if (should_do_checkpoint(sbi)) { + mutex_lock(&sbi->cp_mutex); + block_operations(sbi); + return GC_BLOCKED; + } + + err = check_valid_map(sbi, segno, off); + if (err == GC_ERROR) + return err; + else if (err == GC_NEXT) + continue; + + if (initial) { + ra_node_page(sbi, nid); + continue; + } + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + continue; + + /* set page dirty and write it */ + if (!PageWriteback(node_page)) + set_page_dirty(node_page); + f2fs_put_page(node_page, 1); + stat_inc_node_blk_count(sbi, 1); + } + if (initial) { + initial = false; + goto next_step; + } + + if (gc_type == FG_GC) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + sync_node_pages(sbi, 0, &wbc); + } + return GC_DONE; +} + +/* + * Calculate start block index that this node page contains + */ +block_t start_bidx_of_node(unsigned int node_ofs) +{ + block_t start_bidx; + unsigned int bidx, indirect_blks; + int dec; + + indirect_blks = 2 * NIDS_PER_BLOCK + 4; + + start_bidx = 1; + if (node_ofs == 0) { + start_bidx = 0; + } else if (node_ofs <= 2) { + bidx = node_ofs - 1; + } else if (node_ofs <= indirect_blks) { + dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); + bidx = node_ofs - 2 - dec; + } else { + dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); + bidx = node_ofs - 5 - dec; + } + + if (start_bidx) + start_bidx = bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE; + return start_bidx; +} + +static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + struct node_info *dni, block_t blkaddr, unsigned int *nofs) +{ + struct page *node_page; + nid_t nid; + unsigned int ofs_in_node; + block_t source_blkaddr; + + nid = le32_to_cpu(sum->nid); + ofs_in_node = le16_to_cpu(sum->ofs_in_node); + + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + return GC_NEXT; + + get_node_info(sbi, nid, dni); + + if (sum->version != dni->version) { + f2fs_put_page(node_page, 1); + return GC_NEXT; + } + + *nofs = ofs_of_node(node_page); + source_blkaddr = datablock_addr(node_page, ofs_in_node); + f2fs_put_page(node_page, 1); + + if (source_blkaddr != blkaddr) + return GC_NEXT; + return GC_OK; +} + +static void move_data_page(struct inode *inode, struct page *page, int gc_type) +{ + if (page->mapping != inode->i_mapping) + goto out; + + if (inode != page->mapping->host) + goto out; + + if (PageWriteback(page)) + goto out; + + if (gc_type == BG_GC) { + set_page_dirty(page); + set_cold_data(page); + } else { + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + mutex_lock_op(sbi, DATA_WRITE); + if (clear_page_dirty_for_io(page) && + S_ISDIR(inode->i_mode)) { + dec_page_count(sbi, F2FS_DIRTY_DENTS); + inode_dec_dirty_dents(inode); + } + set_cold_data(page); + do_write_data_page(page); + mutex_unlock_op(sbi, DATA_WRITE); + clear_cold_data(page); + } +out: + f2fs_put_page(page, 1); +} + +/* + * This function tries to get parent node of victim data block, and identifies + * data block validity. If the block is valid, copy that with cold status and + * modify parent node. + * If the parent node is not valid or the data block address is different, + * the victim data block is ignored. + */ +static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + struct list_head *ilist, unsigned int segno, int gc_type) +{ + struct super_block *sb = sbi->sb; + struct f2fs_summary *entry; + block_t start_addr; + int err, off; + int phase = 0; + + start_addr = START_BLOCK(sbi, segno); + +next_step: + entry = sum; + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { + struct page *data_page; + struct inode *inode; + struct node_info dni; /* dnode info for the data */ + unsigned int ofs_in_node, nofs; + block_t start_bidx; + + /* + * It makes sure that free segments are able to write + * all the dirty node pages before CP after this CP. + * So let's check the space of dirty node pages. + */ + if (should_do_checkpoint(sbi)) { + mutex_lock(&sbi->cp_mutex); + block_operations(sbi); + err = GC_BLOCKED; + goto stop; + } + + err = check_valid_map(sbi, segno, off); + if (err == GC_ERROR) + goto stop; + else if (err == GC_NEXT) + continue; + + if (phase == 0) { + ra_node_page(sbi, le32_to_cpu(entry->nid)); + continue; + } + + /* Get an inode by ino with checking validity */ + err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs); + if (err == GC_ERROR) + goto stop; + else if (err == GC_NEXT) + continue; + + if (phase == 1) { + ra_node_page(sbi, dni.ino); + continue; + } + + start_bidx = start_bidx_of_node(nofs); + ofs_in_node = le16_to_cpu(entry->ofs_in_node); + + if (phase == 2) { + inode = f2fs_iget_nowait(sb, dni.ino); + if (IS_ERR(inode)) + continue; + + data_page = find_data_page(inode, + start_bidx + ofs_in_node); + if (IS_ERR(data_page)) + goto next_iput; + + f2fs_put_page(data_page, 0); + add_gc_inode(inode, ilist); + } else { + inode = find_gc_inode(dni.ino, ilist); + if (inode) { + data_page = get_lock_data_page(inode, + start_bidx + ofs_in_node); + if (IS_ERR(data_page)) + continue; + move_data_page(inode, data_page, gc_type); + stat_inc_data_blk_count(sbi, 1); + } + } + continue; +next_iput: + iput(inode); + } + if (++phase < 4) + goto next_step; + err = GC_DONE; +stop: + if (gc_type == FG_GC) + f2fs_submit_bio(sbi, DATA, true); + return err; +} + +static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, + int gc_type, int type) +{ + struct sit_info *sit_i = SIT_I(sbi); + int ret; + mutex_lock(&sit_i->sentry_lock); + ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); + mutex_unlock(&sit_i->sentry_lock); + return ret; +} + +static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, + struct list_head *ilist, int gc_type) +{ + struct page *sum_page; + struct f2fs_summary_block *sum; + int ret = GC_DONE; + + /* read segment summary of victim */ + sum_page = get_sum_page(sbi, segno); + if (IS_ERR(sum_page)) + return GC_ERROR; + + /* + * CP needs to lock sum_page. In this time, we don't need + * to lock this page, because this summary page is not gone anywhere. + * Also, this page is not gonna be updated before GC is done. + */ + unlock_page(sum_page); + sum = page_address(sum_page); + + switch (GET_SUM_TYPE((&sum->footer))) { + case SUM_TYPE_NODE: + ret = gc_node_segment(sbi, sum->entries, segno, gc_type); + break; + case SUM_TYPE_DATA: + ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + break; + } + stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); + stat_inc_call_count(sbi->stat_info); + + f2fs_put_page(sum_page, 0); + return ret; +} + +int f2fs_gc(struct f2fs_sb_info *sbi, int nGC) +{ + unsigned int segno; + int old_free_secs, cur_free_secs; + int gc_status, nfree; + struct list_head ilist; + int gc_type = BG_GC; + + INIT_LIST_HEAD(&ilist); +gc_more: + nfree = 0; + gc_status = GC_NONE; + + if (has_not_enough_free_secs(sbi)) + old_free_secs = reserved_sections(sbi); + else + old_free_secs = free_sections(sbi); + + while (sbi->sb->s_flags & MS_ACTIVE) { + int i; + if (has_not_enough_free_secs(sbi)) + gc_type = FG_GC; + + cur_free_secs = free_sections(sbi) + nfree; + + /* We got free space successfully. */ + if (nGC < cur_free_secs - old_free_secs) + break; + + if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + break; + + for (i = 0; i < sbi->segs_per_sec; i++) { + /* + * do_garbage_collect will give us three gc_status: + * GC_ERROR, GC_DONE, and GC_BLOCKED. + * If GC is finished uncleanly, we have to return + * the victim to dirty segment list. + */ + gc_status = do_garbage_collect(sbi, segno + i, + &ilist, gc_type); + if (gc_status != GC_DONE) + goto stop; + nfree++; + } + } +stop: + if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) { + write_checkpoint(sbi, (gc_status == GC_BLOCKED), false); + if (nfree) + goto gc_more; + } + mutex_unlock(&sbi->gc_mutex); + + put_gc_inode(&ilist); + BUG_ON(!list_empty(&ilist)); + return gc_status; +} + +void build_gc_manager(struct f2fs_sb_info *sbi) +{ + DIRTY_I(sbi)->v_ops = &default_v_ops; +} + +int create_gc_caches(void) +{ + winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", + sizeof(struct inode_entry), NULL); + if (!winode_slab) + return -ENOMEM; + return 0; +} + +void destroy_gc_caches(void) +{ + kmem_cache_destroy(winode_slab); +} diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h new file mode 100644 index 000000000000..b026d9354ccd --- /dev/null +++ b/fs/f2fs/gc.h @@ -0,0 +1,117 @@ +/* + * fs/f2fs/gc.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define GC_THREAD_NAME "f2fs_gc_task" +#define GC_THREAD_MIN_WB_PAGES 1 /* + * a threshold to determine + * whether IO subsystem is idle + * or not + */ +#define GC_THREAD_MIN_SLEEP_TIME 10000 /* milliseconds */ +#define GC_THREAD_MAX_SLEEP_TIME 30000 +#define GC_THREAD_NOGC_SLEEP_TIME 10000 +#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ +#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ + +/* Search max. number of dirty segments to select a victim segment */ +#define MAX_VICTIM_SEARCH 20 + +enum { + GC_NONE = 0, + GC_ERROR, + GC_OK, + GC_NEXT, + GC_BLOCKED, + GC_DONE, +}; + +struct f2fs_gc_kthread { + struct task_struct *f2fs_gc_task; + wait_queue_head_t gc_wait_queue_head; +}; + +struct inode_entry { + struct list_head list; + struct inode *inode; +}; + +/* + * inline functions + */ +static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) +{ + if (free_segments(sbi) < overprovision_segments(sbi)) + return 0; + else + return (free_segments(sbi) - overprovision_segments(sbi)) + << sbi->log_blocks_per_seg; +} + +static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi) +{ + return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100; +} + +static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) +{ + block_t reclaimable_user_blocks = sbi->user_block_count - + written_block_count(sbi); + return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; +} + +static inline long increase_sleep_time(long wait) +{ + wait += GC_THREAD_MIN_SLEEP_TIME; + if (wait > GC_THREAD_MAX_SLEEP_TIME) + wait = GC_THREAD_MAX_SLEEP_TIME; + return wait; +} + +static inline long decrease_sleep_time(long wait) +{ + wait -= GC_THREAD_MIN_SLEEP_TIME; + if (wait <= GC_THREAD_MIN_SLEEP_TIME) + wait = GC_THREAD_MIN_SLEEP_TIME; + return wait; +} + +static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) +{ + block_t invalid_user_blocks = sbi->user_block_count - + written_block_count(sbi); + /* + * Background GC is triggered with the following condition. + * 1. There are a number of invalid blocks. + * 2. There is not enough free space. + */ + if (invalid_user_blocks > limit_invalid_user_blocks(sbi) && + free_user_blocks(sbi) < limit_free_user_blocks(sbi)) + return true; + return false; +} + +static inline int is_idle(struct f2fs_sb_info *sbi) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct request_queue *q = bdev_get_queue(bdev); + struct request_list *rl = &q->root_rl; + return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); +} + +static inline bool should_do_checkpoint(struct f2fs_sb_info *sbi) +{ + unsigned int pages_per_sec = sbi->segs_per_sec * + (1 << sbi->log_blocks_per_seg); + int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1) + >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; + int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1) + >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; + return free_sections(sbi) <= (node_secs + 2 * dent_secs + 2); +} diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c new file mode 100644 index 000000000000..a60f04200f8b --- /dev/null +++ b/fs/f2fs/hash.c @@ -0,0 +1,97 @@ +/* + * fs/f2fs/hash.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext3/hash.c + * + * Copyright (C) 2002 by Theodore Ts'o + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/f2fs_fs.h> +#include <linux/cryptohash.h> +#include <linux/pagemap.h> + +#include "f2fs.h" + +/* + * Hashing code copied from ext3 + */ +#define DELTA 0x9E3779B9 + +static void TEA_transform(unsigned int buf[4], unsigned int const in[]) +{ + __u32 sum = 0; + __u32 b0 = buf[0], b1 = buf[1]; + __u32 a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); + } while (--n); + + buf[0] += b0; + buf[1] += b1; +} + +static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num) +{ + unsigned pad, val; + int i; + + pad = (__u32)len | ((__u32)len << 8); + pad |= pad << 16; + + val = pad; + if (len > num * 4) + len = num * 4; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = msg[i] + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num--; + } + } + if (--num >= 0) + *buf++ = val; + while (--num >= 0) + *buf++ = pad; +} + +f2fs_hash_t f2fs_dentry_hash(const char *name, int len) +{ + __u32 hash, minor_hash; + f2fs_hash_t f2fs_hash; + const char *p; + __u32 in[8], buf[4]; + + /* Initialize the default seed for the hash checksum functions */ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + + p = name; + while (len > 0) { + str2hashbuf(p, len, in, 4); + TEA_transform(buf, in); + len -= 16; + p += 16; + } + hash = buf[0]; + minor_hash = buf[1]; + + f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); + return f2fs_hash; +} diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c new file mode 100644 index 000000000000..df5fb381ebf1 --- /dev/null +++ b/fs/f2fs/inode.c @@ -0,0 +1,268 @@ +/* + * fs/f2fs/inode.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/fs.h> +#include <linux/f2fs_fs.h> +#include <linux/buffer_head.h> +#include <linux/writeback.h> + +#include "f2fs.h" +#include "node.h" + +struct f2fs_iget_args { + u64 ino; + int on_free; +}; + +void f2fs_set_inode_flags(struct inode *inode) +{ + unsigned int flags = F2FS_I(inode)->i_flags; + + inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | + S_NOATIME | S_DIRSYNC); + + if (flags & FS_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & FS_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & FS_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + if (flags & FS_NOATIME_FL) + inode->i_flags |= S_NOATIME; + if (flags & FS_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; +} + +static int f2fs_iget_test(struct inode *inode, void *data) +{ + struct f2fs_iget_args *args = data; + + if (inode->i_ino != args->ino) + return 0; + if (inode->i_state & (I_FREEING | I_WILL_FREE)) { + args->on_free = 1; + return 0; + } + return 1; +} + +struct inode *f2fs_iget_nowait(struct super_block *sb, unsigned long ino) +{ + struct f2fs_iget_args args = { + .ino = ino, + .on_free = 0 + }; + struct inode *inode = ilookup5(sb, ino, f2fs_iget_test, &args); + + if (inode) + return inode; + if (!args.on_free) + return f2fs_iget(sb, ino); + return ERR_PTR(-ENOENT); +} + +static int do_read_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct page *node_page; + struct f2fs_node *rn; + struct f2fs_inode *ri; + + /* Check if ino is within scope */ + check_nid_range(sbi, inode->i_ino); + + node_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + rn = page_address(node_page); + ri = &(rn->i); + + inode->i_mode = le16_to_cpu(ri->i_mode); + i_uid_write(inode, le32_to_cpu(ri->i_uid)); + i_gid_write(inode, le32_to_cpu(ri->i_gid)); + set_nlink(inode, le32_to_cpu(ri->i_links)); + inode->i_size = le64_to_cpu(ri->i_size); + inode->i_blocks = le64_to_cpu(ri->i_blocks); + + inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); + inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); + inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime); + inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); + inode->i_generation = le32_to_cpu(ri->i_generation); + + fi->i_current_depth = le32_to_cpu(ri->i_current_depth); + fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); + fi->i_flags = le32_to_cpu(ri->i_flags); + fi->flags = 0; + fi->data_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver) - 1; + fi->i_advise = ri->i_advise; + fi->i_pino = le32_to_cpu(ri->i_pino); + get_extent_info(&fi->ext, ri->i_ext); + f2fs_put_page(node_page, 1); + return 0; +} + +struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + int ret; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) + goto make_now; + + ret = do_read_inode(inode); + if (ret) + goto bad_inode; + + if (!sbi->por_doing && inode->i_nlink == 0) { + ret = -ENOENT; + goto bad_inode; + } + +make_now: + if (ino == F2FS_NODE_INO(sbi)) { + inode->i_mapping->a_ops = &f2fs_node_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (ino == F2FS_META_INO(sbi)) { + inode->i_mapping->a_ops = &f2fs_meta_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + } else if (S_ISREG(inode->i_mode)) { + inode->i_op = &f2fs_file_inode_operations; + inode->i_fop = &f2fs_file_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &f2fs_dir_inode_operations; + inode->i_fop = &f2fs_dir_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE | + __GFP_ZERO); + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = &f2fs_symlink_inode_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { + inode->i_op = &f2fs_special_inode_operations; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + } else { + ret = -EIO; + goto bad_inode; + } + unlock_new_inode(inode); + + return inode; + +bad_inode: + iget_failed(inode); + return ERR_PTR(ret); +} + +void update_inode(struct inode *inode, struct page *node_page) +{ + struct f2fs_node *rn; + struct f2fs_inode *ri; + + wait_on_page_writeback(node_page); + + rn = page_address(node_page); + ri = &(rn->i); + + ri->i_mode = cpu_to_le16(inode->i_mode); + ri->i_advise = F2FS_I(inode)->i_advise; + ri->i_uid = cpu_to_le32(i_uid_read(inode)); + ri->i_gid = cpu_to_le32(i_gid_read(inode)); + ri->i_links = cpu_to_le32(inode->i_nlink); + ri->i_size = cpu_to_le64(i_size_read(inode)); + ri->i_blocks = cpu_to_le64(inode->i_blocks); + set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); + + ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); + ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); + ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); + ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth); + ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); + ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); + ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); + ri->i_generation = cpu_to_le32(inode->i_generation); + set_page_dirty(node_page); +} + +int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *node_page; + bool need_lock = false; + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + return 0; + + node_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); + + if (!PageDirty(node_page)) { + need_lock = true; + f2fs_put_page(node_page, 1); + mutex_lock(&sbi->write_inode); + node_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(node_page)) { + mutex_unlock(&sbi->write_inode); + return PTR_ERR(node_page); + } + } + update_inode(inode, node_page); + f2fs_put_page(node_page, 1); + if (need_lock) + mutex_unlock(&sbi->write_inode); + return 0; +} + +/* + * Called at the last iput() if i_nlink is zero + */ +void f2fs_evict_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + truncate_inode_pages(&inode->i_data, 0); + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + goto no_delete; + + BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents)); + remove_dirty_dir_inode(inode); + + if (inode->i_nlink || is_bad_inode(inode)) + goto no_delete; + + set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); + i_size_write(inode, 0); + + if (F2FS_HAS_BLOCKS(inode)) + f2fs_truncate(inode); + + remove_inode_page(inode); +no_delete: + clear_inode(inode); +} diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c new file mode 100644 index 000000000000..89b7675dc377 --- /dev/null +++ b/fs/f2fs/namei.c @@ -0,0 +1,503 @@ +/* + * fs/f2fs/namei.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/fs.h> +#include <linux/f2fs_fs.h> +#include <linux/pagemap.h> +#include <linux/sched.h> +#include <linux/ctype.h> + +#include "f2fs.h" +#include "xattr.h" +#include "acl.h" + +static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + nid_t ino; + struct inode *inode; + bool nid_free = false; + int err; + + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + mutex_lock_op(sbi, NODE_NEW); + if (!alloc_nid(sbi, &ino)) { + mutex_unlock_op(sbi, NODE_NEW); + err = -ENOSPC; + goto fail; + } + mutex_unlock_op(sbi, NODE_NEW); + + inode->i_uid = current_fsuid(); + + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else { + inode->i_gid = current_fsgid(); + } + + inode->i_ino = ino; + inode->i_mode = mode; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_generation = sbi->s_next_generation++; + + err = insert_inode_locked(inode); + if (err) { + err = -EINVAL; + nid_free = true; + goto out; + } + + mark_inode_dirty(inode); + return inode; + +out: + clear_nlink(inode); + unlock_new_inode(inode); +fail: + iput(inode); + if (nid_free) + alloc_nid_failed(sbi, ino); + return ERR_PTR(err); +} + +static int is_multimedia_file(const unsigned char *s, const char *sub) +{ + int slen = strlen(s); + int sublen = strlen(sub); + int ret; + + if (sublen > slen) + return 1; + + ret = memcmp(s + slen - sublen, sub, sublen); + if (ret) { /* compare upper case */ + int i; + char upper_sub[8]; + for (i = 0; i < sublen && i < sizeof(upper_sub); i++) + upper_sub[i] = toupper(sub[i]); + return memcmp(s + slen - sublen, upper_sub, sublen); + } + + return ret; +} + +/* + * Set multimedia files as cold files for hot/cold data separation + */ +static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode, + const unsigned char *name) +{ + int i; + __u8 (*extlist)[8] = sbi->raw_super->extension_list; + + int count = le32_to_cpu(sbi->raw_super->extension_count); + for (i = 0; i < count; i++) { + if (!is_multimedia_file(name, extlist[i])) { + F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; + break; + } + } +} + +static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + nid_t ino = 0; + int err; + + inode = f2fs_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) + set_cold_file(sbi, inode, dentry->d_name.name); + + inode->i_op = &f2fs_file_inode_operations; + inode->i_fop = &f2fs_file_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + ino = inode->i_ino; + + err = f2fs_add_link(dentry, inode); + if (err) + goto out; + + alloc_nid_done(sbi, ino); + + if (!sbi->por_doing) + d_instantiate(dentry, inode); + unlock_new_inode(inode); + + f2fs_balance_fs(sbi); + return 0; +out: + clear_nlink(inode); + unlock_new_inode(inode); + iput(inode); + alloc_nid_failed(sbi, ino); + return err; +} + +static int f2fs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int err; + + inode->i_ctime = CURRENT_TIME; + atomic_inc(&inode->i_count); + + set_inode_flag(F2FS_I(inode), FI_INC_LINK); + err = f2fs_add_link(dentry, inode); + if (err) + goto out; + + d_instantiate(dentry, inode); + + f2fs_balance_fs(sbi); + return 0; +out: + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + iput(inode); + return err; +} + +struct dentry *f2fs_get_parent(struct dentry *child) +{ + struct qstr dotdot = QSTR_INIT("..", 2); + unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot); + if (!ino) + return ERR_PTR(-ENOENT); + return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); +} + +static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct inode *inode = NULL; + struct f2fs_dir_entry *de; + struct page *page; + + if (dentry->d_name.len > F2FS_MAX_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + de = f2fs_find_entry(dir, &dentry->d_name, &page); + if (de) { + nid_t ino = le32_to_cpu(de->ino); + kunmap(page); + f2fs_put_page(page, 0); + + inode = f2fs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + } + + return d_splice_alias(inode, dentry); +} + +static int f2fs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode = dentry->d_inode; + struct f2fs_dir_entry *de; + struct page *page; + int err = -ENOENT; + + de = f2fs_find_entry(dir, &dentry->d_name, &page); + if (!de) + goto fail; + + err = check_orphan_space(sbi); + if (err) { + kunmap(page); + f2fs_put_page(page, 0); + goto fail; + } + + f2fs_delete_entry(de, page, inode); + + /* In order to evict this inode, we set it dirty */ + mark_inode_dirty(inode); + f2fs_balance_fs(sbi); +fail: + return err; +} + +static int f2fs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + unsigned symlen = strlen(symname) + 1; + int err; + + inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &f2fs_symlink_inode_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + + err = f2fs_add_link(dentry, inode); + if (err) + goto out; + + err = page_symlink(inode, symname, symlen); + alloc_nid_done(sbi, inode->i_ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + + f2fs_balance_fs(sbi); + + return err; +out: + clear_nlink(inode); + unlock_new_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct inode *inode; + int err; + + inode = f2fs_new_inode(dir, S_IFDIR | mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &f2fs_dir_inode_operations; + inode->i_fop = &f2fs_dir_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + + set_inode_flag(F2FS_I(inode), FI_INC_LINK); + err = f2fs_add_link(dentry, inode); + if (err) + goto out_fail; + + alloc_nid_done(sbi, inode->i_ino); + + d_instantiate(dentry, inode); + unlock_new_inode(inode); + + f2fs_balance_fs(sbi); + return 0; + +out_fail: + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + clear_nlink(inode); + unlock_new_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + if (f2fs_empty_dir(inode)) + return f2fs_unlink(dir, dentry); + return -ENOTEMPTY; +} + +static int f2fs_mknod(struct inode *dir, struct dentry *dentry, + umode_t mode, dev_t rdev) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + int err = 0; + + if (!new_valid_dev(rdev)) + return -EINVAL; + + inode = f2fs_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + init_special_inode(inode, inode->i_mode, rdev); + inode->i_op = &f2fs_special_inode_operations; + + err = f2fs_add_link(dentry, inode); + if (err) + goto out; + + alloc_nid_done(sbi, inode->i_ino); + d_instantiate(dentry, inode); + unlock_new_inode(inode); + + f2fs_balance_fs(sbi); + + return 0; +out: + clear_nlink(inode); + unlock_new_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + +static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct super_block *sb = old_dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct page *old_dir_page; + struct page *old_page; + struct f2fs_dir_entry *old_dir_entry = NULL; + struct f2fs_dir_entry *old_entry; + struct f2fs_dir_entry *new_entry; + int err = -ENOENT; + + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + if (!old_entry) + goto out; + + if (S_ISDIR(old_inode->i_mode)) { + err = -EIO; + old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); + if (!old_dir_entry) + goto out_old; + } + + mutex_lock_op(sbi, RENAME); + + if (new_inode) { + struct page *new_page; + + err = -ENOTEMPTY; + if (old_dir_entry && !f2fs_empty_dir(new_inode)) + goto out_dir; + + err = -ENOENT; + new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, + &new_page); + if (!new_entry) + goto out_dir; + + f2fs_set_link(new_dir, new_entry, new_page, old_inode); + + new_inode->i_ctime = CURRENT_TIME; + if (old_dir_entry) + drop_nlink(new_inode); + drop_nlink(new_inode); + if (!new_inode->i_nlink) + add_orphan_inode(sbi, new_inode->i_ino); + f2fs_write_inode(new_inode, NULL); + } else { + err = f2fs_add_link(new_dentry, old_inode); + if (err) + goto out_dir; + + if (old_dir_entry) { + inc_nlink(new_dir); + f2fs_write_inode(new_dir, NULL); + } + } + + old_inode->i_ctime = CURRENT_TIME; + set_inode_flag(F2FS_I(old_inode), FI_NEED_CP); + mark_inode_dirty(old_inode); + + f2fs_delete_entry(old_entry, old_page, NULL); + + if (old_dir_entry) { + if (old_dir != new_dir) { + f2fs_set_link(old_inode, old_dir_entry, + old_dir_page, new_dir); + } else { + kunmap(old_dir_page); + f2fs_put_page(old_dir_page, 0); + } + drop_nlink(old_dir); + f2fs_write_inode(old_dir, NULL); + } + + mutex_unlock_op(sbi, RENAME); + + f2fs_balance_fs(sbi); + return 0; + +out_dir: + if (old_dir_entry) { + kunmap(old_dir_page); + f2fs_put_page(old_dir_page, 0); + } + mutex_unlock_op(sbi, RENAME); +out_old: + kunmap(old_page); + f2fs_put_page(old_page, 0); +out: + return err; +} + +const struct inode_operations f2fs_dir_inode_operations = { + .create = f2fs_create, + .lookup = f2fs_lookup, + .link = f2fs_link, + .unlink = f2fs_unlink, + .symlink = f2fs_symlink, + .mkdir = f2fs_mkdir, + .rmdir = f2fs_rmdir, + .mknod = f2fs_mknod, + .rename = f2fs_rename, + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +const struct inode_operations f2fs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, + .setattr = f2fs_setattr, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; + +const struct inode_operations f2fs_special_inode_operations = { + .setattr = f2fs_setattr, + .get_acl = f2fs_get_acl, +#ifdef CONFIG_F2FS_FS_XATTR + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = f2fs_listxattr, + .removexattr = generic_removexattr, +#endif +}; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c new file mode 100644 index 000000000000..19870361497e --- /dev/null +++ b/fs/f2fs/node.c @@ -0,0 +1,1764 @@ +/* + * fs/f2fs/node.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/fs.h> +#include <linux/f2fs_fs.h> +#include <linux/mpage.h> +#include <linux/backing-dev.h> +#include <linux/blkdev.h> +#include <linux/pagevec.h> +#include <linux/swap.h> + +#include "f2fs.h" +#include "node.h" +#include "segment.h" + +static struct kmem_cache *nat_entry_slab; +static struct kmem_cache *free_nid_slab; + +static void clear_node_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + unsigned int long flags; + + if (PageDirty(page)) { + spin_lock_irqsave(&mapping->tree_lock, flags); + radix_tree_tag_clear(&mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + + clear_page_dirty_for_io(page); + dec_page_count(sbi, F2FS_DIRTY_NODES); + } + ClearPageUptodate(page); +} + +static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + pgoff_t index = current_nat_addr(sbi, nid); + return get_meta_page(sbi, index); +} + +static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct page *src_page; + struct page *dst_page; + pgoff_t src_off; + pgoff_t dst_off; + void *src_addr; + void *dst_addr; + struct f2fs_nm_info *nm_i = NM_I(sbi); + + src_off = current_nat_addr(sbi, nid); + dst_off = next_nat_addr(sbi, src_off); + + /* get current nat block page with lock */ + src_page = get_meta_page(sbi, src_off); + + /* Dirty src_page means that it is already the new target NAT page. */ + if (PageDirty(src_page)) + return src_page; + + dst_page = grab_meta_page(sbi, dst_off); + + src_addr = page_address(src_page); + dst_addr = page_address(dst_page); + memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); + set_page_dirty(dst_page); + f2fs_put_page(src_page, 1); + + set_to_next_nat(nm_i, nid); + + return dst_page; +} + +/* + * Readahead NAT pages + */ +static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct page *page; + pgoff_t index; + int i; + + for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { + if (nid >= nm_i->max_nid) + nid = 0; + index = current_nat_addr(sbi, nid); + + page = grab_cache_page(mapping, index); + if (!page) + continue; + if (f2fs_readpage(sbi, page, index, READ)) { + f2fs_put_page(page, 1); + continue; + } + page_cache_release(page); + } +} + +static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) +{ + return radix_tree_lookup(&nm_i->nat_root, n); +} + +static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, + nid_t start, unsigned int nr, struct nat_entry **ep) +{ + return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr); +} + +static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) +{ + list_del(&e->list); + radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); + nm_i->nat_cnt--; + kmem_cache_free(nat_entry_slab, e); +} + +int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; + int is_cp = 1; + + read_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e && !e->checkpointed) + is_cp = 0; + read_unlock(&nm_i->nat_tree_lock); + return is_cp; +} + +static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) +{ + struct nat_entry *new; + + new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); + if (!new) + return NULL; + if (radix_tree_insert(&nm_i->nat_root, nid, new)) { + kmem_cache_free(nat_entry_slab, new); + return NULL; + } + memset(new, 0, sizeof(struct nat_entry)); + nat_set_nid(new, nid); + list_add_tail(&new->list, &nm_i->nat_entries); + nm_i->nat_cnt++; + return new; +} + +static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, + struct f2fs_nat_entry *ne) +{ + struct nat_entry *e; +retry: + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (!e) { + e = grab_nat_entry(nm_i, nid); + if (!e) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); + nat_set_ino(e, le32_to_cpu(ne->ino)); + nat_set_version(e, ne->version); + e->checkpointed = true; + } + write_unlock(&nm_i->nat_tree_lock); +} + +static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, + block_t new_blkaddr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; +retry: + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, ni->nid); + if (!e) { + e = grab_nat_entry(nm_i, ni->nid); + if (!e) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + e->ni = *ni; + e->checkpointed = true; + BUG_ON(ni->blk_addr == NEW_ADDR); + } else if (new_blkaddr == NEW_ADDR) { + /* + * when nid is reallocated, + * previous nat entry can be remained in nat cache. + * So, reinitialize it with new information. + */ + e->ni = *ni; + BUG_ON(ni->blk_addr != NULL_ADDR); + } + + if (new_blkaddr == NEW_ADDR) + e->checkpointed = false; + + /* sanity check */ + BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); + BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && + new_blkaddr == NULL_ADDR); + BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && + new_blkaddr == NEW_ADDR); + BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && + nat_get_blkaddr(e) != NULL_ADDR && + new_blkaddr == NEW_ADDR); + + /* increament version no as node is removed */ + if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { + unsigned char version = nat_get_version(e); + nat_set_version(e, inc_node_version(version)); + } + + /* change address */ + nat_set_blkaddr(e, new_blkaddr); + __set_nat_cache_dirty(nm_i, e); + write_unlock(&nm_i->nat_tree_lock); +} + +static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD) + return 0; + + write_lock(&nm_i->nat_tree_lock); + while (nr_shrink && !list_empty(&nm_i->nat_entries)) { + struct nat_entry *ne; + ne = list_first_entry(&nm_i->nat_entries, + struct nat_entry, list); + __del_from_nat_cache(nm_i, ne); + nr_shrink--; + } + write_unlock(&nm_i->nat_tree_lock); + return nr_shrink; +} + +/* + * This function returns always success + */ +void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + nid_t start_nid = START_NID(nid); + struct f2fs_nat_block *nat_blk; + struct page *page = NULL; + struct f2fs_nat_entry ne; + struct nat_entry *e; + int i; + + memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + ni->nid = nid; + + /* Check nat cache */ + read_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e) { + ni->ino = nat_get_ino(e); + ni->blk_addr = nat_get_blkaddr(e); + ni->version = nat_get_version(e); + } + read_unlock(&nm_i->nat_tree_lock); + if (e) + return; + + /* Check current segment summary */ + mutex_lock(&curseg->curseg_mutex); + i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); + if (i >= 0) { + ne = nat_in_journal(sum, i); + node_info_from_raw_nat(ni, &ne); + } + mutex_unlock(&curseg->curseg_mutex); + if (i >= 0) + goto cache; + + /* Fill node_info from nat page */ + page = get_current_nat_page(sbi, start_nid); + nat_blk = (struct f2fs_nat_block *)page_address(page); + ne = nat_blk->entries[nid - start_nid]; + node_info_from_raw_nat(ni, &ne); + f2fs_put_page(page, 1); +cache: + /* cache nat entry */ + cache_nat_entry(NM_I(sbi), nid, &ne); +} + +/* + * The maximum depth is four. + * Offset[0] will have raw inode offset. + */ +static int get_node_path(long block, int offset[4], unsigned int noffset[4]) +{ + const long direct_index = ADDRS_PER_INODE; + const long direct_blks = ADDRS_PER_BLOCK; + const long dptrs_per_blk = NIDS_PER_BLOCK; + const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; + const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK; + int n = 0; + int level = 0; + + noffset[0] = 0; + + if (block < direct_index) { + offset[n++] = block; + level = 0; + goto got; + } + block -= direct_index; + if (block < direct_blks) { + offset[n++] = NODE_DIR1_BLOCK; + noffset[n] = 1; + offset[n++] = block; + level = 1; + goto got; + } + block -= direct_blks; + if (block < direct_blks) { + offset[n++] = NODE_DIR2_BLOCK; + noffset[n] = 2; + offset[n++] = block; + level = 1; + goto got; + } + block -= direct_blks; + if (block < indirect_blks) { + offset[n++] = NODE_IND1_BLOCK; + noffset[n] = 3; + offset[n++] = block / direct_blks; + noffset[n] = 4 + offset[n - 1]; + offset[n++] = block % direct_blks; + level = 2; + goto got; + } + block -= indirect_blks; + if (block < indirect_blks) { + offset[n++] = NODE_IND2_BLOCK; + noffset[n] = 4 + dptrs_per_blk; + offset[n++] = block / direct_blks; + noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; + offset[n++] = block % direct_blks; + level = 2; + goto got; + } + block -= indirect_blks; + if (block < dindirect_blks) { + offset[n++] = NODE_DIND_BLOCK; + noffset[n] = 5 + (dptrs_per_blk * 2); + offset[n++] = block / indirect_blks; + noffset[n] = 6 + (dptrs_per_blk * 2) + + offset[n - 1] * (dptrs_per_blk + 1); + offset[n++] = (block / direct_blks) % dptrs_per_blk; + noffset[n] = 7 + (dptrs_per_blk * 2) + + offset[n - 2] * (dptrs_per_blk + 1) + + offset[n - 1]; + offset[n++] = block % direct_blks; + level = 3; + goto got; + } else { + BUG(); + } +got: + return level; +} + +/* + * Caller should call f2fs_put_dnode(dn). + */ +int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *npage[4]; + struct page *parent; + int offset[4]; + unsigned int noffset[4]; + nid_t nids[4]; + int level, i; + int err = 0; + + level = get_node_path(index, offset, noffset); + + nids[0] = dn->inode->i_ino; + npage[0] = get_node_page(sbi, nids[0]); + if (IS_ERR(npage[0])) + return PTR_ERR(npage[0]); + + parent = npage[0]; + nids[1] = get_nid(parent, offset[0], true); + dn->inode_page = npage[0]; + dn->inode_page_locked = true; + + /* get indirect or direct nodes */ + for (i = 1; i <= level; i++) { + bool done = false; + + if (!nids[i] && !ro) { + mutex_lock_op(sbi, NODE_NEW); + + /* alloc new node */ + if (!alloc_nid(sbi, &(nids[i]))) { + mutex_unlock_op(sbi, NODE_NEW); + err = -ENOSPC; + goto release_pages; + } + + dn->nid = nids[i]; + npage[i] = new_node_page(dn, noffset[i]); + if (IS_ERR(npage[i])) { + alloc_nid_failed(sbi, nids[i]); + mutex_unlock_op(sbi, NODE_NEW); + err = PTR_ERR(npage[i]); + goto release_pages; + } + + set_nid(parent, offset[i - 1], nids[i], i == 1); + alloc_nid_done(sbi, nids[i]); + mutex_unlock_op(sbi, NODE_NEW); + done = true; + } else if (ro && i == level && level > 1) { + npage[i] = get_node_page_ra(parent, offset[i - 1]); + if (IS_ERR(npage[i])) { + err = PTR_ERR(npage[i]); + goto release_pages; + } + done = true; + } + if (i == 1) { + dn->inode_page_locked = false; + unlock_page(parent); + } else { + f2fs_put_page(parent, 1); + } + + if (!done) { + npage[i] = get_node_page(sbi, nids[i]); + if (IS_ERR(npage[i])) { + err = PTR_ERR(npage[i]); + f2fs_put_page(npage[0], 0); + goto release_out; + } + } + if (i < level) { + parent = npage[i]; + nids[i + 1] = get_nid(parent, offset[i], false); + } + } + dn->nid = nids[level]; + dn->ofs_in_node = offset[level]; + dn->node_page = npage[level]; + dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + return 0; + +release_pages: + f2fs_put_page(parent, 1); + if (i > 1) + f2fs_put_page(npage[0], 0); +release_out: + dn->inode_page = NULL; + dn->node_page = NULL; + return err; +} + +static void truncate_node(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct node_info ni; + + get_node_info(sbi, dn->nid, &ni); + BUG_ON(ni.blk_addr == NULL_ADDR); + + if (ni.blk_addr != NULL_ADDR) + invalidate_blocks(sbi, ni.blk_addr); + + /* Deallocate node address */ + dec_valid_node_count(sbi, dn->inode, 1); + set_node_addr(sbi, &ni, NULL_ADDR); + + if (dn->nid == dn->inode->i_ino) { + remove_orphan_inode(sbi, dn->nid); + dec_valid_inode_count(sbi); + } else { + sync_inode_page(dn); + } + + clear_node_page_dirty(dn->node_page); + F2FS_SET_SB_DIRT(sbi); + + f2fs_put_page(dn->node_page, 1); + dn->node_page = NULL; +} + +static int truncate_dnode(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *page; + + if (dn->nid == 0) + return 1; + + /* get direct node */ + page = get_node_page(sbi, dn->nid); + if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) + return 1; + else if (IS_ERR(page)) + return PTR_ERR(page); + + /* Make dnode_of_data for parameter */ + dn->node_page = page; + dn->ofs_in_node = 0; + truncate_data_blocks(dn); + truncate_node(dn); + return 1; +} + +static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, + int ofs, int depth) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct dnode_of_data rdn = *dn; + struct page *page; + struct f2fs_node *rn; + nid_t child_nid; + unsigned int child_nofs; + int freed = 0; + int i, ret; + + if (dn->nid == 0) + return NIDS_PER_BLOCK + 1; + + page = get_node_page(sbi, dn->nid); + if (IS_ERR(page)) + return PTR_ERR(page); + + rn = (struct f2fs_node *)page_address(page); + if (depth < 3) { + for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { + child_nid = le32_to_cpu(rn->in.nid[i]); + if (child_nid == 0) + continue; + rdn.nid = child_nid; + ret = truncate_dnode(&rdn); + if (ret < 0) + goto out_err; + set_nid(page, i, 0, false); + } + } else { + child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; + for (i = ofs; i < NIDS_PER_BLOCK; i++) { + child_nid = le32_to_cpu(rn->in.nid[i]); + if (child_nid == 0) { + child_nofs += NIDS_PER_BLOCK + 1; + continue; + } + rdn.nid = child_nid; + ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); + if (ret == (NIDS_PER_BLOCK + 1)) { + set_nid(page, i, 0, false); + child_nofs += ret; + } else if (ret < 0 && ret != -ENOENT) { + goto out_err; + } + } + freed = child_nofs; + } + + if (!ofs) { + /* remove current indirect node */ + dn->node_page = page; + truncate_node(dn); + freed++; + } else { + f2fs_put_page(page, 1); + } + return freed; + +out_err: + f2fs_put_page(page, 1); + return ret; +} + +static int truncate_partial_nodes(struct dnode_of_data *dn, + struct f2fs_inode *ri, int *offset, int depth) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct page *pages[2]; + nid_t nid[3]; + nid_t child_nid; + int err = 0; + int i; + int idx = depth - 2; + + nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); + if (!nid[0]) + return 0; + + /* get indirect nodes in the path */ + for (i = 0; i < depth - 1; i++) { + /* refernece count'll be increased */ + pages[i] = get_node_page(sbi, nid[i]); + if (IS_ERR(pages[i])) { + depth = i + 1; + err = PTR_ERR(pages[i]); + goto fail; + } + nid[i + 1] = get_nid(pages[i], offset[i + 1], false); + } + + /* free direct nodes linked to a partial indirect node */ + for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { + child_nid = get_nid(pages[idx], i, false); + if (!child_nid) + continue; + dn->nid = child_nid; + err = truncate_dnode(dn); + if (err < 0) + goto fail; + set_nid(pages[idx], i, 0, false); + } + + if (offset[depth - 1] == 0) { + dn->node_page = pages[idx]; + dn->nid = nid[idx]; + truncate_node(dn); + } else { + f2fs_put_page(pages[idx], 1); + } + offset[idx]++; + offset[depth - 1] = 0; +fail: + for (i = depth - 3; i >= 0; i--) + f2fs_put_page(pages[i], 1); + return err; +} + +/* + * All the block addresses of data and nodes should be nullified. + */ +int truncate_inode_blocks(struct inode *inode, pgoff_t from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int err = 0, cont = 1; + int level, offset[4], noffset[4]; + unsigned int nofs; + struct f2fs_node *rn; + struct dnode_of_data dn; + struct page *page; + + level = get_node_path(from, offset, noffset); + + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + set_new_dnode(&dn, inode, page, NULL, 0); + unlock_page(page); + + rn = page_address(page); + switch (level) { + case 0: + case 1: + nofs = noffset[1]; + break; + case 2: + nofs = noffset[1]; + if (!offset[level - 1]) + goto skip_partial; + err = truncate_partial_nodes(&dn, &rn->i, offset, level); + if (err < 0 && err != -ENOENT) + goto fail; + nofs += 1 + NIDS_PER_BLOCK; + break; + case 3: + nofs = 5 + 2 * NIDS_PER_BLOCK; + if (!offset[level - 1]) + goto skip_partial; + err = truncate_partial_nodes(&dn, &rn->i, offset, level); + if (err < 0 && err != -ENOENT) + goto fail; + break; + default: + BUG(); + } + +skip_partial: + while (cont) { + dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); + switch (offset[0]) { + case NODE_DIR1_BLOCK: + case NODE_DIR2_BLOCK: + err = truncate_dnode(&dn); + break; + + case NODE_IND1_BLOCK: + case NODE_IND2_BLOCK: + err = truncate_nodes(&dn, nofs, offset[1], 2); + break; + + case NODE_DIND_BLOCK: + err = truncate_nodes(&dn, nofs, offset[1], 3); + cont = 0; + break; + + default: + BUG(); + } + if (err < 0 && err != -ENOENT) + goto fail; + if (offset[1] == 0 && + rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { + lock_page(page); + wait_on_page_writeback(page); + rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; + set_page_dirty(page); + unlock_page(page); + } + offset[1] = 0; + offset[0]++; + nofs += err; + } +fail: + f2fs_put_page(page, 0); + return err > 0 ? 0 : err; +} + +int remove_inode_page(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + nid_t ino = inode->i_ino; + struct dnode_of_data dn; + + mutex_lock_op(sbi, NODE_TRUNC); + page = get_node_page(sbi, ino); + if (IS_ERR(page)) { + mutex_unlock_op(sbi, NODE_TRUNC); + return PTR_ERR(page); + } + + if (F2FS_I(inode)->i_xattr_nid) { + nid_t nid = F2FS_I(inode)->i_xattr_nid; + struct page *npage = get_node_page(sbi, nid); + + if (IS_ERR(npage)) { + mutex_unlock_op(sbi, NODE_TRUNC); + return PTR_ERR(npage); + } + + F2FS_I(inode)->i_xattr_nid = 0; + set_new_dnode(&dn, inode, page, npage, nid); + dn.inode_page_locked = 1; + truncate_node(&dn); + } + if (inode->i_blocks == 1) { + /* inernally call f2fs_put_page() */ + set_new_dnode(&dn, inode, page, page, ino); + truncate_node(&dn); + } else if (inode->i_blocks == 0) { + struct node_info ni; + get_node_info(sbi, inode->i_ino, &ni); + + /* called after f2fs_new_inode() is failed */ + BUG_ON(ni.blk_addr != NULL_ADDR); + f2fs_put_page(page, 1); + } else { + BUG(); + } + mutex_unlock_op(sbi, NODE_TRUNC); + return 0; +} + +int new_inode_page(struct inode *inode, struct dentry *dentry) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + struct dnode_of_data dn; + + /* allocate inode page for new inode */ + set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); + mutex_lock_op(sbi, NODE_NEW); + page = new_node_page(&dn, 0); + init_dent_inode(dentry, page); + mutex_unlock_op(sbi, NODE_NEW); + if (IS_ERR(page)) + return PTR_ERR(page); + f2fs_put_page(page, 1); + return 0; +} + +struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct address_space *mapping = sbi->node_inode->i_mapping; + struct node_info old_ni, new_ni; + struct page *page; + int err; + + if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) + return ERR_PTR(-EPERM); + + page = grab_cache_page(mapping, dn->nid); + if (!page) + return ERR_PTR(-ENOMEM); + + get_node_info(sbi, dn->nid, &old_ni); + + SetPageUptodate(page); + fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); + + /* Reinitialize old_ni with new node page */ + BUG_ON(old_ni.blk_addr != NULL_ADDR); + new_ni = old_ni; + new_ni.ino = dn->inode->i_ino; + + if (!inc_valid_node_count(sbi, dn->inode, 1)) { + err = -ENOSPC; + goto fail; + } + set_node_addr(sbi, &new_ni, NEW_ADDR); + + dn->node_page = page; + sync_inode_page(dn); + set_page_dirty(page); + set_cold_node(dn->inode, page); + if (ofs == 0) + inc_valid_inode_count(sbi); + + return page; + +fail: + f2fs_put_page(page, 1); + return ERR_PTR(err); +} + +static int read_node_page(struct page *page, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct node_info ni; + + get_node_info(sbi, page->index, &ni); + + if (ni.blk_addr == NULL_ADDR) + return -ENOENT; + return f2fs_readpage(sbi, page, ni.blk_addr, type); +} + +/* + * Readahead a node page + */ +void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct page *apage; + + apage = find_get_page(mapping, nid); + if (apage && PageUptodate(apage)) + goto release_out; + f2fs_put_page(apage, 0); + + apage = grab_cache_page(mapping, nid); + if (!apage) + return; + + if (read_node_page(apage, READA)) + goto unlock_out; + + page_cache_release(apage); + return; + +unlock_out: + unlock_page(apage); +release_out: + page_cache_release(apage); +} + +struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +{ + int err; + struct page *page; + struct address_space *mapping = sbi->node_inode->i_mapping; + + page = grab_cache_page(mapping, nid); + if (!page) + return ERR_PTR(-ENOMEM); + + err = read_node_page(page, READ_SYNC); + if (err) { + f2fs_put_page(page, 1); + return ERR_PTR(err); + } + + BUG_ON(nid != nid_of_node(page)); + mark_page_accessed(page); + return page; +} + +/* + * Return a locked page for the desired node page. + * And, readahead MAX_RA_NODE number of node pages. + */ +struct page *get_node_page_ra(struct page *parent, int start) +{ + struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); + struct address_space *mapping = sbi->node_inode->i_mapping; + int i, end; + int err = 0; + nid_t nid; + struct page *page; + + /* First, try getting the desired direct node. */ + nid = get_nid(parent, start, false); + if (!nid) + return ERR_PTR(-ENOENT); + + page = find_get_page(mapping, nid); + if (page && PageUptodate(page)) + goto page_hit; + f2fs_put_page(page, 0); + +repeat: + page = grab_cache_page(mapping, nid); + if (!page) + return ERR_PTR(-ENOMEM); + + err = read_node_page(page, READA); + if (err) { + f2fs_put_page(page, 1); + return ERR_PTR(err); + } + + /* Then, try readahead for siblings of the desired node */ + end = start + MAX_RA_NODE; + end = min(end, NIDS_PER_BLOCK); + for (i = start + 1; i < end; i++) { + nid = get_nid(parent, i, false); + if (!nid) + continue; + ra_node_page(sbi, nid); + } + +page_hit: + lock_page(page); + if (PageError(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + + /* Has the page been truncated? */ + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } + return page; +} + +void sync_inode_page(struct dnode_of_data *dn) +{ + if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { + update_inode(dn->inode, dn->node_page); + } else if (dn->inode_page) { + if (!dn->inode_page_locked) + lock_page(dn->inode_page); + update_inode(dn->inode, dn->inode_page); + if (!dn->inode_page_locked) + unlock_page(dn->inode_page); + } else { + f2fs_write_inode(dn->inode, NULL); + } +} + +int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, + struct writeback_control *wbc) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + pgoff_t index, end; + struct pagevec pvec; + int step = ino ? 2 : 0; + int nwritten = 0, wrote = 0; + + pagevec_init(&pvec, 0); + +next_step: + index = 0; + end = LONG_MAX; + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * flushing sequence with step: + * 0. indirect nodes + * 1. dentry dnodes + * 2. file dnodes + */ + if (step == 0 && IS_DNODE(page)) + continue; + if (step == 1 && (!IS_DNODE(page) || + is_cold_node(page))) + continue; + if (step == 2 && (!IS_DNODE(page) || + !is_cold_node(page))) + continue; + + /* + * If an fsync mode, + * we should not skip writing node pages. + */ + if (ino && ino_of_node(page) == ino) + lock_page(page); + else if (!trylock_page(page)) + continue; + + if (unlikely(page->mapping != mapping)) { +continue_unlock: + unlock_page(page); + continue; + } + if (ino && ino_of_node(page) != ino) + goto continue_unlock; + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; + + /* called by fsync() */ + if (ino && IS_DNODE(page)) { + int mark = !is_checkpointed_node(sbi, ino); + set_fsync_mark(page, 1); + if (IS_INODE(page)) + set_dentry_mark(page, mark); + nwritten++; + } else { + set_fsync_mark(page, 0); + set_dentry_mark(page, 0); + } + mapping->a_ops->writepage(page, wbc); + wrote++; + + if (--wbc->nr_to_write == 0) + break; + } + pagevec_release(&pvec); + cond_resched(); + + if (wbc->nr_to_write == 0) { + step = 2; + break; + } + } + + if (step < 2) { + step++; + goto next_step; + } + + if (wrote) + f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); + + return nwritten; +} + +static int f2fs_write_node_page(struct page *page, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + nid_t nid; + unsigned int nofs; + block_t new_addr; + struct node_info ni; + + if (wbc->for_reclaim) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; + } + + wait_on_page_writeback(page); + + mutex_lock_op(sbi, NODE_WRITE); + + /* get old block addr of this node page */ + nid = nid_of_node(page); + nofs = ofs_of_node(page); + BUG_ON(page->index != nid); + + get_node_info(sbi, nid, &ni); + + /* This page is already truncated */ + if (ni.blk_addr == NULL_ADDR) + return 0; + + set_page_writeback(page); + + /* insert node offset */ + write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); + set_node_addr(sbi, &ni, new_addr); + dec_page_count(sbi, F2FS_DIRTY_NODES); + + mutex_unlock_op(sbi, NODE_WRITE); + unlock_page(page); + return 0; +} + +static int f2fs_write_node_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + struct block_device *bdev = sbi->sb->s_bdev; + long nr_to_write = wbc->nr_to_write; + + if (wbc->for_kupdate) + return 0; + + if (get_pages(sbi, F2FS_DIRTY_NODES) == 0) + return 0; + + if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { + write_checkpoint(sbi, false, false); + return 0; + } + + /* if mounting is failed, skip writing node pages */ + wbc->nr_to_write = bio_get_nr_vecs(bdev); + sync_node_pages(sbi, 0, wbc); + wbc->nr_to_write = nr_to_write - + (bio_get_nr_vecs(bdev) - wbc->nr_to_write); + return 0; +} + +static int f2fs_set_node_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + + SetPageUptodate(page); + if (!PageDirty(page)) { + __set_page_dirty_nobuffers(page); + inc_page_count(sbi, F2FS_DIRTY_NODES); + SetPagePrivate(page); + return 1; + } + return 0; +} + +static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) +{ + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (PageDirty(page)) + dec_page_count(sbi, F2FS_DIRTY_NODES); + ClearPagePrivate(page); +} + +static int f2fs_release_node_page(struct page *page, gfp_t wait) +{ + ClearPagePrivate(page); + return 0; +} + +/* + * Structure of the f2fs node operations + */ +const struct address_space_operations f2fs_node_aops = { + .writepage = f2fs_write_node_page, + .writepages = f2fs_write_node_pages, + .set_page_dirty = f2fs_set_node_page_dirty, + .invalidatepage = f2fs_invalidate_node_page, + .releasepage = f2fs_release_node_page, +}; + +static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) +{ + struct list_head *this; + struct free_nid *i = NULL; + list_for_each(this, head) { + i = list_entry(this, struct free_nid, list); + if (i->nid == n) + break; + i = NULL; + } + return i; +} + +static void __del_from_free_nid_list(struct free_nid *i) +{ + list_del(&i->list); + kmem_cache_free(free_nid_slab, i); +} + +static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) +{ + struct free_nid *i; + + if (nm_i->fcnt > 2 * MAX_FREE_NIDS) + return 0; +retry: + i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); + if (!i) { + cond_resched(); + goto retry; + } + i->nid = nid; + i->state = NID_NEW; + + spin_lock(&nm_i->free_nid_list_lock); + if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { + spin_unlock(&nm_i->free_nid_list_lock); + kmem_cache_free(free_nid_slab, i); + return 0; + } + list_add_tail(&i->list, &nm_i->free_nid_list); + nm_i->fcnt++; + spin_unlock(&nm_i->free_nid_list_lock); + return 1; +} + +static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) +{ + struct free_nid *i; + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + if (i && i->state == NID_NEW) { + __del_from_free_nid_list(i); + nm_i->fcnt--; + } + spin_unlock(&nm_i->free_nid_list_lock); +} + +static int scan_nat_page(struct f2fs_nm_info *nm_i, + struct page *nat_page, nid_t start_nid) +{ + struct f2fs_nat_block *nat_blk = page_address(nat_page); + block_t blk_addr; + int fcnt = 0; + int i; + + /* 0 nid should not be used */ + if (start_nid == 0) + ++start_nid; + + i = start_nid % NAT_ENTRY_PER_BLOCK; + + for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { + blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); + BUG_ON(blk_addr == NEW_ADDR); + if (blk_addr == NULL_ADDR) + fcnt += add_free_nid(nm_i, start_nid); + } + return fcnt; +} + +static void build_free_nids(struct f2fs_sb_info *sbi) +{ + struct free_nid *fnid, *next_fnid; + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + nid_t nid = 0; + bool is_cycled = false; + int fcnt = 0; + int i; + + nid = nm_i->next_scan_nid; + nm_i->init_scan_nid = nid; + + ra_nat_pages(sbi, nid); + + while (1) { + struct page *page = get_current_nat_page(sbi, nid); + + fcnt += scan_nat_page(nm_i, page, nid); + f2fs_put_page(page, 1); + + nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); + + if (nid >= nm_i->max_nid) { + nid = 0; + is_cycled = true; + } + if (fcnt > MAX_FREE_NIDS) + break; + if (is_cycled && nm_i->init_scan_nid <= nid) + break; + } + + nm_i->next_scan_nid = nid; + + /* find free nids from current sum_pages */ + mutex_lock(&curseg->curseg_mutex); + for (i = 0; i < nats_in_cursum(sum); i++) { + block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); + nid = le32_to_cpu(nid_in_journal(sum, i)); + if (addr == NULL_ADDR) + add_free_nid(nm_i, nid); + else + remove_free_nid(nm_i, nid); + } + mutex_unlock(&curseg->curseg_mutex); + + /* remove the free nids from current allocated nids */ + list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) { + struct nat_entry *ne; + + read_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, fnid->nid); + if (ne && nat_get_blkaddr(ne) != NULL_ADDR) + remove_free_nid(nm_i, fnid->nid); + read_unlock(&nm_i->nat_tree_lock); + } +} + +/* + * If this function returns success, caller can obtain a new nid + * from second parameter of this function. + * The returned nid could be used ino as well as nid when inode is created. + */ +bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i = NULL; + struct list_head *this; +retry: + mutex_lock(&nm_i->build_lock); + if (!nm_i->fcnt) { + /* scan NAT in order to build free nid list */ + build_free_nids(sbi); + if (!nm_i->fcnt) { + mutex_unlock(&nm_i->build_lock); + return false; + } + } + mutex_unlock(&nm_i->build_lock); + + /* + * We check fcnt again since previous check is racy as + * we didn't hold free_nid_list_lock. So other thread + * could consume all of free nids. + */ + spin_lock(&nm_i->free_nid_list_lock); + if (!nm_i->fcnt) { + spin_unlock(&nm_i->free_nid_list_lock); + goto retry; + } + + BUG_ON(list_empty(&nm_i->free_nid_list)); + list_for_each(this, &nm_i->free_nid_list) { + i = list_entry(this, struct free_nid, list); + if (i->state == NID_NEW) + break; + } + + BUG_ON(i->state != NID_NEW); + *nid = i->nid; + i->state = NID_ALLOC; + nm_i->fcnt--; + spin_unlock(&nm_i->free_nid_list_lock); + return true; +} + +/* + * alloc_nid() should be called prior to this function. + */ +void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i; + + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + if (i) { + BUG_ON(i->state != NID_ALLOC); + __del_from_free_nid_list(i); + } + spin_unlock(&nm_i->free_nid_list_lock); +} + +/* + * alloc_nid() should be called prior to this function. + */ +void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) +{ + alloc_nid_done(sbi, nid); + add_free_nid(NM_I(sbi), nid); +} + +void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, + struct f2fs_summary *sum, struct node_info *ni, + block_t new_blkaddr) +{ + rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); + set_node_addr(sbi, ni, new_blkaddr); + clear_node_page_dirty(page); +} + +int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) +{ + struct address_space *mapping = sbi->node_inode->i_mapping; + struct f2fs_node *src, *dst; + nid_t ino = ino_of_node(page); + struct node_info old_ni, new_ni; + struct page *ipage; + + ipage = grab_cache_page(mapping, ino); + if (!ipage) + return -ENOMEM; + + /* Should not use this inode from free nid list */ + remove_free_nid(NM_I(sbi), ino); + + get_node_info(sbi, ino, &old_ni); + SetPageUptodate(ipage); + fill_node_footer(ipage, ino, ino, 0, true); + + src = (struct f2fs_node *)page_address(page); + dst = (struct f2fs_node *)page_address(ipage); + + memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); + dst->i.i_size = 0; + dst->i.i_blocks = cpu_to_le64(1); + dst->i.i_links = cpu_to_le32(1); + dst->i.i_xattr_nid = 0; + + new_ni = old_ni; + new_ni.ino = ino; + + set_node_addr(sbi, &new_ni, NEW_ADDR); + inc_valid_inode_count(sbi); + + f2fs_put_page(ipage, 1); + return 0; +} + +int restore_node_summary(struct f2fs_sb_info *sbi, + unsigned int segno, struct f2fs_summary_block *sum) +{ + struct f2fs_node *rn; + struct f2fs_summary *sum_entry; + struct page *page; + block_t addr; + int i, last_offset; + + /* alloc temporal page for read node */ + page = alloc_page(GFP_NOFS | __GFP_ZERO); + if (IS_ERR(page)) + return PTR_ERR(page); + lock_page(page); + + /* scan the node segment */ + last_offset = sbi->blocks_per_seg; + addr = START_BLOCK(sbi, segno); + sum_entry = &sum->entries[0]; + + for (i = 0; i < last_offset; i++, sum_entry++) { + if (f2fs_readpage(sbi, page, addr, READ_SYNC)) + goto out; + + rn = (struct f2fs_node *)page_address(page); + sum_entry->nid = rn->footer.nid; + sum_entry->version = 0; + sum_entry->ofs_in_node = 0; + addr++; + + /* + * In order to read next node page, + * we must clear PageUptodate flag. + */ + ClearPageUptodate(page); + } +out: + unlock_page(page); + __free_pages(page, 0); + return 0; +} + +static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i; + + mutex_lock(&curseg->curseg_mutex); + + if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) { + mutex_unlock(&curseg->curseg_mutex); + return false; + } + + for (i = 0; i < nats_in_cursum(sum); i++) { + struct nat_entry *ne; + struct f2fs_nat_entry raw_ne; + nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); + + raw_ne = nat_in_journal(sum, i); +retry: + write_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne) { + __set_nat_cache_dirty(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + continue; + } + ne = grab_nat_entry(nm_i, nid); + if (!ne) { + write_unlock(&nm_i->nat_tree_lock); + goto retry; + } + nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); + nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); + nat_set_version(ne, raw_ne.version); + __set_nat_cache_dirty(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + } + update_nats_in_cursum(sum, -i); + mutex_unlock(&curseg->curseg_mutex); + return true; +} + +/* + * This function is called during the checkpointing process. + */ +void flush_nat_entries(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + struct list_head *cur, *n; + struct page *page = NULL; + struct f2fs_nat_block *nat_blk = NULL; + nid_t start_nid = 0, end_nid = 0; + bool flushed; + + flushed = flush_nats_in_journal(sbi); + + if (!flushed) + mutex_lock(&curseg->curseg_mutex); + + /* 1) flush dirty nat caches */ + list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { + struct nat_entry *ne; + nid_t nid; + struct f2fs_nat_entry raw_ne; + int offset = -1; + block_t old_blkaddr, new_blkaddr; + + ne = list_entry(cur, struct nat_entry, list); + nid = nat_get_nid(ne); + + if (nat_get_blkaddr(ne) == NEW_ADDR) + continue; + if (flushed) + goto to_nat_page; + + /* if there is room for nat enries in curseg->sumpage */ + offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); + if (offset >= 0) { + raw_ne = nat_in_journal(sum, offset); + old_blkaddr = le32_to_cpu(raw_ne.block_addr); + goto flush_now; + } +to_nat_page: + if (!page || (start_nid > nid || nid > end_nid)) { + if (page) { + f2fs_put_page(page, 1); + page = NULL; + } + start_nid = START_NID(nid); + end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1; + + /* + * get nat block with dirty flag, increased reference + * count, mapped and lock + */ + page = get_next_nat_page(sbi, start_nid); + nat_blk = page_address(page); + } + + BUG_ON(!nat_blk); + raw_ne = nat_blk->entries[nid - start_nid]; + old_blkaddr = le32_to_cpu(raw_ne.block_addr); +flush_now: + new_blkaddr = nat_get_blkaddr(ne); + + raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); + raw_ne.block_addr = cpu_to_le32(new_blkaddr); + raw_ne.version = nat_get_version(ne); + + if (offset < 0) { + nat_blk->entries[nid - start_nid] = raw_ne; + } else { + nat_in_journal(sum, offset) = raw_ne; + nid_in_journal(sum, offset) = cpu_to_le32(nid); + } + + if (nat_get_blkaddr(ne) == NULL_ADDR) { + write_lock(&nm_i->nat_tree_lock); + __del_from_nat_cache(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + + /* We can reuse this freed nid at this point */ + add_free_nid(NM_I(sbi), nid); + } else { + write_lock(&nm_i->nat_tree_lock); + __clear_nat_cache_dirty(nm_i, ne); + ne->checkpointed = true; + write_unlock(&nm_i->nat_tree_lock); + } + } + if (!flushed) + mutex_unlock(&curseg->curseg_mutex); + f2fs_put_page(page, 1); + + /* 2) shrink nat caches if necessary */ + try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD); +} + +static int init_node_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned char *version_bitmap; + unsigned int nat_segs, nat_blocks; + + nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); + + /* segment_count_nat includes pair segment so divide to 2. */ + nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; + nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; + nm_i->fcnt = 0; + nm_i->nat_cnt = 0; + + INIT_LIST_HEAD(&nm_i->free_nid_list); + INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); + INIT_LIST_HEAD(&nm_i->nat_entries); + INIT_LIST_HEAD(&nm_i->dirty_nat_entries); + + mutex_init(&nm_i->build_lock); + spin_lock_init(&nm_i->free_nid_list_lock); + rwlock_init(&nm_i->nat_tree_lock); + + nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); + nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); + nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); + + nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL); + if (!nm_i->nat_bitmap) + return -ENOMEM; + version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); + if (!version_bitmap) + return -EFAULT; + + /* copy version bitmap */ + memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size); + return 0; +} + +int build_node_manager(struct f2fs_sb_info *sbi) +{ + int err; + + sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); + if (!sbi->nm_info) + return -ENOMEM; + + err = init_node_manager(sbi); + if (err) + return err; + + build_free_nids(sbi); + return 0; +} + +void destroy_node_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i, *next_i; + struct nat_entry *natvec[NATVEC_SIZE]; + nid_t nid = 0; + unsigned int found; + + if (!nm_i) + return; + + /* destroy free nid list */ + spin_lock(&nm_i->free_nid_list_lock); + list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { + BUG_ON(i->state == NID_ALLOC); + __del_from_free_nid_list(i); + nm_i->fcnt--; + } + BUG_ON(nm_i->fcnt); + spin_unlock(&nm_i->free_nid_list_lock); + + /* destroy nat cache */ + write_lock(&nm_i->nat_tree_lock); + while ((found = __gang_lookup_nat_cache(nm_i, + nid, NATVEC_SIZE, natvec))) { + unsigned idx; + for (idx = 0; idx < found; idx++) { + struct nat_entry *e = natvec[idx]; + nid = nat_get_nid(e) + 1; + __del_from_nat_cache(nm_i, e); + } + } + BUG_ON(nm_i->nat_cnt); + write_unlock(&nm_i->nat_tree_lock); + + kfree(nm_i->nat_bitmap); + sbi->nm_info = NULL; + kfree(nm_i); +} + +int create_node_manager_caches(void) +{ + nat_entry_slab = f2fs_kmem_cache_create("nat_entry", + sizeof(struct nat_entry), NULL); + if (!nat_entry_slab) + return -ENOMEM; + + free_nid_slab = f2fs_kmem_cache_create("free_nid", + sizeof(struct free_nid), NULL); + if (!free_nid_slab) { + kmem_cache_destroy(nat_entry_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_node_manager_caches(void) +{ + kmem_cache_destroy(free_nid_slab); + kmem_cache_destroy(nat_entry_slab); +} diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h new file mode 100644 index 000000000000..afdb130f782e --- /dev/null +++ b/fs/f2fs/node.h @@ -0,0 +1,353 @@ +/* + * fs/f2fs/node.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/* start node id of a node block dedicated to the given node id */ +#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) + +/* node block offset on the NAT area dedicated to the given start node id */ +#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) + +/* # of pages to perform readahead before building free nids */ +#define FREE_NID_PAGES 4 + +/* maximum # of free node ids to produce during build_free_nids */ +#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) + +/* maximum readahead size for node during getting data blocks */ +#define MAX_RA_NODE 128 + +/* maximum cached nat entries to manage memory footprint */ +#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) + +/* vector size for gang look-up from nat cache that consists of radix tree */ +#define NATVEC_SIZE 64 + +/* + * For node information + */ +struct node_info { + nid_t nid; /* node id */ + nid_t ino; /* inode number of the node's owner */ + block_t blk_addr; /* block address of the node */ + unsigned char version; /* version of the node */ +}; + +struct nat_entry { + struct list_head list; /* for clean or dirty nat list */ + bool checkpointed; /* whether it is checkpointed or not */ + struct node_info ni; /* in-memory node information */ +}; + +#define nat_get_nid(nat) (nat->ni.nid) +#define nat_set_nid(nat, n) (nat->ni.nid = n) +#define nat_get_blkaddr(nat) (nat->ni.blk_addr) +#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) +#define nat_get_ino(nat) (nat->ni.ino) +#define nat_set_ino(nat, i) (nat->ni.ino = i) +#define nat_get_version(nat) (nat->ni.version) +#define nat_set_version(nat, v) (nat->ni.version = v) + +#define __set_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->dirty_nat_entries); +#define __clear_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->nat_entries); +#define inc_node_version(version) (++version) + +static inline void node_info_from_raw_nat(struct node_info *ni, + struct f2fs_nat_entry *raw_ne) +{ + ni->ino = le32_to_cpu(raw_ne->ino); + ni->blk_addr = le32_to_cpu(raw_ne->block_addr); + ni->version = raw_ne->version; +} + +/* + * For free nid mangement + */ +enum nid_state { + NID_NEW, /* newly added to free nid list */ + NID_ALLOC /* it is allocated */ +}; + +struct free_nid { + struct list_head list; /* for free node id list */ + nid_t nid; /* node id */ + int state; /* in use or not: NID_NEW or NID_ALLOC */ +}; + +static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *fnid; + + if (nm_i->fcnt <= 0) + return -1; + spin_lock(&nm_i->free_nid_list_lock); + fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); + *nid = fnid->nid; + spin_unlock(&nm_i->free_nid_list_lock); + return 0; +} + +/* + * inline functions + */ +static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); +} + +static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + pgoff_t block_off; + pgoff_t block_addr; + int seg_off; + + block_off = NAT_BLOCK_OFFSET(start); + seg_off = block_off >> sbi->log_blocks_per_seg; + + block_addr = (pgoff_t)(nm_i->nat_blkaddr + + (seg_off << sbi->log_blocks_per_seg << 1) + + (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + block_addr += sbi->blocks_per_seg; + + return block_addr; +} + +static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + block_addr -= nm_i->nat_blkaddr; + if ((block_addr >> sbi->log_blocks_per_seg) % 2) + block_addr -= sbi->blocks_per_seg; + else + block_addr += sbi->blocks_per_seg; + + return block_addr + nm_i->nat_blkaddr; +} + +static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) +{ + unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + f2fs_clear_bit(block_off, nm_i->nat_bitmap); + else + f2fs_set_bit(block_off, nm_i->nat_bitmap); +} + +static inline void fill_node_footer(struct page *page, nid_t nid, + nid_t ino, unsigned int ofs, bool reset) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + if (reset) + memset(rn, 0, sizeof(*rn)); + rn->footer.nid = cpu_to_le32(nid); + rn->footer.ino = cpu_to_le32(ino); + rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); +} + +static inline void copy_node_footer(struct page *dst, struct page *src) +{ + void *src_addr = page_address(src); + void *dst_addr = page_address(dst); + struct f2fs_node *src_rn = (struct f2fs_node *)src_addr; + struct f2fs_node *dst_rn = (struct f2fs_node *)dst_addr; + memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); +} + +static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + rn->footer.cp_ver = ckpt->checkpoint_ver; + rn->footer.next_blkaddr = cpu_to_le32(blkaddr); +} + +static inline nid_t ino_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le32_to_cpu(rn->footer.ino); +} + +static inline nid_t nid_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le32_to_cpu(rn->footer.nid); +} + +static inline unsigned int ofs_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned flag = le32_to_cpu(rn->footer.flag); + return flag >> OFFSET_BIT_SHIFT; +} + +static inline unsigned long long cpver_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le64_to_cpu(rn->footer.cp_ver); +} + +static inline block_t next_blkaddr_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le32_to_cpu(rn->footer.next_blkaddr); +} + +/* + * f2fs assigns the following node offsets described as (num). + * N = NIDS_PER_BLOCK + * + * Inode block (0) + * |- direct node (1) + * |- direct node (2) + * |- indirect node (3) + * | `- direct node (4 => 4 + N - 1) + * |- indirect node (4 + N) + * | `- direct node (5 + N => 5 + 2N - 1) + * `- double indirect node (5 + 2N) + * `- indirect node (6 + 2N) + * `- direct node (x(N + 1)) + */ +static inline bool IS_DNODE(struct page *node_page) +{ + unsigned int ofs = ofs_of_node(node_page); + if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || + ofs == 5 + 2 * NIDS_PER_BLOCK) + return false; + if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { + ofs -= 6 + 2 * NIDS_PER_BLOCK; + if ((long int)ofs % (NIDS_PER_BLOCK + 1)) + return false; + } + return true; +} + +static inline void set_nid(struct page *p, int off, nid_t nid, bool i) +{ + struct f2fs_node *rn = (struct f2fs_node *)page_address(p); + + wait_on_page_writeback(p); + + if (i) + rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); + else + rn->in.nid[off] = cpu_to_le32(nid); + set_page_dirty(p); +} + +static inline nid_t get_nid(struct page *p, int off, bool i) +{ + struct f2fs_node *rn = (struct f2fs_node *)page_address(p); + if (i) + return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); + return le32_to_cpu(rn->in.nid[off]); +} + +/* + * Coldness identification: + * - Mark cold files in f2fs_inode_info + * - Mark cold node blocks in their node footer + * - Mark cold data pages in page cache + */ +static inline int is_cold_file(struct inode *inode) +{ + return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; +} + +static inline int is_cold_data(struct page *page) +{ + return PageChecked(page); +} + +static inline void set_cold_data(struct page *page) +{ + SetPageChecked(page); +} + +static inline void clear_cold_data(struct page *page) +{ + ClearPageChecked(page); +} + +static inline int is_cold_node(struct page *page) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + return flag & (0x1 << COLD_BIT_SHIFT); +} + +static inline unsigned char is_fsync_dnode(struct page *page) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + return flag & (0x1 << FSYNC_BIT_SHIFT); +} + +static inline unsigned char is_dent_dnode(struct page *page) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + return flag & (0x1 << DENT_BIT_SHIFT); +} + +static inline void set_cold_node(struct inode *inode, struct page *page) +{ + struct f2fs_node *rn = (struct f2fs_node *)page_address(page); + unsigned int flag = le32_to_cpu(rn->footer.flag); + + if (S_ISDIR(inode->i_mode)) + flag &= ~(0x1 << COLD_BIT_SHIFT); + else + flag |= (0x1 << COLD_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_fsync_mark(struct page *page, int mark) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + if (mark) + flag |= (0x1 << FSYNC_BIT_SHIFT); + else + flag &= ~(0x1 << FSYNC_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_dentry_mark(struct page *page, int mark) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + if (mark) + flag |= (0x1 << DENT_BIT_SHIFT); + else + flag &= ~(0x1 << DENT_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c new file mode 100644 index 000000000000..b07e9b6ef376 --- /dev/null +++ b/fs/f2fs/recovery.c @@ -0,0 +1,375 @@ +/* + * fs/f2fs/recovery.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/fs.h> +#include <linux/f2fs_fs.h> +#include "f2fs.h" +#include "node.h" +#include "segment.h" + +static struct kmem_cache *fsync_entry_slab; + +bool space_for_roll_forward(struct f2fs_sb_info *sbi) +{ + if (sbi->last_valid_block_count + sbi->alloc_valid_block_count + > sbi->user_block_count) + return false; + return true; +} + +static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, + nid_t ino) +{ + struct list_head *this; + struct fsync_inode_entry *entry; + + list_for_each(this, head) { + entry = list_entry(this, struct fsync_inode_entry, list); + if (entry->inode->i_ino == ino) + return entry; + } + return NULL; +} + +static int recover_dentry(struct page *ipage, struct inode *inode) +{ + struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage); + struct f2fs_inode *raw_inode = &(raw_node->i); + struct dentry dent, parent; + struct f2fs_dir_entry *de; + struct page *page; + struct inode *dir; + int err = 0; + + if (!is_dent_dnode(ipage)) + goto out; + + dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino)); + if (IS_ERR(dir)) { + err = -EINVAL; + goto out; + } + + parent.d_inode = dir; + dent.d_parent = &parent; + dent.d_name.len = le32_to_cpu(raw_inode->i_namelen); + dent.d_name.name = raw_inode->i_name; + + de = f2fs_find_entry(dir, &dent.d_name, &page); + if (de) { + kunmap(page); + f2fs_put_page(page, 0); + } else { + f2fs_add_link(&dent, inode); + } + iput(dir); +out: + kunmap(ipage); + return err; +} + +static int recover_inode(struct inode *inode, struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; + struct f2fs_inode *raw_inode = &(raw_node->i); + + inode->i_mode = le16_to_cpu(raw_inode->i_mode); + i_size_write(inode, le64_to_cpu(raw_inode->i_size)); + inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + + return recover_dentry(node_page, inode); +} + +static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) +{ + unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver); + struct curseg_info *curseg; + struct page *page; + block_t blkaddr; + int err = 0; + + /* get node pages in the current segment */ + curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); + blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; + + /* read node page */ + page = alloc_page(GFP_F2FS_ZERO); + if (IS_ERR(page)) + return PTR_ERR(page); + lock_page(page); + + while (1) { + struct fsync_inode_entry *entry; + + if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC)) + goto out; + + if (cp_ver != cpver_of_node(page)) + goto out; + + if (!is_fsync_dnode(page)) + goto next; + + entry = get_fsync_inode(head, ino_of_node(page)); + if (entry) { + entry->blkaddr = blkaddr; + if (IS_INODE(page) && is_dent_dnode(page)) + set_inode_flag(F2FS_I(entry->inode), + FI_INC_LINK); + } else { + if (IS_INODE(page) && is_dent_dnode(page)) { + if (recover_inode_page(sbi, page)) { + err = -ENOMEM; + goto out; + } + } + + /* add this fsync inode to the list */ + entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); + if (!entry) { + err = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&entry->list); + list_add_tail(&entry->list, head); + + entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); + if (IS_ERR(entry->inode)) { + err = PTR_ERR(entry->inode); + goto out; + } + entry->blkaddr = blkaddr; + } + if (IS_INODE(page)) { + err = recover_inode(entry->inode, page); + if (err) + goto out; + } +next: + /* check next segment */ + blkaddr = next_blkaddr_of_node(page); + ClearPageUptodate(page); + } +out: + unlock_page(page); + __free_pages(page, 0); + return err; +} + +static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, + struct list_head *head) +{ + struct list_head *this; + struct fsync_inode_entry *entry; + list_for_each(this, head) { + entry = list_entry(this, struct fsync_inode_entry, list); + iput(entry->inode); + list_del(&entry->list); + kmem_cache_free(fsync_entry_slab, entry); + } +} + +static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, + block_t blkaddr) +{ + struct seg_entry *sentry; + unsigned int segno = GET_SEGNO(sbi, blkaddr); + unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & + (sbi->blocks_per_seg - 1); + struct f2fs_summary sum; + nid_t ino; + void *kaddr; + struct inode *inode; + struct page *node_page; + block_t bidx; + int i; + + sentry = get_seg_entry(sbi, segno); + if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) + return; + + /* Get the previous summary */ + for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + if (curseg->segno == segno) { + sum = curseg->sum_blk->entries[blkoff]; + break; + } + } + if (i > CURSEG_COLD_DATA) { + struct page *sum_page = get_sum_page(sbi, segno); + struct f2fs_summary_block *sum_node; + kaddr = page_address(sum_page); + sum_node = (struct f2fs_summary_block *)kaddr; + sum = sum_node->entries[blkoff]; + f2fs_put_page(sum_page, 1); + } + + /* Get the node page */ + node_page = get_node_page(sbi, le32_to_cpu(sum.nid)); + bidx = start_bidx_of_node(ofs_of_node(node_page)) + + le16_to_cpu(sum.ofs_in_node); + ino = ino_of_node(node_page); + f2fs_put_page(node_page, 1); + + /* Deallocate previous index in the node page */ + inode = f2fs_iget_nowait(sbi->sb, ino); + truncate_hole(inode, bidx, bidx + 1); + iput(inode); +} + +static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, + struct page *page, block_t blkaddr) +{ + unsigned int start, end; + struct dnode_of_data dn; + struct f2fs_summary sum; + struct node_info ni; + + start = start_bidx_of_node(ofs_of_node(page)); + if (IS_INODE(page)) + end = start + ADDRS_PER_INODE; + else + end = start + ADDRS_PER_BLOCK; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + if (get_dnode_of_data(&dn, start, 0)) + return; + + wait_on_page_writeback(dn.node_page); + + get_node_info(sbi, dn.nid, &ni); + BUG_ON(ni.ino != ino_of_node(page)); + BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page)); + + for (; start < end; start++) { + block_t src, dest; + + src = datablock_addr(dn.node_page, dn.ofs_in_node); + dest = datablock_addr(page, dn.ofs_in_node); + + if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { + if (src == NULL_ADDR) { + int err = reserve_new_block(&dn); + /* We should not get -ENOSPC */ + BUG_ON(err); + } + + /* Check the previous node page having this index */ + check_index_in_prev_nodes(sbi, dest); + + set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); + + /* write dummy data page */ + recover_data_page(sbi, NULL, &sum, src, dest); + update_extent_cache(dest, &dn); + } + dn.ofs_in_node++; + } + + /* write node page in place */ + set_summary(&sum, dn.nid, 0, 0); + if (IS_INODE(dn.node_page)) + sync_inode_page(&dn); + + copy_node_footer(dn.node_page, page); + fill_node_footer(dn.node_page, dn.nid, ni.ino, + ofs_of_node(page), false); + set_page_dirty(dn.node_page); + + recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); + f2fs_put_dnode(&dn); +} + +static void recover_data(struct f2fs_sb_info *sbi, + struct list_head *head, int type) +{ + unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver); + struct curseg_info *curseg; + struct page *page; + block_t blkaddr; + + /* get node pages in the current segment */ + curseg = CURSEG_I(sbi, type); + blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + + /* read node page */ + page = alloc_page(GFP_NOFS | __GFP_ZERO); + if (IS_ERR(page)) + return; + lock_page(page); + + while (1) { + struct fsync_inode_entry *entry; + + if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC)) + goto out; + + if (cp_ver != cpver_of_node(page)) + goto out; + + entry = get_fsync_inode(head, ino_of_node(page)); + if (!entry) + goto next; + + do_recover_data(sbi, entry->inode, page, blkaddr); + + if (entry->blkaddr == blkaddr) { + iput(entry->inode); + list_del(&entry->list); + kmem_cache_free(fsync_entry_slab, entry); + } +next: + /* check next segment */ + blkaddr = next_blkaddr_of_node(page); + ClearPageUptodate(page); + } +out: + unlock_page(page); + __free_pages(page, 0); + + allocate_new_segments(sbi); +} + +void recover_fsync_data(struct f2fs_sb_info *sbi) +{ + struct list_head inode_list; + + fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", + sizeof(struct fsync_inode_entry), NULL); + if (unlikely(!fsync_entry_slab)) + return; + + INIT_LIST_HEAD(&inode_list); + + /* step #1: find fsynced inode numbers */ + if (find_fsync_dnodes(sbi, &inode_list)) + goto out; + + if (list_empty(&inode_list)) + goto out; + + /* step #2: recover data */ + sbi->por_doing = 1; + recover_data(sbi, &inode_list, CURSEG_WARM_NODE); + sbi->por_doing = 0; + BUG_ON(!list_empty(&inode_list)); +out: + destroy_fsync_dnodes(sbi, &inode_list); + kmem_cache_destroy(fsync_entry_slab); + write_checkpoint(sbi, false, false); +} diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c new file mode 100644 index 000000000000..1b26e4ea1016 --- /dev/null +++ b/fs/f2fs/segment.c @@ -0,0 +1,1791 @@ +/* + * fs/f2fs/segment.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/fs.h> +#include <linux/f2fs_fs.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/vmalloc.h> + +#include "f2fs.h" +#include "segment.h" +#include "node.h" + +static int need_to_flush(struct f2fs_sb_info *sbi) +{ + unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) * + sbi->segs_per_sec; + int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1) + >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; + int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1) + >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; + + if (sbi->por_doing) + return 0; + + if (free_sections(sbi) <= (node_secs + 2 * dent_secs + + reserved_sections(sbi))) + return 1; + return 0; +} + +/* + * This function balances dirty node and dentry pages. + * In addition, it controls garbage collection. + */ +void f2fs_balance_fs(struct f2fs_sb_info *sbi) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_reclaim = 0, + }; + + if (sbi->por_doing) + return; + + /* + * We should do checkpoint when there are so many dirty node pages + * with enough free segments. After then, we should do GC. + */ + if (need_to_flush(sbi)) { + sync_dirty_dir_inodes(sbi); + sync_node_pages(sbi, 0, &wbc); + } + + if (has_not_enough_free_secs(sbi)) { + mutex_lock(&sbi->gc_mutex); + f2fs_gc(sbi, 1); + } +} + +static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + /* need not be added */ + if (IS_CURSEG(sbi, segno)) + return; + + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]++; + + if (dirty_type == DIRTY) { + struct seg_entry *sentry = get_seg_entry(sbi, segno); + dirty_type = sentry->type; + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]++; + } +} + +static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]--; + + if (dirty_type == DIRTY) { + struct seg_entry *sentry = get_seg_entry(sbi, segno); + dirty_type = sentry->type; + if (test_and_clear_bit(segno, + dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]--; + clear_bit(segno, dirty_i->victim_segmap[FG_GC]); + clear_bit(segno, dirty_i->victim_segmap[BG_GC]); + } +} + +/* + * Should not occur error such as -ENOMEM. + * Adding dirty entry into seglist is not critical operation. + * If a given segment is one of current working segments, it won't be added. + */ +void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned short valid_blocks; + + if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) + return; + + mutex_lock(&dirty_i->seglist_lock); + + valid_blocks = get_valid_blocks(sbi, segno, 0); + + if (valid_blocks == 0) { + __locate_dirty_segment(sbi, segno, PRE); + __remove_dirty_segment(sbi, segno, DIRTY); + } else if (valid_blocks < sbi->blocks_per_seg) { + __locate_dirty_segment(sbi, segno, DIRTY); + } else { + /* Recovery routine with SSR needs this */ + __remove_dirty_segment(sbi, segno, DIRTY); + } + + mutex_unlock(&dirty_i->seglist_lock); + return; +} + +/* + * Should call clear_prefree_segments after checkpoint is done. + */ +static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno, offset = 0; + unsigned int total_segs = TOTAL_SEGS(sbi); + + mutex_lock(&dirty_i->seglist_lock); + while (1) { + segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, + offset); + if (segno >= total_segs) + break; + __set_test_and_free(sbi, segno); + offset = segno + 1; + } + mutex_unlock(&dirty_i->seglist_lock); +} + +void clear_prefree_segments(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno, offset = 0; + unsigned int total_segs = TOTAL_SEGS(sbi); + + mutex_lock(&dirty_i->seglist_lock); + while (1) { + segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, + offset); + if (segno >= total_segs) + break; + + offset = segno + 1; + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) + dirty_i->nr_dirty[PRE]--; + + /* Let's use trim */ + if (test_opt(sbi, DISCARD)) + blkdev_issue_discard(sbi->sb->s_bdev, + START_BLOCK(sbi, segno) << + sbi->log_sectors_per_block, + 1 << (sbi->log_sectors_per_block + + sbi->log_blocks_per_seg), + GFP_NOFS, 0); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) + sit_i->dirty_sentries++; +} + +static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, + unsigned int segno, int modified) +{ + struct seg_entry *se = get_seg_entry(sbi, segno); + se->type = type; + if (modified) + __mark_sit_entry_dirty(sbi, segno); +} + +static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +{ + struct seg_entry *se; + unsigned int segno, offset; + long int new_vblocks; + + segno = GET_SEGNO(sbi, blkaddr); + + se = get_seg_entry(sbi, segno); + new_vblocks = se->valid_blocks + del; + offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); + + BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) || + (new_vblocks > sbi->blocks_per_seg))); + + se->valid_blocks = new_vblocks; + se->mtime = get_mtime(sbi); + SIT_I(sbi)->max_mtime = se->mtime; + + /* Update valid block bitmap */ + if (del > 0) { + if (f2fs_set_bit(offset, se->cur_valid_map)) + BUG(); + } else { + if (!f2fs_clear_bit(offset, se->cur_valid_map)) + BUG(); + } + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks += del; + + __mark_sit_entry_dirty(sbi, segno); + + /* update total number of valid blocks to be written in ckpt area */ + SIT_I(sbi)->written_valid_blocks += del; + + if (sbi->segs_per_sec > 1) + get_sec_entry(sbi, segno)->valid_blocks += del; +} + +static void refresh_sit_entry(struct f2fs_sb_info *sbi, + block_t old_blkaddr, block_t new_blkaddr) +{ + update_sit_entry(sbi, new_blkaddr, 1); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + update_sit_entry(sbi, old_blkaddr, -1); +} + +void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +{ + unsigned int segno = GET_SEGNO(sbi, addr); + struct sit_info *sit_i = SIT_I(sbi); + + BUG_ON(addr == NULL_ADDR); + if (addr == NEW_ADDR) + return; + + /* add it into sit main buffer */ + mutex_lock(&sit_i->sentry_lock); + + update_sit_entry(sbi, addr, -1); + + /* add it into dirty seglist */ + locate_dirty_segment(sbi, segno); + + mutex_unlock(&sit_i->sentry_lock); +} + +/* + * This function should be resided under the curseg_mutex lock + */ +static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, + struct f2fs_summary *sum, unsigned short offset) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + void *addr = curseg->sum_blk; + addr += offset * sizeof(struct f2fs_summary); + memcpy(addr, sum, sizeof(struct f2fs_summary)); + return; +} + +/* + * Calculate the number of current summary pages for writing + */ +int npages_for_summary_flush(struct f2fs_sb_info *sbi) +{ + int total_size_bytes = 0; + int valid_sum_count = 0; + int i, sum_space; + + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + if (sbi->ckpt->alloc_type[i] == SSR) + valid_sum_count += sbi->blocks_per_seg; + else + valid_sum_count += curseg_blkoff(sbi, i); + } + + total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) + + sizeof(struct nat_journal) + 2 + + sizeof(struct sit_journal) + 2; + sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE; + if (total_size_bytes < sum_space) + return 1; + else if (total_size_bytes < 2 * sum_space) + return 2; + return 3; +} + +/* + * Caller should put this summary page + */ +struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) +{ + return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); +} + +static void write_sum_page(struct f2fs_sb_info *sbi, + struct f2fs_summary_block *sum_blk, block_t blk_addr) +{ + struct page *page = grab_meta_page(sbi, blk_addr); + void *kaddr = page_address(page); + memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE); + set_page_dirty(page); + f2fs_put_page(page, 1); +} + +static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, + int ofs_unit, int type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE]; + unsigned int segno, next_segno, i; + int ofs = 0; + + /* + * If there is not enough reserved sections, + * we should not reuse prefree segments. + */ + if (has_not_enough_free_secs(sbi)) + return NULL_SEGNO; + + /* + * NODE page should not reuse prefree segment, + * since those information is used for SPOR. + */ + if (IS_NODESEG(type)) + return NULL_SEGNO; +next: + segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++); + ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit; + if (segno < TOTAL_SEGS(sbi)) { + /* skip intermediate segments in a section */ + if (segno % ofs_unit) + goto next; + + /* skip if whole section is not prefree */ + next_segno = find_next_zero_bit(prefree_segmap, + TOTAL_SEGS(sbi), segno + 1); + if (next_segno - segno < ofs_unit) + goto next; + + /* skip if whole section was not free at the last checkpoint */ + for (i = 0; i < ofs_unit; i++) + if (get_seg_entry(sbi, segno)->ckpt_valid_blocks) + goto next; + return segno; + } + return NULL_SEGNO; +} + +/* + * Find a new segment from the free segments bitmap to right order + * This function should be returned with success, otherwise BUG + */ +static void get_new_segment(struct f2fs_sb_info *sbi, + unsigned int *newseg, bool new_sec, int dir) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int total_secs = sbi->total_sections; + unsigned int segno, secno, zoneno; + unsigned int total_zones = sbi->total_sections / sbi->secs_per_zone; + unsigned int hint = *newseg / sbi->segs_per_sec; + unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); + unsigned int left_start = hint; + bool init = true; + int go_left = 0; + int i; + + write_lock(&free_i->segmap_lock); + + if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { + segno = find_next_zero_bit(free_i->free_segmap, + TOTAL_SEGS(sbi), *newseg + 1); + if (segno < TOTAL_SEGS(sbi)) + goto got_it; + } +find_other_zone: + secno = find_next_zero_bit(free_i->free_secmap, total_secs, hint); + if (secno >= total_secs) { + if (dir == ALLOC_RIGHT) { + secno = find_next_zero_bit(free_i->free_secmap, + total_secs, 0); + BUG_ON(secno >= total_secs); + } else { + go_left = 1; + left_start = hint - 1; + } + } + if (go_left == 0) + goto skip_left; + + while (test_bit(left_start, free_i->free_secmap)) { + if (left_start > 0) { + left_start--; + continue; + } + left_start = find_next_zero_bit(free_i->free_secmap, + total_secs, 0); + BUG_ON(left_start >= total_secs); + break; + } + secno = left_start; +skip_left: + hint = secno; + segno = secno * sbi->segs_per_sec; + zoneno = secno / sbi->secs_per_zone; + + /* give up on finding another zone */ + if (!init) + goto got_it; + if (sbi->secs_per_zone == 1) + goto got_it; + if (zoneno == old_zoneno) + goto got_it; + if (dir == ALLOC_LEFT) { + if (!go_left && zoneno + 1 >= total_zones) + goto got_it; + if (go_left && zoneno == 0) + goto got_it; + } + for (i = 0; i < NR_CURSEG_TYPE; i++) + if (CURSEG_I(sbi, i)->zone == zoneno) + break; + + if (i < NR_CURSEG_TYPE) { + /* zone is in user, try another */ + if (go_left) + hint = zoneno * sbi->secs_per_zone - 1; + else if (zoneno + 1 >= total_zones) + hint = 0; + else + hint = (zoneno + 1) * sbi->secs_per_zone; + init = false; + goto find_other_zone; + } +got_it: + /* set it as dirty segment in free segmap */ + BUG_ON(test_bit(segno, free_i->free_segmap)); + __set_inuse(sbi, segno); + *newseg = segno; + write_unlock(&free_i->segmap_lock); +} + +static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + struct summary_footer *sum_footer; + + curseg->segno = curseg->next_segno; + curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); + curseg->next_blkoff = 0; + curseg->next_segno = NULL_SEGNO; + + sum_footer = &(curseg->sum_blk->footer); + memset(sum_footer, 0, sizeof(struct summary_footer)); + if (IS_DATASEG(type)) + SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); + if (IS_NODESEG(type)) + SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); + __set_sit_entry_type(sbi, type, curseg->segno, modified); +} + +/* + * Allocate a current working segment. + * This function always allocates a free segment in LFS manner. + */ +static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno; + int dir = ALLOC_LEFT; + + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, curseg->segno)); + if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) + dir = ALLOC_RIGHT; + + if (test_opt(sbi, NOHEAP)) + dir = ALLOC_RIGHT; + + get_new_segment(sbi, &segno, new_sec, dir); + curseg->next_segno = segno; + reset_curseg(sbi, type, 1); + curseg->alloc_type = LFS; +} + +static void __next_free_blkoff(struct f2fs_sb_info *sbi, + struct curseg_info *seg, block_t start) +{ + struct seg_entry *se = get_seg_entry(sbi, seg->segno); + block_t ofs; + for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { + if (!f2fs_test_bit(ofs, se->ckpt_valid_map) + && !f2fs_test_bit(ofs, se->cur_valid_map)) + break; + } + seg->next_blkoff = ofs; +} + +/* + * If a segment is written by LFS manner, next block offset is just obtained + * by increasing the current block offset. However, if a segment is written by + * SSR manner, next block offset obtained by calling __next_free_blkoff + */ +static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, + struct curseg_info *seg) +{ + if (seg->alloc_type == SSR) + __next_free_blkoff(sbi, seg, seg->next_blkoff + 1); + else + seg->next_blkoff++; +} + +/* + * This function always allocates a used segment (from dirty seglist) by SSR + * manner, so it should recover the existing segment information of valid blocks + */ +static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int new_segno = curseg->next_segno; + struct f2fs_summary_block *sum_node; + struct page *sum_page; + + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, curseg->segno)); + __set_test_and_inuse(sbi, new_segno); + + mutex_lock(&dirty_i->seglist_lock); + __remove_dirty_segment(sbi, new_segno, PRE); + __remove_dirty_segment(sbi, new_segno, DIRTY); + mutex_unlock(&dirty_i->seglist_lock); + + reset_curseg(sbi, type, 1); + curseg->alloc_type = SSR; + __next_free_blkoff(sbi, curseg, 0); + + if (reuse) { + sum_page = get_sum_page(sbi, new_segno); + sum_node = (struct f2fs_summary_block *)page_address(sum_page); + memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); + f2fs_put_page(sum_page, 1); + } +} + +/* + * flush out current segment and replace it with new segment + * This function should be returned with success, otherwise BUG + */ +static void allocate_segment_by_default(struct f2fs_sb_info *sbi, + int type, bool force) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int ofs_unit; + + if (force) { + new_curseg(sbi, type, true); + goto out; + } + + ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec; + curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type); + + if (curseg->next_segno != NULL_SEGNO) + change_curseg(sbi, type, false); + else if (type == CURSEG_WARM_NODE) + new_curseg(sbi, type, false); + else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) + change_curseg(sbi, type, true); + else + new_curseg(sbi, type, false); +out: + sbi->segment_count[curseg->alloc_type]++; +} + +void allocate_new_segments(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg; + unsigned int old_curseg; + int i; + + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + curseg = CURSEG_I(sbi, i); + old_curseg = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); + locate_dirty_segment(sbi, old_curseg); + } +} + +static const struct segment_allocation default_salloc_ops = { + .allocate_segment = allocate_segment_by_default, +}; + +static void f2fs_end_io_write(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_private *p = bio->bi_private; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + if (!uptodate) { + SetPageError(page); + if (page->mapping) + set_bit(AS_EIO, &page->mapping->flags); + set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); + set_page_dirty(page); + } + end_page_writeback(page); + dec_page_count(p->sbi, F2FS_WRITEBACK); + } while (bvec >= bio->bi_io_vec); + + if (p->is_sync) + complete(p->wait); + kfree(p); + bio_put(bio); +} + +struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) +{ + struct bio *bio; + struct bio_private *priv; +retry: + priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); + if (!priv) { + cond_resched(); + goto retry; + } + + /* No failure on bio allocation */ + bio = bio_alloc(GFP_NOIO, npages); + bio->bi_bdev = bdev; + bio->bi_private = priv; + return bio; +} + +static void do_submit_bio(struct f2fs_sb_info *sbi, + enum page_type type, bool sync) +{ + int rw = sync ? WRITE_SYNC : WRITE; + enum page_type btype = type > META ? META : type; + + if (type >= META_FLUSH) + rw = WRITE_FLUSH_FUA; + + if (sbi->bio[btype]) { + struct bio_private *p = sbi->bio[btype]->bi_private; + p->sbi = sbi; + sbi->bio[btype]->bi_end_io = f2fs_end_io_write; + if (type == META_FLUSH) { + DECLARE_COMPLETION_ONSTACK(wait); + p->is_sync = true; + p->wait = &wait; + submit_bio(rw, sbi->bio[btype]); + wait_for_completion(&wait); + } else { + p->is_sync = false; + submit_bio(rw, sbi->bio[btype]); + } + sbi->bio[btype] = NULL; + } +} + +void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) +{ + down_write(&sbi->bio_sem); + do_submit_bio(sbi, type, sync); + up_write(&sbi->bio_sem); +} + +static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, enum page_type type) +{ + struct block_device *bdev = sbi->sb->s_bdev; + + verify_block_addr(sbi, blk_addr); + + down_write(&sbi->bio_sem); + + inc_page_count(sbi, F2FS_WRITEBACK); + + if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) + do_submit_bio(sbi, type, false); +alloc_new: + if (sbi->bio[type] == NULL) { + sbi->bio[type] = f2fs_bio_alloc(bdev, bio_get_nr_vecs(bdev)); + sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + /* + * The end_io will be assigned at the sumbission phase. + * Until then, let bio_add_page() merge consecutive IOs as much + * as possible. + */ + } + + if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < + PAGE_CACHE_SIZE) { + do_submit_bio(sbi, type, false); + goto alloc_new; + } + + sbi->last_block_in_bio[type] = blk_addr; + + up_write(&sbi->bio_sem); +} + +static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + if (curseg->next_blkoff < sbi->blocks_per_seg) + return true; + return false; +} + +static int __get_segment_type_2(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) + return CURSEG_HOT_DATA; + else + return CURSEG_HOT_NODE; +} + +static int __get_segment_type_4(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) { + struct inode *inode = page->mapping->host; + + if (S_ISDIR(inode->i_mode)) + return CURSEG_HOT_DATA; + else + return CURSEG_COLD_DATA; + } else { + if (IS_DNODE(page) && !is_cold_node(page)) + return CURSEG_HOT_NODE; + else + return CURSEG_COLD_NODE; + } +} + +static int __get_segment_type_6(struct page *page, enum page_type p_type) +{ + if (p_type == DATA) { + struct inode *inode = page->mapping->host; + + if (S_ISDIR(inode->i_mode)) + return CURSEG_HOT_DATA; + else if (is_cold_data(page) || is_cold_file(inode)) + return CURSEG_COLD_DATA; + else + return CURSEG_WARM_DATA; + } else { + if (IS_DNODE(page)) + return is_cold_node(page) ? CURSEG_WARM_NODE : + CURSEG_HOT_NODE; + else + return CURSEG_COLD_NODE; + } +} + +static int __get_segment_type(struct page *page, enum page_type p_type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + switch (sbi->active_logs) { + case 2: + return __get_segment_type_2(page, p_type); + case 4: + return __get_segment_type_4(page, p_type); + case 6: + return __get_segment_type_6(page, p_type); + default: + BUG(); + } +} + +static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blkaddr, block_t *new_blkaddr, + struct f2fs_summary *sum, enum page_type p_type) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg; + unsigned int old_cursegno; + int type; + + type = __get_segment_type(page, p_type); + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + + *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + old_cursegno = curseg->segno; + + /* + * __add_sum_entry should be resided under the curseg_mutex + * because, this function updates a summary entry in the + * current summary block. + */ + __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + + mutex_lock(&sit_i->sentry_lock); + __refresh_next_blkoff(sbi, curseg); + sbi->block_count[curseg->alloc_type]++; + + /* + * SIT information should be updated before segment allocation, + * since SSR needs latest valid block information. + */ + refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); + + if (!__has_curseg_space(sbi, type)) + sit_i->s_ops->allocate_segment(sbi, type, false); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + mutex_unlock(&sit_i->sentry_lock); + + if (p_type == NODE) + fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + + /* writeout dirty page into bdev */ + submit_write_page(sbi, page, *new_blkaddr, p_type); + + mutex_unlock(&curseg->curseg_mutex); +} + +int write_meta_page(struct f2fs_sb_info *sbi, struct page *page, + struct writeback_control *wbc) +{ + if (wbc->for_reclaim) + return AOP_WRITEPAGE_ACTIVATE; + + set_page_writeback(page); + submit_write_page(sbi, page, page->index, META); + return 0; +} + +void write_node_page(struct f2fs_sb_info *sbi, struct page *page, + unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) +{ + struct f2fs_summary sum; + set_summary(&sum, nid, 0, 0); + do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); +} + +void write_data_page(struct inode *inode, struct page *page, + struct dnode_of_data *dn, block_t old_blkaddr, + block_t *new_blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_summary sum; + struct node_info ni; + + BUG_ON(old_blkaddr == NULL_ADDR); + get_node_info(sbi, dn->nid, &ni); + set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); + + do_write_page(sbi, page, old_blkaddr, + new_blkaddr, &sum, DATA); +} + +void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blk_addr) +{ + submit_write_page(sbi, page, old_blk_addr, DATA); +} + +void recover_data_page(struct f2fs_sb_info *sbi, + struct page *page, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg; + unsigned int segno, old_cursegno; + struct seg_entry *se; + int type; + + segno = GET_SEGNO(sbi, new_blkaddr); + se = get_seg_entry(sbi, segno); + type = se->type; + + if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (old_blkaddr == NULL_ADDR) + type = CURSEG_COLD_DATA; + else + type = CURSEG_WARM_DATA; + } + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + old_cursegno = curseg->segno; + + /* change the current segment */ + if (segno != curseg->segno) { + curseg->next_segno = segno; + change_curseg(sbi, type, true); + } + + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & + (sbi->blocks_per_seg - 1); + __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + + refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); +} + +void rewrite_node_page(struct f2fs_sb_info *sbi, + struct page *page, struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr) +{ + struct sit_info *sit_i = SIT_I(sbi); + int type = CURSEG_WARM_NODE; + struct curseg_info *curseg; + unsigned int segno, old_cursegno; + block_t next_blkaddr = next_blkaddr_of_node(page); + unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); + + curseg = CURSEG_I(sbi, type); + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + segno = GET_SEGNO(sbi, new_blkaddr); + old_cursegno = curseg->segno; + + /* change the current segment */ + if (segno != curseg->segno) { + curseg->next_segno = segno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & + (sbi->blocks_per_seg - 1); + __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + + /* change the current log to the next block addr in advance */ + if (next_segno != segno) { + curseg->next_segno = next_segno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & + (sbi->blocks_per_seg - 1); + + /* rewrite node page */ + set_page_writeback(page); + submit_write_page(sbi, page, new_blkaddr, NODE); + f2fs_submit_bio(sbi, NODE, true); + refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); + + locate_dirty_segment(sbi, old_cursegno); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); +} + +static int read_compacted_summaries(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct curseg_info *seg_i; + unsigned char *kaddr; + struct page *page; + block_t start; + int i, j, offset; + + start = start_sum_block(sbi); + + page = get_meta_page(sbi, start++); + kaddr = (unsigned char *)page_address(page); + + /* Step 1: restore nat cache */ + seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); + memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE); + + /* Step 2: restore sit cache */ + seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); + memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE, + SUM_JOURNAL_SIZE); + offset = 2 * SUM_JOURNAL_SIZE; + + /* Step 3: restore summary entries */ + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + unsigned short blk_off; + unsigned int segno; + + seg_i = CURSEG_I(sbi, i); + segno = le32_to_cpu(ckpt->cur_data_segno[i]); + blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); + seg_i->next_segno = segno; + reset_curseg(sbi, i, 0); + seg_i->alloc_type = ckpt->alloc_type[i]; + seg_i->next_blkoff = blk_off; + + if (seg_i->alloc_type == SSR) + blk_off = sbi->blocks_per_seg; + + for (j = 0; j < blk_off; j++) { + struct f2fs_summary *s; + s = (struct f2fs_summary *)(kaddr + offset); + seg_i->sum_blk->entries[j] = *s; + offset += SUMMARY_SIZE; + if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE - + SUM_FOOTER_SIZE) + continue; + + f2fs_put_page(page, 1); + page = NULL; + + page = get_meta_page(sbi, start++); + kaddr = (unsigned char *)page_address(page); + offset = 0; + } + } + f2fs_put_page(page, 1); + return 0; +} + +static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_summary_block *sum; + struct curseg_info *curseg; + struct page *new; + unsigned short blk_off; + unsigned int segno = 0; + block_t blk_addr = 0; + + /* get segment number and block addr */ + if (IS_DATASEG(type)) { + segno = le32_to_cpu(ckpt->cur_data_segno[type]); + blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - + CURSEG_HOT_DATA]); + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); + else + blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); + } else { + segno = le32_to_cpu(ckpt->cur_node_segno[type - + CURSEG_HOT_NODE]); + blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - + CURSEG_HOT_NODE]); + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, + type - CURSEG_HOT_NODE); + else + blk_addr = GET_SUM_BLOCK(sbi, segno); + } + + new = get_meta_page(sbi, blk_addr); + sum = (struct f2fs_summary_block *)page_address(new); + + if (IS_NODESEG(type)) { + if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { + struct f2fs_summary *ns = &sum->entries[0]; + int i; + for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { + ns->version = 0; + ns->ofs_in_node = 0; + } + } else { + if (restore_node_summary(sbi, segno, sum)) { + f2fs_put_page(new, 1); + return -EINVAL; + } + } + } + + /* set uncompleted segment to curseg */ + curseg = CURSEG_I(sbi, type); + mutex_lock(&curseg->curseg_mutex); + memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE); + curseg->next_segno = segno; + reset_curseg(sbi, type, 0); + curseg->alloc_type = ckpt->alloc_type[type]; + curseg->next_blkoff = blk_off; + mutex_unlock(&curseg->curseg_mutex); + f2fs_put_page(new, 1); + return 0; +} + +static int restore_curseg_summaries(struct f2fs_sb_info *sbi) +{ + int type = CURSEG_HOT_DATA; + + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { + /* restore for compacted data summary */ + if (read_compacted_summaries(sbi)) + return -EINVAL; + type = CURSEG_HOT_NODE; + } + + for (; type <= CURSEG_COLD_NODE; type++) + if (read_normal_summaries(sbi, type)) + return -EINVAL; + return 0; +} + +static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct page *page; + unsigned char *kaddr; + struct f2fs_summary *summary; + struct curseg_info *seg_i; + int written_size = 0; + int i, j; + + page = grab_meta_page(sbi, blkaddr++); + kaddr = (unsigned char *)page_address(page); + + /* Step 1: write nat cache */ + seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); + memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE); + written_size += SUM_JOURNAL_SIZE; + + /* Step 2: write sit cache */ + seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); + memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits, + SUM_JOURNAL_SIZE); + written_size += SUM_JOURNAL_SIZE; + + set_page_dirty(page); + + /* Step 3: write summary entries */ + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + unsigned short blkoff; + seg_i = CURSEG_I(sbi, i); + if (sbi->ckpt->alloc_type[i] == SSR) + blkoff = sbi->blocks_per_seg; + else + blkoff = curseg_blkoff(sbi, i); + + for (j = 0; j < blkoff; j++) { + if (!page) { + page = grab_meta_page(sbi, blkaddr++); + kaddr = (unsigned char *)page_address(page); + written_size = 0; + } + summary = (struct f2fs_summary *)(kaddr + written_size); + *summary = seg_i->sum_blk->entries[j]; + written_size += SUMMARY_SIZE; + set_page_dirty(page); + + if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - + SUM_FOOTER_SIZE) + continue; + + f2fs_put_page(page, 1); + page = NULL; + } + } + if (page) + f2fs_put_page(page, 1); +} + +static void write_normal_summaries(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + int i, end; + if (IS_DATASEG(type)) + end = type + NR_CURSEG_DATA_TYPE; + else + end = type + NR_CURSEG_NODE_TYPE; + + for (i = type; i < end; i++) { + struct curseg_info *sum = CURSEG_I(sbi, i); + mutex_lock(&sum->curseg_mutex); + write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type)); + mutex_unlock(&sum->curseg_mutex); + } +} + +void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +{ + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) + write_compacted_summaries(sbi, start_blk); + else + write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); +} + +void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) +{ + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) + write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); + return; +} + +int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, + unsigned int val, int alloc) +{ + int i; + + if (type == NAT_JOURNAL) { + for (i = 0; i < nats_in_cursum(sum); i++) { + if (le32_to_cpu(nid_in_journal(sum, i)) == val) + return i; + } + if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) + return update_nats_in_cursum(sum, 1); + } else if (type == SIT_JOURNAL) { + for (i = 0; i < sits_in_cursum(sum); i++) + if (le32_to_cpu(segno_in_journal(sum, i)) == val) + return i; + if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) + return update_sits_in_cursum(sum, 1); + } + return -1; +} + +static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); + block_t blk_addr = sit_i->sit_base_addr + offset; + + check_seg_range(sbi, segno); + + /* calculate sit block address */ + if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + blk_addr += sit_i->sit_blocks; + + return get_meta_page(sbi, blk_addr); +} + +static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, + unsigned int start) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct page *src_page, *dst_page; + pgoff_t src_off, dst_off; + void *src_addr, *dst_addr; + + src_off = current_sit_addr(sbi, start); + dst_off = next_sit_addr(sbi, src_off); + + /* get current sit block page without lock */ + src_page = get_meta_page(sbi, src_off); + dst_page = grab_meta_page(sbi, dst_off); + BUG_ON(PageDirty(src_page)); + + src_addr = page_address(src_page); + dst_addr = page_address(dst_page); + memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE); + + set_page_dirty(dst_page); + f2fs_put_page(src_page, 1); + + set_to_next_sit(sit_i, start); + + return dst_page; +} + +static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + int i; + + /* + * If the journal area in the current summary is full of sit entries, + * all the sit entries will be flushed. Otherwise the sit entries + * are not able to replace with newly hot sit entries. + */ + if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { + for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { + unsigned int segno; + segno = le32_to_cpu(segno_in_journal(sum, i)); + __mark_sit_entry_dirty(sbi, segno); + } + update_sits_in_cursum(sum, -sits_in_cursum(sum)); + return 1; + } + return 0; +} + +/* + * CP calls this function, which flushes SIT entries including sit_journal, + * and moves prefree segs to free segs. + */ +void flush_sit_entries(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned long *bitmap = sit_i->dirty_sentries_bitmap; + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + unsigned long nsegs = TOTAL_SEGS(sbi); + struct page *page = NULL; + struct f2fs_sit_block *raw_sit = NULL; + unsigned int start = 0, end = 0; + unsigned int segno = -1; + bool flushed; + + mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + /* + * "flushed" indicates whether sit entries in journal are flushed + * to the SIT area or not. + */ + flushed = flush_sits_in_journal(sbi); + + while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { + struct seg_entry *se = get_seg_entry(sbi, segno); + int sit_offset, offset; + + sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); + + if (flushed) + goto to_sit_page; + + offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); + if (offset >= 0) { + segno_in_journal(sum, offset) = cpu_to_le32(segno); + seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); + goto flush_done; + } +to_sit_page: + if (!page || (start > segno) || (segno > end)) { + if (page) { + f2fs_put_page(page, 1); + page = NULL; + } + + start = START_SEGNO(sit_i, segno); + end = start + SIT_ENTRY_PER_BLOCK - 1; + + /* read sit block that will be updated */ + page = get_next_sit_page(sbi, start); + raw_sit = page_address(page); + } + + /* udpate entry in SIT block */ + seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); +flush_done: + __clear_bit(segno, bitmap); + sit_i->dirty_sentries--; + } + mutex_unlock(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); + + /* writeout last modified SIT block */ + f2fs_put_page(page, 1); + + set_prefree_as_free_segments(sbi); +} + +static int build_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct sit_info *sit_i; + unsigned int sit_segs, start; + char *src_bitmap, *dst_bitmap; + unsigned int bitmap_size; + + /* allocate memory for SIT information */ + sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); + if (!sit_i) + return -ENOMEM; + + SM_I(sbi)->sit_info = sit_i; + + sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); + if (!sit_i->sentries) + return -ENOMEM; + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!sit_i->dirty_sentries_bitmap) + return -ENOMEM; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + sit_i->sentries[start].cur_valid_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + sit_i->sentries[start].ckpt_valid_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].cur_valid_map + || !sit_i->sentries[start].ckpt_valid_map) + return -ENOMEM; + } + + if (sbi->segs_per_sec > 1) { + sit_i->sec_entries = vzalloc(sbi->total_sections * + sizeof(struct sec_entry)); + if (!sit_i->sec_entries) + return -ENOMEM; + } + + /* get information related with SIT */ + sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; + + /* setup SIT bitmap from ckeckpoint pack */ + bitmap_size = __bitmap_size(sbi, SIT_BITMAP); + src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); + + dst_bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dst_bitmap) + return -ENOMEM; + memcpy(dst_bitmap, src_bitmap, bitmap_size); + + /* init SIT information */ + sit_i->s_ops = &default_salloc_ops; + + sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); + sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; + sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count); + sit_i->sit_bitmap = dst_bitmap; + sit_i->bitmap_size = bitmap_size; + sit_i->dirty_sentries = 0; + sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; + sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); + sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; + mutex_init(&sit_i->sentry_lock); + return 0; +} + +static int build_free_segmap(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + struct free_segmap_info *free_i; + unsigned int bitmap_size, sec_bitmap_size; + + /* allocate memory for free segmap information */ + free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); + if (!free_i) + return -ENOMEM; + + SM_I(sbi)->free_info = free_i; + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); + if (!free_i->free_segmap) + return -ENOMEM; + + sec_bitmap_size = f2fs_bitmap_size(sbi->total_sections); + free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); + if (!free_i->free_secmap) + return -ENOMEM; + + /* set all segments as dirty temporarily */ + memset(free_i->free_segmap, 0xff, bitmap_size); + memset(free_i->free_secmap, 0xff, sec_bitmap_size); + + /* init free segmap information */ + free_i->start_segno = + (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); + free_i->free_segments = 0; + free_i->free_sections = 0; + rwlock_init(&free_i->segmap_lock); + return 0; +} + +static int build_curseg(struct f2fs_sb_info *sbi) +{ + struct curseg_info *array; + int i; + + array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); + if (!array) + return -ENOMEM; + + SM_I(sbi)->curseg_array = array; + + for (i = 0; i < NR_CURSEG_TYPE; i++) { + mutex_init(&array[i].curseg_mutex); + array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL); + if (!array[i].sum_blk) + return -ENOMEM; + array[i].segno = NULL_SEGNO; + array[i].next_blkoff = 0; + } + return restore_curseg_summaries(sbi); +} + +static void build_sit_entries(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + unsigned int start; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + struct seg_entry *se = &sit_i->sentries[start]; + struct f2fs_sit_block *sit_blk; + struct f2fs_sit_entry sit; + struct page *page; + int i; + + mutex_lock(&curseg->curseg_mutex); + for (i = 0; i < sits_in_cursum(sum); i++) { + if (le32_to_cpu(segno_in_journal(sum, i)) == start) { + sit = sit_in_journal(sum, i); + mutex_unlock(&curseg->curseg_mutex); + goto got_it; + } + } + mutex_unlock(&curseg->curseg_mutex); + page = get_current_sit_page(sbi, start); + sit_blk = (struct f2fs_sit_block *)page_address(page); + sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; + f2fs_put_page(page, 1); +got_it: + check_block_count(sbi, start, &sit); + seg_info_from_raw_sit(se, &sit); + if (sbi->segs_per_sec > 1) { + struct sec_entry *e = get_sec_entry(sbi, start); + e->valid_blocks += se->valid_blocks; + } + } +} + +static void init_free_segmap(struct f2fs_sb_info *sbi) +{ + unsigned int start; + int type; + + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + struct seg_entry *sentry = get_seg_entry(sbi, start); + if (!sentry->valid_blocks) + __set_free(sbi, start); + } + + /* set use the current segments */ + for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { + struct curseg_info *curseg_t = CURSEG_I(sbi, type); + __set_test_and_inuse(sbi, curseg_t->segno); + } +} + +static void init_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int segno = 0, offset = 0; + unsigned short valid_blocks; + + while (segno < TOTAL_SEGS(sbi)) { + /* find dirty segment based on free segmap */ + segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset); + if (segno >= TOTAL_SEGS(sbi)) + break; + offset = segno + 1; + valid_blocks = get_valid_blocks(sbi, segno, 0); + if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) + continue; + mutex_lock(&dirty_i->seglist_lock); + __locate_dirty_segment(sbi, segno, DIRTY); + mutex_unlock(&dirty_i->seglist_lock); + } +} + +static int init_victim_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + + dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL); + dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL); + if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC]) + return -ENOMEM; + return 0; +} + +static int build_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i; + unsigned int bitmap_size, i; + + /* allocate memory for dirty segments list information */ + dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); + if (!dirty_i) + return -ENOMEM; + + SM_I(sbi)->dirty_info = dirty_i; + mutex_init(&dirty_i->seglist_lock); + + bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + + for (i = 0; i < NR_DIRTY_TYPE; i++) { + dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); + dirty_i->nr_dirty[i] = 0; + if (!dirty_i->dirty_segmap[i]) + return -ENOMEM; + } + + init_dirty_segmap(sbi); + return init_victim_segmap(sbi); +} + +/* + * Update min, max modified time for cost-benefit GC algorithm + */ +static void init_min_max_mtime(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int segno; + + mutex_lock(&sit_i->sentry_lock); + + sit_i->min_mtime = LLONG_MAX; + + for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + unsigned int i; + unsigned long long mtime = 0; + + for (i = 0; i < sbi->segs_per_sec; i++) + mtime += get_seg_entry(sbi, segno + i)->mtime; + + mtime = div_u64(mtime, sbi->segs_per_sec); + + if (sit_i->min_mtime > mtime) + sit_i->min_mtime = mtime; + } + sit_i->max_mtime = get_mtime(sbi); + mutex_unlock(&sit_i->sentry_lock); +} + +int build_segment_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_sm_info *sm_info; + int err; + + sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); + if (!sm_info) + return -ENOMEM; + + /* init sm info */ + sbi->sm_info = sm_info; + INIT_LIST_HEAD(&sm_info->wblist_head); + spin_lock_init(&sm_info->wblist_lock); + sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); + sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); + sm_info->segment_count = le32_to_cpu(raw_super->segment_count); + sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); + sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); + sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); + sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); + + err = build_sit_info(sbi); + if (err) + return err; + err = build_free_segmap(sbi); + if (err) + return err; + err = build_curseg(sbi); + if (err) + return err; + + /* reinit free segmap based on SIT */ + build_sit_entries(sbi); + + init_free_segmap(sbi); + err = build_dirty_segmap(sbi); + if (err) + return err; + + init_min_max_mtime(sbi); + return 0; +} + +static void discard_dirty_segmap(struct f2fs_sb_info *sbi, + enum dirty_type dirty_type) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + mutex_lock(&dirty_i->seglist_lock); + kfree(dirty_i->dirty_segmap[dirty_type]); + dirty_i->nr_dirty[dirty_type] = 0; + mutex_unlock(&dirty_i->seglist_lock); +} + +void reset_victim_segmap(struct f2fs_sb_info *sbi) +{ + unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size); +} + +static void destroy_victim_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + + kfree(dirty_i->victim_segmap[FG_GC]); + kfree(dirty_i->victim_segmap[BG_GC]); +} + +static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + int i; + + if (!dirty_i) + return; + + /* discard pre-free/dirty segments list */ + for (i = 0; i < NR_DIRTY_TYPE; i++) + discard_dirty_segmap(sbi, i); + + destroy_victim_segmap(sbi); + SM_I(sbi)->dirty_info = NULL; + kfree(dirty_i); +} + +static void destroy_curseg(struct f2fs_sb_info *sbi) +{ + struct curseg_info *array = SM_I(sbi)->curseg_array; + int i; + + if (!array) + return; + SM_I(sbi)->curseg_array = NULL; + for (i = 0; i < NR_CURSEG_TYPE; i++) + kfree(array[i].sum_blk); + kfree(array); +} + +static void destroy_free_segmap(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = SM_I(sbi)->free_info; + if (!free_i) + return; + SM_I(sbi)->free_info = NULL; + kfree(free_i->free_segmap); + kfree(free_i->free_secmap); + kfree(free_i); +} + +static void destroy_sit_info(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int start; + + if (!sit_i) + return; + + if (sit_i->sentries) { + for (start = 0; start < TOTAL_SEGS(sbi); start++) { + kfree(sit_i->sentries[start].cur_valid_map); + kfree(sit_i->sentries[start].ckpt_valid_map); + } + } + vfree(sit_i->sentries); + vfree(sit_i->sec_entries); + kfree(sit_i->dirty_sentries_bitmap); + + SM_I(sbi)->sit_info = NULL; + kfree(sit_i->sit_bitmap); + kfree(sit_i); +} + +void destroy_segment_manager(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + destroy_dirty_segmap(sbi); + destroy_curseg(sbi); + destroy_free_segmap(sbi); + destroy_sit_info(sbi); + sbi->sm_info = NULL; + kfree(sm_info); +} diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h new file mode 100644 index 000000000000..0948405af6f5 --- /dev/null +++ b/fs/f2fs/segment.h @@ -0,0 +1,618 @@ +/* + * fs/f2fs/segment.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/* constant macro */ +#define NULL_SEGNO ((unsigned int)(~0)) + +/* V: Logical segment # in volume, R: Relative segment # in main area */ +#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) +#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) + +#define IS_DATASEG(t) \ + ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ + (t == CURSEG_WARM_DATA)) + +#define IS_NODESEG(t) \ + ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ + (t == CURSEG_WARM_NODE)) + +#define IS_CURSEG(sbi, segno) \ + ((segno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) + +#define IS_CURSEC(sbi, secno) \ + ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ + sbi->segs_per_sec)) \ + +#define START_BLOCK(sbi, segno) \ + (SM_I(sbi)->seg0_blkaddr + \ + (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) +#define NEXT_FREE_BLKADDR(sbi, curseg) \ + (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) + +#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) + +#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ + ((blk_addr) - SM_I(sbi)->seg0_blkaddr) +#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) +#define GET_SEGNO(sbi, blk_addr) \ + (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ + NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ + GET_SEGNO_FROM_SEG0(sbi, blk_addr))) +#define GET_SECNO(sbi, segno) \ + ((segno) / sbi->segs_per_sec) +#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ + ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) + +#define GET_SUM_BLOCK(sbi, segno) \ + ((sbi->sm_info->ssa_blkaddr) + segno) + +#define GET_SUM_TYPE(footer) ((footer)->entry_type) +#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) + +#define SIT_ENTRY_OFFSET(sit_i, segno) \ + (segno % sit_i->sents_per_block) +#define SIT_BLOCK_OFFSET(sit_i, segno) \ + (segno / SIT_ENTRY_PER_BLOCK) +#define START_SEGNO(sit_i, segno) \ + (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) +#define f2fs_bitmap_size(nr) \ + (BITS_TO_LONGS(nr) * sizeof(unsigned long)) +#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) + +#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ + (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) + +/* during checkpoint, bio_private is used to synchronize the last bio */ +struct bio_private { + struct f2fs_sb_info *sbi; + bool is_sync; + void *wait; +}; + +/* + * indicate a block allocation direction: RIGHT and LEFT. + * RIGHT means allocating new sections towards the end of volume. + * LEFT means the opposite direction. + */ +enum { + ALLOC_RIGHT = 0, + ALLOC_LEFT +}; + +/* + * In the victim_sel_policy->alloc_mode, there are two block allocation modes. + * LFS writes data sequentially with cleaning operations. + * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. + */ +enum { + LFS = 0, + SSR +}; + +/* + * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes. + * GC_CB is based on cost-benefit algorithm. + * GC_GREEDY is based on greedy algorithm. + */ +enum { + GC_CB = 0, + GC_GREEDY +}; + +/* + * BG_GC means the background cleaning job. + * FG_GC means the on-demand cleaning job. + */ +enum { + BG_GC = 0, + FG_GC +}; + +/* for a function parameter to select a victim segment */ +struct victim_sel_policy { + int alloc_mode; /* LFS or SSR */ + int gc_mode; /* GC_CB or GC_GREEDY */ + unsigned long *dirty_segmap; /* dirty segment bitmap */ + unsigned int offset; /* last scanned bitmap offset */ + unsigned int ofs_unit; /* bitmap search unit */ + unsigned int min_cost; /* minimum cost */ + unsigned int min_segno; /* segment # having min. cost */ +}; + +struct seg_entry { + unsigned short valid_blocks; /* # of valid blocks */ + unsigned char *cur_valid_map; /* validity bitmap of blocks */ + /* + * # of valid blocks and the validity bitmap stored in the the last + * checkpoint pack. This information is used by the SSR mode. + */ + unsigned short ckpt_valid_blocks; + unsigned char *ckpt_valid_map; + unsigned char type; /* segment type like CURSEG_XXX_TYPE */ + unsigned long long mtime; /* modification time of the segment */ +}; + +struct sec_entry { + unsigned int valid_blocks; /* # of valid blocks in a section */ +}; + +struct segment_allocation { + void (*allocate_segment)(struct f2fs_sb_info *, int, bool); +}; + +struct sit_info { + const struct segment_allocation *s_ops; + + block_t sit_base_addr; /* start block address of SIT area */ + block_t sit_blocks; /* # of blocks used by SIT area */ + block_t written_valid_blocks; /* # of valid blocks in main area */ + char *sit_bitmap; /* SIT bitmap pointer */ + unsigned int bitmap_size; /* SIT bitmap size */ + + unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ + unsigned int dirty_sentries; /* # of dirty sentries */ + unsigned int sents_per_block; /* # of SIT entries per block */ + struct mutex sentry_lock; /* to protect SIT cache */ + struct seg_entry *sentries; /* SIT segment-level cache */ + struct sec_entry *sec_entries; /* SIT section-level cache */ + + /* for cost-benefit algorithm in cleaning procedure */ + unsigned long long elapsed_time; /* elapsed time after mount */ + unsigned long long mounted_time; /* mount time */ + unsigned long long min_mtime; /* min. modification time */ + unsigned long long max_mtime; /* max. modification time */ +}; + +struct free_segmap_info { + unsigned int start_segno; /* start segment number logically */ + unsigned int free_segments; /* # of free segments */ + unsigned int free_sections; /* # of free sections */ + rwlock_t segmap_lock; /* free segmap lock */ + unsigned long *free_segmap; /* free segment bitmap */ + unsigned long *free_secmap; /* free section bitmap */ +}; + +/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */ +enum dirty_type { + DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ + DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ + DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ + DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ + DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ + DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ + DIRTY, /* to count # of dirty segments */ + PRE, /* to count # of entirely obsolete segments */ + NR_DIRTY_TYPE +}; + +struct dirty_seglist_info { + const struct victim_selection *v_ops; /* victim selction operation */ + unsigned long *dirty_segmap[NR_DIRTY_TYPE]; + struct mutex seglist_lock; /* lock for segment bitmaps */ + int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ + unsigned long *victim_segmap[2]; /* BG_GC, FG_GC */ +}; + +/* victim selection function for cleaning and SSR */ +struct victim_selection { + int (*get_victim)(struct f2fs_sb_info *, unsigned int *, + int, int, char); +}; + +/* for active log information */ +struct curseg_info { + struct mutex curseg_mutex; /* lock for consistency */ + struct f2fs_summary_block *sum_blk; /* cached summary block */ + unsigned char alloc_type; /* current allocation type */ + unsigned int segno; /* current segment number */ + unsigned short next_blkoff; /* next block offset to write */ + unsigned int zone; /* current zone number */ + unsigned int next_segno; /* preallocated segment */ +}; + +/* + * inline functions + */ +static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) +{ + return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); +} + +static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sentries[segno]; +} + +static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; +} + +static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno, int section) +{ + /* + * In order to get # of valid blocks in a section instantly from many + * segments, f2fs manages two counting structures separately. + */ + if (section > 1) + return get_sec_entry(sbi, segno)->valid_blocks; + else + return get_seg_entry(sbi, segno)->valid_blocks; +} + +static inline void seg_info_from_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + se->valid_blocks = GET_SIT_VBLOCKS(rs); + se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); + memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->type = GET_SIT_TYPE(rs); + se->mtime = le64_to_cpu(rs->mtime); +} + +static inline void seg_info_to_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | + se->valid_blocks; + rs->vblocks = cpu_to_le16(raw_vblocks); + memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->ckpt_valid_blocks = se->valid_blocks; + rs->mtime = cpu_to_le64(se->mtime); +} + +static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, + unsigned int max, unsigned int segno) +{ + unsigned int ret; + read_lock(&free_i->segmap_lock); + ret = find_next_bit(free_i->free_segmap, max, segno); + read_unlock(&free_i->segmap_lock); + return ret; +} + +static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + clear_bit(segno, free_i->free_segmap); + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + clear_bit(secno, free_i->free_secmap); + free_i->free_sections++; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + set_bit(segno, free_i->free_segmap); + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; +} + +static inline void __set_test_and_free(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + if (test_and_clear_bit(segno, free_i->free_segmap)) { + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), + start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + if (test_and_clear_bit(secno, free_i->free_secmap)) + free_i->free_sections++; + } + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + write_lock(&free_i->segmap_lock); + if (!test_and_set_bit(segno, free_i->free_segmap)) { + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, + void *dst_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); +} + +static inline block_t written_block_count(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_t vblocks; + + mutex_lock(&sit_i->sentry_lock); + vblocks = sit_i->written_valid_blocks; + mutex_unlock(&sit_i->sentry_lock); + + return vblocks; +} + +static inline unsigned int free_segments(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_segs; + + read_lock(&free_i->segmap_lock); + free_segs = free_i->free_segments; + read_unlock(&free_i->segmap_lock); + + return free_segs; +} + +static inline int reserved_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->reserved_segments; +} + +static inline unsigned int free_sections(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_secs; + + read_lock(&free_i->segmap_lock); + free_secs = free_i->free_sections; + read_unlock(&free_i->segmap_lock); + + return free_secs; +} + +static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[PRE]; +} + +static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE]; +} + +static inline int overprovision_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->ovp_segments; +} + +static inline int overprovision_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; +} + +static inline int reserved_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; +} + +static inline bool need_SSR(struct f2fs_sb_info *sbi) +{ + return (free_sections(sbi) < overprovision_sections(sbi)); +} + +static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return DIRTY_I(sbi)->v_ops->get_victim(sbi, + &(curseg)->next_segno, BG_GC, type, SSR); +} + +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi) +{ + return free_sections(sbi) <= reserved_sections(sbi); +} + +static inline int utilization(struct f2fs_sb_info *sbi) +{ + return (long int)valid_user_blocks(sbi) * 100 / + (long int)sbi->user_block_count; +} + +/* + * Sometimes f2fs may be better to drop out-of-place update policy. + * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write + * data in the original place likewise other traditional file systems. + * But, currently set 100 in percentage, which means it is disabled. + * See below need_inplace_update(). + */ +#define MIN_IPU_UTIL 100 +static inline bool need_inplace_update(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (S_ISDIR(inode->i_mode)) + return false; + if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) + return true; + return false; +} + +static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->segno; +} + +static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->alloc_type; +} + +static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->next_blkoff; +} + +static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) +{ + unsigned int end_segno = SM_I(sbi)->segment_count - 1; + BUG_ON(segno > end_segno); +} + +/* + * This function is used for only debugging. + * NOTE: In future, we have to remove this function. + */ +static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; + block_t start_addr = sm_info->seg0_blkaddr; + block_t end_addr = start_addr + total_blks - 1; + BUG_ON(blk_addr < start_addr); + BUG_ON(blk_addr > end_addr); +} + +/* + * Summary block is always treated as invalid block + */ +static inline void check_block_count(struct f2fs_sb_info *sbi, + int segno, struct f2fs_sit_entry *raw_sit) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + unsigned int end_segno = sm_info->segment_count - 1; + int valid_blocks = 0; + int i; + + /* check segment usage */ + BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); + + /* check boundary of a given segment number */ + BUG_ON(segno > end_segno); + + /* check bitmap with valid block count */ + for (i = 0; i < sbi->blocks_per_seg; i++) + if (f2fs_test_bit(i, raw_sit->valid_map)) + valid_blocks++; + BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); +} + +static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, + unsigned int start) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); + block_t blk_addr = sit_i->sit_base_addr + offset; + + check_seg_range(sbi, start); + + /* calculate sit block address */ + if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + blk_addr += sit_i->sit_blocks; + + return blk_addr; +} + +static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_addr -= sit_i->sit_base_addr; + if (block_addr < sit_i->sit_blocks) + block_addr += sit_i->sit_blocks; + else + block_addr -= sit_i->sit_blocks; + + return block_addr + sit_i->sit_base_addr; +} + +static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) +{ + unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); + + if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) + f2fs_clear_bit(block_off, sit_i->sit_bitmap); + else + f2fs_set_bit(block_off, sit_i->sit_bitmap); +} + +static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - + sit_i->mounted_time; +} + +static inline void set_summary(struct f2fs_summary *sum, nid_t nid, + unsigned int ofs_in_node, unsigned char version) +{ + sum->nid = cpu_to_le32(nid); + sum->ofs_in_node = cpu_to_le16(ofs_in_node); + sum->version = version; +} + +static inline block_t start_sum_block(struct f2fs_sb_info *sbi) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) + - (base + 1) + type; +} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c new file mode 100644 index 000000000000..13867322cf5a --- /dev/null +++ b/fs/f2fs/super.c @@ -0,0 +1,657 @@ +/* + * fs/f2fs/super.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/statfs.h> +#include <linux/proc_fs.h> +#include <linux/buffer_head.h> +#include <linux/backing-dev.h> +#include <linux/kthread.h> +#include <linux/parser.h> +#include <linux/mount.h> +#include <linux/seq_file.h> +#include <linux/random.h> +#include <linux/exportfs.h> +#include <linux/f2fs_fs.h> + +#include "f2fs.h" +#include "node.h" +#include "xattr.h" + +static struct kmem_cache *f2fs_inode_cachep; + +enum { + Opt_gc_background_off, + Opt_disable_roll_forward, + Opt_discard, + Opt_noheap, + Opt_nouser_xattr, + Opt_noacl, + Opt_active_logs, + Opt_disable_ext_identify, + Opt_err, +}; + +static match_table_t f2fs_tokens = { + {Opt_gc_background_off, "background_gc_off"}, + {Opt_disable_roll_forward, "disable_roll_forward"}, + {Opt_discard, "discard"}, + {Opt_noheap, "no_heap"}, + {Opt_nouser_xattr, "nouser_xattr"}, + {Opt_noacl, "noacl"}, + {Opt_active_logs, "active_logs=%u"}, + {Opt_disable_ext_identify, "disable_ext_identify"}, + {Opt_err, NULL}, +}; + +static void init_once(void *foo) +{ + struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; + + inode_init_once(&fi->vfs_inode); +} + +static struct inode *f2fs_alloc_inode(struct super_block *sb) +{ + struct f2fs_inode_info *fi; + + fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); + if (!fi) + return NULL; + + init_once((void *) fi); + + /* Initilize f2fs-specific inode info */ + fi->vfs_inode.i_version = 1; + atomic_set(&fi->dirty_dents, 0); + fi->i_current_depth = 1; + fi->i_advise = 0; + rwlock_init(&fi->ext.ext_lock); + + set_inode_flag(fi, FI_NEW_INODE); + + return &fi->vfs_inode; +} + +static void f2fs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode)); +} + +static void f2fs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, f2fs_i_callback); +} + +static void f2fs_put_super(struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + f2fs_destroy_stats(sbi); + stop_gc_thread(sbi); + + write_checkpoint(sbi, false, true); + + iput(sbi->node_inode); + iput(sbi->meta_inode); + + /* destroy f2fs internal modules */ + destroy_node_manager(sbi); + destroy_segment_manager(sbi); + + kfree(sbi->ckpt); + + sb->s_fs_info = NULL; + brelse(sbi->raw_super_buf); + kfree(sbi); +} + +int f2fs_sync_fs(struct super_block *sb, int sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int ret = 0; + + if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) + return 0; + + if (sync) + write_checkpoint(sbi, false, false); + + return ret; +} + +static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + block_t total_count, user_block_count, start_count, ovp_count; + + total_count = le64_to_cpu(sbi->raw_super->block_count); + user_block_count = sbi->user_block_count; + start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr); + ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; + buf->f_type = F2FS_SUPER_MAGIC; + buf->f_bsize = sbi->blocksize; + + buf->f_blocks = total_count - start_count; + buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; + buf->f_bavail = user_block_count - valid_user_blocks(sbi); + + buf->f_files = valid_inode_count(sbi); + buf->f_ffree = sbi->total_node_count - valid_node_count(sbi); + + buf->f_namelen = F2FS_MAX_NAME_LEN; + buf->f_fsid.val[0] = (u32)id; + buf->f_fsid.val[1] = (u32)(id >> 32); + + return 0; +} + +static int f2fs_show_options(struct seq_file *seq, struct dentry *root) +{ + struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); + + if (test_opt(sbi, BG_GC)) + seq_puts(seq, ",background_gc_on"); + else + seq_puts(seq, ",background_gc_off"); + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) + seq_puts(seq, ",disable_roll_forward"); + if (test_opt(sbi, DISCARD)) + seq_puts(seq, ",discard"); + if (test_opt(sbi, NOHEAP)) + seq_puts(seq, ",no_heap_alloc"); +#ifdef CONFIG_F2FS_FS_XATTR + if (test_opt(sbi, XATTR_USER)) + seq_puts(seq, ",user_xattr"); + else + seq_puts(seq, ",nouser_xattr"); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + if (test_opt(sbi, POSIX_ACL)) + seq_puts(seq, ",acl"); + else + seq_puts(seq, ",noacl"); +#endif + if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) + seq_puts(seq, ",disable_ext_indentify"); + + seq_printf(seq, ",active_logs=%u", sbi->active_logs); + + return 0; +} + +static struct super_operations f2fs_sops = { + .alloc_inode = f2fs_alloc_inode, + .destroy_inode = f2fs_destroy_inode, + .write_inode = f2fs_write_inode, + .show_options = f2fs_show_options, + .evict_inode = f2fs_evict_inode, + .put_super = f2fs_put_super, + .sync_fs = f2fs_sync_fs, + .statfs = f2fs_statfs, +}; + +static struct inode *f2fs_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + + if (ino < F2FS_ROOT_INO(sbi)) + return ERR_PTR(-ESTALE); + + /* + * f2fs_iget isn't quite right if the inode is currently unallocated! + * However f2fs_iget currently does appropriate checks to handle stale + * inodes so everything is OK. + */ + inode = f2fs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { + /* we didn't find the right inode.. */ + iput(inode); + return ERR_PTR(-ESTALE); + } + return inode; +} + +static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + f2fs_nfs_get_inode); +} + +static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + f2fs_nfs_get_inode); +} + +static const struct export_operations f2fs_export_ops = { + .fh_to_dentry = f2fs_fh_to_dentry, + .fh_to_parent = f2fs_fh_to_parent, + .get_parent = f2fs_get_parent, +}; + +static int parse_options(struct f2fs_sb_info *sbi, char *options) +{ + substring_t args[MAX_OPT_ARGS]; + char *p; + int arg = 0; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = NULL; + token = match_token(p, f2fs_tokens, args); + + switch (token) { + case Opt_gc_background_off: + clear_opt(sbi, BG_GC); + break; + case Opt_disable_roll_forward: + set_opt(sbi, DISABLE_ROLL_FORWARD); + break; + case Opt_discard: + set_opt(sbi, DISCARD); + break; + case Opt_noheap: + set_opt(sbi, NOHEAP); + break; +#ifdef CONFIG_F2FS_FS_XATTR + case Opt_nouser_xattr: + clear_opt(sbi, XATTR_USER); + break; +#else + case Opt_nouser_xattr: + pr_info("nouser_xattr options not supported\n"); + break; +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + case Opt_noacl: + clear_opt(sbi, POSIX_ACL); + break; +#else + case Opt_noacl: + pr_info("noacl options not supported\n"); + break; +#endif + case Opt_active_logs: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg != 2 && arg != 4 && arg != 6) + return -EINVAL; + sbi->active_logs = arg; + break; + case Opt_disable_ext_identify: + set_opt(sbi, DISABLE_EXT_IDENTIFY); + break; + default: + pr_err("Unrecognized mount option \"%s\" or missing value\n", + p); + return -EINVAL; + } + } + return 0; +} + +static loff_t max_file_size(unsigned bits) +{ + loff_t result = ADDRS_PER_INODE; + loff_t leaf_count = ADDRS_PER_BLOCK; + + /* two direct node blocks */ + result += (leaf_count * 2); + + /* two indirect node blocks */ + leaf_count *= NIDS_PER_BLOCK; + result += (leaf_count * 2); + + /* one double indirect node block */ + leaf_count *= NIDS_PER_BLOCK; + result += leaf_count; + + result <<= bits; + return result; +} + +static int sanity_check_raw_super(struct f2fs_super_block *raw_super) +{ + unsigned int blocksize; + + if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) + return 1; + + /* Currently, support only 4KB block size */ + blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); + if (blocksize != PAGE_CACHE_SIZE) + return 1; + if (le32_to_cpu(raw_super->log_sectorsize) != + F2FS_LOG_SECTOR_SIZE) + return 1; + if (le32_to_cpu(raw_super->log_sectors_per_block) != + F2FS_LOG_SECTORS_PER_BLOCK) + return 1; + return 0; +} + +static int sanity_check_ckpt(struct f2fs_super_block *raw_super, + struct f2fs_checkpoint *ckpt) +{ + unsigned int total, fsmeta; + + total = le32_to_cpu(raw_super->segment_count); + fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); + fsmeta += le32_to_cpu(raw_super->segment_count_sit); + fsmeta += le32_to_cpu(raw_super->segment_count_nat); + fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); + fsmeta += le32_to_cpu(raw_super->segment_count_ssa); + + if (fsmeta >= total) + return 1; + return 0; +} + +static void init_sb_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_super_block *raw_super = sbi->raw_super; + int i; + + sbi->log_sectors_per_block = + le32_to_cpu(raw_super->log_sectors_per_block); + sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize); + sbi->blocksize = 1 << sbi->log_blocksize; + sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); + sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg; + sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); + sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); + sbi->total_sections = le32_to_cpu(raw_super->section_count); + sbi->total_node_count = + (le32_to_cpu(raw_super->segment_count_nat) / 2) + * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; + sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); + sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); + sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); + + for (i = 0; i < NR_COUNT_TYPE; i++) + atomic_set(&sbi->nr_pages[i], 0); +} + +static int f2fs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct f2fs_sb_info *sbi; + struct f2fs_super_block *raw_super; + struct buffer_head *raw_super_buf; + struct inode *root; + long err = -EINVAL; + int i; + + /* allocate memory for f2fs-specific super block info */ + sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + /* set a temporary block size */ + if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) + goto free_sbi; + + /* read f2fs raw super block */ + raw_super_buf = sb_bread(sb, 0); + if (!raw_super_buf) { + err = -EIO; + goto free_sbi; + } + raw_super = (struct f2fs_super_block *) + ((char *)raw_super_buf->b_data + F2FS_SUPER_OFFSET); + + /* init some FS parameters */ + sbi->active_logs = NR_CURSEG_TYPE; + + set_opt(sbi, BG_GC); + +#ifdef CONFIG_F2FS_FS_XATTR + set_opt(sbi, XATTR_USER); +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + set_opt(sbi, POSIX_ACL); +#endif + /* parse mount options */ + if (parse_options(sbi, (char *)data)) + goto free_sb_buf; + + /* sanity checking of raw super */ + if (sanity_check_raw_super(raw_super)) + goto free_sb_buf; + + sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); + sb->s_max_links = F2FS_LINK_MAX; + get_random_bytes(&sbi->s_next_generation, sizeof(u32)); + + sb->s_op = &f2fs_sops; + sb->s_xattr = f2fs_xattr_handlers; + sb->s_export_op = &f2fs_export_ops; + sb->s_magic = F2FS_SUPER_MAGIC; + sb->s_fs_info = sbi; + sb->s_time_gran = 1; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + + /* init f2fs-specific super block info */ + sbi->sb = sb; + sbi->raw_super = raw_super; + sbi->raw_super_buf = raw_super_buf; + mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->write_inode); + mutex_init(&sbi->writepages); + mutex_init(&sbi->cp_mutex); + for (i = 0; i < NR_LOCK_TYPE; i++) + mutex_init(&sbi->fs_lock[i]); + sbi->por_doing = 0; + spin_lock_init(&sbi->stat_lock); + init_rwsem(&sbi->bio_sem); + init_sb_info(sbi); + + /* get an inode for meta space */ + sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); + if (IS_ERR(sbi->meta_inode)) { + err = PTR_ERR(sbi->meta_inode); + goto free_sb_buf; + } + + err = get_valid_checkpoint(sbi); + if (err) + goto free_meta_inode; + + /* sanity checking of checkpoint */ + err = -EINVAL; + if (sanity_check_ckpt(raw_super, sbi->ckpt)) + goto free_cp; + + sbi->total_valid_node_count = + le32_to_cpu(sbi->ckpt->valid_node_count); + sbi->total_valid_inode_count = + le32_to_cpu(sbi->ckpt->valid_inode_count); + sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count); + sbi->total_valid_block_count = + le64_to_cpu(sbi->ckpt->valid_block_count); + sbi->last_valid_block_count = sbi->total_valid_block_count; + sbi->alloc_valid_block_count = 0; + INIT_LIST_HEAD(&sbi->dir_inode_list); + spin_lock_init(&sbi->dir_inode_lock); + + /* init super block */ + if (!sb_set_blocksize(sb, sbi->blocksize)) + goto free_cp; + + init_orphan_info(sbi); + + /* setup f2fs internal modules */ + err = build_segment_manager(sbi); + if (err) + goto free_sm; + err = build_node_manager(sbi); + if (err) + goto free_nm; + + build_gc_manager(sbi); + + /* get an inode for node space */ + sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); + if (IS_ERR(sbi->node_inode)) { + err = PTR_ERR(sbi->node_inode); + goto free_nm; + } + + /* if there are nt orphan nodes free them */ + err = -EINVAL; + if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) && + recover_orphan_inodes(sbi)) + goto free_node_inode; + + /* read root inode and dentry */ + root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto free_node_inode; + } + if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) + goto free_root_inode; + + sb->s_root = d_make_root(root); /* allocate root dentry */ + if (!sb->s_root) { + err = -ENOMEM; + goto free_root_inode; + } + + /* recover fsynced data */ + if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) && + !test_opt(sbi, DISABLE_ROLL_FORWARD)) + recover_fsync_data(sbi); + + /* After POR, we can run background GC thread */ + err = start_gc_thread(sbi); + if (err) + goto fail; + + err = f2fs_build_stats(sbi); + if (err) + goto fail; + + return 0; +fail: + stop_gc_thread(sbi); +free_root_inode: + dput(sb->s_root); + sb->s_root = NULL; +free_node_inode: + iput(sbi->node_inode); +free_nm: + destroy_node_manager(sbi); +free_sm: + destroy_segment_manager(sbi); +free_cp: + kfree(sbi->ckpt); +free_meta_inode: + make_bad_inode(sbi->meta_inode); + iput(sbi->meta_inode); +free_sb_buf: + brelse(raw_super_buf); +free_sbi: + kfree(sbi); + return err; +} + +static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); +} + +static struct file_system_type f2fs_fs_type = { + .owner = THIS_MODULE, + .name = "f2fs", + .mount = f2fs_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int init_inodecache(void) +{ + f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", + sizeof(struct f2fs_inode_info), NULL); + if (f2fs_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(f2fs_inode_cachep); +} + +static int __init init_f2fs_fs(void) +{ + int err; + + err = init_inodecache(); + if (err) + goto fail; + err = create_node_manager_caches(); + if (err) + goto fail; + err = create_gc_caches(); + if (err) + goto fail; + err = create_checkpoint_caches(); + if (err) + goto fail; + return register_filesystem(&f2fs_fs_type); +fail: + return err; +} + +static void __exit exit_f2fs_fs(void) +{ + destroy_root_stats(); + unregister_filesystem(&f2fs_fs_type); + destroy_checkpoint_caches(); + destroy_gc_caches(); + destroy_node_manager_caches(); + destroy_inodecache(); +} + +module_init(init_f2fs_fs) +module_exit(exit_f2fs_fs) + +MODULE_AUTHOR("Samsung Electronics's Praesto Team"); +MODULE_DESCRIPTION("Flash Friendly File System"); +MODULE_LICENSE("GPL"); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c new file mode 100644 index 000000000000..7d52e8dc0c59 --- /dev/null +++ b/fs/f2fs/xattr.c @@ -0,0 +1,440 @@ +/* + * fs/f2fs/xattr.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/xattr.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de> + * + * Fix by Harrison Xing <harrison@mountainviewdata.com>. + * Extended attributes for symlinks and special files added per + * suggestion of Luka Renko <luka.renko@hermes.si>. + * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>, + * Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/rwsem.h> +#include <linux/f2fs_fs.h> +#include "f2fs.h" +#include "xattr.h" + +static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + int total_len, prefix_len = 0; + const char *prefix = NULL; + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + prefix = XATTR_USER_PREFIX; + prefix_len = XATTR_USER_PREFIX_LEN; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + prefix = XATTR_TRUSTED_PREFIX; + prefix_len = XATTR_TRUSTED_PREFIX_LEN; + break; + default: + return -EINVAL; + } + + total_len = prefix_len + name_len + 1; + if (list && total_len <= list_size) { + memcpy(list, prefix, prefix_len); + memcpy(list+prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + break; + default: + return -EINVAL; + } + if (strcmp(name, "") == 0) + return -EINVAL; + return f2fs_getxattr(dentry->d_inode, type, name, + buffer, size); +} + +static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + + switch (type) { + case F2FS_XATTR_INDEX_USER: + if (!test_opt(sbi, XATTR_USER)) + return -EOPNOTSUPP; + break; + case F2FS_XATTR_INDEX_TRUSTED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + break; + default: + return -EINVAL; + } + if (strcmp(name, "") == 0) + return -EINVAL; + + return f2fs_setxattr(dentry->d_inode, type, name, value, size); +} + +static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; + size_t size; + + if (type != F2FS_XATTR_INDEX_ADVISE) + return 0; + + size = strlen(xname) + 1; + if (list && size <= list_size) + memcpy(list, xname, size); + return size; +} + +static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(name, "") != 0) + return -EINVAL; + + *((char *)buffer) = F2FS_I(inode)->i_advise; + return sizeof(char); +} + +static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(name, "") != 0) + return -EINVAL; + if (!inode_owner_or_capable(inode)) + return -EPERM; + if (value == NULL) + return -EINVAL; + + F2FS_I(inode)->i_advise |= *(char *)value; + return 0; +} + +const struct xattr_handler f2fs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .flags = F2FS_XATTR_INDEX_USER, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +const struct xattr_handler f2fs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .flags = F2FS_XATTR_INDEX_TRUSTED, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + +const struct xattr_handler f2fs_xattr_advise_handler = { + .prefix = F2FS_SYSTEM_ADVISE_PREFIX, + .flags = F2FS_XATTR_INDEX_ADVISE, + .list = f2fs_xattr_advise_list, + .get = f2fs_xattr_advise_get, + .set = f2fs_xattr_advise_set, +}; + +static const struct xattr_handler *f2fs_xattr_handler_map[] = { + [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, +#ifdef CONFIG_F2FS_FS_POSIX_ACL + [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler, + [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, +#endif + [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, + [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, +}; + +const struct xattr_handler *f2fs_xattr_handlers[] = { + &f2fs_xattr_user_handler, +#ifdef CONFIG_F2FS_FS_POSIX_ACL + &f2fs_xattr_acl_access_handler, + &f2fs_xattr_acl_default_handler, +#endif + &f2fs_xattr_trusted_handler, + &f2fs_xattr_advise_handler, + NULL, +}; + +static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) +{ + const struct xattr_handler *handler = NULL; + + if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) + handler = f2fs_xattr_handler_map[name_index]; + return handler; +} + +int f2fs_getxattr(struct inode *inode, int name_index, const char *name, + void *buffer, size_t buffer_size) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_xattr_entry *entry; + struct page *page; + void *base_addr; + int error = 0, found = 0; + int value_len, name_len; + + if (name == NULL) + return -EINVAL; + name_len = strlen(name); + + if (!fi->i_xattr_nid) + return -ENODATA; + + page = get_node_page(sbi, fi->i_xattr_nid); + base_addr = page_address(page); + + list_for_each_xattr(entry, base_addr) { + if (entry->e_name_index != name_index) + continue; + if (entry->e_name_len != name_len) + continue; + if (!memcmp(entry->e_name, name, name_len)) { + found = 1; + break; + } + } + if (!found) { + error = -ENODATA; + goto cleanup; + } + + value_len = le16_to_cpu(entry->e_value_size); + + if (buffer && value_len > buffer_size) { + error = -ERANGE; + goto cleanup; + } + + if (buffer) { + char *pval = entry->e_name + entry->e_name_len; + memcpy(buffer, pval, value_len); + } + error = value_len; + +cleanup: + f2fs_put_page(page, 1); + return error; +} + +ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) +{ + struct inode *inode = dentry->d_inode; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_xattr_entry *entry; + struct page *page; + void *base_addr; + int error = 0; + size_t rest = buffer_size; + + if (!fi->i_xattr_nid) + return 0; + + page = get_node_page(sbi, fi->i_xattr_nid); + base_addr = page_address(page); + + list_for_each_xattr(entry, base_addr) { + const struct xattr_handler *handler = + f2fs_xattr_handler(entry->e_name_index); + size_t size; + + if (!handler) + continue; + + size = handler->list(dentry, buffer, rest, entry->e_name, + entry->e_name_len, handler->flags); + if (buffer && size > rest) { + error = -ERANGE; + goto cleanup; + } + + if (buffer) + buffer += size; + rest -= size; + } + error = buffer_size - rest; +cleanup: + f2fs_put_page(page, 1); + return error; +} + +int f2fs_setxattr(struct inode *inode, int name_index, const char *name, + const void *value, size_t value_len) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_xattr_header *header = NULL; + struct f2fs_xattr_entry *here, *last; + struct page *page; + void *base_addr; + int error, found, free, name_len, newsize; + char *pval; + + if (name == NULL) + return -EINVAL; + name_len = strlen(name); + + if (value == NULL) + value_len = 0; + + if (name_len > 255 || value_len > MAX_VALUE_LEN) + return -ERANGE; + + mutex_lock_op(sbi, NODE_NEW); + if (!fi->i_xattr_nid) { + /* Allocate new attribute block */ + struct dnode_of_data dn; + + if (!alloc_nid(sbi, &fi->i_xattr_nid)) { + mutex_unlock_op(sbi, NODE_NEW); + return -ENOSPC; + } + set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); + mark_inode_dirty(inode); + + page = new_node_page(&dn, XATTR_NODE_OFFSET); + if (IS_ERR(page)) { + alloc_nid_failed(sbi, fi->i_xattr_nid); + fi->i_xattr_nid = 0; + mutex_unlock_op(sbi, NODE_NEW); + return PTR_ERR(page); + } + + alloc_nid_done(sbi, fi->i_xattr_nid); + base_addr = page_address(page); + header = XATTR_HDR(base_addr); + header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); + header->h_refcount = cpu_to_le32(1); + } else { + /* The inode already has an extended attribute block. */ + page = get_node_page(sbi, fi->i_xattr_nid); + if (IS_ERR(page)) { + mutex_unlock_op(sbi, NODE_NEW); + return PTR_ERR(page); + } + + base_addr = page_address(page); + header = XATTR_HDR(base_addr); + } + + if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { + error = -EIO; + goto cleanup; + } + + /* find entry with wanted name. */ + found = 0; + list_for_each_xattr(here, base_addr) { + if (here->e_name_index != name_index) + continue; + if (here->e_name_len != name_len) + continue; + if (!memcmp(here->e_name, name, name_len)) { + found = 1; + break; + } + } + + last = here; + + while (!IS_XATTR_LAST_ENTRY(last)) + last = XATTR_NEXT_ENTRY(last); + + newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + + name_len + value_len); + + /* 1. Check space */ + if (value) { + /* If value is NULL, it is remove operation. + * In case of update operation, we caculate free. + */ + free = MIN_OFFSET - ((char *)last - (char *)header); + if (found) + free = free - ENTRY_SIZE(here); + + if (free < newsize) { + error = -ENOSPC; + goto cleanup; + } + } + + /* 2. Remove old entry */ + if (found) { + /* If entry is found, remove old entry. + * If not found, remove operation is not needed. + */ + struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here); + int oldsize = ENTRY_SIZE(here); + + memmove(here, next, (char *)last - (char *)next); + last = (struct f2fs_xattr_entry *)((char *)last - oldsize); + memset(last, 0, oldsize); + } + + /* 3. Write new entry */ + if (value) { + /* Before we come here, old entry is removed. + * We just write new entry. */ + memset(last, 0, newsize); + last->e_name_index = name_index; + last->e_name_len = name_len; + memcpy(last->e_name, name, name_len); + pval = last->e_name + name_len; + memcpy(pval, value, value_len); + last->e_value_size = cpu_to_le16(value_len); + } + + set_page_dirty(page); + f2fs_put_page(page, 1); + + if (is_inode_flag_set(fi, FI_ACL_MODE)) { + inode->i_mode = fi->i_acl_mode; + inode->i_ctime = CURRENT_TIME; + clear_inode_flag(fi, FI_ACL_MODE); + } + f2fs_write_inode(inode, NULL); + mutex_unlock_op(sbi, NODE_NEW); + + return 0; +cleanup: + f2fs_put_page(page, 1); + mutex_unlock_op(sbi, NODE_NEW); + return error; +} diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h new file mode 100644 index 000000000000..49c9558305e3 --- /dev/null +++ b/fs/f2fs/xattr.h @@ -0,0 +1,145 @@ +/* + * fs/f2fs/xattr.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/xattr.h + * + * On-disk format of extended attributes for the ext2 filesystem. + * + * (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_XATTR_H__ +#define __F2FS_XATTR_H__ + +#include <linux/init.h> +#include <linux/xattr.h> + +/* Magic value in attribute blocks */ +#define F2FS_XATTR_MAGIC 0xF2F52011 + +/* Maximum number of references to one attribute block */ +#define F2FS_XATTR_REFCOUNT_MAX 1024 + +/* Name indexes */ +#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise" +#define F2FS_XATTR_INDEX_USER 1 +#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2 +#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3 +#define F2FS_XATTR_INDEX_TRUSTED 4 +#define F2FS_XATTR_INDEX_LUSTRE 5 +#define F2FS_XATTR_INDEX_SECURITY 6 +#define F2FS_XATTR_INDEX_ADVISE 7 + +struct f2fs_xattr_header { + __le32 h_magic; /* magic number for identification */ + __le32 h_refcount; /* reference count */ + __u32 h_reserved[4]; /* zero right now */ +}; + +struct f2fs_xattr_entry { + __u8 e_name_index; + __u8 e_name_len; + __le16 e_value_size; /* size of attribute value */ + char e_name[0]; /* attribute name */ +}; + +#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) +#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr)) +#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr)+1)) +#define XATTR_ROUND (3) + +#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) + +#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \ + entry->e_name_len + le16_to_cpu(entry->e_value_size))) + +#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\ + ENTRY_SIZE(entry))) + +#define IS_XATTR_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) + +#define list_for_each_xattr(entry, addr) \ + for (entry = XATTR_FIRST_ENTRY(addr);\ + !IS_XATTR_LAST_ENTRY(entry);\ + entry = XATTR_NEXT_ENTRY(entry)) + + +#define MIN_OFFSET XATTR_ALIGN(PAGE_SIZE - \ + sizeof(struct node_footer) - \ + sizeof(__u32)) + +#define MAX_VALUE_LEN (MIN_OFFSET - sizeof(struct f2fs_xattr_header) - \ + sizeof(struct f2fs_xattr_entry)) + +/* + * On-disk structure of f2fs_xattr + * We use only 1 block for xattr. + * + * +--------------------+ + * | f2fs_xattr_header | + * | | + * +--------------------+ + * | f2fs_xattr_entry | + * | .e_name_index = 1 | + * | .e_name_len = 3 | + * | .e_value_size = 14 | + * | .e_name = "foo" | + * | "value_of_xattr" |<- value_offs = e_name + e_name_len + * +--------------------+ + * | f2fs_xattr_entry | + * | .e_name_index = 4 | + * | .e_name = "bar" | + * +--------------------+ + * | | + * | Free | + * | | + * +--------------------+<- MIN_OFFSET + * | node_footer | + * | (nid, ino, offset) | + * +--------------------+ + * + **/ + +#ifdef CONFIG_F2FS_FS_XATTR +extern const struct xattr_handler f2fs_xattr_user_handler; +extern const struct xattr_handler f2fs_xattr_trusted_handler; +extern const struct xattr_handler f2fs_xattr_acl_access_handler; +extern const struct xattr_handler f2fs_xattr_acl_default_handler; +extern const struct xattr_handler f2fs_xattr_advise_handler; + +extern const struct xattr_handler *f2fs_xattr_handlers[]; + +extern int f2fs_setxattr(struct inode *inode, int name_index, const char *name, + const void *value, size_t value_len); +extern int f2fs_getxattr(struct inode *inode, int name_index, const char *name, + void *buffer, size_t buffer_size); +extern ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, + size_t buffer_size); + +#else + +#define f2fs_xattr_handlers NULL +static inline int f2fs_setxattr(struct inode *inode, int name_index, + const char *name, const void *value, size_t value_len) +{ + return -EOPNOTSUPP; +} +static inline int f2fs_getxattr(struct inode *inode, int name_index, + const char *name, void *buffer, size_t buffer_size) +{ + return -EOPNOTSUPP; +} +static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, + size_t buffer_size) +{ + return -EOPNOTSUPP; +} +#endif + +#endif /* __F2FS_XATTR_H__ */ diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 2a182342442e..58bf744dbf39 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -461,8 +461,7 @@ static int fat_parse_short(struct super_block *sb, } /* - * Return values: negative -> error, 0 -> not found, positive -> found, - * value is the total amount of slots, including the shortname entry. + * Return values: negative -> error/not found, 0 -> found. */ int fat_search_long(struct inode *inode, const unsigned char *name, int name_len, struct fat_slot_info *sinfo) @@ -1255,7 +1254,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, sinfo->nr_slots = nr_slots; - /* First stage: search free direcotry entries */ + /* First stage: search free directory entries */ free_slots = nr_bhs = 0; bh = prev = NULL; pos = 0; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 35806813ea4e..f8f491677a4a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1344,7 +1344,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->dir_entries = get_unaligned_le16(&b->dir_entries); if (sbi->dir_entries & (sbi->dir_per_block - 1)) { if (!silent) - fat_msg(sb, KERN_ERR, "bogus directroy-entries per block" + fat_msg(sb, KERN_ERR, "bogus directory-entries per block" " (%u)", sbi->dir_entries); brelse(bh); goto out_invalid; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 5eb600dc43a9..359d307b5507 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -135,6 +135,10 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) } if (ret < 0) return ret; + /* + * FIXME:Although we can add this cache, fat_cache_add() is + * assuming to be called after linear search with fat_cache_id. + */ // fat_cache_add(inode, new_fclus, new_dclus); } else { MSDOS_I(inode)->i_start = new_dclus; diff --git a/fs/fhandle.c b/fs/fhandle.c index cccdc874bb55..999ff5c3cab0 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -52,7 +52,7 @@ static long do_sys_name_to_handle(struct path *path, handle_bytes = handle_dwords * sizeof(u32); handle->handle_bytes = handle_bytes; if ((handle->handle_bytes > f_handle.handle_bytes) || - (retval == 255) || (retval == -ENOSPC)) { + (retval == FILEID_INVALID) || (retval == -ENOSPC)) { /* As per old exportfs_encode_fh documentation * we could return ENOSPC to indicate overflow * But file system returned 255 always. So handle diff --git a/fs/file_table.c b/fs/file_table.c index a72bf9ddd0d2..de9e9653d611 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -458,8 +458,8 @@ void mark_files_ro(struct super_block *sb) spin_unlock(&f->f_lock); if (file_check_writeable(f) != 0) continue; + __mnt_drop_write(f->f_path.mnt); file_release_write(f); - mnt_drop_write_file(f); } while_file_list_for_each_entry; lg_global_unlock(&files_lglock); } diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index 6a3c48abd677..b52aed1dca97 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -314,10 +314,10 @@ EXPORT_SYMBOL(fscache_add_cache); */ void fscache_io_error(struct fscache_cache *cache) { - set_bit(FSCACHE_IOERROR, &cache->flags); - - printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n", - cache->ops->name); + if (!test_and_set_bit(FSCACHE_IOERROR, &cache->flags)) + printk(KERN_ERR "FS-Cache:" + " Cache '%s' stopped due to I/O error\n", + cache->ops->name); } EXPORT_SYMBOL(fscache_io_error); diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 990535071a8a..8dcb114758e3 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -370,6 +370,66 @@ cant_attach_object: } /* + * Invalidate an object. Callable with spinlocks held. + */ +void __fscache_invalidate(struct fscache_cookie *cookie) +{ + struct fscache_object *object; + + _enter("{%s}", cookie->def->name); + + fscache_stat(&fscache_n_invalidates); + + /* Only permit invalidation of data files. Invalidating an index will + * require the caller to release all its attachments to the tree rooted + * there, and if it's doing that, it may as well just retire the + * cookie. + */ + ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE); + + /* We will be updating the cookie too. */ + BUG_ON(!cookie->def->get_aux); + + /* If there's an object, we tell the object state machine to handle the + * invalidation on our behalf, otherwise there's nothing to do. + */ + if (!hlist_empty(&cookie->backing_objects)) { + spin_lock(&cookie->lock); + + if (!hlist_empty(&cookie->backing_objects) && + !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING, + &cookie->flags)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, + cookie_link); + if (object->state < FSCACHE_OBJECT_DYING) + fscache_raise_event( + object, FSCACHE_OBJECT_EV_INVALIDATE); + } + + spin_unlock(&cookie->lock); + } + + _leave(""); +} +EXPORT_SYMBOL(__fscache_invalidate); + +/* + * Wait for object invalidation to complete. + */ +void __fscache_wait_on_invalidate(struct fscache_cookie *cookie) +{ + _enter("%p", cookie); + + wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING, + fscache_wait_bit_interruptible, + TASK_UNINTERRUPTIBLE); + + _leave(""); +} +EXPORT_SYMBOL(__fscache_wait_on_invalidate); + +/* * update the index entries backing a cookie */ void __fscache_update_cookie(struct fscache_cookie *cookie) @@ -442,16 +502,34 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; +try_again: spin_lock(&cookie->lock); /* break links with all the active objects */ while (!hlist_empty(&cookie->backing_objects)) { + int n_reads; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); _debug("RELEASE OBJ%x", object->debug_id); + set_bit(FSCACHE_COOKIE_WAITING_ON_READS, &cookie->flags); + n_reads = atomic_read(&object->n_reads); + if (n_reads) { + int n_ops = object->n_ops; + int n_in_progress = object->n_in_progress; + spin_unlock(&cookie->lock); + printk(KERN_ERR "FS-Cache:" + " Cookie '%s' still has %d outstanding reads (%d,%d)\n", + cookie->def->name, + n_reads, n_ops, n_in_progress); + wait_on_bit(&cookie->flags, FSCACHE_COOKIE_WAITING_ON_READS, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + printk("Wait finished\n"); + goto try_again; + } + /* detach each cache object from the object cookie */ spin_lock(&object->lock); hlist_del_init(&object->cookie_link); diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index f6aad48d38a8..ee38fef4be51 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -121,12 +121,19 @@ extern int fscache_submit_exclusive_op(struct fscache_object *, struct fscache_operation *); extern int fscache_submit_op(struct fscache_object *, struct fscache_operation *); -extern int fscache_cancel_op(struct fscache_operation *); +extern int fscache_cancel_op(struct fscache_operation *, + void (*)(struct fscache_operation *)); +extern void fscache_cancel_all_ops(struct fscache_object *); extern void fscache_abort_object(struct fscache_object *); extern void fscache_start_operations(struct fscache_object *); extern void fscache_operation_gc(struct work_struct *); /* + * page.c + */ +extern void fscache_invalidate_writes(struct fscache_cookie *); + +/* * proc.c */ #ifdef CONFIG_PROC_FS @@ -194,6 +201,7 @@ extern atomic_t fscache_n_store_vmscan_not_storing; extern atomic_t fscache_n_store_vmscan_gone; extern atomic_t fscache_n_store_vmscan_busy; extern atomic_t fscache_n_store_vmscan_cancelled; +extern atomic_t fscache_n_store_vmscan_wait; extern atomic_t fscache_n_marks; extern atomic_t fscache_n_uncaches; @@ -205,6 +213,9 @@ extern atomic_t fscache_n_acquires_ok; extern atomic_t fscache_n_acquires_nobufs; extern atomic_t fscache_n_acquires_oom; +extern atomic_t fscache_n_invalidates; +extern atomic_t fscache_n_invalidates_run; + extern atomic_t fscache_n_updates; extern atomic_t fscache_n_updates_null; extern atomic_t fscache_n_updates_run; @@ -237,6 +248,7 @@ extern atomic_t fscache_n_cop_alloc_object; extern atomic_t fscache_n_cop_lookup_object; extern atomic_t fscache_n_cop_lookup_complete; extern atomic_t fscache_n_cop_grab_object; +extern atomic_t fscache_n_cop_invalidate_object; extern atomic_t fscache_n_cop_update_object; extern atomic_t fscache_n_cop_drop_object; extern atomic_t fscache_n_cop_put_object; @@ -278,6 +290,7 @@ extern const struct file_operations fscache_stats_fops; static inline void fscache_raise_event(struct fscache_object *object, unsigned event) { + BUG_ON(event >= NR_FSCACHE_OBJECT_EVENTS); if (!test_and_set_bit(event, &object->events) && test_bit(event, &object->event_mask)) fscache_enqueue_object(object); diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index ebe29c581380..f27c89d17885 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -245,7 +245,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) obj->n_in_progress, obj->n_exclusive, atomic_read(&obj->n_reads), - obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK, + obj->event_mask, obj->events, obj->flags, work_busy(&obj->work)); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index b6b897c550ac..50d41c180211 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -14,6 +14,7 @@ #define FSCACHE_DEBUG_LEVEL COOKIE #include <linux/module.h> +#include <linux/slab.h> #include "internal.h" const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { @@ -22,6 +23,7 @@ const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE", [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE", + [FSCACHE_OBJECT_INVALIDATING] = "OBJECT_INVALIDATING", [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING", [FSCACHE_OBJECT_DYING] = "OBJECT_DYING", [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING", @@ -39,6 +41,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { [FSCACHE_OBJECT_CREATING] = "CRTN", [FSCACHE_OBJECT_AVAILABLE] = "AVBL", [FSCACHE_OBJECT_ACTIVE] = "ACTV", + [FSCACHE_OBJECT_INVALIDATING] = "INVL", [FSCACHE_OBJECT_UPDATING] = "UPDT", [FSCACHE_OBJECT_DYING] = "DYNG", [FSCACHE_OBJECT_LC_DYING] = "LCDY", @@ -54,6 +57,7 @@ static void fscache_put_object(struct fscache_object *); static void fscache_initialise_object(struct fscache_object *); static void fscache_lookup_object(struct fscache_object *); static void fscache_object_available(struct fscache_object *); +static void fscache_invalidate_object(struct fscache_object *); static void fscache_release_object(struct fscache_object *); static void fscache_withdraw_object(struct fscache_object *); static void fscache_enqueue_dependents(struct fscache_object *); @@ -79,6 +83,15 @@ static inline void fscache_done_parent_op(struct fscache_object *object) } /* + * Notify netfs of invalidation completion. + */ +static inline void fscache_invalidation_complete(struct fscache_cookie *cookie) +{ + if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); +} + +/* * process events that have been sent to an object's state machine * - initiates parent lookup * - does object lookup @@ -90,6 +103,7 @@ static void fscache_object_state_machine(struct fscache_object *object) { enum fscache_object_state new_state; struct fscache_cookie *cookie; + int event; ASSERT(object != NULL); @@ -101,7 +115,8 @@ static void fscache_object_state_machine(struct fscache_object *object) /* wait for the parent object to become ready */ case FSCACHE_OBJECT_INIT: object->event_mask = - ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); + FSCACHE_OBJECT_EVENTS_MASK & + ~(1 << FSCACHE_OBJECT_EV_CLEARED); fscache_initialise_object(object); goto done; @@ -125,6 +140,16 @@ static void fscache_object_state_machine(struct fscache_object *object) case FSCACHE_OBJECT_ACTIVE: goto active_transit; + /* Invalidate an object on disk */ + case FSCACHE_OBJECT_INVALIDATING: + clear_bit(FSCACHE_OBJECT_EV_INVALIDATE, &object->events); + fscache_stat(&fscache_n_invalidates_run); + fscache_stat(&fscache_n_cop_invalidate_object); + fscache_invalidate_object(object); + fscache_stat_d(&fscache_n_cop_invalidate_object); + fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); + goto active_transit; + /* update the object metadata on disk */ case FSCACHE_OBJECT_UPDATING: clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); @@ -251,13 +276,17 @@ static void fscache_object_state_machine(struct fscache_object *object) /* determine the transition from a lookup state */ lookup_transit: - switch (fls(object->events & object->event_mask) - 1) { + event = fls(object->events & object->event_mask) - 1; + switch (event) { case FSCACHE_OBJECT_EV_WITHDRAW: case FSCACHE_OBJECT_EV_RETIRE: case FSCACHE_OBJECT_EV_RELEASE: case FSCACHE_OBJECT_EV_ERROR: new_state = FSCACHE_OBJECT_LC_DYING; goto change_state; + case FSCACHE_OBJECT_EV_INVALIDATE: + new_state = FSCACHE_OBJECT_INVALIDATING; + goto change_state; case FSCACHE_OBJECT_EV_REQUEUE: goto done; case -1: @@ -268,13 +297,17 @@ lookup_transit: /* determine the transition from an active state */ active_transit: - switch (fls(object->events & object->event_mask) - 1) { + event = fls(object->events & object->event_mask) - 1; + switch (event) { case FSCACHE_OBJECT_EV_WITHDRAW: case FSCACHE_OBJECT_EV_RETIRE: case FSCACHE_OBJECT_EV_RELEASE: case FSCACHE_OBJECT_EV_ERROR: new_state = FSCACHE_OBJECT_DYING; goto change_state; + case FSCACHE_OBJECT_EV_INVALIDATE: + new_state = FSCACHE_OBJECT_INVALIDATING; + goto change_state; case FSCACHE_OBJECT_EV_UPDATE: new_state = FSCACHE_OBJECT_UPDATING; goto change_state; @@ -287,7 +320,8 @@ active_transit: /* determine the transition from a terminal state */ terminal_transit: - switch (fls(object->events & object->event_mask) - 1) { + event = fls(object->events & object->event_mask) - 1; + switch (event) { case FSCACHE_OBJECT_EV_WITHDRAW: new_state = FSCACHE_OBJECT_WITHDRAWING; goto change_state; @@ -320,8 +354,8 @@ done: unsupported_event: printk(KERN_ERR "FS-Cache:" - " Unsupported event %lx [mask %lx] in state %s\n", - object->events, object->event_mask, + " Unsupported event %d [%lx/%lx] in state %s\n", + event, object->events, object->event_mask, fscache_object_states[object->state]); BUG(); } @@ -587,8 +621,6 @@ static void fscache_object_available(struct fscache_object *object) if (object->n_in_progress == 0) { if (object->n_ops > 0) { ASSERTCMP(object->n_ops, >=, object->n_obj_ops); - ASSERTIF(object->n_ops > object->n_obj_ops, - !list_empty(&object->pending_ops)); fscache_start_operations(object); } else { ASSERT(list_empty(&object->pending_ops)); @@ -681,6 +713,7 @@ static void fscache_withdraw_object(struct fscache_object *object) if (object->cookie == cookie) { hlist_del_init(&object->cookie_link); object->cookie = NULL; + fscache_invalidation_complete(cookie); detached = true; } spin_unlock(&cookie->lock); @@ -890,3 +923,55 @@ enum fscache_checkaux fscache_check_aux(struct fscache_object *object, return result; } EXPORT_SYMBOL(fscache_check_aux); + +/* + * Asynchronously invalidate an object. + */ +static void fscache_invalidate_object(struct fscache_object *object) +{ + struct fscache_operation *op; + struct fscache_cookie *cookie = object->cookie; + + _enter("{OBJ%x}", object->debug_id); + + /* Reject any new read/write ops and abort any that are pending. */ + fscache_invalidate_writes(cookie); + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + fscache_cancel_all_ops(object); + + /* Now we have to wait for in-progress reads and writes */ + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); + _leave(" [ENOMEM]"); + return; + } + + fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); + op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); + + spin_lock(&cookie->lock); + if (fscache_submit_exclusive_op(object, op) < 0) + goto submit_op_failed; + spin_unlock(&cookie->lock); + fscache_put_operation(op); + + /* Once we've completed the invalidation, we know there will be no data + * stored in the cache and thus we can reinstate the data-check-skip + * optimisation. + */ + set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + /* We can allow read and write requests to come in once again. They'll + * queue up behind our exclusive invalidation operation. + */ + fscache_invalidation_complete(cookie); + _leave(""); + return; + +submit_op_failed: + spin_unlock(&cookie->lock); + kfree(op); + fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); + _leave(" [EIO]"); +} diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 30afdfa7aec7..762a9ec4ffa4 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -37,6 +37,7 @@ void fscache_enqueue_operation(struct fscache_operation *op) ASSERT(op->processor != NULL); ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); ASSERTCMP(atomic_read(&op->usage), >, 0); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { @@ -64,6 +65,9 @@ EXPORT_SYMBOL(fscache_enqueue_operation); static void fscache_run_op(struct fscache_object *object, struct fscache_operation *op) { + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + + op->state = FSCACHE_OP_ST_IN_PROGRESS; object->n_in_progress++; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); @@ -84,18 +88,21 @@ int fscache_submit_exclusive_op(struct fscache_object *object, _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); + ASSERTCMP(atomic_read(&op->usage), >, 0); + spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); ASSERT(list_empty(&op->pend_link)); - ret = -ENOBUFS; + op->state = FSCACHE_OP_ST_PENDING; if (fscache_object_is_active(object)) { op->object = object; object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ - if (object->n_ops > 1) { + if (object->n_in_progress > 0) { atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); @@ -121,8 +128,11 @@ int fscache_submit_exclusive_op(struct fscache_object *object, fscache_stat(&fscache_n_op_pend); ret = 0; } else { - /* not allowed to submit ops in any other state */ - BUG(); + /* If we're in any other state, there must have been an I/O + * error of some nature. + */ + ASSERT(test_bit(FSCACHE_IOERROR, &object->cache->flags)); + ret = -EIO; } spin_unlock(&object->lock); @@ -186,6 +196,7 @@ int fscache_submit_op(struct fscache_object *object, _enter("{OBJ%x OP%x},{%u}", object->debug_id, op->debug_id, atomic_read(&op->usage)); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); ASSERTCMP(atomic_read(&op->usage), >, 0); spin_lock(&object->lock); @@ -196,6 +207,7 @@ int fscache_submit_op(struct fscache_object *object, ostate = object->state; smp_rmb(); + op->state = FSCACHE_OP_ST_PENDING; if (fscache_object_is_active(object)) { op->object = object; object->n_ops++; @@ -225,12 +237,15 @@ int fscache_submit_op(struct fscache_object *object, object->state == FSCACHE_OBJECT_LC_DYING || object->state == FSCACHE_OBJECT_WITHDRAWING) { fscache_stat(&fscache_n_op_rejected); + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { fscache_report_unexpected_submission(object, op, ostate); ASSERT(!fscache_object_is_active(object)); + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else { + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } @@ -283,20 +298,28 @@ void fscache_start_operations(struct fscache_object *object) /* * cancel an operation that's pending on an object */ -int fscache_cancel_op(struct fscache_operation *op) +int fscache_cancel_op(struct fscache_operation *op, + void (*do_cancel)(struct fscache_operation *)) { struct fscache_object *object = op->object; int ret; _enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); + ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING); + ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED); + ASSERTCMP(atomic_read(&op->usage), >, 0); + spin_lock(&object->lock); ret = -EBUSY; - if (!list_empty(&op->pend_link)) { + if (op->state == FSCACHE_OP_ST_PENDING) { + ASSERT(!list_empty(&op->pend_link)); fscache_stat(&fscache_n_op_cancelled); list_del_init(&op->pend_link); - object->n_ops--; + if (do_cancel) + do_cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) @@ -311,6 +334,70 @@ int fscache_cancel_op(struct fscache_operation *op) } /* + * Cancel all pending operations on an object + */ +void fscache_cancel_all_ops(struct fscache_object *object) +{ + struct fscache_operation *op; + + _enter("OBJ%x", object->debug_id); + + spin_lock(&object->lock); + + while (!list_empty(&object->pending_ops)) { + op = list_entry(object->pending_ops.next, + struct fscache_operation, pend_link); + fscache_stat(&fscache_n_op_cancelled); + list_del_init(&op->pend_link); + + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + op->state = FSCACHE_OP_ST_CANCELLED; + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + fscache_put_operation(op); + cond_resched_lock(&object->lock); + } + + spin_unlock(&object->lock); + _leave(""); +} + +/* + * Record the completion or cancellation of an in-progress operation. + */ +void fscache_op_complete(struct fscache_operation *op, bool cancelled) +{ + struct fscache_object *object = op->object; + + _enter("OBJ%x", object->debug_id); + + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); + ASSERTCMP(object->n_in_progress, >, 0); + ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), + object->n_exclusive, >, 0); + ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), + object->n_in_progress, ==, 1); + + spin_lock(&object->lock); + + op->state = cancelled ? + FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE; + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + object->n_in_progress--; + if (object->n_in_progress == 0) + fscache_start_operations(object); + + spin_unlock(&object->lock); + _leave(""); +} +EXPORT_SYMBOL(fscache_op_complete); + +/* * release an operation * - queues pending ops if this is the last in-progress op */ @@ -328,8 +415,9 @@ void fscache_put_operation(struct fscache_operation *op) return; _debug("PUT OP"); - if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) - BUG(); + ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE, + op->state, ==, FSCACHE_OP_ST_CANCELLED); + op->state = FSCACHE_OP_ST_DEAD; fscache_stat(&fscache_n_op_release); @@ -340,8 +428,14 @@ void fscache_put_operation(struct fscache_operation *op) object = op->object; - if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) - atomic_dec(&object->n_reads); + if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) { + if (atomic_dec_and_test(&object->n_reads)) { + clear_bit(FSCACHE_COOKIE_WAITING_ON_READS, + &object->cookie->flags); + wake_up_bit(&object->cookie->flags, + FSCACHE_COOKIE_WAITING_ON_READS); + } + } /* now... we may get called with the object spinlock held, so we * complete the cleanup here only if we can immediately acquire the @@ -359,16 +453,6 @@ void fscache_put_operation(struct fscache_operation *op) return; } - if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { - ASSERTCMP(object->n_exclusive, >, 0); - object->n_exclusive--; - } - - ASSERTCMP(object->n_in_progress, >, 0); - object->n_in_progress--; - if (object->n_in_progress == 0) - fscache_start_operations(object); - ASSERTCMP(object->n_ops, >, 0); object->n_ops--; if (object->n_ops == 0) @@ -407,23 +491,14 @@ void fscache_operation_gc(struct work_struct *work) spin_unlock(&cache->op_gc_list_lock); object = op->object; + spin_lock(&object->lock); _debug("GC DEFERRED REL OBJ%x OP%x", object->debug_id, op->debug_id); fscache_stat(&fscache_n_op_gc); ASSERTCMP(atomic_read(&op->usage), ==, 0); - - spin_lock(&object->lock); - if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { - ASSERTCMP(object->n_exclusive, >, 0); - object->n_exclusive--; - } - - ASSERTCMP(object->n_in_progress, >, 0); - object->n_in_progress--; - if (object->n_in_progress == 0) - fscache_start_operations(object); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_DEAD); ASSERTCMP(object->n_ops, >, 0); object->n_ops--; @@ -431,6 +506,7 @@ void fscache_operation_gc(struct work_struct *work) fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); spin_unlock(&object->lock); + kfree(op); } while (count++ < 20); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 3f7a59bfa7ad..ff000e52072d 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -56,6 +56,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, _enter("%p,%p,%x", cookie, page, gfp); +try_again: rcu_read_lock(); val = radix_tree_lookup(&cookie->stores, page->index); if (!val) { @@ -104,11 +105,19 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, return true; page_busy: - /* we might want to wait here, but that could deadlock the allocator as - * the work threads writing to the cache may all end up sleeping - * on memory allocation */ - fscache_stat(&fscache_n_store_vmscan_busy); - return false; + /* We will wait here if we're allowed to, but that could deadlock the + * allocator as the work threads writing to the cache may all end up + * sleeping on memory allocation, so we may need to impose a timeout + * too. */ + if (!(gfp & __GFP_WAIT)) { + fscache_stat(&fscache_n_store_vmscan_busy); + return false; + } + + fscache_stat(&fscache_n_store_vmscan_wait); + __fscache_wait_on_page_write(cookie, page); + gfp &= ~__GFP_WAIT; + goto try_again; } EXPORT_SYMBOL(__fscache_maybe_release_page); @@ -162,6 +171,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_abort_object(object); } + fscache_op_complete(op, true); _leave(""); } @@ -223,6 +233,8 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) _enter("{OP%x}", op->op.debug_id); + ASSERTCMP(op->n_pages, ==, 0); + fscache_hist(fscache_retrieval_histogram, op->start_time); if (op->context) fscache_put_context(op->op.object->cookie, op->context); @@ -291,6 +303,17 @@ static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) } /* + * Handle cancellation of a pending retrieval op + */ +static void fscache_do_cancel_retrieval(struct fscache_operation *_op) +{ + struct fscache_retrieval *op = + container_of(_op, struct fscache_retrieval, op); + + op->n_pages = 0; +} + +/* * wait for an object to become active (or dead) */ static int fscache_wait_for_retrieval_activation(struct fscache_object *object, @@ -307,8 +330,8 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object, fscache_stat(stat_op_waits); if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, fscache_wait_bit_interruptible, - TASK_INTERRUPTIBLE) < 0) { - ret = fscache_cancel_op(&op->op); + TASK_INTERRUPTIBLE) != 0) { + ret = fscache_cancel_op(&op->op, fscache_do_cancel_retrieval); if (ret == 0) return -ERESTARTSYS; @@ -320,7 +343,14 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object, _debug("<<< GO"); check_if_dead: + if (op->op.state == FSCACHE_OP_ST_CANCELLED) { + fscache_stat(stat_object_dead); + _leave(" = -ENOBUFS [cancelled]"); + return -ENOBUFS; + } if (unlikely(fscache_object_is_dead(object))) { + pr_err("%s() = -ENOBUFS [obj dead %d]\n", __func__, op->op.state); + fscache_cancel_op(&op->op, fscache_do_cancel_retrieval); fscache_stat(stat_object_dead); return -ENOBUFS; } @@ -353,6 +383,11 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, if (hlist_empty(&cookie->backing_objects)) goto nobufs; + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(page, !=, NULL); @@ -364,6 +399,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, _leave(" = -ENOMEM"); return -ENOMEM; } + op->n_pages = 1; spin_lock(&cookie->lock); @@ -375,10 +411,10 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); atomic_inc(&object->n_reads); - set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock; + goto nobufs_unlock_dec; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_retrieval_ops); @@ -425,6 +461,8 @@ error: _leave(" = %d", ret); return ret; +nobufs_unlock_dec: + atomic_dec(&object->n_reads); nobufs_unlock: spin_unlock(&cookie->lock); kfree(op); @@ -472,6 +510,11 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, if (hlist_empty(&cookie->backing_objects)) goto nobufs; + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(*nr_pages, >, 0); ASSERT(!list_empty(pages)); @@ -482,6 +525,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(mapping, end_io_func, context); if (!op) return -ENOMEM; + op->n_pages = *nr_pages; spin_lock(&cookie->lock); @@ -491,10 +535,10 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, struct fscache_object, cookie_link); atomic_inc(&object->n_reads); - set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock; + goto nobufs_unlock_dec; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_retrieval_ops); @@ -541,6 +585,8 @@ error: _leave(" = %d", ret); return ret; +nobufs_unlock_dec: + atomic_dec(&object->n_reads); nobufs_unlock: spin_unlock(&cookie->lock); kfree(op); @@ -577,12 +623,18 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(page, !=, NULL); + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + if (fscache_wait_for_deferred_lookup(cookie) < 0) return -ERESTARTSYS; op = fscache_alloc_retrieval(page->mapping, NULL, NULL); if (!op) return -ENOMEM; + op->n_pages = 1; spin_lock(&cookie->lock); @@ -658,9 +710,27 @@ static void fscache_write_op(struct fscache_operation *_op) spin_lock(&object->lock); cookie = object->cookie; - if (!fscache_object_is_active(object) || !cookie) { + if (!fscache_object_is_active(object)) { + /* If we get here, then the on-disk cache object likely longer + * exists, so we should just cancel this write operation. + */ + spin_unlock(&object->lock); + fscache_op_complete(&op->op, false); + _leave(" [inactive]"); + return; + } + + if (!cookie) { + /* If we get here, then the cookie belonging to the object was + * detached, probably by the cookie being withdrawn due to + * memory pressure, which means that the pages we might write + * to the cache from no longer exist - therefore, we can just + * cancel this write operation. + */ spin_unlock(&object->lock); - _leave(""); + fscache_op_complete(&op->op, false); + _leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}", + _op->flags, _op->state, object->state, object->flags); return; } @@ -696,6 +766,7 @@ static void fscache_write_op(struct fscache_operation *_op) fscache_end_page_write(object, page); if (ret < 0) { fscache_abort_object(object); + fscache_op_complete(&op->op, true); } else { fscache_enqueue_operation(&op->op); } @@ -710,6 +781,38 @@ superseded: spin_unlock(&cookie->stores_lock); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); spin_unlock(&object->lock); + fscache_op_complete(&op->op, true); + _leave(""); +} + +/* + * Clear the pages pending writing for invalidation + */ +void fscache_invalidate_writes(struct fscache_cookie *cookie) +{ + struct page *page; + void *results[16]; + int n, i; + + _enter(""); + + while (spin_lock(&cookie->stores_lock), + n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, + ARRAY_SIZE(results), + FSCACHE_COOKIE_PENDING_TAG), + n > 0) { + for (i = n - 1; i >= 0; i--) { + page = results[i]; + radix_tree_delete(&cookie->stores, page->index); + } + + spin_unlock(&cookie->stores_lock); + + for (i = n - 1; i >= 0; i--) + page_cache_release(results[i]); + } + + spin_unlock(&cookie->stores_lock); _leave(""); } @@ -759,7 +862,12 @@ int __fscache_write_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_stores); - op = kzalloc(sizeof(*op), GFP_NOIO); + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + + op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY); if (!op) goto nomem; @@ -915,6 +1023,40 @@ done: EXPORT_SYMBOL(__fscache_uncache_page); /** + * fscache_mark_page_cached - Mark a page as being cached + * @op: The retrieval op pages are being marked for + * @page: The page to be marked + * + * Mark a netfs page as being cached. After this is called, the netfs + * must call fscache_uncache_page() to remove the mark. + */ +void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page) +{ + struct fscache_cookie *cookie = op->op.object->cookie; + +#ifdef CONFIG_FSCACHE_STATS + atomic_inc(&fscache_n_marks); +#endif + + _debug("- mark %p{%lx}", page, page->index); + if (TestSetPageFsCache(page)) { + static bool once_only; + if (!once_only) { + once_only = true; + printk(KERN_WARNING "FS-Cache:" + " Cookie type %s marked page %lx" + " multiple times\n", + cookie->def->name, page->index); + } + } + + if (cookie->def->mark_page_cached) + cookie->def->mark_page_cached(cookie->netfs_data, + op->mapping, page); +} +EXPORT_SYMBOL(fscache_mark_page_cached); + +/** * fscache_mark_pages_cached - Mark pages as being cached * @op: The retrieval op pages are being marked for * @pagevec: The pages to be marked @@ -925,32 +1067,11 @@ EXPORT_SYMBOL(__fscache_uncache_page); void fscache_mark_pages_cached(struct fscache_retrieval *op, struct pagevec *pagevec) { - struct fscache_cookie *cookie = op->op.object->cookie; unsigned long loop; -#ifdef CONFIG_FSCACHE_STATS - atomic_add(pagevec->nr, &fscache_n_marks); -#endif - - for (loop = 0; loop < pagevec->nr; loop++) { - struct page *page = pagevec->pages[loop]; - - _debug("- mark %p{%lx}", page, page->index); - if (TestSetPageFsCache(page)) { - static bool once_only; - if (!once_only) { - once_only = true; - printk(KERN_WARNING "FS-Cache:" - " Cookie type %s marked page %lx" - " multiple times\n", - cookie->def->name, page->index); - } - } - } + for (loop = 0; loop < pagevec->nr; loop++) + fscache_mark_page_cached(op, pagevec->pages[loop]); - if (cookie->def->mark_pages_cached) - cookie->def->mark_pages_cached(cookie->netfs_data, - op->mapping, pagevec); pagevec_reinit(pagevec); } EXPORT_SYMBOL(fscache_mark_pages_cached); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4765190d537f..8179e8bc4a3d 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -69,6 +69,7 @@ atomic_t fscache_n_store_vmscan_not_storing; atomic_t fscache_n_store_vmscan_gone; atomic_t fscache_n_store_vmscan_busy; atomic_t fscache_n_store_vmscan_cancelled; +atomic_t fscache_n_store_vmscan_wait; atomic_t fscache_n_marks; atomic_t fscache_n_uncaches; @@ -80,6 +81,9 @@ atomic_t fscache_n_acquires_ok; atomic_t fscache_n_acquires_nobufs; atomic_t fscache_n_acquires_oom; +atomic_t fscache_n_invalidates; +atomic_t fscache_n_invalidates_run; + atomic_t fscache_n_updates; atomic_t fscache_n_updates_null; atomic_t fscache_n_updates_run; @@ -112,6 +116,7 @@ atomic_t fscache_n_cop_alloc_object; atomic_t fscache_n_cop_lookup_object; atomic_t fscache_n_cop_lookup_complete; atomic_t fscache_n_cop_grab_object; +atomic_t fscache_n_cop_invalidate_object; atomic_t fscache_n_cop_update_object; atomic_t fscache_n_cop_drop_object; atomic_t fscache_n_cop_put_object; @@ -168,6 +173,10 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_object_created), atomic_read(&fscache_n_object_lookups_timed_out)); + seq_printf(m, "Invals : n=%u run=%u\n", + atomic_read(&fscache_n_invalidates), + atomic_read(&fscache_n_invalidates_run)); + seq_printf(m, "Updates: n=%u nul=%u run=%u\n", atomic_read(&fscache_n_updates), atomic_read(&fscache_n_updates_null), @@ -224,11 +233,12 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_store_radix_deletes), atomic_read(&fscache_n_store_pages_over_limit)); - seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n", + seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u wt=%u\n", atomic_read(&fscache_n_store_vmscan_not_storing), atomic_read(&fscache_n_store_vmscan_gone), atomic_read(&fscache_n_store_vmscan_busy), - atomic_read(&fscache_n_store_vmscan_cancelled)); + atomic_read(&fscache_n_store_vmscan_cancelled), + atomic_read(&fscache_n_store_vmscan_wait)); seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u rej=%u\n", atomic_read(&fscache_n_op_pend), @@ -246,7 +256,8 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_cop_lookup_object), atomic_read(&fscache_n_cop_lookup_complete), atomic_read(&fscache_n_cop_grab_object)); - seq_printf(m, "CacheOp: upo=%d dro=%d pto=%d atc=%d syn=%d\n", + seq_printf(m, "CacheOp: inv=%d upo=%d dro=%d pto=%d atc=%d syn=%d\n", + atomic_read(&fscache_n_cop_invalidate_object), atomic_read(&fscache_n_cop_update_object), atomic_read(&fscache_n_cop_drop_object), atomic_read(&fscache_n_cop_put_object), diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 0b35903219bc..d47f11658c17 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -35,6 +35,16 @@ static int hfs_readpage(struct file *file, struct page *page) return block_read_full_page(page, hfs_get_block); } +static void hfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + hfs_file_truncate(inode); + } +} + static int hfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -45,11 +55,8 @@ static int hfs_write_begin(struct file *file, struct address_space *mapping, ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, hfs_get_block, &HFS_I(mapping->host)->phys_size); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + hfs_write_failed(mapping, pos + len); return ret; } @@ -120,6 +127,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; ssize_t ret; @@ -135,7 +143,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, loff_t end = offset + iov_length(iov, nr_segs); if (end > isize) - vmtruncate(inode, isize); + hfs_write_failed(mapping, end); } return ret; @@ -617,9 +625,12 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) attr->ia_size != i_size_read(inode)) { inode_dio_wait(inode); - error = vmtruncate(inode, attr->ia_size); + error = inode_newsize_ok(inode, attr->ia_size); if (error) return error; + + truncate_setsize(inode, attr->ia_size); + hfs_file_truncate(inode); } setattr_copy(inode, attr); @@ -668,7 +679,6 @@ static const struct file_operations hfs_file_operations = { static const struct inode_operations hfs_file_inode_operations = { .lookup = hfs_file_lookup, - .truncate = hfs_file_truncate, .setattr = hfs_inode_setattr, .setxattr = hfs_setxattr, .getxattr = hfs_getxattr, diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index 4cfbe2edd296..6feefc0cb48a 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -176,12 +176,14 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); /* are all of the bits in range? */ if ((offset + count) > sbi->total_blocks) - return -2; + return -ENOENT; mutex_lock(&sbi->alloc_mutex); mapping = sbi->alloc_file->i_mapping; pnr = offset / PAGE_CACHE_BITS; page = read_mapping_page(mapping, pnr, NULL); + if (IS_ERR(page)) + goto kaboom; pptr = kmap(page); curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; end = pptr + PAGE_CACHE_BITS / 32; @@ -214,6 +216,8 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) set_page_dirty(page); kunmap(page); page = read_mapping_page(mapping, ++pnr, NULL); + if (IS_ERR(page)) + goto kaboom; pptr = kmap(page); curr = pptr; end = pptr + PAGE_CACHE_BITS / 32; @@ -232,4 +236,11 @@ out: mutex_unlock(&sbi->alloc_mutex); return 0; + +kaboom: + printk(KERN_CRIT "hfsplus: unable to mark blocks free: error %ld\n", + PTR_ERR(page)); + mutex_unlock(&sbi->alloc_mutex); + + return -EIO; } diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 21023d9f8ff3..685d07d0ed18 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -159,7 +159,7 @@ void hfs_btree_close(struct hfs_btree *tree) kfree(tree); } -void hfs_btree_write(struct hfs_btree *tree) +int hfs_btree_write(struct hfs_btree *tree) { struct hfs_btree_header_rec *head; struct hfs_bnode *node; @@ -168,7 +168,7 @@ void hfs_btree_write(struct hfs_btree *tree) node = hfs_bnode_find(tree, 0); if (IS_ERR(node)) /* panic? */ - return; + return -EIO; /* Load the header */ page = node->page[0]; head = (struct hfs_btree_header_rec *)(kmap(page) + @@ -186,6 +186,7 @@ void hfs_btree_write(struct hfs_btree *tree) kunmap(page); set_page_dirty(page); hfs_bnode_put(node); + return 0; } static struct hfs_bnode *hfs_bmap_new_bmap(struct hfs_bnode *prev, u32 idx) diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 5849e3ef35cc..eba76eab6d62 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -329,6 +329,7 @@ static int hfsplus_free_extents(struct super_block *sb, { u32 count, start; int i; + int err = 0; hfsplus_dump_extent(extent); for (i = 0; i < 8; extent++, i++) { @@ -345,18 +346,33 @@ found: for (;;) { start = be32_to_cpu(extent->start_block); if (count <= block_nr) { - hfsplus_block_free(sb, start, count); + err = hfsplus_block_free(sb, start, count); + if (err) { + printk(KERN_ERR "hfs: can't free extent\n"); + dprint(DBG_EXTENT, " start: %u count: %u\n", + start, count); + } extent->block_count = 0; extent->start_block = 0; block_nr -= count; } else { count -= block_nr; - hfsplus_block_free(sb, start + count, block_nr); + err = hfsplus_block_free(sb, start + count, block_nr); + if (err) { + printk(KERN_ERR "hfs: can't free extent\n"); + dprint(DBG_EXTENT, " start: %u count: %u\n", + start, count); + } extent->block_count = cpu_to_be32(count); block_nr = 0; } - if (!block_nr || !i) - return 0; + if (!block_nr || !i) { + /* + * Try to free all extents and + * return only last error + */ + return err; + } i--; extent--; count = be32_to_cpu(extent->block_count); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index c571de224b15..a6da86b1b4c1 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -335,7 +335,7 @@ int hfsplus_block_free(struct super_block *, u32, u32); /* btree.c */ struct hfs_btree *hfs_btree_open(struct super_block *, u32); void hfs_btree_close(struct hfs_btree *); -void hfs_btree_write(struct hfs_btree *); +int hfs_btree_write(struct hfs_btree *); struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *); void hfs_bmap_free(struct hfs_bnode *); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 2172aa5976f5..799b336b59f9 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -28,6 +28,16 @@ static int hfsplus_writepage(struct page *page, struct writeback_control *wbc) return block_write_full_page(page, hfsplus_get_block, wbc); } +static void hfsplus_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + hfsplus_file_truncate(inode); + } +} + static int hfsplus_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -38,11 +48,8 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping, ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, hfsplus_get_block, &HFSPLUS_I(mapping->host)->phys_size); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + hfsplus_write_failed(mapping, pos + len); return ret; } @@ -116,6 +123,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; ssize_t ret; @@ -131,7 +139,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, loff_t end = offset + iov_length(iov, nr_segs); if (end > isize) - vmtruncate(inode, isize); + hfsplus_write_failed(mapping, end); } return ret; @@ -300,10 +308,8 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { inode_dio_wait(inode); - - error = vmtruncate(inode, attr->ia_size); - if (error) - return error; + truncate_setsize(inode, attr->ia_size); + hfsplus_file_truncate(inode); } setattr_copy(inode, attr); @@ -358,7 +364,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, - .truncate = hfsplus_file_truncate, .setattr = hfsplus_setattr, .setxattr = hfsplus_setxattr, .getxattr = hfsplus_getxattr, diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 811a84d2d964..796198d26553 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -127,8 +127,14 @@ static int hfsplus_system_write_inode(struct inode *inode) hfsplus_mark_mdb_dirty(inode->i_sb); } hfsplus_inode_write_fork(inode, fork); - if (tree) - hfs_btree_write(tree); + if (tree) { + int err = hfs_btree_write(tree); + if (err) { + printk(KERN_ERR "hfs: b-tree write err: %d, ino %lu\n", + err, inode->i_ino); + return err; + } + } return 0; } @@ -226,6 +232,7 @@ out: static void delayed_sync_fs(struct work_struct *work) { + int err; struct hfsplus_sb_info *sbi; sbi = container_of(work, struct hfsplus_sb_info, sync_work.work); @@ -234,7 +241,9 @@ static void delayed_sync_fs(struct work_struct *work) sbi->work_queued = 0; spin_unlock(&sbi->work_lock); - hfsplus_sync_fs(sbi->alloc_file->i_sb, 1); + err = hfsplus_sync_fs(sbi->alloc_file->i_sb, 1); + if (err) + printk(KERN_ERR "hfs: delayed sync fs err %d\n", err); } void hfsplus_mark_mdb_dirty(struct super_block *sb) diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 89d2a5803ae3..fbfe2df5624b 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -50,7 +50,7 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno) return disk_secno; } -static void hpfs_truncate(struct inode *i) +void hpfs_truncate(struct inode *i) { if (IS_IMMUTABLE(i)) return /*-EPERM*/; hpfs_lock_assert(i->i_sb); @@ -105,6 +105,16 @@ static int hpfs_readpage(struct file *file, struct page *page) return block_read_full_page(page,hpfs_get_block); } +static void hpfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + hpfs_truncate(inode); + } +} + static int hpfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -115,11 +125,8 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping, ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, hpfs_get_block, &hpfs_i(mapping->host)->mmu_private); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + hpfs_write_failed(mapping, pos + len); return ret; } @@ -166,6 +173,5 @@ const struct file_operations hpfs_file_ops = const struct inode_operations hpfs_file_iops = { - .truncate = hpfs_truncate, .setattr = hpfs_setattr, }; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 7102aaecc244..b7ae286646b5 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -252,6 +252,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, const char *, /* file.c */ int hpfs_file_fsync(struct file *, loff_t, loff_t, int); +void hpfs_truncate(struct inode *); extern const struct file_operations hpfs_file_ops; extern const struct inode_operations hpfs_file_iops; extern const struct address_space_operations hpfs_aops; diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 804a9a842cbc..5dc06c837105 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -277,9 +277,12 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); + error = inode_newsize_ok(inode, attr->ia_size); if (error) goto out_unlock; + + truncate_setsize(inode, attr->ia_size); + hpfs_truncate(inode); } setattr_copy(inode, attr); diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 9d3afd157f99..dd7442c58358 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -119,9 +119,12 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr) iattr->ia_size != i_size_read(inode)) { inode_dio_wait(inode); - rc = vmtruncate(inode, iattr->ia_size); + rc = inode_newsize_ok(inode, iattr->ia_size); if (rc) return rc; + + truncate_setsize(inode, iattr->ia_size); + jfs_truncate(inode); } setattr_copy(inode, iattr); @@ -133,7 +136,6 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr) } const struct inode_operations jfs_file_inode_operations = { - .truncate = jfs_truncate, .setxattr = jfs_setxattr, .getxattr = jfs_getxattr, .listxattr = jfs_listxattr, diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 4692bf3ca8cb..b7dc47ba675e 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -300,6 +300,16 @@ static int jfs_readpages(struct file *file, struct address_space *mapping, return mpage_readpages(mapping, pages, nr_pages, jfs_get_block); } +static void jfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + jfs_truncate(inode); + } +} + static int jfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -308,11 +318,8 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping, ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, jfs_get_block); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + jfs_write_failed(mapping, pos + len); return ret; } @@ -326,6 +333,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host; ssize_t ret; @@ -341,7 +349,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, loff_t end = offset + iov_length(iov, nr_segs); if (end > isize) - vmtruncate(inode, isize); + jfs_write_failed(mapping, end); } return ret; diff --git a/fs/libfs.c b/fs/libfs.c index 35fc6e74cd88..916da8c4158b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -369,8 +369,6 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr) struct inode *inode = dentry->d_inode; int error; - WARN_ON_ONCE(inode->i_op->truncate); - error = inode_change_ok(inode, iattr); if (error) return error; diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index e1a3b6bf6324..9a59cbade2fb 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -1887,9 +1887,15 @@ int logfs_truncate(struct inode *inode, u64 target) logfs_put_wblocks(sb, NULL, 1); } - if (!err) - err = vmtruncate(inode, target); + if (!err) { + err = inode_newsize_ok(inode, target); + if (err) + goto out; + + truncate_setsize(inode, target); + } + out: /* I don't trust error recovery yet. */ WARN_ON(err); return err; diff --git a/fs/minix/file.c b/fs/minix/file.c index 4493ce695ab8..adc6f5494231 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -34,9 +34,12 @@ static int minix_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); + error = inode_newsize_ok(inode, attr->ia_size); if (error) return error; + + truncate_setsize(inode, attr->ia_size); + minix_truncate(inode); } setattr_copy(inode, attr); @@ -45,7 +48,6 @@ static int minix_setattr(struct dentry *dentry, struct iattr *attr) } const struct inode_operations minix_file_inode_operations = { - .truncate = minix_truncate, .setattr = minix_setattr, .getattr = minix_getattr, }; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 4fc5f8ab1c44..99541cceb584 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -395,6 +395,16 @@ int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len) return __block_write_begin(page, pos, len, minix_get_block); } +static void minix_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + minix_truncate(inode); + } +} + static int minix_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -403,11 +413,8 @@ static int minix_write_begin(struct file *file, struct address_space *mapping, ret = block_write_begin(mapping, pos, len, flags, pagep, minix_get_block); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + minix_write_failed(mapping, pos + len); return ret; } diff --git a/fs/namei.c b/fs/namei.c index 5f4cdf3ad913..43a97ee1d4c8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1275,9 +1275,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, *need_lookup = false; dentry = d_lookup(dir, name); if (dentry) { - if (d_need_lookup(dentry)) { - *need_lookup = true; - } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { + if (dentry->d_flags & DCACHE_OP_REVALIDATE) { error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { if (error < 0) { @@ -1383,8 +1381,6 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name, return -ECHILD; nd->seq = seq; - if (unlikely(d_need_lookup(dentry))) - goto unlazy; if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { @@ -1410,11 +1406,6 @@ unlazy: if (unlikely(!dentry)) goto need_lookup; - if (unlikely(d_need_lookup(dentry))) { - dput(dentry); - goto need_lookup; - } - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { @@ -1859,7 +1850,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (flags & LOOKUP_ROOT) { struct inode *inode = nd->root.dentry->d_inode; if (*name) { - if (!inode->i_op->lookup) + if (!can_lookup(inode)) return -ENOTDIR; retval = inode_permission(inode, MAY_EXEC); if (retval) @@ -1903,6 +1894,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, get_fs_pwd(current->fs, &nd->path); } } else { + /* Caller must check execute permissions on the starting path component */ struct fd f = fdget_raw(dfd); struct dentry *dentry; @@ -1912,16 +1904,10 @@ static int path_init(int dfd, const char *name, unsigned int flags, dentry = f.file->f_path.dentry; if (*name) { - if (!S_ISDIR(dentry->d_inode->i_mode)) { + if (!can_lookup(dentry->d_inode)) { fdput(f); return -ENOTDIR; } - - retval = inode_permission(dentry->d_inode, MAY_EXEC); - if (retval) { - fdput(f); - return retval; - } } nd->path = f.file->f_path; @@ -2189,15 +2175,19 @@ int user_path_at(int dfd, const char __user *name, unsigned flags, * path-walking is complete. */ static struct filename * -user_path_parent(int dfd, const char __user *path, struct nameidata *nd) +user_path_parent(int dfd, const char __user *path, struct nameidata *nd, + unsigned int flags) { struct filename *s = getname(path); int error; + /* only LOOKUP_REVAL is allowed in extra flags */ + flags &= LOOKUP_REVAL; + if (IS_ERR(s)) return s; - error = filename_lookup(dfd, s, LOOKUP_PARENT, nd); + error = filename_lookup(dfd, s, flags | LOOKUP_PARENT, nd); if (error) { putname(s); return ERR_PTR(error); @@ -3044,12 +3034,22 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, return file; } -struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir) +struct dentry *kern_path_create(int dfd, const char *pathname, + struct path *path, unsigned int lookup_flags) { struct dentry *dentry = ERR_PTR(-EEXIST); struct nameidata nd; int err2; - int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); + int error; + bool is_dir = (lookup_flags & LOOKUP_DIRECTORY); + + /* + * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any + * other flags passed in are ignored! + */ + lookup_flags &= LOOKUP_REVAL; + + error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd); if (error) return ERR_PTR(error); @@ -3113,13 +3113,14 @@ void done_path_create(struct path *path, struct dentry *dentry) } EXPORT_SYMBOL(done_path_create); -struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) +struct dentry *user_path_create(int dfd, const char __user *pathname, + struct path *path, unsigned int lookup_flags) { struct filename *tmp = getname(pathname); struct dentry *res; if (IS_ERR(tmp)) return ERR_CAST(tmp); - res = kern_path_create(dfd, tmp->name, path, is_dir); + res = kern_path_create(dfd, tmp->name, path, lookup_flags); putname(tmp); return res; } @@ -3175,12 +3176,13 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, struct dentry *dentry; struct path path; int error; + unsigned int lookup_flags = 0; error = may_mknod(mode); if (error) return error; - - dentry = user_path_create(dfd, filename, &path, 0); +retry: + dentry = user_path_create(dfd, filename, &path, lookup_flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -3203,6 +3205,10 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, } out: done_path_create(&path, dentry); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } @@ -3241,8 +3247,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) struct dentry *dentry; struct path path; int error; + unsigned int lookup_flags = LOOKUP_DIRECTORY; - dentry = user_path_create(dfd, pathname, &path, 1); +retry: + dentry = user_path_create(dfd, pathname, &path, lookup_flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -3252,6 +3260,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) if (!error) error = vfs_mkdir(path.dentry->d_inode, dentry, mode); done_path_create(&path, dentry); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } @@ -3327,8 +3339,9 @@ static long do_rmdir(int dfd, const char __user *pathname) struct filename *name; struct dentry *dentry; struct nameidata nd; - - name = user_path_parent(dfd, pathname, &nd); + unsigned int lookup_flags = 0; +retry: + name = user_path_parent(dfd, pathname, &nd, lookup_flags); if (IS_ERR(name)) return PTR_ERR(name); @@ -3370,6 +3383,10 @@ exit2: exit1: path_put(&nd.path); putname(name); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } @@ -3423,8 +3440,9 @@ static long do_unlinkat(int dfd, const char __user *pathname) struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; - - name = user_path_parent(dfd, pathname, &nd); + unsigned int lookup_flags = 0; +retry: + name = user_path_parent(dfd, pathname, &nd, lookup_flags); if (IS_ERR(name)) return PTR_ERR(name); @@ -3462,6 +3480,11 @@ exit2: exit1: path_put(&nd.path); putname(name); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + inode = NULL; + goto retry; + } return error; slashes: @@ -3513,12 +3536,13 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, struct filename *from; struct dentry *dentry; struct path path; + unsigned int lookup_flags = 0; from = getname(oldname); if (IS_ERR(from)) return PTR_ERR(from); - - dentry = user_path_create(newdfd, newname, &path, 0); +retry: + dentry = user_path_create(newdfd, newname, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_putname; @@ -3527,6 +3551,10 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, if (!error) error = vfs_symlink(path.dentry->d_inode, dentry, from->name); done_path_create(&path, dentry); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out_putname: putname(from); return error; @@ -3613,12 +3641,13 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; - +retry: error = user_path_at(olddfd, oldname, how, &old_path); if (error) return error; - new_dentry = user_path_create(newdfd, newname, &new_path, 0); + new_dentry = user_path_create(newdfd, newname, &new_path, + (how & LOOKUP_REVAL)); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out; @@ -3635,6 +3664,10 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); out_dput: done_path_create(&new_path, new_dentry); + if (retry_estale(error, how)) { + how |= LOOKUP_REVAL; + goto retry; + } out: path_put(&old_path); @@ -3807,15 +3840,17 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, struct nameidata oldnd, newnd; struct filename *from; struct filename *to; + unsigned int lookup_flags = 0; + bool should_retry = false; int error; - - from = user_path_parent(olddfd, oldname, &oldnd); +retry: + from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); if (IS_ERR(from)) { error = PTR_ERR(from); goto exit; } - to = user_path_parent(newdfd, newname, &newnd); + to = user_path_parent(newdfd, newname, &newnd, lookup_flags); if (IS_ERR(to)) { error = PTR_ERR(to); goto exit1; @@ -3887,11 +3922,18 @@ exit3: unlock_rename(new_dir, old_dir); mnt_drop_write(oldnd.path.mnt); exit2: + if (retry_estale(error, lookup_flags)) + should_retry = true; path_put(&newnd.path); putname(to); exit1: path_put(&oldnd.path); putname(from); + if (should_retry) { + should_retry = false; + lookup_flags |= LOOKUP_REVAL; + goto retry; + } exit: return error; } diff --git a/fs/namespace.c b/fs/namespace.c index 398a50ff2438..55605c552787 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -313,7 +313,7 @@ int __mnt_want_write(struct vfsmount *m) * incremented count after it has set MNT_WRITE_HOLD. */ smp_mb(); - while (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) cpu_relax(); /* * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index d7e9fe77188a..1acdad7fcec7 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -976,9 +976,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) goto out; if (attr->ia_size != i_size_read(inode)) { - result = vmtruncate(inode, attr->ia_size); - if (result) - goto out; + truncate_setsize(inode, attr->ia_size); mark_inode_dirty(inode); } } diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index c817787fbdb4..24d1d1c5fcaf 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -307,6 +307,7 @@ void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) nfs_fscache_inode_unlock(inode); } } +EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie); /* * Replace a per-inode cookie due to revalidation detecting a file having diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index c5b11b53ff33..4ecb76652eba 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -153,6 +153,22 @@ static inline void nfs_readpage_to_fscache(struct inode *inode, } /* + * Invalidate the contents of fscache for this inode. This will not sleep. + */ +static inline void nfs_fscache_invalidate(struct inode *inode) +{ + fscache_invalidate(NFS_I(inode)->fscache); +} + +/* + * Wait for an object to finish being invalidated. + */ +static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) +{ + fscache_wait_on_invalidate(NFS_I(inode)->fscache); +} + +/* * indicate the client caching state as readable text */ static inline const char *nfs_server_fscache_state(struct nfs_server *server) @@ -162,7 +178,6 @@ static inline const char *nfs_server_fscache_state(struct nfs_server *server) return "no "; } - #else /* CONFIG_NFS_FSCACHE */ static inline int nfs_fscache_register(void) { return 0; } static inline void nfs_fscache_unregister(void) {} @@ -205,6 +220,10 @@ static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, static inline void nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) {} + +static inline void nfs_fscache_invalidate(struct inode *inode) {} +static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) {} + static inline const char *nfs_server_fscache_state(struct nfs_server *server) { return "no "; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2faae14d89f4..ebeb94ce1b0b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -161,10 +161,12 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo_timestamp = jiffies; memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); - if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; - else + nfs_fscache_invalidate(inode); + } else { nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + } } void nfs_zap_caches(struct inode *inode) @@ -179,6 +181,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) if (mapping->nrpages != 0) { spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; + nfs_fscache_invalidate(inode); spin_unlock(&inode->i_lock); } } @@ -881,7 +884,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); spin_unlock(&inode->i_lock); nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); - nfs_fscache_reset_inode_cookie(inode); + nfs_fscache_wait_on_invalidate(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); return 0; @@ -957,6 +960,10 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } + + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); + return ret; } @@ -1205,8 +1212,10 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr struct nfs_inode *nfsi = NFS_I(inode); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { nfsi->cache_validity |= NFS_INO_INVALID_DATA; + nfs_fscache_invalidate(inode); + } if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; return nfs_refresh_inode_locked(inode, fattr); @@ -1494,6 +1503,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) (save_cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; + if (invalid & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); + return 0; out_err: /* diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e7699308364a..08ddcccb8887 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -5,6 +5,7 @@ */ #include <linux/nfs_fs.h> #include "internal.h" +#include "fscache.h" #include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -74,6 +75,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_file_set_open_context(filp, ctx); + nfs_fscache_set_inode_cookie(inode, filp); err = 0; out_put_ctx: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 493f0f41c554..5d864fb36578 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -64,7 +64,7 @@ #include "pnfs.h" #include "netns.h" #include "nfs4session.h" - +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -734,6 +734,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) if (!cinfo->atomic || cinfo->before != dir->i_version) nfs_force_lookup_revalidate(dir); dir->i_version = cinfo->after; + nfs_fscache_invalidate(dir); spin_unlock(&dir->i_lock); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index aa5315bb3666..c25cadf8f8c4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2375,19 +2375,30 @@ static void nfs_get_cache_cookie(struct super_block *sb, struct nfs_parsed_mount_data *parsed, struct nfs_clone_mount *cloned) { + struct nfs_server *nfss = NFS_SB(sb); char *uniq = NULL; int ulen = 0; - if (parsed && parsed->fscache_uniq) { - uniq = parsed->fscache_uniq; - ulen = strlen(parsed->fscache_uniq); + nfss->fscache_key = NULL; + nfss->fscache = NULL; + + if (parsed) { + if (!(parsed->options & NFS_OPTION_FSCACHE)) + return; + if (parsed->fscache_uniq) { + uniq = parsed->fscache_uniq; + ulen = strlen(parsed->fscache_uniq); + } } else if (cloned) { struct nfs_server *mnt_s = NFS_SB(cloned->sb); + if (!(mnt_s->options & NFS_OPTION_FSCACHE)) + return; if (mnt_s->fscache_key) { uniq = mnt_s->fscache_key->key.uniquifier; ulen = mnt_s->fscache_key->key.uniq_len; }; - } + } else + return; nfs_fscache_get_super_cookie(sb, uniq, ulen); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5209916e1222..b673be31590e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1794,7 +1794,8 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, if (PagePrivate(page)) return -EBUSY; - nfs_fscache_release_page(page, GFP_KERNEL); + if (!nfs_fscache_release_page(page, GFP_KERNEL)) + return -EBUSY; return migrate_page(mapping, newpage, page, mode); } diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index e6c38159622f..e761ee95617f 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -8,61 +8,144 @@ #include <linux/fs.h> #include <linux/debugfs.h> #include <linux/module.h> +#include <linux/nsproxy.h> +#include <linux/sunrpc/clnt.h> +#include <asm/uaccess.h> #include "state.h" -#include "fault_inject.h" +#include "netns.h" struct nfsd_fault_inject_op { char *file; - void (*func)(u64); + u64 (*forget)(struct nfs4_client *, u64); + u64 (*print)(struct nfs4_client *, u64); }; static struct nfsd_fault_inject_op inject_ops[] = { { .file = "forget_clients", - .func = nfsd_forget_clients, + .forget = nfsd_forget_client, + .print = nfsd_print_client, }, { .file = "forget_locks", - .func = nfsd_forget_locks, + .forget = nfsd_forget_client_locks, + .print = nfsd_print_client_locks, }, { .file = "forget_openowners", - .func = nfsd_forget_openowners, + .forget = nfsd_forget_client_openowners, + .print = nfsd_print_client_openowners, }, { .file = "forget_delegations", - .func = nfsd_forget_delegations, + .forget = nfsd_forget_client_delegations, + .print = nfsd_print_client_delegations, }, { .file = "recall_delegations", - .func = nfsd_recall_delegations, + .forget = nfsd_recall_client_delegations, + .print = nfsd_print_client_delegations, }, }; static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); static struct dentry *debug_dir; -static int nfsd_inject_set(void *op_ptr, u64 val) +static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val) { - struct nfsd_fault_inject_op *op = op_ptr; + u64 count = 0; if (val == 0) printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); else printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); - op->func(val); - return 0; + nfs4_lock_state(); + count = nfsd_for_n_state(val, op->forget); + nfs4_unlock_state(); + printk(KERN_INFO "NFSD: %s: found %llu", op->file, count); } -static int nfsd_inject_get(void *data, u64 *val) +static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op, + struct sockaddr_storage *addr, + size_t addr_size) { - *val = 0; - return 0; + char buf[INET6_ADDRSTRLEN]; + struct nfs4_client *clp; + u64 count; + + nfs4_lock_state(); + clp = nfsd_find_client(addr, addr_size); + if (clp) { + count = op->forget(clp, 0); + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count); + } + nfs4_unlock_state(); +} + +static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val) +{ + nfs4_lock_state(); + *val = nfsd_for_n_state(0, op->print); + nfs4_unlock_state(); } -DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n"); +static ssize_t fault_inject_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + static u64 val; + char read_buf[25]; + size_t size, ret; + loff_t pos = *ppos; + + if (!pos) + nfsd_inject_get(file->f_dentry->d_inode->i_private, &val); + size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); + + if (pos < 0) + return -EINVAL; + if (pos >= size || !len) + return 0; + if (len > size - pos) + len = size - pos; + ret = copy_to_user(buf, read_buf + pos, len); + if (ret == len) + return -EFAULT; + len -= ret; + *ppos = pos + len; + return len; +} + +static ssize_t fault_inject_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + char write_buf[INET6_ADDRSTRLEN]; + size_t size = min(sizeof(write_buf) - 1, len); + struct net *net = current->nsproxy->net_ns; + struct sockaddr_storage sa; + u64 val; + + if (copy_from_user(write_buf, buf, size)) + return -EFAULT; + write_buf[size] = '\0'; + + size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); + if (size > 0) + nfsd_inject_set_client(file->f_dentry->d_inode->i_private, &sa, size); + else { + val = simple_strtoll(write_buf, NULL, 0); + nfsd_inject_set(file->f_dentry->d_inode->i_private, val); + } + return len; /* on success, claim we got the whole input */ +} + +static const struct file_operations fops_nfsd = { + .owner = THIS_MODULE, + .read = fault_inject_read, + .write = fault_inject_write, +}; void nfsd_fault_inject_cleanup(void) { diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h deleted file mode 100644 index 90bd0570956c..000000000000 --- a/fs/nfsd/fault_inject.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> - * - * Function definitions for fault injection - */ - -#ifndef LINUX_NFSD_FAULT_INJECT_H -#define LINUX_NFSD_FAULT_INJECT_H - -#ifdef CONFIG_NFSD_FAULT_INJECTION -int nfsd_fault_inject_init(void); -void nfsd_fault_inject_cleanup(void); -void nfsd_forget_clients(u64); -void nfsd_forget_locks(u64); -void nfsd_forget_openowners(u64); -void nfsd_forget_delegations(u64); -void nfsd_recall_delegations(u64); -#else /* CONFIG_NFSD_FAULT_INJECTION */ -static inline int nfsd_fault_inject_init(void) { return 0; } -static inline void nfsd_fault_inject_cleanup(void) {} -static inline void nfsd_forget_clients(u64 num) {} -static inline void nfsd_forget_locks(u64 num) {} -static inline void nfsd_forget_openowners(u64 num) {} -static inline void nfsd_forget_delegations(u64 num) {} -static inline void nfsd_recall_delegations(u64 num) {} -#endif /* CONFIG_NFSD_FAULT_INJECTION */ - -#endif /* LINUX_NFSD_FAULT_INJECT_H */ diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 65c2431ea32f..1051bebff1b0 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -24,7 +24,18 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> +/* Hash tables for nfs4_clientid state */ +#define CLIENT_HASH_BITS 4 +#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) +#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) + +#define LOCKOWNER_INO_HASH_BITS 8 +#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) + +#define SESSION_HASH_SIZE 512 + struct cld_net; +struct nfsd4_client_tracking_ops; struct nfsd_net { struct cld_net *cld_net; @@ -38,7 +49,62 @@ struct nfsd_net { struct lock_manager nfsd4_manager; bool grace_ended; time_t boot_time; + + /* + * reclaim_str_hashtbl[] holds known client info from previous reset/reboot + * used in reboot/reset lease grace period processing + * + * conf_id_hashtbl[], and conf_name_tree hold confirmed + * setclientid_confirmed info. + * + * unconf_str_hastbl[] and unconf_name_tree hold unconfirmed + * setclientid info. + */ + struct list_head *reclaim_str_hashtbl; + int reclaim_str_hashtbl_size; + struct list_head *conf_id_hashtbl; + struct rb_root conf_name_tree; + struct list_head *unconf_id_hashtbl; + struct rb_root unconf_name_tree; + struct list_head *ownerstr_hashtbl; + struct list_head *lockowner_ino_hashtbl; + struct list_head *sessionid_hashtbl; + /* + * client_lru holds client queue ordered by nfs4_client.cl_time + * for lease renewal. + * + * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time + * for last close replay. + * + * All of the above fields are protected by the client_mutex. + */ + struct list_head client_lru; + struct list_head close_lru; + + struct delayed_work laundromat_work; + + /* client_lock protects the client lru list and session hash table */ + spinlock_t client_lock; + + struct file *rec_file; + bool in_grace; + struct nfsd4_client_tracking_ops *client_tracking_ops; + + time_t nfsd4_lease; + time_t nfsd4_grace; + + bool nfsd_net_up; + + /* + * Time of server startup + */ + struct timeval nfssvc_boot; + + struct svc_serv *nfsd_serv; }; +/* Simple check to find out if a given net was properly initialized */ +#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) + extern int nfsd_net_id; #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index b314888825d5..9170861c804a 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -253,7 +253,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { - if (!rqstp->rq_respages[rqstp->rq_resused++]) + if (!*(rqstp->rq_next_page++)) return 0; w -= PAGE_SIZE; } diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index a596e9d987e4..9cbc1a841f87 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -184,7 +184,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { - if (!rqstp->rq_respages[rqstp->rq_resused++]) + if (!*(rqstp->rq_next_page++)) return 0; w -= PAGE_SIZE; } diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 97d90d1c8608..1fc02dfdc5c4 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -460,7 +460,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, __be32 nfserr; int count = 0; loff_t offset; - int i; + struct page **p; caddr_t page_addr = NULL; dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", @@ -484,8 +484,8 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, &resp->common, nfs3svc_encode_entry_plus); memcpy(resp->verf, argp->verf, 8); - for (i=1; i<rqstp->rq_resused ; i++) { - page_addr = page_address(rqstp->rq_respages[i]); + for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { + page_addr = page_address(*p); if (((caddr_t)resp->buffer >= page_addr) && ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 43f46cd9edea..324c0baf7cda 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -7,8 +7,10 @@ */ #include <linux/namei.h> +#include <linux/sunrpc/svc_xprt.h> #include "xdr3.h" #include "auth.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -323,7 +325,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readargs *args) { unsigned int len; - int v,pn; + int v; u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh))) @@ -338,8 +340,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, /* set up the kvec */ v=0; while (len > 0) { - pn = rqstp->rq_resused++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + struct page *p = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(p); rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; len -= rqstp->rq_vec[v].iov_len; v++; @@ -461,8 +464,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, len = ntohl(*p++); if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) return 0; - args->tname = new = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->tname = new = page_address(*(rqstp->rq_next_page++)); args->tlen = len; /* first copy and check from the first page */ old = (char*)p; @@ -533,8 +535,7 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, { if (!(p = decode_fh(p, &args->fh))) return 0; - args->buffer = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -565,8 +566,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, if (args->count > PAGE_SIZE) args->count = PAGE_SIZE; - args->buffer = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -575,7 +575,7 @@ int nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readdirargs *args) { - int len, pn; + int len; u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh))) @@ -590,9 +590,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->count = len; while (len > 0) { - pn = rqstp->rq_resused++; + struct page *p = *(rqstp->rq_next_page++); if (!args->buffer) - args->buffer = page_address(rqstp->rq_respages[pn]); + args->buffer = page_address(p); len -= PAGE_SIZE; } @@ -720,12 +720,14 @@ int nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_writeres *resp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + p = encode_wcc_data(rqstp, p, &resp->fh); if (resp->status == 0) { *p++ = htonl(resp->count); *p++ = htonl(resp->committed); - *p++ = htonl(nfssvc_boot.tv_sec); - *p++ = htonl(nfssvc_boot.tv_usec); + *p++ = htonl(nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_usec); } return xdr_ressize_check(rqstp, p); } @@ -876,7 +878,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, common); __be32 *p = cd->buffer; caddr_t curr_page_addr = NULL; - int pn; /* current page number */ + struct page ** page; int slen; /* string (name) length */ int elen; /* estimated entry length in words */ int num_entry_words = 0; /* actual number of words */ @@ -913,8 +915,9 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, } /* determine which page in rq_respages[] we are currently filling */ - for (pn=1; pn < cd->rqstp->rq_resused; pn++) { - curr_page_addr = page_address(cd->rqstp->rq_respages[pn]); + for (page = cd->rqstp->rq_respages + 1; + page < cd->rqstp->rq_next_page; page++) { + curr_page_addr = page_address(*page); if (((caddr_t)cd->buffer >= curr_page_addr) && ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE)) @@ -929,14 +932,14 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, if (plus) p = encode_entryplus_baggage(cd, p, name, namlen); num_entry_words = p - cd->buffer; - } else if (cd->rqstp->rq_respages[pn+1] != NULL) { + } else if (*(page+1) != NULL) { /* temporarily encode entry into next page, then move back to * current and next page in rq_respages[] */ __be32 *p1, *tmp; int len1, len2; /* grab next page for temporary storage of entry */ - p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]); + p1 = tmp = page_address(*(page+1)); p1 = encode_entry_baggage(cd, p1, name, namlen, ino); @@ -1082,11 +1085,13 @@ int nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_commitres *resp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + p = encode_wcc_data(rqstp, p, &resp->fh); /* Write verifier */ if (resp->status == 0) { - *p++ = htonl(nfssvc_boot.tv_sec); - *p++ = htonl(nfssvc_boot.tv_usec); + *p++ = htonl(nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_usec); } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index bdf29c96e4cd..99bc85ff0217 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -36,6 +36,7 @@ #include <linux/slab.h> #include "nfsd.h" #include "state.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -625,20 +626,46 @@ static const struct rpc_program cb_program = { .pipe_dir_name = "nfsd4_cb", }; -static int max_cb_time(void) +static int max_cb_time(struct net *net) { - return max(nfsd4_lease/10, (time_t)1) * HZ; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + return max(nn->nfsd4_lease/10, (time_t)1) * HZ; } +static struct rpc_cred *callback_cred; + +int set_callback_cred(void) +{ + if (callback_cred) + return 0; + callback_cred = rpc_lookup_machine_cred("nfs"); + if (!callback_cred) + return -ENOMEM; + return 0; +} + +static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) +{ + if (clp->cl_minorversion == 0) { + return get_rpccred(callback_cred); + } else { + struct rpc_auth *auth = client->cl_auth; + struct auth_cred acred = {}; + + acred.uid = ses->se_cb_sec.uid; + acred.gid = ses->se_cb_sec.gid; + return auth->au_ops->lookup_cred(client->cl_auth, &acred, 0); + } +} static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { struct rpc_timeout timeparms = { - .to_initval = max_cb_time(), + .to_initval = max_cb_time(clp->net), .to_retries = 0, }; struct rpc_create_args args = { - .net = &init_net, + .net = clp->net, .address = (struct sockaddr *) &conn->cb_addr, .addrsize = conn->cb_addrlen, .saddress = (struct sockaddr *) &conn->cb_saddr, @@ -648,6 +675,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), }; struct rpc_clnt *client; + struct rpc_cred *cred; if (clp->cl_minorversion == 0) { if (!clp->cl_cred.cr_principal && @@ -666,7 +694,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; args.protocol = XPRT_TRANSPORT_BC_TCP; - args.authflavor = RPC_AUTH_UNIX; + args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ client = rpc_create(&args); @@ -675,9 +703,14 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c PTR_ERR(client)); return PTR_ERR(client); } + cred = get_backchannel_cred(clp, client, ses); + if (IS_ERR(cred)) { + rpc_shutdown_client(client); + return PTR_ERR(cred); + } clp->cl_cb_client = client; + clp->cl_cb_cred = cred; return 0; - } static void warn_no_callback_path(struct nfs4_client *clp, int reason) @@ -714,18 +747,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { .rpc_call_done = nfsd4_cb_probe_done, }; -static struct rpc_cred *callback_cred; - -int set_callback_cred(void) -{ - if (callback_cred) - return 0; - callback_cred = rpc_lookup_machine_cred("nfs"); - if (!callback_cred) - return -ENOMEM; - return 0; -} - static struct workqueue_struct *callback_wq; static void run_nfsd4_cb(struct nfsd4_callback *cb) @@ -743,7 +764,6 @@ static void do_probe_callback(struct nfs4_client *clp) cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; cb->cb_msg.rpc_argp = NULL; cb->cb_msg.rpc_resp = NULL; - cb->cb_msg.rpc_cred = callback_cred; cb->cb_ops = &nfsd4_cb_probe_ops; @@ -962,6 +982,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) if (clp->cl_cb_client) { rpc_shutdown_client(clp->cl_cb_client); clp->cl_cb_client = NULL; + put_rpccred(clp->cl_cb_cred); + clp->cl_cb_cred = NULL; } if (clp->cl_cb_conn.cb_xprt) { svc_xprt_put(clp->cl_cb_conn.cb_xprt); @@ -995,7 +1017,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) run_nfsd4_cb(cb); } -void nfsd4_do_callback_rpc(struct work_struct *w) +static void nfsd4_do_callback_rpc(struct work_struct *w) { struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; @@ -1010,10 +1032,16 @@ void nfsd4_do_callback_rpc(struct work_struct *w) nfsd4_release_cb(cb); return; } + cb->cb_msg.rpc_cred = clp->cl_cb_cred; rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, cb->cb_ops, cb); } +void nfsd4_init_callback(struct nfsd4_callback *cb) +{ + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); +} + void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfsd4_callback *cb = &dp->dl_recall; @@ -1025,7 +1053,6 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; - cb->cb_msg.rpc_cred = callback_cred; cb->cb_ops = &nfsd4_cb_recall_ops; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 6c9a4b291dba..9d1c5dba2bbb 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -40,6 +40,7 @@ #include "xdr4.h" #include "vfs.h" #include "current_stateid.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -194,6 +195,7 @@ static __be32 do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct svc_fh *resfh; + int accmode; __be32 status; resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); @@ -253,9 +255,10 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o /* set reply cache */ fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, &resfh->fh_handle); - if (!open->op_created) - status = do_open_permission(rqstp, resfh, open, - NFSD_MAY_NOP); + accmode = NFSD_MAY_NOP; + if (open->op_created) + accmode |= NFSD_MAY_OWNER_OVERRIDE; + status = do_open_permission(rqstp, resfh, open, accmode); set_change_info(&open->op_cinfo, current_fh); fh_dup2(current_fh, resfh); out: @@ -304,6 +307,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; struct nfsd4_compoundres *resp; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", (int)open->op_fname.len, open->op_fname.data, @@ -331,7 +336,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* check seqid for replay. set nfs4_owner */ resp = rqstp->rq_resp; - status = nfsd4_process_open1(&resp->cstate, open); + status = nfsd4_process_open1(&resp->cstate, open, nn); if (status == nfserr_replay_me) { struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; fh_put(&cstate->current_fh); @@ -354,10 +359,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; - if (locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) goto out; status = nfserr_no_grace; - if (!locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) goto out; switch (open->op_claim_type) { @@ -370,7 +375,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_OPEN_CLAIM_PREVIOUS: open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - status = nfs4_check_open_reclaim(&open->op_clientid, cstate->minorversion); + status = nfs4_check_open_reclaim(&open->op_clientid, + cstate->minorversion, + nn); if (status) goto out; case NFS4_OPEN_CLAIM_FH: @@ -490,12 +497,13 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &access->ac_supported); } -static void gen_boot_verifier(nfs4_verifier *verifier) +static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) { __be32 verf[2]; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - verf[0] = (__be32)nfssvc_boot.tv_sec; - verf[1] = (__be32)nfssvc_boot.tv_usec; + verf[0] = (__be32)nn->nfssvc_boot.tv_sec; + verf[1] = (__be32)nn->nfssvc_boot.tv_usec; memcpy(verifier->data, verf, sizeof(verifier->data)); } @@ -503,7 +511,7 @@ static __be32 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_commit *commit) { - gen_boot_verifier(&commit->co_verf); + gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp)); return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, commit->co_count); } @@ -684,6 +692,17 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; + /* + * If we do a zero copy read, then a client will see read data + * that reflects the state of the file *after* performing the + * following compound. + * + * To ensure proper ordering, we therefore turn off zero copy if + * the client wants us to do more in this compound: + */ + if (!nfsd4_last_compound_op(rqstp)) + rqstp->rq_splice_ok = false; + nfs4_lock_state(); /* check stateid */ if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), @@ -876,6 +895,24 @@ out: return status; } +static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) +{ + int i = 1; + int buflen = write->wr_buflen; + + vec[0].iov_base = write->wr_head.iov_base; + vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len); + buflen -= vec[0].iov_len; + + while (buflen) { + vec[i].iov_base = page_address(write->wr_pagelist[i - 1]); + vec[i].iov_len = min_t(int, PAGE_SIZE, buflen); + buflen -= vec[i].iov_len; + i++; + } + return i; +} + static __be32 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_write *write) @@ -884,6 +921,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file *filp = NULL; __be32 status = nfs_ok; unsigned long cnt; + int nvecs; /* no need to check permission - this will be done in nfsd_write() */ @@ -904,10 +942,13 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; - gen_boot_verifier(&write->wr_verifier); + gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); + + nvecs = fill_in_write_vector(rqstp->rq_vec, write); + WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_write(rqstp, &cstate->current_fh, filp, - write->wr_offset, rqstp->rq_vec, write->wr_vlen, + write->wr_offset, rqstp->rq_vec, nvecs, &cnt, &write->wr_how_written); if (filp) fput(filp); @@ -1666,6 +1707,12 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_EXCHANGE_ID", .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize, }, + [OP_BACKCHANNEL_CTL] = { + .op_func = (nfsd4op_func)nfsd4_backchannel_ctl, + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, + .op_name = "OP_BACKCHANNEL_CTL", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, [OP_BIND_CONN_TO_SESSION] = { .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP @@ -1719,6 +1766,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_func = (nfsd4op_func)nfsd4_free_stateid, .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, .op_name = "OP_FREE_STATEID", + .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, }; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 43295d45cc2b..ba6fdd4a0455 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -58,13 +58,11 @@ struct nfsd4_client_tracking_ops { void (*create)(struct nfs4_client *); void (*remove)(struct nfs4_client *); int (*check)(struct nfs4_client *); - void (*grace_done)(struct net *, time_t); + void (*grace_done)(struct nfsd_net *, time_t); }; /* Globals */ -static struct file *rec_file; static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; -static struct nfsd4_client_tracking_ops *client_tracking_ops; static int nfs4_save_creds(const struct cred **original_creds) @@ -102,33 +100,39 @@ md5_to_hex(char *out, char *md5) *out = '\0'; } -__be32 -nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) +static int +nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname) { struct xdr_netobj cksum; struct hash_desc desc; struct scatterlist sg; - __be32 status = nfserr_jukebox; + int status; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(desc.tfm)) + if (IS_ERR(desc.tfm)) { + status = PTR_ERR(desc.tfm); goto out_no_tfm; + } + cksum.len = crypto_hash_digestsize(desc.tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); - if (cksum.data == NULL) + if (cksum.data == NULL) { + status = -ENOMEM; goto out; + } sg_init_one(&sg, clname->data, clname->len); - if (crypto_hash_digest(&desc, &sg, sg.length, cksum.data)) + status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data); + if (status) goto out; md5_to_hex(dname, cksum.data); - status = nfs_ok; + status = 0; out: kfree(cksum.data); crypto_free_hash(desc.tfm); @@ -136,29 +140,61 @@ out_no_tfm: return status; } +/* + * If we had an error generating the recdir name for the legacy tracker + * then warn the admin. If the error doesn't appear to be transient, + * then disable recovery tracking. + */ +static void +legacy_recdir_name_error(int error) +{ + printk(KERN_ERR "NFSD: unable to generate recoverydir " + "name (%d).\n", error); + + /* + * if the algorithm just doesn't exist, then disable the recovery + * tracker altogether. The crypto libs will generally return this if + * FIPS is enabled as well. + */ + if (error == -ENOENT) { + printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " + "Reboot recovery will not function correctly!\n"); + + /* the argument is ignored by the legacy exit function */ + nfsd4_client_tracking_exit(NULL); + } +} + static void nfsd4_create_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; - char *dname = clp->cl_recdir; + char dname[HEXDIR_LEN]; struct dentry *dir, *dentry; + struct nfs4_client_reclaim *crp; int status; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; - if (!rec_file) + if (!nn->rec_file) return; + + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) + return legacy_recdir_name_error(status); + status = nfs4_save_creds(&original_cred); if (status < 0) return; - status = mnt_want_write_file(rec_file); + status = mnt_want_write_file(nn->rec_file); if (status) return; - dir = rec_file->f_path.dentry; + dir = nn->rec_file->f_path.dentry; /* lock the parent */ mutex_lock(&dir->d_inode->i_mutex); @@ -182,18 +218,24 @@ out_put: dput(dentry); out_unlock: mutex_unlock(&dir->d_inode->i_mutex); - if (status == 0) - vfs_fsync(rec_file, 0); - else + if (status == 0) { + if (nn->in_grace) { + crp = nfs4_client_to_reclaim(dname, nn); + if (crp) + crp->cr_clp = clp; + } + vfs_fsync(nn->rec_file, 0); + } else { printk(KERN_ERR "NFSD: failed to write recovery record" " (err %d); please check that %s exists" " and is writeable", status, user_recovery_dirname); - mnt_drop_write_file(rec_file); + } + mnt_drop_write_file(nn->rec_file); nfs4_reset_creds(original_cred); } -typedef int (recdir_func)(struct dentry *, struct dentry *); +typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *); struct name_list { char name[HEXDIR_LEN]; @@ -219,10 +261,10 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen, } static int -nfsd4_list_rec_dir(recdir_func *f) +nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; - struct dentry *dir = rec_file->f_path.dentry; + struct dentry *dir = nn->rec_file->f_path.dentry; LIST_HEAD(names); int status; @@ -230,13 +272,13 @@ nfsd4_list_rec_dir(recdir_func *f) if (status < 0) return status; - status = vfs_llseek(rec_file, 0, SEEK_SET); + status = vfs_llseek(nn->rec_file, 0, SEEK_SET); if (status < 0) { nfs4_reset_creds(original_cred); return status; } - status = vfs_readdir(rec_file, nfsd4_build_namelist, &names); + status = vfs_readdir(nn->rec_file, nfsd4_build_namelist, &names); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); while (!list_empty(&names)) { struct name_list *entry; @@ -248,7 +290,7 @@ nfsd4_list_rec_dir(recdir_func *f) status = PTR_ERR(dentry); break; } - status = f(dir, dentry); + status = f(dir, dentry, nn); dput(dentry); } list_del(&entry->list); @@ -260,14 +302,14 @@ nfsd4_list_rec_dir(recdir_func *f) } static int -nfsd4_unlink_clid_dir(char *name, int namlen) +nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) { struct dentry *dir, *dentry; int status; dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - dir = rec_file->f_path.dentry; + dir = nn->rec_file->f_path.dentry; mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, dir, namlen); if (IS_ERR(dentry)) { @@ -289,37 +331,52 @@ static void nfsd4_remove_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; + struct nfs4_client_reclaim *crp; + char dname[HEXDIR_LEN]; int status; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - if (!rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; - status = mnt_want_write_file(rec_file); + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) + return legacy_recdir_name_error(status); + + status = mnt_want_write_file(nn->rec_file); if (status) goto out; clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); status = nfs4_save_creds(&original_cred); if (status < 0) - goto out; + goto out_drop_write; - status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); + status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn); nfs4_reset_creds(original_cred); - if (status == 0) - vfs_fsync(rec_file, 0); - mnt_drop_write_file(rec_file); + if (status == 0) { + vfs_fsync(nn->rec_file, 0); + if (nn->in_grace) { + /* remove reclaim record */ + crp = nfsd4_find_reclaim_client(dname, nn); + if (crp) + nfs4_remove_reclaim_record(crp, nn); + } + } +out_drop_write: + mnt_drop_write_file(nn->rec_file); out: if (status) printk("NFSD: Failed to remove expired client state directory" - " %.*s\n", HEXDIR_LEN, clp->cl_recdir); + " %.*s\n", HEXDIR_LEN, dname); } static int -purge_old(struct dentry *parent, struct dentry *child) +purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { int status; - if (nfs4_has_reclaimed_state(child->d_name.name, false)) + if (nfs4_has_reclaimed_state(child->d_name.name, nn)) return 0; status = vfs_rmdir(parent->d_inode, child); @@ -331,27 +388,29 @@ purge_old(struct dentry *parent, struct dentry *child) } static void -nfsd4_recdir_purge_old(struct net *net, time_t boot_time) +nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) { int status; - if (!rec_file) + nn->in_grace = false; + if (!nn->rec_file) return; - status = mnt_want_write_file(rec_file); + status = mnt_want_write_file(nn->rec_file); if (status) goto out; - status = nfsd4_list_rec_dir(purge_old); + status = nfsd4_list_rec_dir(purge_old, nn); if (status == 0) - vfs_fsync(rec_file, 0); - mnt_drop_write_file(rec_file); + vfs_fsync(nn->rec_file, 0); + mnt_drop_write_file(nn->rec_file); out: + nfs4_release_reclaim(nn); if (status) printk("nfsd4: failed to purge old clients from recovery" - " directory %s\n", rec_file->f_path.dentry->d_name.name); + " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); } static int -load_recdir(struct dentry *parent, struct dentry *child) +load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { if (child->d_name.len != HEXDIR_LEN - 1) { printk("nfsd4: illegal name %s in recovery directory\n", @@ -359,21 +418,22 @@ load_recdir(struct dentry *parent, struct dentry *child) /* Keep trying; maybe the others are OK: */ return 0; } - nfs4_client_to_reclaim(child->d_name.name); + nfs4_client_to_reclaim(child->d_name.name, nn); return 0; } static int -nfsd4_recdir_load(void) { +nfsd4_recdir_load(struct net *net) { int status; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (!rec_file) + if (!nn->rec_file) return 0; - status = nfsd4_list_rec_dir(load_recdir); + status = nfsd4_list_rec_dir(load_recdir, nn); if (status) printk("nfsd4: failed loading clients from recovery" - " directory %s\n", rec_file->f_path.dentry->d_name.name); + " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); return status; } @@ -382,15 +442,16 @@ nfsd4_recdir_load(void) { */ static int -nfsd4_init_recdir(void) +nfsd4_init_recdir(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct cred *original_cred; int status; printk("NFSD: Using %s as the NFSv4 state recovery directory\n", user_recovery_dirname); - BUG_ON(rec_file); + BUG_ON(nn->rec_file); status = nfs4_save_creds(&original_cred); if (status < 0) { @@ -400,23 +461,65 @@ nfsd4_init_recdir(void) return status; } - rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); - if (IS_ERR(rec_file)) { + nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); + if (IS_ERR(nn->rec_file)) { printk("NFSD: unable to find recovery directory %s\n", user_recovery_dirname); - status = PTR_ERR(rec_file); - rec_file = NULL; + status = PTR_ERR(nn->rec_file); + nn->rec_file = NULL; } nfs4_reset_creds(original_cred); + if (!status) + nn->in_grace = true; return status; } + +static int +nfs4_legacy_state_init(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + nn->reclaim_str_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->reclaim_str_hashtbl) + return -ENOMEM; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]); + nn->reclaim_str_hashtbl_size = 0; + + return 0; +} + +static void +nfs4_legacy_state_shutdown(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + kfree(nn->reclaim_str_hashtbl); +} + static int nfsd4_load_reboot_recovery_data(struct net *net) { int status; + status = nfsd4_init_recdir(net); + if (!status) + status = nfsd4_recdir_load(net); + if (status) + printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); + return status; +} + +static int +nfsd4_legacy_tracking_init(struct net *net) +{ + int status; + /* XXX: The legacy code won't work in a container */ if (net != &init_net) { WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client " @@ -424,30 +527,37 @@ nfsd4_load_reboot_recovery_data(struct net *net) return -EINVAL; } - nfs4_lock_state(); - status = nfsd4_init_recdir(); - if (!status) - status = nfsd4_recdir_load(); - nfs4_unlock_state(); + status = nfs4_legacy_state_init(net); if (status) - printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); + return status; + + status = nfsd4_load_reboot_recovery_data(net); + if (status) + goto err; + return 0; + +err: + nfs4_legacy_state_shutdown(net); return status; } static void -nfsd4_shutdown_recdir(void) +nfsd4_shutdown_recdir(struct nfsd_net *nn) { - if (!rec_file) + if (!nn->rec_file) return; - fput(rec_file); - rec_file = NULL; + fput(nn->rec_file); + nn->rec_file = NULL; } static void nfsd4_legacy_tracking_exit(struct net *net) { - nfs4_release_reclaim(); - nfsd4_shutdown_recdir(); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs4_release_reclaim(nn); + nfsd4_shutdown_recdir(nn); + nfs4_legacy_state_shutdown(net); } /* @@ -480,13 +590,26 @@ nfs4_recoverydir(void) static int nfsd4_check_legacy_client(struct nfs4_client *clp) { + int status; + char dname[HEXDIR_LEN]; + struct nfs4_client_reclaim *crp; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + /* did we already find that this client is stable? */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return 0; + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) { + legacy_recdir_name_error(status); + return status; + } + /* look for it in the reclaim hashtable otherwise */ - if (nfsd4_find_reclaim_client(clp)) { + crp = nfsd4_find_reclaim_client(dname, nn); + if (crp) { set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + crp->cr_clp = clp; return 0; } @@ -494,7 +617,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp) } static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { - .init = nfsd4_load_reboot_recovery_data, + .init = nfsd4_legacy_tracking_init, .exit = nfsd4_legacy_tracking_exit, .create = nfsd4_create_clid_dir, .remove = nfsd4_remove_clid_dir, @@ -785,8 +908,7 @@ nfsd4_cld_create(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; - /* FIXME: determine net from clp */ - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; /* Don't upcall if it's already stored */ @@ -823,8 +945,7 @@ nfsd4_cld_remove(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; - /* FIXME: determine net from clp */ - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; /* Don't upcall if it's already removed */ @@ -861,8 +982,7 @@ nfsd4_cld_check(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; - /* FIXME: determine net from clp */ - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; /* Don't upcall if one was already stored during this grace pd */ @@ -892,11 +1012,10 @@ nfsd4_cld_check(struct nfs4_client *clp) } static void -nfsd4_cld_grace_done(struct net *net, time_t boot_time) +nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) { int ret; struct cld_upcall *cup; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct cld_net *cn = nn->cld_net; cup = alloc_cld_upcall(cn); @@ -926,28 +1045,261 @@ static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { .grace_done = nfsd4_cld_grace_done, }; +/* upcall via usermodehelper */ +static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack"; +module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog), + S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program"); + +static bool cltrack_legacy_disable; +module_param(cltrack_legacy_disable, bool, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(cltrack_legacy_disable, + "Disable legacy recoverydir conversion. Default: false"); + +#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" +#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" + +static char * +nfsd4_cltrack_legacy_topdir(void) +{ + int copied; + size_t len; + char *result; + + if (cltrack_legacy_disable) + return NULL; + + len = strlen(LEGACY_TOPDIR_ENV_PREFIX) + + strlen(nfs4_recoverydir()) + 1; + + result = kmalloc(len, GFP_KERNEL); + if (!result) + return result; + + copied = snprintf(result, len, LEGACY_TOPDIR_ENV_PREFIX "%s", + nfs4_recoverydir()); + if (copied >= len) { + /* just return nothing if output was truncated */ + kfree(result); + return NULL; + } + + return result; +} + +static char * +nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name) +{ + int copied; + size_t len; + char *result; + + if (cltrack_legacy_disable) + return NULL; + + /* +1 is for '/' between "topdir" and "recdir" */ + len = strlen(LEGACY_RECDIR_ENV_PREFIX) + + strlen(nfs4_recoverydir()) + 1 + HEXDIR_LEN; + + result = kmalloc(len, GFP_KERNEL); + if (!result) + return result; + + copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/", + nfs4_recoverydir()); + if (copied > (len - HEXDIR_LEN)) { + /* just return nothing if output will be truncated */ + kfree(result); + return NULL; + } + + copied = nfs4_make_rec_clidname(result + copied, name); + if (copied) { + kfree(result); + return NULL; + } + + return result; +} + +static int +nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) +{ + char *envp[2]; + char *argv[4]; + int ret; + + if (unlikely(!cltrack_prog[0])) { + dprintk("%s: cltrack_prog is disabled\n", __func__); + return -EACCES; + } + + dprintk("%s: cmd: %s\n", __func__, cmd); + dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); + dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); + + envp[0] = legacy; + envp[1] = NULL; + + argv[0] = (char *)cltrack_prog; + argv[1] = cmd; + argv[2] = arg; + argv[3] = NULL; + + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + /* + * Disable the upcall mechanism if we're getting an ENOENT or EACCES + * error. The admin can re-enable it on the fly by using sysfs + * once the problem has been fixed. + */ + if (ret == -ENOENT || ret == -EACCES) { + dprintk("NFSD: %s was not found or isn't executable (%d). " + "Setting cltrack_prog to blank string!", + cltrack_prog, ret); + cltrack_prog[0] = '\0'; + } + dprintk("%s: %s return value: %d\n", __func__, cltrack_prog, ret); + + return ret; +} + +static char * +bin_to_hex_dup(const unsigned char *src, int srclen) +{ + int i; + char *buf, *hex; + + /* +1 for terminating NULL */ + buf = kmalloc((srclen * 2) + 1, GFP_KERNEL); + if (!buf) + return buf; + + hex = buf; + for (i = 0; i < srclen; i++) { + sprintf(hex, "%2.2x", *src++); + hex += 2; + } + return buf; +} + +static int +nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) +{ + return nfsd4_umh_cltrack_upcall("init", NULL, NULL); +} + +static void +nfsd4_umh_cltrack_create(struct nfs4_client *clp) +{ + char *hexid; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return; + } + nfsd4_umh_cltrack_upcall("create", hexid, NULL); + kfree(hexid); +} + +static void +nfsd4_umh_cltrack_remove(struct nfs4_client *clp) +{ + char *hexid; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return; + } + nfsd4_umh_cltrack_upcall("remove", hexid, NULL); + kfree(hexid); +} + +static int +nfsd4_umh_cltrack_check(struct nfs4_client *clp) +{ + int ret; + char *hexid, *legacy; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return -ENOMEM; + } + legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); + ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy); + kfree(legacy); + kfree(hexid); + return ret; +} + +static void +nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, + time_t boot_time) +{ + char *legacy; + char timestr[22]; /* FIXME: better way to determine max size? */ + + sprintf(timestr, "%ld", boot_time); + legacy = nfsd4_cltrack_legacy_topdir(); + nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); + kfree(legacy); +} + +static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = { + .init = nfsd4_umh_cltrack_init, + .exit = NULL, + .create = nfsd4_umh_cltrack_create, + .remove = nfsd4_umh_cltrack_remove, + .check = nfsd4_umh_cltrack_check, + .grace_done = nfsd4_umh_cltrack_grace_done, +}; + int nfsd4_client_tracking_init(struct net *net) { int status; struct path path; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (!client_tracking_ops) { - client_tracking_ops = &nfsd4_cld_tracking_ops; - status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); - if (!status) { - if (S_ISDIR(path.dentry->d_inode->i_mode)) - client_tracking_ops = - &nfsd4_legacy_tracking_ops; - path_put(&path); - } + /* just run the init if it the method is already decided */ + if (nn->client_tracking_ops) + goto do_init; + + /* + * First, try a UMH upcall. It should succeed or fail quickly, so + * there's little harm in trying that first. + */ + nn->client_tracking_ops = &nfsd4_umh_tracking_ops; + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + + /* + * See if the recoverydir exists and is a directory. If it is, + * then use the legacy ops. + */ + nn->client_tracking_ops = &nfsd4_legacy_tracking_ops; + status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); + if (!status) { + status = S_ISDIR(path.dentry->d_inode->i_mode); + path_put(&path); + if (status) + goto do_init; } - status = client_tracking_ops->init(net); + /* Finally, try to use nfsdcld */ + nn->client_tracking_ops = &nfsd4_cld_tracking_ops; + printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be " + "removed in 3.10. Please transition to using " + "nfsdcltrack.\n"); +do_init: + status = nn->client_tracking_ops->init(net); if (status) { printk(KERN_WARNING "NFSD: Unable to initialize client " "recovery tracking! (%d)\n", status); - client_tracking_ops = NULL; + nn->client_tracking_ops = NULL; } return status; } @@ -955,40 +1307,49 @@ nfsd4_client_tracking_init(struct net *net) void nfsd4_client_tracking_exit(struct net *net) { - if (client_tracking_ops) { - client_tracking_ops->exit(net); - client_tracking_ops = NULL; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->client_tracking_ops) { + if (nn->client_tracking_ops->exit) + nn->client_tracking_ops->exit(net); + nn->client_tracking_ops = NULL; } } void nfsd4_client_record_create(struct nfs4_client *clp) { - if (client_tracking_ops) - client_tracking_ops->create(clp); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + nn->client_tracking_ops->create(clp); } void nfsd4_client_record_remove(struct nfs4_client *clp) { - if (client_tracking_ops) - client_tracking_ops->remove(clp); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + nn->client_tracking_ops->remove(clp); } int nfsd4_client_record_check(struct nfs4_client *clp) { - if (client_tracking_ops) - return client_tracking_ops->check(clp); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + return nn->client_tracking_ops->check(clp); return -EOPNOTSUPP; } void -nfsd4_record_grace_done(struct net *net, time_t boot_time) +nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) { - if (client_tracking_ops) - client_tracking_ops->grace_done(net, boot_time); + if (nn->client_tracking_ops) + nn->client_tracking_ops->grace_done(nn, boot_time); } static int diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d0237f872cc4..ac8ed96c4199 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,16 +44,11 @@ #include "xdr4.h" #include "vfs.h" #include "current_stateid.h" -#include "fault_inject.h" #include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC -/* Globals */ -time_t nfsd4_lease = 90; /* default lease time */ -time_t nfsd4_grace = 90; - #define all_ones {{~0,~0},~0} static const stateid_t one_stateid = { .si_generation = ~0, @@ -176,8 +171,6 @@ static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) return ret & OWNER_HASH_MASK; } -static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; - /* hash table for nfs4_file */ #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) @@ -192,7 +185,7 @@ static struct list_head file_hashtbl[FILE_HASH_SIZE]; static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) { - BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); + WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); atomic_inc(&fp->fi_access[oflag]); } @@ -251,7 +244,7 @@ static inline int get_new_stid(struct nfs4_stid *stid) * preallocations that can exist at a time, but the state lock * prevents anyone from using ours before we get here: */ - BUG_ON(error); + WARN_ON_ONCE(error); /* * It shouldn't be a problem to reuse an opaque stateid value. * I don't think it is for 4.1. But with 4.0 I worry that, for @@ -340,7 +333,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); - INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); + nfsd4_init_callback(&dp->dl_recall); return dp; } @@ -390,14 +383,6 @@ unhash_delegation(struct nfs4_delegation *dp) * SETCLIENTID state */ -/* client_lock protects the client lru list and session hash table */ -static DEFINE_SPINLOCK(client_lock); - -/* Hash tables for nfs4_clientid state */ -#define CLIENT_HASH_BITS 4 -#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) -#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) - static unsigned int clientid_hashval(u32 id) { return id & CLIENT_HASH_MASK; @@ -409,31 +394,6 @@ static unsigned int clientstr_hashval(const char *name) } /* - * reclaim_str_hashtbl[] holds known client info from previous reset/reboot - * used in reboot/reset lease grace period processing - * - * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed - * setclientid_confirmed info. - * - * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed - * setclientid info. - * - * client_lru holds client queue ordered by nfs4_client.cl_time - * for lease renewal. - * - * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time - * for last close replay. - */ -static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE]; -static int reclaim_str_hashtbl_size = 0; -static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head client_lru; -static struct list_head close_lru; - -/* * We store the NONE, READ, WRITE, and BOTH bits separately in the * st_{access,deny}_bmap field of the stateid, in order to track not * only what share bits are currently in force, but also what @@ -526,7 +486,8 @@ static int nfs4_access_to_omode(u32 access) case NFS4_SHARE_ACCESS_BOTH: return O_RDWR; } - BUG(); + WARN_ON_ONCE(1); + return O_RDONLY; } /* release all access and file references for a given stateid */ @@ -652,9 +613,6 @@ static void release_openowner(struct nfs4_openowner *oo) nfs4_free_openowner(oo); } -#define SESSION_HASH_SIZE 512 -static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE]; - static inline int hash_sessionid(struct nfs4_sessionid *sessionid) { @@ -785,9 +743,12 @@ out_free: return NULL; } -static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize) +static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, + struct nfsd4_channel_attrs *req, + int numslots, int slotsize, + struct nfsd_net *nn) { - u32 maxrpc = nfsd_serv->sv_max_mesg; + u32 maxrpc = nn->nfsd_serv->sv_max_mesg; new->maxreqs = numslots; new->maxresp_cached = min_t(u32, req->maxresp_cached, @@ -906,21 +867,27 @@ static void __free_session(struct nfsd4_session *ses) static void free_session(struct kref *kref) { struct nfsd4_session *ses; + struct nfsd_net *nn; - lockdep_assert_held(&client_lock); ses = container_of(kref, struct nfsd4_session, se_ref); + nn = net_generic(ses->se_client->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); nfsd4_del_conns(ses); __free_session(ses); } void nfsd4_put_session(struct nfsd4_session *ses) { - spin_lock(&client_lock); + struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); + + spin_lock(&nn->client_lock); nfsd4_put_session_locked(ses); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); } -static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan) +static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan, + struct nfsd_net *nn) { struct nfsd4_session *new; int numslots, slotsize; @@ -941,13 +908,14 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan) nfsd4_put_drc_mem(slotsize, fchan->maxreqs); return NULL; } - init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize); + init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn); return new; } -static struct nfsd4_session *init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) +static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) { int idx; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); new->se_client = clp; gen_sessionid(new); @@ -957,14 +925,15 @@ static struct nfsd4_session *init_session(struct svc_rqst *rqstp, struct nfsd4_s new->se_cb_seq_nr = 1; new->se_flags = cses->flags; new->se_cb_prog = cses->callback_prog; + new->se_cb_sec = cses->cb_sec; kref_init(&new->se_ref); idx = hash_sessionid(&new->se_sessionid); - spin_lock(&client_lock); - list_add(&new->se_hash, &sessionid_hashtbl[idx]); + spin_lock(&nn->client_lock); + list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); if (cses->flags & SESSION4_BACK_CHAN) { struct sockaddr *sa = svc_addr(rqstp); @@ -978,20 +947,20 @@ static struct nfsd4_session *init_session(struct svc_rqst *rqstp, struct nfsd4_s rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); } - return new; } /* caller must hold client_lock */ static struct nfsd4_session * -find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) +find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) { struct nfsd4_session *elem; int idx; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dump_sessionid(__func__, sessionid); idx = hash_sessionid(sessionid); /* Search in the appropriate list */ - list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { + list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) { if (!memcmp(elem->se_sessionid.data, sessionid->data, NFS4_MAX_SESSIONID_LEN)) { return elem; @@ -1016,6 +985,8 @@ unhash_session(struct nfsd4_session *ses) static inline void renew_client_locked(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + if (is_client_expired(clp)) { WARN_ON(1); printk("%s: client (clientid %08x/%08x) already expired\n", @@ -1028,16 +999,18 @@ renew_client_locked(struct nfs4_client *clp) dprintk("renewing client (clientid %08x/%08x)\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - list_move_tail(&clp->cl_lru, &client_lru); + list_move_tail(&clp->cl_lru, &nn->client_lru); clp->cl_time = get_seconds(); } static inline void renew_client(struct nfs4_client *clp) { - spin_lock(&client_lock); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); renew_client_locked(clp); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); } /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ @@ -1075,7 +1048,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { - lockdep_assert_held(&client_lock); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, @@ -1092,15 +1067,16 @@ void release_session_client(struct nfsd4_session *session) { struct nfs4_client *clp = session->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock)) + if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) return; if (is_client_expired(clp)) { free_client(clp); session->se_client = NULL; } else renew_client_locked(clp); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); } /* must be called under the client_lock */ @@ -1123,6 +1099,7 @@ destroy_client(struct nfs4_client *clp) struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); INIT_LIST_HEAD(&reaplist); spin_lock(&recall_lock); @@ -1144,12 +1121,15 @@ destroy_client(struct nfs4_client *clp) if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); list_del(&clp->cl_idhash); - list_del(&clp->cl_strhash); - spin_lock(&client_lock); + if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + rb_erase(&clp->cl_namenode, &nn->conf_name_tree); + else + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + spin_lock(&nn->client_lock); unhash_client_locked(clp); if (atomic_read(&clp->cl_refcount) == 0) free_client(clp); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); } static void expire_client(struct nfs4_client *clp) @@ -1187,6 +1167,17 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) return 0; } +static long long +compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2) +{ + long long res; + + res = o1->len - o2->len; + if (res) + return res; + return (long long)memcmp(o1->data, o2->data, o1->len); +} + static int same_name(const char *n1, const char *n2) { return 0 == memcmp(n1, n2, HEXDIR_LEN); @@ -1247,10 +1238,9 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); } -static void gen_clid(struct nfs4_client *clp) +static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { static u32 current_clientid = 1; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); clp->cl_clientid.cl_boot = nn->boot_time; clp->cl_clientid.cl_id = current_clientid++; @@ -1283,12 +1273,14 @@ static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t return NULL; } -static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, +static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { struct nfs4_client *clp; struct sockaddr *sa = svc_addr(rqstp); int ret; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); clp = alloc_client(name); if (clp == NULL) @@ -1297,23 +1289,21 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, INIT_LIST_HEAD(&clp->cl_sessions); ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { - spin_lock(&client_lock); + spin_lock(&nn->client_lock); free_client(clp); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); return NULL; } idr_init(&clp->cl_stateids); - memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); atomic_set(&clp->cl_refcount, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; INIT_LIST_HEAD(&clp->cl_idhash); - INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_lru); INIT_LIST_HEAD(&clp->cl_callbacks); spin_lock_init(&clp->cl_lock); - INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); + nfsd4_init_callback(&clp->cl_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); @@ -1321,17 +1311,60 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); gen_confirm(clp); clp->cl_cb_session = NULL; + clp->net = net; return clp; } static void -add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) +add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct nfs4_client *clp; + + while (*new) { + clp = rb_entry(*new, struct nfs4_client, cl_namenode); + parent = *new; + + if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + rb_link_node(&new_clp->cl_namenode, parent, new); + rb_insert_color(&new_clp->cl_namenode, root); +} + +static struct nfs4_client * +find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root) +{ + long long cmp; + struct rb_node *node = root->rb_node; + struct nfs4_client *clp; + + while (node) { + clp = rb_entry(node, struct nfs4_client, cl_namenode); + cmp = compare_blob(&clp->cl_name, name); + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + return clp; + } + return NULL; +} + +static void +add_to_unconfirmed(struct nfs4_client *clp) { unsigned int idhashval; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]); + clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); + add_clp_to_name_tree(clp, &nn->unconf_name_tree); idhashval = clientid_hashval(clp->cl_clientid.cl_id); - list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]); + list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); renew_client(clp); } @@ -1339,22 +1372,23 @@ static void move_to_confirmed(struct nfs4_client *clp) { unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); - unsigned int strhashval; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); - list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); - strhashval = clientstr_hashval(clp->cl_recdir); - list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); + list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + add_clp_to_name_tree(clp, &nn->conf_name_tree); + set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); renew_client(clp); } static struct nfs4_client * -find_confirmed_client(clientid_t *clid, bool sessions) +find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); - list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { + list_for_each_entry(clp, &nn->conf_id_hashtbl[idhashval], cl_idhash) { if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; @@ -1366,12 +1400,12 @@ find_confirmed_client(clientid_t *clid, bool sessions) } static struct nfs4_client * -find_unconfirmed_client(clientid_t *clid, bool sessions) +find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); - list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { + list_for_each_entry(clp, &nn->unconf_id_hashtbl[idhashval], cl_idhash) { if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; @@ -1387,27 +1421,15 @@ static bool clp_used_exchangeid(struct nfs4_client *clp) } static struct nfs4_client * -find_confirmed_client_by_str(const char *dname, unsigned int hashval) +find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { - struct nfs4_client *clp; - - list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname)) - return clp; - } - return NULL; + return find_clp_in_name_tree(name, &nn->conf_name_tree); } static struct nfs4_client * -find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) +find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { - struct nfs4_client *clp; - - list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname)) - return clp; - } - return NULL; + return find_clp_in_name_tree(name, &nn->unconf_name_tree); } static void @@ -1428,7 +1450,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r else goto out_err; - conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val, + conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val, se->se_callback_addr_len, (struct sockaddr *)&conn->cb_addr, sizeof(conn->cb_addr)); @@ -1572,12 +1594,11 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, { struct nfs4_client *unconf, *conf, *new; __be32 status; - unsigned int strhashval; - char dname[HEXDIR_LEN]; char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; struct sockaddr *sa = svc_addr(rqstp); bool update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " @@ -1592,24 +1613,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, switch (exid->spa_how) { case SP4_NONE: break; + default: /* checked by xdr code */ + WARN_ON_ONCE(1); case SP4_SSV: - return nfserr_serverfault; - default: - BUG(); /* checked by xdr code */ case SP4_MACH_CRED: return nfserr_serverfault; /* no excuse :-/ */ } - status = nfs4_make_rec_clidname(dname, &exid->clname); - - if (status) - return status; - - strhashval = clientstr_hashval(dname); - /* Cases below refer to rfc 5661 section 18.35.4: */ nfs4_lock_state(); - conf = find_confirmed_client_by_str(dname, strhashval); + conf = find_confirmed_client_by_name(&exid->clname, nn); if (conf) { bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); bool verfs_match = same_verf(&verf, &conf->cl_verifier); @@ -1654,21 +1667,21 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, goto out; } - unconf = find_unconfirmed_client_by_str(dname, strhashval); + unconf = find_unconfirmed_client_by_name(&exid->clname, nn); if (unconf) /* case 4, possible retry or client restart */ expire_client(unconf); /* case 1 (normal case) */ out_new: - new = create_client(exid->clname, dname, rqstp, &verf); + new = create_client(exid->clname, rqstp, &verf); if (new == NULL) { status = nfserr_jukebox; goto out; } new->cl_minorversion = 1; - gen_clid(new); - add_to_unconfirmed(new, strhashval); + gen_clid(new, nn); + add_to_unconfirmed(new); out_copy: exid->clientid.cl_boot = new->cl_clientid.cl_boot; exid->clientid.cl_id = new->cl_clientid.cl_id; @@ -1761,12 +1774,13 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_conn *conn; struct nfsd4_clid_slot *cs_slot = NULL; __be32 status = 0; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; if (check_forechannel_attrs(cr_ses->fore_channel)) return nfserr_toosmall; - new = alloc_session(&cr_ses->fore_channel); + new = alloc_session(&cr_ses->fore_channel, nn); if (!new) return nfserr_jukebox; status = nfserr_jukebox; @@ -1775,8 +1789,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_session; nfs4_lock_state(); - unconf = find_unconfirmed_client(&cr_ses->clientid, true); - conf = find_confirmed_client(&cr_ses->clientid, true); + unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); + conf = find_confirmed_client(&cr_ses->clientid, true, nn); if (conf) { cs_slot = &conf->cl_cs_slot; @@ -1789,7 +1803,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } } else if (unconf) { - unsigned int hash; struct nfs4_client *old; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { @@ -1803,8 +1816,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = nfserr_seq_misordered; goto out_free_conn; } - hash = clientstr_hashval(unconf->cl_recdir); - old = find_confirmed_client_by_str(unconf->cl_recdir, hash); + old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) expire_client(old); move_to_confirmed(unconf); @@ -1843,14 +1855,6 @@ out_free_session: goto out; } -static bool nfsd4_last_compound_op(struct svc_rqst *rqstp) -{ - struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct nfsd4_compoundargs *argp = rqstp->rq_argp; - - return argp->opcnt == resp->opcnt; -} - static __be32 nfsd4_map_bcts_dir(u32 *dir) { switch (*dir) { @@ -1865,24 +1869,40 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir) return nfserr_inval; } +__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc) +{ + struct nfsd4_session *session = cstate->session; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + spin_lock(&nn->client_lock); + session->se_cb_prog = bc->bc_cb_program; + session->se_cb_sec = bc->bc_cb_sec; + spin_unlock(&nn->client_lock); + + nfsd4_probe_callback(session->se_client); + + return nfs_ok; +} + __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_bind_conn_to_session *bcts) { __be32 status; struct nfsd4_conn *conn; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; - spin_lock(&client_lock); - cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid); + spin_lock(&nn->client_lock); + cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); /* Sorta weird: we only need the refcnt'ing because new_conn acquires * client_lock iself: */ if (cstate->session) { nfsd4_get_session(cstate->session); atomic_inc(&cstate->session->se_client->cl_refcount); } - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); if (!cstate->session) return nfserr_badsession; @@ -1910,6 +1930,7 @@ nfsd4_destroy_session(struct svc_rqst *r, { struct nfsd4_session *ses; __be32 status = nfserr_badsession; + struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); /* Notes: * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid @@ -1923,24 +1944,24 @@ nfsd4_destroy_session(struct svc_rqst *r, return nfserr_not_only_op; } dump_sessionid(__func__, &sessionid->sessionid); - spin_lock(&client_lock); - ses = find_in_sessionid_hashtbl(&sessionid->sessionid); + spin_lock(&nn->client_lock); + ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); if (!ses) { - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); goto out; } unhash_session(ses); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); nfs4_lock_state(); nfsd4_probe_callback_sync(ses->se_client); nfs4_unlock_state(); - spin_lock(&client_lock); + spin_lock(&nn->client_lock); nfsd4_del_conns(ses); nfsd4_put_session_locked(ses); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); status = nfs_ok; out: dprintk("%s returns %d\n", __func__, ntohl(status)); @@ -2006,6 +2027,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_slot *slot; struct nfsd4_conn *conn; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (resp->opcnt != 1) return nfserr_sequence_pos; @@ -2018,9 +2040,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (!conn) return nfserr_jukebox; - spin_lock(&client_lock); + spin_lock(&nn->client_lock); status = nfserr_badsession; - session = find_in_sessionid_hashtbl(&seq->sessionid); + session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp)); if (!session) goto out; @@ -2094,7 +2116,7 @@ out: } } kfree(conn); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); dprintk("%s: return %d\n", __func__, ntohl(status)); return status; } @@ -2104,10 +2126,11 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta { struct nfs4_client *conf, *unconf, *clp; __be32 status = 0; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); nfs4_lock_state(); - unconf = find_unconfirmed_client(&dc->clientid, true); - conf = find_confirmed_client(&dc->clientid, true); + unconf = find_unconfirmed_client(&dc->clientid, true, nn); + conf = find_confirmed_client(&dc->clientid, true, nn); if (conf) { clp = conf; @@ -2181,20 +2204,13 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct xdr_netobj clname = setclid->se_name; nfs4_verifier clverifier = setclid->se_verf; - unsigned int strhashval; struct nfs4_client *conf, *unconf, *new; __be32 status; - char dname[HEXDIR_LEN]; - - status = nfs4_make_rec_clidname(dname, &clname); - if (status) - return status; - - strhashval = clientstr_hashval(dname); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); /* Cases below refer to rfc 3530 section 14.2.33: */ nfs4_lock_state(); - conf = find_confirmed_client_by_str(dname, strhashval); + conf = find_confirmed_client_by_name(&clname, nn); if (conf) { /* case 0: */ status = nfserr_clid_inuse; @@ -2209,21 +2225,21 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } } - unconf = find_unconfirmed_client_by_str(dname, strhashval); + unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) expire_client(unconf); status = nfserr_jukebox; - new = create_client(clname, dname, rqstp, &clverifier); + new = create_client(clname, rqstp, &clverifier); if (new == NULL) goto out; if (conf && same_verf(&conf->cl_verifier, &clverifier)) /* case 1: probable callback update */ copy_clid(new, conf); else /* case 4 (new client) or cases 2, 3 (client reboot): */ - gen_clid(new); + gen_clid(new, nn); new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); - add_to_unconfirmed(new, strhashval); + add_to_unconfirmed(new); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); @@ -2243,14 +2259,14 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; __be32 status; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; nfs4_lock_state(); - conf = find_confirmed_client(clid, false); - unconf = find_unconfirmed_client(clid, false); + conf = find_confirmed_client(clid, false, nn); + unconf = find_unconfirmed_client(clid, false, nn); /* * We try hard to give out unique clientid's, so if we get an * attempt to confirm the same clientid with a different cred, @@ -2276,9 +2292,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, nfsd4_probe_callback(conf); expire_client(unconf); } else { /* case 3: normal case; new or rebooted client */ - unsigned int hash = clientstr_hashval(unconf->cl_recdir); - - conf = find_confirmed_client_by_str(unconf->cl_recdir, hash); + conf = find_confirmed_client_by_name(&unconf->cl_name, nn); if (conf) expire_client(conf); move_to_confirmed(unconf); @@ -2340,7 +2354,7 @@ nfsd4_init_slabs(void) if (openowner_slab == NULL) goto out_nomem; lockowner_slab = kmem_cache_create("nfsd4_lockowners", - sizeof(struct nfs4_openowner), 0, 0, NULL); + sizeof(struct nfs4_lockowner), 0, 0, NULL); if (lockowner_slab == NULL) goto out_nomem; file_slab = kmem_cache_create("nfsd4_files", @@ -2404,7 +2418,9 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { - list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } @@ -2444,11 +2460,13 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, } static void -move_to_close_lru(struct nfs4_openowner *oo) +move_to_close_lru(struct nfs4_openowner *oo, struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); - list_move_tail(&oo->oo_close_lru, &close_lru); + list_move_tail(&oo->oo_close_lru, &nn->close_lru); oo->oo_time = get_seconds(); } @@ -2462,13 +2480,14 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, } static struct nfs4_openowner * -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, bool sessions) +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, + bool sessions, struct nfsd_net *nn) { struct nfs4_stateowner *so; struct nfs4_openowner *oo; struct nfs4_client *clp; - list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { if (!so->so_is_open_owner) continue; if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { @@ -2555,9 +2574,14 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; struct nfs4_delegation *dp; - BUG_ON(!fp); - /* We assume break_lease is only called once per lease: */ - BUG_ON(fp->fi_had_conflict); + if (!fp) { + WARN(1, "(%p)->fl_owner NULL\n", fl); + return; + } + if (fp->fi_had_conflict) { + WARN(1, "duplicate break on %p\n", fp); + return; + } /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned @@ -2599,14 +2623,13 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 __be32 nfsd4_process_open1(struct nfsd4_compound_state *cstate, - struct nfsd4_open *open) + struct nfsd4_open *open, struct nfsd_net *nn) { clientid_t *clientid = &open->op_clientid; struct nfs4_client *clp = NULL; unsigned int strhashval; struct nfs4_openowner *oo = NULL; __be32 status; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); if (STALE_CLIENTID(&open->op_clientid, nn)) return nfserr_stale_clientid; @@ -2619,10 +2642,11 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, return nfserr_jukebox; strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); - oo = find_openstateowner_str(strhashval, open, cstate->minorversion); + oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); open->op_openowner = oo; if (!oo) { - clp = find_confirmed_client(clientid, cstate->minorversion); + clp = find_confirmed_client(clientid, cstate->minorversion, + nn); if (clp == NULL) return nfserr_expired; goto new_owner; @@ -2891,7 +2915,7 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) open->op_why_no_deleg = WND4_CANCELLED; break; case NFS4_SHARE_WANT_NO_DELEG: - BUG(); /* not supposed to get here */ + WARN_ON_ONCE(1); } } } @@ -2959,6 +2983,7 @@ out: } return; out_free: + unhash_stid(&dp->dl_stid); nfs4_put_delegation(dp); out_no_deleg: flag = NFS4_OPEN_DELEGATE_NONE; @@ -3104,27 +3129,32 @@ void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) free_generic_stateid(open->op_stp); } +static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp) +{ + struct nfs4_client *found; + + if (STALE_CLIENTID(clid, nn)) + return nfserr_stale_clientid; + found = find_confirmed_client(clid, session, nn); + if (clp) + *clp = found; + return found ? nfs_ok : nfserr_expired; +} + __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, clientid_t *clid) { struct nfs4_client *clp; __be32 status; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); nfs4_lock_state(); dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); - status = nfserr_stale_clientid; - if (STALE_CLIENTID(clid, nn)) - goto out; - clp = find_confirmed_client(clid, cstate->minorversion); - status = nfserr_expired; - if (clp == NULL) { - /* We assume the client took too long to RENEW. */ - dprintk("nfsd4_renew: clientid not found!\n"); + status = lookup_clientid(clid, cstate->minorversion, nn, &clp); + if (status) goto out; - } status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) @@ -3136,44 +3166,42 @@ out: } static void -nfsd4_end_grace(struct net *net) +nfsd4_end_grace(struct nfsd_net *nn) { - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - /* do nothing if grace period already ended */ if (nn->grace_ended) return; dprintk("NFSD: end of grace period\n"); nn->grace_ended = true; - nfsd4_record_grace_done(net, nn->boot_time); + nfsd4_record_grace_done(nn, nn->boot_time); locks_end_grace(&nn->nfsd4_manager); /* * Now that every NFSv4 client has had the chance to recover and * to see the (possibly new, possibly shorter) lease time, we * can safely set the next grace time to the current lease time: */ - nfsd4_grace = nfsd4_lease; + nn->nfsd4_grace = nn->nfsd4_lease; } static time_t -nfs4_laundromat(void) +nfs4_laundromat(struct nfsd_net *nn) { struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head *pos, *next, reaplist; - time_t cutoff = get_seconds() - nfsd4_lease; - time_t t, clientid_val = nfsd4_lease; - time_t u, test_val = nfsd4_lease; + time_t cutoff = get_seconds() - nn->nfsd4_lease; + time_t t, clientid_val = nn->nfsd4_lease; + time_t u, test_val = nn->nfsd4_lease; nfs4_lock_state(); dprintk("NFSD: laundromat service - starting\n"); - nfsd4_end_grace(&init_net); + nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); - spin_lock(&client_lock); - list_for_each_safe(pos, next, &client_lru) { + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { t = clp->cl_time - cutoff; @@ -3189,7 +3217,7 @@ nfs4_laundromat(void) unhash_client_locked(clp); list_add(&clp->cl_lru, &reaplist); } - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); list_for_each_safe(pos, next, &reaplist) { clp = list_entry(pos, struct nfs4_client, cl_lru); dprintk("NFSD: purging unused client (clientid %08x)\n", @@ -3199,6 +3227,8 @@ nfs4_laundromat(void) spin_lock(&recall_lock); list_for_each_safe(pos, next, &del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) + continue; if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { u = dp->dl_time - cutoff; if (test_val > u) @@ -3212,8 +3242,8 @@ nfs4_laundromat(void) dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); unhash_delegation(dp); } - test_val = nfsd4_lease; - list_for_each_safe(pos, next, &close_lru) { + test_val = nn->nfsd4_lease; + list_for_each_safe(pos, next, &nn->close_lru) { oo = container_of(pos, struct nfs4_openowner, oo_close_lru); if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { u = oo->oo_time - cutoff; @@ -3231,16 +3261,19 @@ nfs4_laundromat(void) static struct workqueue_struct *laundry_wq; static void laundromat_main(struct work_struct *); -static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main); static void -laundromat_main(struct work_struct *not_used) +laundromat_main(struct work_struct *laundry) { time_t t; + struct delayed_work *dwork = container_of(laundry, struct delayed_work, + work); + struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + laundromat_work); - t = nfs4_laundromat(); + t = nfs4_laundromat(nn); dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); - queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); + queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) @@ -3385,16 +3418,17 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) return nfs_ok; } -static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, bool sessions) +static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, + struct nfs4_stid **s, bool sessions, + struct nfsd_net *nn) { struct nfs4_client *cl; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; if (STALE_STATEID(stateid, nn)) return nfserr_stale_stateid; - cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions); + cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions, nn); if (!cl) return nfserr_expired; *s = find_stateid_by_type(cl, stateid, typemask); @@ -3416,6 +3450,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, struct nfs4_delegation *dp = NULL; struct svc_fh *current_fh = &cstate->current_fh; struct inode *ino = current_fh->fh_dentry->d_inode; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); __be32 status; if (filpp) @@ -3427,7 +3462,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(net, current_fh, stateid, flags); - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, cstate->minorversion); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + &s, cstate->minorversion, nn); if (status) return status; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); @@ -3441,7 +3477,11 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, goto out; if (filpp) { *filpp = dp->dl_file->fi_deleg_file; - BUG_ON(!*filpp); + if (!*filpp) { + WARN_ON_ONCE(1); + status = nfserr_serverfault; + goto out; + } } break; case NFS4_OPEN_STID: @@ -3568,7 +3608,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ static __be32 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, char typemask, - struct nfs4_ol_stateid **stpp) + struct nfs4_ol_stateid **stpp, + struct nfsd_net *nn) { __be32 status; struct nfs4_stid *s; @@ -3577,7 +3618,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, seqid, STATEID_VAL(stateid)); *stpp = NULL; - status = nfsd4_lookup_stateid(stateid, typemask, &s, cstate->minorversion); + status = nfsd4_lookup_stateid(stateid, typemask, &s, + cstate->minorversion, nn); if (status) return status; *stpp = openlockstateid(s); @@ -3586,13 +3628,14 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); } -static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp) +static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, + stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn) { __be32 status; struct nfs4_openowner *oo; status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, - NFS4_OPEN_STID, stpp); + NFS4_OPEN_STID, stpp, nn); if (status) return status; oo = openowner((*stpp)->st_stateowner); @@ -3608,6 +3651,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", (int)cstate->current_fh.fh_dentry->d_name.len, @@ -3621,7 +3665,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, - NFS4_OPEN_STID, &stp); + NFS4_OPEN_STID, &stp, nn); if (status) goto out; oo = openowner(stp->st_stateowner); @@ -3664,7 +3708,7 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac case NFS4_SHARE_ACCESS_BOTH: break; default: - BUG(); + WARN_ON_ONCE(1); } } @@ -3685,6 +3729,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, { __be32 status; struct nfs4_ol_stateid *stp; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", (int)cstate->current_fh.fh_dentry->d_name.len, @@ -3697,7 +3742,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, nfs4_lock_state(); status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, - &od->od_stateid, &stp); + &od->od_stateid, &stp, nn); if (status) goto out; status = nfserr_inval; @@ -3760,6 +3805,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_close on file %.*s\n", (int)cstate->current_fh.fh_dentry->d_name.len, @@ -3769,7 +3816,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, &close->cl_stateid, NFS4_OPEN_STID|NFS4_CLOSED_STID, - &stp); + &stp, nn); if (status) goto out; oo = openowner(stp->st_stateowner); @@ -3791,7 +3838,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * little while to handle CLOSE replay. */ if (list_empty(&oo->oo_owner.so_stateids)) - move_to_close_lru(oo); + move_to_close_lru(oo, SVC_NET(rqstp)); } } out: @@ -3807,15 +3854,15 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_delegation *dp; stateid_t *stateid = &dr->dr_stateid; struct nfs4_stid *s; - struct inode *inode; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - inode = cstate->current_fh.fh_dentry->d_inode; nfs4_lock_state(); - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, cstate->minorversion); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, + cstate->minorversion, nn); if (status) goto out; dp = delegstateid(s); @@ -3833,8 +3880,6 @@ out: #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) -#define LOCKOWNER_INO_HASH_BITS 8 -#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) #define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) static inline u64 @@ -3852,7 +3897,7 @@ last_byte_offset(u64 start, u64 len) { u64 end; - BUG_ON(!len); + WARN_ON_ONCE(!len); end = start + len; return end > start ? end - 1: NFS4_MAX_UINT64; } @@ -3864,8 +3909,6 @@ static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct & LOCKOWNER_INO_HASH_MASK; } -static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE]; - /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th @@ -3931,12 +3974,12 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, - struct xdr_netobj *owner) + struct xdr_netobj *owner, struct nfsd_net *nn) { unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); struct nfs4_lockowner *lo; - list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { + list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { if (same_lockowner_ino(lo, inode, clid, owner)) return lo; } @@ -3948,9 +3991,10 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s struct inode *inode = open_stp->st_file->fi_inode; unsigned int inohash = lockowner_ino_hashval(inode, clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); - list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]); + list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]); list_add(&lo->lo_perstateid, &open_stp->st_lockowners); } @@ -4024,8 +4068,10 @@ static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, s struct nfs4_client *cl = oo->oo_owner.so_client; struct nfs4_lockowner *lo; unsigned int strhashval; + struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); - lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner); + lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, + &lock->v.new.owner, nn); if (lo) { if (!cstate->minorversion) return nfserr_bad_seqid; @@ -4065,7 +4111,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, bool new_state = false; int lkflg; int err; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", (long long) lock->lk_offset, @@ -4099,7 +4146,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_confirmed_seqid_op(cstate, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, - &open_stp); + &open_stp, nn); if (status) goto out; open_sop = openowner(open_stp->st_stateowner); @@ -4113,7 +4160,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, - NFS4_LOCK_STID, &lock_stp); + NFS4_LOCK_STID, &lock_stp, nn); if (status) goto out; lock_sop = lockowner(lock_stp->st_stateowner); @@ -4124,10 +4171,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = nfserr_grace; - if (locks_in_grace(SVC_NET(rqstp)) && !lock->lk_reclaim) + if (locks_in_grace(net) && !lock->lk_reclaim) goto out; status = nfserr_no_grace; - if (!locks_in_grace(SVC_NET(rqstp)) && lock->lk_reclaim) + if (!locks_in_grace(net) && lock->lk_reclaim) goto out; file_lock = locks_alloc_lock(); @@ -4238,7 +4285,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file_lock *file_lock = NULL; struct nfs4_lockowner *lo; __be32 status; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; @@ -4248,9 +4295,11 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - status = nfserr_stale_clientid; - if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid, nn)) - goto out; + if (!nfsd4_has_session(cstate)) { + status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL); + if (status) + goto out; + } if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) goto out; @@ -4278,7 +4327,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner); + lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn); if (lo) file_lock->fl_owner = (fl_owner_t)lo; file_lock->fl_pid = current->tgid; @@ -4313,7 +4362,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file_lock *file_lock = NULL; __be32 status; int err; - + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n", (long long) locku->lu_offset, (long long) locku->lu_length); @@ -4324,7 +4374,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, - &locku->lu_stateid, NFS4_LOCK_STID, &stp); + &locku->lu_stateid, NFS4_LOCK_STID, + &stp, nn); if (status) goto out; filp = find_any_file(stp->st_file); @@ -4414,23 +4465,21 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct list_head matches; unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); __be32 status; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - /* XXX check for lease expiration */ - - status = nfserr_stale_clientid; - if (STALE_CLIENTID(clid, nn)) - return status; - nfs4_lock_state(); + status = lookup_clientid(clid, cstate->minorversion, nn, NULL); + if (status) + goto out; + status = nfserr_locks_held; INIT_LIST_HEAD(&matches); - list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) { if (sop->so_is_open_owner) continue; if (!same_owner_str(sop, owner, clid)) @@ -4466,73 +4515,74 @@ alloc_reclaim(void) return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); } -int -nfs4_has_reclaimed_state(const char *name, bool use_exchange_id) +bool +nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn) { - unsigned int strhashval = clientstr_hashval(name); - struct nfs4_client *clp; + struct nfs4_client_reclaim *crp; - clp = find_confirmed_client_by_str(name, strhashval); - if (!clp) - return 0; - return test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + crp = nfsd4_find_reclaim_client(name, nn); + return (crp && crp->cr_clp); } /* * failure => all reset bets are off, nfserr_no_grace... */ -int -nfs4_client_to_reclaim(const char *name) +struct nfs4_client_reclaim * +nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn) { unsigned int strhashval; - struct nfs4_client_reclaim *crp = NULL; + struct nfs4_client_reclaim *crp; dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name); crp = alloc_reclaim(); - if (!crp) - return 0; - strhashval = clientstr_hashval(name); - INIT_LIST_HEAD(&crp->cr_strhash); - list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); - memcpy(crp->cr_recdir, name, HEXDIR_LEN); - reclaim_str_hashtbl_size++; - return 1; + if (crp) { + strhashval = clientstr_hashval(name); + INIT_LIST_HEAD(&crp->cr_strhash); + list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]); + memcpy(crp->cr_recdir, name, HEXDIR_LEN); + crp->cr_clp = NULL; + nn->reclaim_str_hashtbl_size++; + } + return crp; +} + +void +nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn) +{ + list_del(&crp->cr_strhash); + kfree(crp); + nn->reclaim_str_hashtbl_size--; } void -nfs4_release_reclaim(void) +nfs4_release_reclaim(struct nfsd_net *nn) { struct nfs4_client_reclaim *crp = NULL; int i; for (i = 0; i < CLIENT_HASH_SIZE; i++) { - while (!list_empty(&reclaim_str_hashtbl[i])) { - crp = list_entry(reclaim_str_hashtbl[i].next, + while (!list_empty(&nn->reclaim_str_hashtbl[i])) { + crp = list_entry(nn->reclaim_str_hashtbl[i].next, struct nfs4_client_reclaim, cr_strhash); - list_del(&crp->cr_strhash); - kfree(crp); - reclaim_str_hashtbl_size--; + nfs4_remove_reclaim_record(crp, nn); } } - BUG_ON(reclaim_str_hashtbl_size); + WARN_ON_ONCE(nn->reclaim_str_hashtbl_size); } /* * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ struct nfs4_client_reclaim * -nfsd4_find_reclaim_client(struct nfs4_client *clp) +nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn) { unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; - dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n", - clp->cl_name.len, clp->cl_name.data, - clp->cl_recdir); + dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir); - /* find clp->cl_name in reclaim_str_hashtbl */ - strhashval = clientstr_hashval(clp->cl_recdir); - list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) { - if (same_name(crp->cr_recdir, clp->cl_recdir)) { + strhashval = clientstr_hashval(recdir); + list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { + if (same_name(crp->cr_recdir, recdir)) { return crp; } } @@ -4543,12 +4593,12 @@ nfsd4_find_reclaim_client(struct nfs4_client *clp) * Called from OPEN. Look for clientid in reclaim list. */ __be32 -nfs4_check_open_reclaim(clientid_t *clid, bool sessions) +nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct nfs4_client *clp; /* find clientid in conf_id_hashtbl */ - clp = find_confirmed_client(clid, sessions); + clp = find_confirmed_client(clid, sessions, nn); if (clp == NULL) return nfserr_reclaim_bad; @@ -4557,124 +4607,177 @@ nfs4_check_open_reclaim(clientid_t *clid, bool sessions) #ifdef CONFIG_NFSD_FAULT_INJECTION -void nfsd_forget_clients(u64 num) +u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) { - struct nfs4_client *clp, *next; - int count = 0; - - nfs4_lock_state(); - list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { - expire_client(clp); - if (++count == num) - break; - } - nfs4_unlock_state(); - - printk(KERN_INFO "NFSD: Forgot %d clients", count); + expire_client(clp); + return 1; } -static void release_lockowner_sop(struct nfs4_stateowner *sop) +u64 nfsd_print_client(struct nfs4_client *clp, u64 num) { - release_lockowner(lockowner(sop)); + char buf[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFS Client: %s\n", buf); + return 1; } -static void release_openowner_sop(struct nfs4_stateowner *sop) +static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, + const char *type) { - release_openowner(openowner(sop)); + char buf[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); } -static int nfsd_release_n_owners(u64 num, bool is_open_owner, - void (*release_sop)(struct nfs4_stateowner *)) +static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *)) { - int i, count = 0; - struct nfs4_stateowner *sop, *next; + struct nfs4_openowner *oop; + struct nfs4_lockowner *lop, *lo_next; + struct nfs4_ol_stateid *stp, *st_next; + u64 count = 0; - for (i = 0; i < OWNER_HASH_SIZE; i++) { - list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) { - if (sop->so_is_open_owner != is_open_owner) - continue; - release_sop(sop); - if (++count == num) - return count; + list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { + list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) { + list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) { + if (func) + func(lop); + if (++count == max) + return count; + } } } + return count; } -void nfsd_forget_locks(u64 num) +u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) { - int count; - - nfs4_lock_state(); - count = nfsd_release_n_owners(num, false, release_lockowner_sop); - nfs4_unlock_state(); + return nfsd_foreach_client_lock(clp, max, release_lockowner); +} - printk(KERN_INFO "NFSD: Forgot %d locks", count); +u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) +{ + u64 count = nfsd_foreach_client_lock(clp, max, NULL); + nfsd_print_count(clp, count, "locked files"); + return count; } -void nfsd_forget_openowners(u64 num) +static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *)) { - int count; + struct nfs4_openowner *oop, *next; + u64 count = 0; - nfs4_lock_state(); - count = nfsd_release_n_owners(num, true, release_openowner_sop); - nfs4_unlock_state(); + list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { + if (func) + func(oop); + if (++count == max) + break; + } - printk(KERN_INFO "NFSD: Forgot %d open owners", count); + return count; } -static int nfsd_process_n_delegations(u64 num, struct list_head *list) +u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max) { - int i, count = 0; - struct nfs4_file *fp, *fnext; - struct nfs4_delegation *dp, *dnext; + return nfsd_foreach_client_open(clp, max, release_openowner); +} - for (i = 0; i < FILE_HASH_SIZE; i++) { - list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) { - list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) { - list_move(&dp->dl_recall_lru, list); - if (++count == num) - return count; - } - } - } +u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max) +{ + u64 count = nfsd_foreach_client_open(clp, max, NULL); + nfsd_print_count(clp, count, "open files"); + return count; +} + +static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, + struct list_head *victims) +{ + struct nfs4_delegation *dp, *next; + u64 count = 0; + list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { + if (victims) + list_move(&dp->dl_recall_lru, victims); + if (++count == max) + break; + } return count; } -void nfsd_forget_delegations(u64 num) +u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) { - unsigned int count; + struct nfs4_delegation *dp, *next; LIST_HEAD(victims); - struct nfs4_delegation *dp, *dnext; + u64 count; spin_lock(&recall_lock); - count = nfsd_process_n_delegations(num, &victims); + count = nfsd_find_all_delegations(clp, max, &victims); spin_unlock(&recall_lock); - nfs4_lock_state(); - list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru) + list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) unhash_delegation(dp); - nfs4_unlock_state(); - printk(KERN_INFO "NFSD: Forgot %d delegations", count); + return count; } -void nfsd_recall_delegations(u64 num) +u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) { - unsigned int count; + struct nfs4_delegation *dp, *next; LIST_HEAD(victims); - struct nfs4_delegation *dp, *dnext; + u64 count; spin_lock(&recall_lock); - count = nfsd_process_n_delegations(num, &victims); - list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru) { - list_del(&dp->dl_recall_lru); + count = nfsd_find_all_delegations(clp, max, &victims); + list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) nfsd_break_one_deleg(dp); - } spin_unlock(&recall_lock); - printk(KERN_INFO "NFSD: Recalled %d delegations", count); + return count; +} + +u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) +{ + u64 count = 0; + + spin_lock(&recall_lock); + count = nfsd_find_all_delegations(clp, max, NULL); + spin_unlock(&recall_lock); + + nfsd_print_count(clp, count, "delegations"); + return count; +} + +u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64)) +{ + struct nfs4_client *clp, *next; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return 0; + + list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { + count += func(clp, max - count); + if ((max != 0) && (count >= max)) + break; + } + + return count; +} + +struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) +{ + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return NULL; + + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + if (memcmp(&clp->cl_addr, addr, addr_size) == 0) + return clp; + } + return NULL; } #endif /* CONFIG_NFSD_FAULT_INJECTION */ @@ -4686,27 +4789,10 @@ nfs4_state_init(void) { int i; - for (i = 0; i < CLIENT_HASH_SIZE; i++) { - INIT_LIST_HEAD(&conf_id_hashtbl[i]); - INIT_LIST_HEAD(&conf_str_hashtbl[i]); - INIT_LIST_HEAD(&unconf_str_hashtbl[i]); - INIT_LIST_HEAD(&unconf_id_hashtbl[i]); - INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); - } - for (i = 0; i < SESSION_HASH_SIZE; i++) - INIT_LIST_HEAD(&sessionid_hashtbl[i]); for (i = 0; i < FILE_HASH_SIZE; i++) { INIT_LIST_HEAD(&file_hashtbl[i]); } - for (i = 0; i < OWNER_HASH_SIZE; i++) { - INIT_LIST_HEAD(&ownerstr_hashtbl[i]); - } - for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) - INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]); - INIT_LIST_HEAD(&close_lru); - INIT_LIST_HEAD(&client_lru); INIT_LIST_HEAD(&del_recall_lru); - reclaim_str_hashtbl_size = 0; } /* @@ -4730,12 +4816,100 @@ set_max_delegations(void) max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT); } -/* initialization to perform when the nfsd service is started: */ +static int nfs4_state_create_net(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + nn->conf_id_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->conf_id_hashtbl) + goto err; + nn->unconf_id_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->unconf_id_hashtbl) + goto err_unconf_id; + nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) * + OWNER_HASH_SIZE, GFP_KERNEL); + if (!nn->ownerstr_hashtbl) + goto err_ownerstr; + nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) * + LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL); + if (!nn->lockowner_ino_hashtbl) + goto err_lockowner_ino; + nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) * + SESSION_HASH_SIZE, GFP_KERNEL); + if (!nn->sessionid_hashtbl) + goto err_sessionid; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); + INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); + } + for (i = 0; i < OWNER_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]); + for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]); + for (i = 0; i < SESSION_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); + nn->conf_name_tree = RB_ROOT; + nn->unconf_name_tree = RB_ROOT; + INIT_LIST_HEAD(&nn->client_lru); + INIT_LIST_HEAD(&nn->close_lru); + spin_lock_init(&nn->client_lock); + + INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); + get_net(net); + + return 0; + +err_sessionid: + kfree(nn->lockowner_ino_hashtbl); +err_lockowner_ino: + kfree(nn->ownerstr_hashtbl); +err_ownerstr: + kfree(nn->unconf_id_hashtbl); +err_unconf_id: + kfree(nn->conf_id_hashtbl); +err: + return -ENOMEM; +} + +static void +nfs4_state_destroy_net(struct net *net) +{ + int i; + struct nfs4_client *clp = NULL; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct rb_node *node, *tmp; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&nn->conf_id_hashtbl[i])) { + clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + destroy_client(clp); + } + } + + node = rb_first(&nn->unconf_name_tree); + while (node != NULL) { + tmp = node; + node = rb_next(tmp); + clp = rb_entry(tmp, struct nfs4_client, cl_namenode); + rb_erase(tmp, &nn->unconf_name_tree); + destroy_client(clp); + } + + kfree(nn->sessionid_hashtbl); + kfree(nn->lockowner_ino_hashtbl); + kfree(nn->ownerstr_hashtbl); + kfree(nn->unconf_id_hashtbl); + kfree(nn->conf_id_hashtbl); + put_net(net); +} int -nfs4_state_start(void) +nfs4_state_start_net(struct net *net) { - struct net *net = &init_net; struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; @@ -4746,18 +4920,32 @@ nfs4_state_start(void) * to that instead and then do most of the rest of this on a per-net * basis. */ - get_net(net); + if (net != &init_net) + return -EINVAL; + + ret = nfs4_state_create_net(net); + if (ret) + return ret; nfsd4_client_tracking_init(net); nn->boot_time = get_seconds(); locks_start_grace(net, &nn->nfsd4_manager); nn->grace_ended = false; - printk(KERN_INFO "NFSD: starting %ld-second grace period\n", - nfsd4_grace); + printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", + nn->nfsd4_grace, net); + queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); + return 0; +} + +/* initialization to perform when the nfsd service is started: */ + +int +nfs4_state_start(void) +{ + int ret; + ret = set_callback_cred(); - if (ret) { - ret = -ENOMEM; - goto out_recovery; - } + if (ret) + return -ENOMEM; laundry_wq = create_singlethread_workqueue("nfsd4"); if (laundry_wq == NULL) { ret = -ENOMEM; @@ -4766,39 +4954,34 @@ nfs4_state_start(void) ret = nfsd4_create_callback_queue(); if (ret) goto out_free_laundry; - queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ); + set_max_delegations(); + return 0; + out_free_laundry: destroy_workqueue(laundry_wq); out_recovery: - nfsd4_client_tracking_exit(net); - put_net(net); return ret; } -static void -__nfs4_state_shutdown(void) +/* should be called with the state lock held */ +void +nfs4_state_shutdown_net(struct net *net) { - int i; - struct nfs4_client *clp = NULL; struct nfs4_delegation *dp = NULL; struct list_head *pos, *next, reaplist; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + cancel_delayed_work_sync(&nn->laundromat_work); + locks_end_grace(&nn->nfsd4_manager); - for (i = 0; i < CLIENT_HASH_SIZE; i++) { - while (!list_empty(&conf_id_hashtbl[i])) { - clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); - destroy_client(clp); - } - while (!list_empty(&unconf_str_hashtbl[i])) { - clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash); - destroy_client(clp); - } - } INIT_LIST_HEAD(&reaplist); spin_lock(&recall_lock); list_for_each_safe(pos, next, &del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + if (dp->dl_stid.sc_client->net != net) + continue; list_move(&dp->dl_recall_lru, &reaplist); } spin_unlock(&recall_lock); @@ -4807,22 +4990,14 @@ __nfs4_state_shutdown(void) unhash_delegation(dp); } - nfsd4_client_tracking_exit(&init_net); - put_net(&init_net); + nfsd4_client_tracking_exit(net); + nfs4_state_destroy_net(net); } void nfs4_state_shutdown(void) { - struct net *net = &init_net; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - - cancel_delayed_work_sync(&laundromat_work); destroy_workqueue(laundry_wq); - locks_end_grace(&nn->nfsd4_manager); - nfs4_lock_state(); - __nfs4_state_shutdown(); - nfs4_unlock_state(); nfsd4_destroy_callback_queue(); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fd548d155088..0dc11586682f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -53,6 +53,7 @@ #include "vfs.h" #include "state.h" #include "cache.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -65,17 +66,17 @@ #define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL static __be32 -check_filename(char *str, int len, __be32 err) +check_filename(char *str, int len) { int i; if (len == 0) return nfserr_inval; if (isdotent(str, len)) - return err; + return nfserr_badname; for (i = 0; i < len; i++) if (str[i] == '/') - return err; + return nfserr_badname; return 0; } @@ -422,6 +423,86 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access DECODE_TAIL; } +static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) +{ + DECODE_HEAD; + u32 dummy, uid, gid; + char *machine_name; + int i; + int nr_secflavs; + + /* callback_sec_params4 */ + READ_BUF(4); + READ32(nr_secflavs); + cbs->flavor = (u32)(-1); + for (i = 0; i < nr_secflavs; ++i) { + READ_BUF(4); + READ32(dummy); + switch (dummy) { + case RPC_AUTH_NULL: + /* Nothing to read */ + if (cbs->flavor == (u32)(-1)) + cbs->flavor = RPC_AUTH_NULL; + break; + case RPC_AUTH_UNIX: + READ_BUF(8); + /* stamp */ + READ32(dummy); + + /* machine name */ + READ32(dummy); + READ_BUF(dummy); + SAVEMEM(machine_name, dummy); + + /* uid, gid */ + READ_BUF(8); + READ32(uid); + READ32(gid); + + /* more gids */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + if (cbs->flavor == (u32)(-1)) { + cbs->uid = uid; + cbs->gid = gid; + cbs->flavor = RPC_AUTH_UNIX; + } + break; + case RPC_AUTH_GSS: + dprintk("RPC_AUTH_GSS callback secflavor " + "not supported!\n"); + READ_BUF(8); + /* gcbp_service */ + READ32(dummy); + /* gcbp_handle_from_server */ + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + /* gcbp_handle_from_client */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + break; + default: + dprintk("Illegal callback secflavor\n"); + return nfserr_inval; + } + } + DECODE_TAIL; +} + +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(bc->bc_cb_program); + nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); + + DECODE_TAIL; +} + static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) { DECODE_HEAD; @@ -490,7 +571,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create READ32(create->cr_namelen); READ_BUF(create->cr_namelen); SAVEMEM(create->cr_name, create->cr_namelen); - if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) + if ((status = check_filename(create->cr_name, create->cr_namelen))) return status; status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, @@ -522,7 +603,7 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) READ32(link->li_namelen); READ_BUF(link->li_namelen); SAVEMEM(link->li_name, link->li_namelen); - if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval))) + if ((status = check_filename(link->li_name, link->li_namelen))) return status; DECODE_TAIL; @@ -616,7 +697,7 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup READ32(lookup->lo_len); READ_BUF(lookup->lo_len); SAVEMEM(lookup->lo_name, lookup->lo_len); - if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent))) + if ((status = check_filename(lookup->lo_name, lookup->lo_len))) return status; DECODE_TAIL; @@ -780,7 +861,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ32(open->op_fname.len); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + if ((status = check_filename(open->op_fname.data, open->op_fname.len))) return status; break; case NFS4_OPEN_CLAIM_PREVIOUS: @@ -795,7 +876,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ32(open->op_fname.len); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + if ((status = check_filename(open->op_fname.data, open->op_fname.len))) return status; break; case NFS4_OPEN_CLAIM_FH: @@ -907,7 +988,7 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove READ32(remove->rm_namelen); READ_BUF(remove->rm_namelen); SAVEMEM(remove->rm_name, remove->rm_namelen); - if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent))) + if ((status = check_filename(remove->rm_name, remove->rm_namelen))) return status; DECODE_TAIL; @@ -925,9 +1006,9 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename READ32(rename->rn_tnamelen); READ_BUF(rename->rn_tnamelen); SAVEMEM(rename->rn_tname, rename->rn_tnamelen); - if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent))) + if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) return status; - if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval))) + if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen))) return status; DECODE_TAIL; @@ -954,8 +1035,7 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, READ32(secinfo->si_namelen); READ_BUF(secinfo->si_namelen); SAVEMEM(secinfo->si_name, secinfo->si_namelen); - status = check_filename(secinfo->si_name, secinfo->si_namelen, - nfserr_noent); + status = check_filename(secinfo->si_name, secinfo->si_namelen); if (status) return status; DECODE_TAIL; @@ -1026,31 +1106,14 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s static __be32 nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) { -#if 0 - struct nfsd4_compoundargs save = { - .p = argp->p, - .end = argp->end, - .rqstp = argp->rqstp, - }; - u32 ve_bmval[2]; - struct iattr ve_iattr; /* request */ - struct nfs4_acl *ve_acl; /* request */ -#endif DECODE_HEAD; if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval))) goto out; /* For convenience's sake, we compare raw xdr'd attributes in - * nfsd4_proc_verify; however we still decode here just to return - * correct error in case of bad xdr. */ -#if 0 - status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl); - if (status == nfserr_inval) { - status = nfserrno(status); - goto out; - } -#endif + * nfsd4_proc_verify */ + READ_BUF(4); READ32(verify->ve_attrlen); READ_BUF(verify->ve_attrlen); @@ -1063,7 +1126,6 @@ static __be32 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) { int avail; - int v; int len; DECODE_HEAD; @@ -1087,27 +1149,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) __FILE__, __LINE__); goto xdr_error; } - argp->rqstp->rq_vec[0].iov_base = p; - argp->rqstp->rq_vec[0].iov_len = avail; - v = 0; - len = write->wr_buflen; - while (len > argp->rqstp->rq_vec[v].iov_len) { - len -= argp->rqstp->rq_vec[v].iov_len; - v++; - argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen >= PAGE_SIZE) { - argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE; - argp->pagelen -= PAGE_SIZE; - } else { - argp->rqstp->rq_vec[v].iov_len = argp->pagelen; - argp->pagelen -= len; - } + write->wr_head.iov_base = p; + write->wr_head.iov_len = avail; + WARN_ON(avail != (XDR_QUADLEN(avail) << 2)); + write->wr_pagelist = argp->pagelist; + + len = XDR_QUADLEN(write->wr_buflen) << 2; + if (len >= avail) { + int pages; + + len -= avail; + + pages = len >> PAGE_SHIFT; + argp->pagelist += pages; + argp->pagelen -= pages * PAGE_SIZE; + len -= pages * PAGE_SIZE; + + argp->p = (__be32 *)page_address(argp->pagelist[0]); + argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); } - argp->end = (__be32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len); - argp->p = (__be32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); - argp->rqstp->rq_vec[v].iov_len = len; - write->wr_vlen = v+1; + argp->p += XDR_QUADLEN(len); DECODE_TAIL; } @@ -1237,11 +1298,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, struct nfsd4_create_session *sess) { DECODE_HEAD; - u32 dummy; - char *machine_name; - int i; - int nr_secflavs; READ_BUF(16); COPYMEM(&sess->clientid, 8); @@ -1282,58 +1339,9 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, goto xdr_error; } - READ_BUF(8); + READ_BUF(4); READ32(sess->callback_prog); - - /* callback_sec_params4 */ - READ32(nr_secflavs); - for (i = 0; i < nr_secflavs; ++i) { - READ_BUF(4); - READ32(dummy); - switch (dummy) { - case RPC_AUTH_NULL: - /* Nothing to read */ - break; - case RPC_AUTH_UNIX: - READ_BUF(8); - /* stamp */ - READ32(dummy); - - /* machine name */ - READ32(dummy); - READ_BUF(dummy); - SAVEMEM(machine_name, dummy); - - /* uid, gid */ - READ_BUF(8); - READ32(sess->uid); - READ32(sess->gid); - - /* more gids */ - READ_BUF(4); - READ32(dummy); - READ_BUF(dummy * 4); - break; - case RPC_AUTH_GSS: - dprintk("RPC_AUTH_GSS callback secflavor " - "not supported!\n"); - READ_BUF(8); - /* gcbp_service */ - READ32(dummy); - /* gcbp_handle_from_server */ - READ32(dummy); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); - /* gcbp_handle_from_client */ - READ_BUF(4); - READ32(dummy); - READ_BUF(dummy); - break; - default: - dprintk("Illegal callback secflavor\n"); - return nfserr_inval; - } - } + nfsd4_decode_cb_sec(argp, &sess->cb_sec); DECODE_TAIL; } @@ -1528,7 +1536,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, @@ -1568,12 +1576,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) bool cachethis = false; int i; - /* - * XXX: According to spec, we should check the tag - * for UTF-8 compliance. I'm postponing this for - * now because it seems that some clients do use - * binary tags. - */ READ_BUF(4); READ32(argp->taglen); READ_BUF(argp->taglen + 8); @@ -1603,38 +1605,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op = &argp->ops[i]; op->replay = NULL; - /* - * We can't use READ_BUF() here because we need to handle - * a missing opcode as an OP_WRITE + 1. So we need to check - * to see if we're truly at the end of our buffer or if there - * is another page we need to flip to. - */ - - if (argp->p == argp->end) { - if (argp->pagelen < 4) { - /* There isn't an opcode still on the wire */ - op->opnum = OP_WRITE + 1; - op->status = nfserr_bad_xdr; - argp->opcnt = i+1; - break; - } - - /* - * False alarm. We just hit a page boundary, but there - * is still data available. Move pointer across page - * boundary. *snip from READ_BUF* - */ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); - argp->pagelen = 0; - } else { - argp->end = argp->p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } - } - op->opnum = ntohl(*argp->p++); + READ_BUF(4); + READ32(op->opnum); if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP) op->status = ops->decoders[op->opnum](argp, &op->u); @@ -2014,6 +1986,22 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) return 0; } + +static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) +{ + struct path path = exp->ex_path; + int err; + + path_get(&path); + while (follow_up(&path)) { + if (path.dentry != path.mnt->mnt_root) + break; + } + err = vfs_getattr(path.mnt, path.dentry, stat); + path_put(&path); + return err; +} + /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. @@ -2048,6 +2036,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, .mnt = exp->ex_path.mnt, .dentry = dentry, }; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); @@ -2208,7 +2197,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (bmval0 & FATTR4_WORD0_LEASE_TIME) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32(nfsd4_lease); + WRITE32(nn->nfsd4_lease); } if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { if ((buflen -= 4) < 0) @@ -2430,18 +2419,8 @@ out_acl: * and this is the root of a cross-mounted filesystem. */ if (ignore_crossmnt == 0 && - dentry == exp->ex_path.mnt->mnt_root) { - struct path path = exp->ex_path; - path_get(&path); - while (follow_up(&path)) { - if (path.dentry != path.mnt->mnt_root) - break; - } - err = vfs_getattr(path.mnt, path.dentry, &stat); - path_put(&path); - if (err) - goto out_nfserr; - } + dentry == exp->ex_path.mnt->mnt_root) + get_parent_attributes(exp, &stat); WRITE64(stat.ino); } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { @@ -2927,7 +2906,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { u32 eof; - int v, pn; + int v; + struct page *page; unsigned long maxcount; long len; __be32 *p; @@ -2946,11 +2926,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, len = maxcount; v = 0; while (len > 0) { - pn = resp->rqstp->rq_resused++; - resp->rqstp->rq_vec[v].iov_base = - page_address(resp->rqstp->rq_respages[pn]); + page = *(resp->rqstp->rq_next_page); + if (!page) { /* ran out of pages */ + maxcount -= len; + break; + } + resp->rqstp->rq_vec[v].iov_base = page_address(page); resp->rqstp->rq_vec[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE; + resp->rqstp->rq_next_page++; v++; len -= PAGE_SIZE; } @@ -2996,8 +2980,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd return nfserr; if (resp->xbuf->page_len) return nfserr_resource; + if (!*resp->rqstp->rq_next_page) + return nfserr_resource; - page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); + page = page_address(*(resp->rqstp->rq_next_page++)); maxcount = PAGE_SIZE; RESERVE_SPACE(4); @@ -3045,6 +3031,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return nfserr; if (resp->xbuf->page_len) return nfserr_resource; + if (!*resp->rqstp->rq_next_page) + return nfserr_resource; RESERVE_SPACE(NFS4_VERIFIER_SIZE); savep = p; @@ -3071,7 +3059,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 goto err_no_verf; } - page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); + page = page_address(*(resp->rqstp->rq_next_page++)); readdir->common.err = 0; readdir->buflen = maxcount; readdir->buffer = page; @@ -3094,8 +3082,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 p = readdir->buffer; *p++ = 0; /* no more entries */ *p++ = htonl(readdir->common.err == nfserr_eof); - resp->xbuf->page_len = ((char*)p) - (char*)page_address( - resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + resp->xbuf->page_len = ((char*)p) - + (char*)page_address(*(resp->rqstp->rq_next_page-1)); /* Use rest of head for padding and remaining ops: */ resp->xbuf->tail[0].iov_base = tailbase; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index dab350dfc376..74934284d9a7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -19,7 +19,7 @@ #include "idmap.h" #include "nfsd.h" #include "cache.h" -#include "fault_inject.h" +#include "state.h" #include "netns.h" /* @@ -186,9 +186,6 @@ static struct file_operations supported_enctypes_ops = { }; #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ -extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); -extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); - static const struct file_operations pool_stats_operations = { .open = nfsd_pool_stats_open, .read = seq_read, @@ -399,6 +396,8 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; + struct net *net = &init_net; + if (size > 0) { int newthreads; rv = get_int(&mesg, &newthreads); @@ -406,11 +405,11 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return rv; if (newthreads < 0) return -EINVAL; - rv = nfsd_svc(newthreads); + rv = nfsd_svc(newthreads, net); if (rv < 0) return rv; } else - rv = nfsd_nrthreads(); + rv = nfsd_nrthreads(net); return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); } @@ -448,9 +447,10 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int len; int npools; int *nthreads; + struct net *net = &init_net; mutex_lock(&nfsd_mutex); - npools = nfsd_nrpools(); + npools = nfsd_nrpools(net); if (npools == 0) { /* * NFS is shut down. The admin can start it by @@ -478,12 +478,12 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) if (nthreads[i] < 0) goto out_free; } - rv = nfsd_set_nrthreads(i, nthreads); + rv = nfsd_set_nrthreads(i, nthreads, net); if (rv) goto out_free; } - rv = nfsd_get_nrthreads(npools, nthreads); + rv = nfsd_get_nrthreads(npools, nthreads, net); if (rv) goto out_free; @@ -510,11 +510,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) unsigned minor; ssize_t tlen = 0; char *sep; + struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size>0) { - if (nfsd_serv) + if (nn->nfsd_serv) /* Cannot change versions without updating - * nfsd_serv->sv_xdrsize, and reallocing + * nn->nfsd_serv->sv_xdrsize, and reallocing * rq_argp and rq_resp */ return -EBUSY; @@ -645,11 +647,13 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) * Zero-length write. Return a list of NFSD's current listener * transports. */ -static ssize_t __write_ports_names(char *buf) +static ssize_t __write_ports_names(char *buf, struct net *net) { - if (nfsd_serv == NULL) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->nfsd_serv == NULL) return 0; - return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); + return svc_xprt_names(nn->nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); } /* @@ -657,28 +661,28 @@ static ssize_t __write_ports_names(char *buf) * a socket of a supported family/protocol, and we use it as an * nfsd listener. */ -static ssize_t __write_ports_addfd(char *buf) +static ssize_t __write_ports_addfd(char *buf, struct net *net) { char *mesg = buf; int fd, err; - struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); err = get_int(&mesg, &fd); if (err != 0 || fd < 0) return -EINVAL; - err = nfsd_create_serv(); + err = nfsd_create_serv(net); if (err != 0) return err; - err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); + err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); if (err < 0) { nfsd_destroy(net); return err; } /* Decrease the count, but don't shut down the service */ - nfsd_serv->sv_nrthreads--; + nn->nfsd_serv->sv_nrthreads--; return err; } @@ -686,12 +690,12 @@ static ssize_t __write_ports_addfd(char *buf) * A transport listener is added by writing it's transport name and * a port number. */ -static ssize_t __write_ports_addxprt(char *buf) +static ssize_t __write_ports_addxprt(char *buf, struct net *net) { char transport[16]; struct svc_xprt *xprt; int port, err; - struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (sscanf(buf, "%15s %5u", transport, &port) != 2) return -EINVAL; @@ -699,25 +703,25 @@ static ssize_t __write_ports_addxprt(char *buf) if (port < 1 || port > USHRT_MAX) return -EINVAL; - err = nfsd_create_serv(); + err = nfsd_create_serv(net); if (err != 0) return err; - err = svc_create_xprt(nfsd_serv, transport, net, + err = svc_create_xprt(nn->nfsd_serv, transport, net, PF_INET, port, SVC_SOCK_ANONYMOUS); if (err < 0) goto out_err; - err = svc_create_xprt(nfsd_serv, transport, net, + err = svc_create_xprt(nn->nfsd_serv, transport, net, PF_INET6, port, SVC_SOCK_ANONYMOUS); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; /* Decrease the count, but don't shut down the service */ - nfsd_serv->sv_nrthreads--; + nn->nfsd_serv->sv_nrthreads--; return 0; out_close: - xprt = svc_find_xprt(nfsd_serv, transport, net, PF_INET, port); + xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port); if (xprt != NULL) { svc_close_xprt(xprt); svc_xprt_put(xprt); @@ -727,16 +731,17 @@ out_err: return err; } -static ssize_t __write_ports(struct file *file, char *buf, size_t size) +static ssize_t __write_ports(struct file *file, char *buf, size_t size, + struct net *net) { if (size == 0) - return __write_ports_names(buf); + return __write_ports_names(buf, net); if (isdigit(buf[0])) - return __write_ports_addfd(buf); + return __write_ports_addfd(buf, net); if (isalpha(buf[0])) - return __write_ports_addxprt(buf); + return __write_ports_addxprt(buf, net); return -EINVAL; } @@ -787,9 +792,10 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; + struct net *net = &init_net; mutex_lock(&nfsd_mutex); - rv = __write_ports(file, buf, size); + rv = __write_ports(file, buf, size, net); mutex_unlock(&nfsd_mutex); return rv; } @@ -821,6 +827,9 @@ int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; + struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + if (size > 0) { int bsize; int rv = get_int(&mesg, &bsize); @@ -835,7 +844,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) bsize = NFSSVC_MAXBLKSIZE; bsize &= ~(1024-1); mutex_lock(&nfsd_mutex); - if (nfsd_serv) { + if (nn->nfsd_serv) { mutex_unlock(&nfsd_mutex); return -EBUSY; } @@ -848,13 +857,14 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) } #ifdef CONFIG_NFSD_V4 -static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) +static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, + time_t *time, struct nfsd_net *nn) { char *mesg = buf; int rv, i; if (size > 0) { - if (nfsd_serv) + if (nn->nfsd_serv) return -EBUSY; rv = get_int(&mesg, &i); if (rv) @@ -879,12 +889,13 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, tim return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time); } -static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) +static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, + time_t *time, struct nfsd_net *nn) { ssize_t rv; mutex_lock(&nfsd_mutex); - rv = __nfsd4_write_time(file, buf, size, time); + rv = __nfsd4_write_time(file, buf, size, time, nn); mutex_unlock(&nfsd_mutex); return rv; } @@ -912,7 +923,8 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_ */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { - return nfsd4_write_time(file, buf, size, &nfsd4_lease); + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } /** @@ -927,17 +939,19 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { - return nfsd4_write_time(file, buf, size, &nfsd4_grace); + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } -static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) +static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, + struct nfsd_net *nn) { char *mesg = buf; char *recdir; int len, status; if (size > 0) { - if (nfsd_serv) + if (nn->nfsd_serv) return -EBUSY; if (size > PATH_MAX || buf[size-1] != '\n') return -EINVAL; @@ -981,9 +995,10 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); mutex_lock(&nfsd_mutex); - rv = __write_recoverydir(file, buf, size); + rv = __write_recoverydir(file, buf, size, nn); mutex_unlock(&nfsd_mutex); return rv; } @@ -1063,6 +1078,7 @@ int nfsd_net_id; static __net_init int nfsd_init_net(struct net *net) { int retval; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); retval = nfsd_export_init(net); if (retval) @@ -1070,6 +1086,8 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; + nn->nfsd4_lease = 90; /* default lease time */ + nn->nfsd4_grace = 90; return 0; out_idmap_error: diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 80d5ce40aadb..de23db255c69 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -55,7 +55,6 @@ extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; -extern struct svc_serv *nfsd_serv; extern spinlock_t nfsd_drc_lock; extern unsigned int nfsd_drc_max_mem; extern unsigned int nfsd_drc_mem_used; @@ -65,26 +64,17 @@ extern const struct seq_operations nfs_exports_op; /* * Function prototypes. */ -int nfsd_svc(int nrservs); +int nfsd_svc(int nrservs, struct net *net); int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); -int nfsd_nrthreads(void); -int nfsd_nrpools(void); -int nfsd_get_nrthreads(int n, int *); -int nfsd_set_nrthreads(int n, int *); +int nfsd_nrthreads(struct net *); +int nfsd_nrpools(struct net *); +int nfsd_get_nrthreads(int n, int *, struct net *); +int nfsd_set_nrthreads(int n, int *, struct net *); int nfsd_pool_stats_open(struct inode *, struct file *); int nfsd_pool_stats_release(struct inode *, struct file *); -static inline void nfsd_destroy(struct net *net) -{ - int destroy = (nfsd_serv->sv_nrthreads == 1); - - if (destroy) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); - if (destroy) - nfsd_serv = NULL; -} +void nfsd_destroy(struct net *net); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL @@ -103,7 +93,7 @@ enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; int nfsd_vers(int vers, enum vers_op change); int nfsd_minorversion(u32 minorversion, enum vers_op change); void nfsd_reset_versions(void); -int nfsd_create_serv(void); +int nfsd_create_serv(struct net *net); extern int nfsd_max_blksize; @@ -121,7 +111,9 @@ void nfs4_state_init(void); int nfsd4_init_slabs(void); void nfsd4_free_slabs(void); int nfs4_state_start(void); +int nfs4_state_start_net(struct net *net); void nfs4_state_shutdown(void); +void nfs4_state_shutdown_net(struct net *net); void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); @@ -130,7 +122,9 @@ static inline void nfs4_state_init(void) { } static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } static inline int nfs4_state_start(void) { return 0; } +static inline int nfs4_state_start_net(struct net *net) { return 0; } static inline void nfs4_state_shutdown(void) { } +static inline void nfs4_state_shutdown_net(struct net *net) { } static inline void nfs4_reset_lease(time_t leasetime) { } static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } static inline char * nfs4_recoverydir(void) {return NULL; } @@ -265,16 +259,8 @@ void nfsd_lockd_shutdown(void); /* Check for dir entries '.' and '..' */ #define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) -/* - * Time of server startup - */ -extern struct timeval nfssvc_boot; - #ifdef CONFIG_NFSD_V4 -extern time_t nfsd4_lease; -extern time_t nfsd4_grace; - /* before processing a COMPOUND operation, we have to check that there * is enough space in the buffer for XDR encode to succeed. otherwise, * we might process an operation with side effects, and be unable to diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 032af381b3aa..814afaa4458a 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -572,7 +572,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (inode) _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) { + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { fh_put(fhp); return nfserr_opnotsupp; } @@ -603,7 +603,7 @@ fh_update(struct svc_fh *fhp) goto out; _fh_update(fhp, fhp->fh_export, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) return nfserr_opnotsupp; } out: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 2013aa001dab..cee62ab9d4a3 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -11,7 +11,6 @@ #include <linux/module.h> #include <linux/fs_struct.h> #include <linux/swap.h> -#include <linux/nsproxy.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svcsock.h> @@ -22,19 +21,19 @@ #include "nfsd.h" #include "cache.h" #include "vfs.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_SVC extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); -struct timeval nfssvc_boot; /* - * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members + * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members * of the svc_serv struct. In particular, ->sv_nrthreads but also to some * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt * - * If (out side the lock) nfsd_serv is non-NULL, then it must point to a + * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number * of nfsd threads must exist and each must listed in ->sp_all_threads in each * entry of ->sv_pools[]. @@ -52,7 +51,6 @@ struct timeval nfssvc_boot; * nfsd_versions */ DEFINE_MUTEX(nfsd_mutex); -struct svc_serv *nfsd_serv; /* * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. @@ -173,28 +171,32 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change) */ #define NFSD_MAXSERVS 8192 -int nfsd_nrthreads(void) +int nfsd_nrthreads(struct net *net) { int rv = 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + mutex_lock(&nfsd_mutex); - if (nfsd_serv) - rv = nfsd_serv->sv_nrthreads; + if (nn->nfsd_serv) + rv = nn->nfsd_serv->sv_nrthreads; mutex_unlock(&nfsd_mutex); return rv; } -static int nfsd_init_socks(void) +static int nfsd_init_socks(struct net *net) { int error; - if (!list_empty(&nfsd_serv->sv_permsocks)) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (!list_empty(&nn->nfsd_serv->sv_permsocks)) return 0; - error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, NFS_PORT, + error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, SVC_SOCK_DEFAULTS); if (error < 0) return error; - error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, NFS_PORT, + error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, SVC_SOCK_DEFAULTS); if (error < 0) return error; @@ -202,14 +204,15 @@ static int nfsd_init_socks(void) return 0; } -static bool nfsd_up = false; +static int nfsd_users = 0; -static int nfsd_startup(int nrservs) +static int nfsd_startup_generic(int nrservs) { int ret; - if (nfsd_up) + if (nfsd_users++) return 0; + /* * Readahead param cache - will no-op if it already exists. * (Note therefore results will be suboptimal if number of @@ -218,43 +221,79 @@ static int nfsd_startup(int nrservs) ret = nfsd_racache_init(2*nrservs); if (ret) return ret; - ret = nfsd_init_socks(); + ret = nfs4_state_start(); if (ret) goto out_racache; - ret = lockd_up(&init_net); + return 0; + +out_racache: + nfsd_racache_shutdown(); + return ret; +} + +static void nfsd_shutdown_generic(void) +{ + if (--nfsd_users) + return; + + nfs4_state_shutdown(); + nfsd_racache_shutdown(); +} + +static int nfsd_startup_net(int nrservs, struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int ret; + + if (nn->nfsd_net_up) + return 0; + + ret = nfsd_startup_generic(nrservs); if (ret) - goto out_racache; - ret = nfs4_state_start(); + return ret; + ret = nfsd_init_socks(net); + if (ret) + goto out_socks; + ret = lockd_up(net); + if (ret) + goto out_socks; + ret = nfs4_state_start_net(net); if (ret) goto out_lockd; - nfsd_up = true; + + nn->nfsd_net_up = true; return 0; + out_lockd: - lockd_down(&init_net); -out_racache: - nfsd_racache_shutdown(); + lockd_down(net); +out_socks: + nfsd_shutdown_generic(); return ret; } -static void nfsd_shutdown(void) +static void nfsd_shutdown_net(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs4_state_shutdown_net(net); + lockd_down(net); + nn->nfsd_net_up = false; + nfsd_shutdown_generic(); +} + +static void nfsd_last_thread(struct svc_serv *serv, struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + /* * write_ports can create the server without actually starting * any threads--if we get shut down before any threads are * started, then nfsd_last_thread will be run before any of this * other initialization has been done. */ - if (!nfsd_up) + if (!nn->nfsd_net_up) return; - nfs4_state_shutdown(); - lockd_down(&init_net); - nfsd_racache_shutdown(); - nfsd_up = false; -} - -static void nfsd_last_thread(struct svc_serv *serv, struct net *net) -{ - nfsd_shutdown(); + nfsd_shutdown_net(net); svc_rpcb_cleanup(serv, net); @@ -327,69 +366,84 @@ static int nfsd_get_default_max_blksize(void) return ret; } -int nfsd_create_serv(void) +int nfsd_create_serv(struct net *net) { int error; - struct net *net = current->nsproxy->net_ns; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); WARN_ON(!mutex_is_locked(&nfsd_mutex)); - if (nfsd_serv) { - svc_get(nfsd_serv); + if (nn->nfsd_serv) { + svc_get(nn->nfsd_serv); return 0; } if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); - nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, + nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd_last_thread, nfsd, THIS_MODULE); - if (nfsd_serv == NULL) + if (nn->nfsd_serv == NULL) return -ENOMEM; - error = svc_bind(nfsd_serv, net); + error = svc_bind(nn->nfsd_serv, net); if (error < 0) { - svc_destroy(nfsd_serv); + svc_destroy(nn->nfsd_serv); return error; } set_max_drc(); - do_gettimeofday(&nfssvc_boot); /* record boot time */ + do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ return 0; } -int nfsd_nrpools(void) +int nfsd_nrpools(struct net *net) { - if (nfsd_serv == NULL) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->nfsd_serv == NULL) return 0; else - return nfsd_serv->sv_nrpools; + return nn->nfsd_serv->sv_nrpools; } -int nfsd_get_nrthreads(int n, int *nthreads) +int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (nfsd_serv != NULL) { - for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++) - nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads; + if (nn->nfsd_serv != NULL) { + for (i = 0; i < nn->nfsd_serv->sv_nrpools && i < n; i++) + nthreads[i] = nn->nfsd_serv->sv_pools[i].sp_nrthreads; } return 0; } -int nfsd_set_nrthreads(int n, int *nthreads) +void nfsd_destroy(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int destroy = (nn->nfsd_serv->sv_nrthreads == 1); + + if (destroy) + svc_shutdown_net(nn->nfsd_serv, net); + svc_destroy(nn->nfsd_serv); + if (destroy) + nn->nfsd_serv = NULL; +} + +int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; int tot = 0; int err = 0; - struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); WARN_ON(!mutex_is_locked(&nfsd_mutex)); - if (nfsd_serv == NULL || n <= 0) + if (nn->nfsd_serv == NULL || n <= 0) return 0; - if (n > nfsd_serv->sv_nrpools) - n = nfsd_serv->sv_nrpools; + if (n > nn->nfsd_serv->sv_nrpools) + n = nn->nfsd_serv->sv_nrpools; /* enforce a global maximum number of threads */ tot = 0; @@ -419,9 +473,9 @@ int nfsd_set_nrthreads(int n, int *nthreads) nthreads[0] = 1; /* apply the new numbers */ - svc_get(nfsd_serv); + svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i], + err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], nthreads[i]); if (err) break; @@ -436,11 +490,11 @@ int nfsd_set_nrthreads(int n, int *nthreads) * this is the first time nrservs is nonzero. */ int -nfsd_svc(int nrservs) +nfsd_svc(int nrservs, struct net *net) { int error; bool nfsd_up_before; - struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); @@ -449,29 +503,29 @@ nfsd_svc(int nrservs) if (nrservs > NFSD_MAXSERVS) nrservs = NFSD_MAXSERVS; error = 0; - if (nrservs == 0 && nfsd_serv == NULL) + if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; - error = nfsd_create_serv(); + error = nfsd_create_serv(net); if (error) goto out; - nfsd_up_before = nfsd_up; + nfsd_up_before = nn->nfsd_net_up; - error = nfsd_startup(nrservs); + error = nfsd_startup_net(nrservs, net); if (error) goto out_destroy; - error = svc_set_num_threads(nfsd_serv, NULL, nrservs); + error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); if (error) goto out_shutdown; - /* We are holding a reference to nfsd_serv which + /* We are holding a reference to nn->nfsd_serv which * we don't want to count in the return value, * so subtract 1 */ - error = nfsd_serv->sv_nrthreads - 1; + error = nn->nfsd_serv->sv_nrthreads - 1; out_shutdown: if (error < 0 && !nfsd_up_before) - nfsd_shutdown(); + nfsd_shutdown_net(net); out_destroy: nfsd_destroy(net); /* Release server */ out: @@ -487,6 +541,8 @@ static int nfsd(void *vrqstp) { struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; + struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list); + struct net *net = perm_sock->xpt_net; int err; /* Lock module and set up kernel thread */ @@ -551,7 +607,7 @@ out: /* Release the thread */ svc_exit_thread(rqstp); - nfsd_destroy(&init_net); + nfsd_destroy(net); /* Release module */ mutex_unlock(&nfsd_mutex); @@ -640,21 +696,24 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) } /* Store reply in cache. */ - nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); + nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); return 1; } int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; + struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + mutex_lock(&nfsd_mutex); - if (nfsd_serv == NULL) { + if (nn->nfsd_serv == NULL) { mutex_unlock(&nfsd_mutex); return -ENODEV; } /* bump up the psudo refcount while traversing */ - svc_get(nfsd_serv); - ret = svc_pool_stats_open(nfsd_serv, file); + svc_get(nn->nfsd_serv); + ret = svc_pool_stats_open(nn->nfsd_serv, file); mutex_unlock(&nfsd_mutex); return ret; } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 65ec595e2226..979b42106979 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -246,7 +246,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readargs *args) { unsigned int len; - int v,pn; + int v; if (!(p = decode_fh(p, &args->fh))) return 0; @@ -262,8 +262,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, */ v=0; while (len > 0) { - pn = rqstp->rq_resused++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + struct page *p = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(p); rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; len -= rqstp->rq_vec[v].iov_len; v++; @@ -355,7 +356,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli { if (!(p = decode_fh(p, &args->fh))) return 0; - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -396,7 +397,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, if (args->count > PAGE_SIZE) args->count = PAGE_SIZE; - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e036894bce57..d1c229feed52 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -150,6 +150,12 @@ struct nfsd4_channel_attrs { u32 rdma_attrs; }; +struct nfsd4_cb_sec { + u32 flavor; /* (u32)(-1) used to mean "no valid flavor" */ + u32 uid; + u32 gid; +}; + struct nfsd4_create_session { clientid_t clientid; struct nfs4_sessionid sessionid; @@ -158,8 +164,12 @@ struct nfsd4_create_session { struct nfsd4_channel_attrs fore_channel; struct nfsd4_channel_attrs back_channel; u32 callback_prog; - u32 uid; - u32 gid; + struct nfsd4_cb_sec cb_sec; +}; + +struct nfsd4_backchannel_ctl { + u32 bc_cb_program; + struct nfsd4_cb_sec bc_cb_sec; }; struct nfsd4_bind_conn_to_session { @@ -192,6 +202,7 @@ struct nfsd4_session { struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; struct nfsd4_channel_attrs se_bchannel; + struct nfsd4_cb_sec se_cb_sec; struct list_head se_conns; u32 se_cb_prog; u32 se_cb_seq_nr; @@ -221,13 +232,12 @@ struct nfsd4_sessionid { */ struct nfs4_client { struct list_head cl_idhash; /* hash by cl_clientid.id */ - struct list_head cl_strhash; /* hash by cl_name */ + struct rb_node cl_namenode; /* link into by-name trees */ struct list_head cl_openowners; struct idr cl_stateids; /* stateid lookup */ struct list_head cl_delegations; struct list_head cl_lru; /* tail queue */ struct xdr_netobj cl_name; /* id generated by client */ - char cl_recdir[HEXDIR_LEN]; /* recovery dir */ nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ @@ -242,9 +252,11 @@ struct nfs4_client { #define NFSD4_CLIENT_CB_KILL (1) #define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ #define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ +#define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) unsigned long cl_flags; + struct rpc_cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; u32 cl_cb_ident; #define NFSD4_CB_UP 0 @@ -271,6 +283,7 @@ struct nfs4_client { unsigned long cl_cb_slot_busy; struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ /* wait here for slots */ + struct net *net; }; static inline void @@ -292,6 +305,7 @@ is_client_expired(struct nfs4_client *clp) */ struct nfs4_client_reclaim { struct list_head cr_strhash; /* hash by cr_name */ + struct nfs4_client *cr_clp; /* pointer to associated clp */ char cr_recdir[HEXDIR_LEN]; /* recover dir */ }; @@ -452,25 +466,26 @@ extern __be32 nfs4_preprocess_stateid_op(struct net *net, stateid_t *stateid, int flags, struct file **filp); extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); -extern int nfs4_in_grace(void); -extern void nfs4_release_reclaim(void); -extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct nfs4_client *crp); -extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions); +void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); +extern void nfs4_release_reclaim(struct nfsd_net *); +extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, + struct nfsd_net *nn); +extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); extern void nfs4_free_openowner(struct nfs4_openowner *); extern void nfs4_free_lockowner(struct nfs4_lockowner *); extern int set_callback_cred(void); +extern void nfsd4_init_callback(struct nfsd4_callback *); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); -extern void nfsd4_do_callback_rpc(struct work_struct *); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfs4_put_delegation(struct nfs4_delegation *dp); -extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); -extern int nfs4_client_to_reclaim(const char *name); -extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); +extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, + struct nfsd_net *nn); +extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); extern void release_session_client(struct nfsd4_session *); extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); @@ -480,5 +495,28 @@ extern void nfsd4_client_tracking_exit(struct net *net); extern void nfsd4_client_record_create(struct nfs4_client *clp); extern void nfsd4_client_record_remove(struct nfs4_client *clp); extern int nfsd4_client_record_check(struct nfs4_client *clp); -extern void nfsd4_record_grace_done(struct net *net, time_t boot_time); +extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); + +/* nfs fault injection functions */ +#ifdef CONFIG_NFSD_FAULT_INJECTION +int nfsd_fault_inject_init(void); +void nfsd_fault_inject_cleanup(void); +u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); +struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); + +u64 nfsd_forget_client(struct nfs4_client *, u64); +u64 nfsd_forget_client_locks(struct nfs4_client*, u64); +u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); +u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); +u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); + +u64 nfsd_print_client(struct nfs4_client *, u64); +u64 nfsd_print_client_locks(struct nfs4_client *, u64); +u64 nfsd_print_client_openowners(struct nfs4_client *, u64); +u64 nfsd_print_client_delegations(struct nfs4_client *, u64); +#else /* CONFIG_NFSD_FAULT_INJECTION */ +static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_cleanup(void) {} +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c120b48ec305..d586117fa94a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -886,7 +886,7 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct svc_rqst *rqstp = sd->u.data; - struct page **pp = rqstp->rq_respages + rqstp->rq_resused; + struct page **pp = rqstp->rq_next_page; struct page *page = buf->page; size_t size; @@ -894,17 +894,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (rqstp->rq_res.page_len == 0) { get_page(page); - put_page(*pp); - *pp = page; - rqstp->rq_resused++; + put_page(*rqstp->rq_next_page); + *(rqstp->rq_next_page++) = page; rqstp->rq_res.page_base = buf->offset; rqstp->rq_res.page_len = size; } else if (page != pp[-1]) { get_page(page); - if (*pp) - put_page(*pp); - *pp = page; - rqstp->rq_resused++; + if (*rqstp->rq_next_page) + put_page(*rqstp->rq_next_page); + *(rqstp->rq_next_page++) = page; rqstp->rq_res.page_len += size; } else rqstp->rq_res.page_len += size; @@ -936,7 +934,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, .u.data = rqstp, }; - rqstp->rq_resused = 1; + rqstp->rq_next_page = rqstp->rq_respages + 1; host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); } else { oldfs = get_fs(); @@ -1020,28 +1018,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, inode = dentry->d_inode; exp = fhp->fh_export; - /* - * Request sync writes if - * - the sync export option has been set, or - * - the client requested O_SYNC behavior (NFSv3 feature). - * - The file system doesn't support fsync(). - * When NFSv2 gathered writes have been configured for this volume, - * flushing the data to disk is handled separately below. - */ use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); - if (!file->f_op->fsync) {/* COMMIT3 cannot work */ - stable = 2; - *stablep = 2; /* FILE_SYNC */ - } - if (!EX_ISSYNC(exp)) stable = 0; - if (stable && !use_wgather) { - spin_lock(&file->f_lock); - file->f_flags |= O_SYNC; - spin_unlock(&file->f_lock); - } /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); @@ -1057,8 +1037,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (inode->i_mode & (S_ISUID | S_ISGID)) kill_suid(dentry); - if (stable && use_wgather) - host_err = wait_for_concurrent_writes(file); + if (stable) { + if (use_wgather) + host_err = wait_for_concurrent_writes(file); + else + host_err = vfs_fsync_range(file, offset, offset+*cnt, 0); + } out_nfserr: dprintk("nfsd: write complete host_err=%d\n", host_err); @@ -1485,13 +1469,19 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, case NFS3_CREATE_EXCLUSIVE: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; break; + } case NFS4_CREATE_EXCLUSIVE4_1: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; goto set_attr; + } /* fallthru */ case NFS3_CREATE_GUARDED: err = nfserr_exist; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index acd127d4ee82..0889bfb43dc9 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -385,7 +385,8 @@ struct nfsd4_write { u64 wr_offset; /* request */ u32 wr_stable_how; /* request */ u32 wr_buflen; /* request */ - int wr_vlen; + struct kvec wr_head; + struct page ** wr_pagelist; /* request */ u32 wr_bytes_written; /* response */ u32 wr_how_written; /* response */ @@ -462,6 +463,7 @@ struct nfsd4_op { /* NFSv4.1 */ struct nfsd4_exchange_id exchange_id; + struct nfsd4_backchannel_ctl backchannel_ctl; struct nfsd4_bind_conn_to_session bind_conn_to_session; struct nfsd4_create_session create_session; struct nfsd4_destroy_session destroy_session; @@ -526,6 +528,14 @@ static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) || nfsd4_is_solo_sequence(resp); } +static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + return argp->opcnt == resp->opcnt; +} + #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) static inline void @@ -566,6 +576,7 @@ extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); +extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *); extern __be32 nfsd4_create_session(struct svc_rqst *, struct nfsd4_compound_state *, @@ -579,7 +590,7 @@ extern __be32 nfsd4_destroy_session(struct svc_rqst *, extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *); __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *); extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, - struct nfsd4_open *open); + struct nfsd4_open *open, struct nfsd_net *nn); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open); extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 16f35f7423c5..61946883025c 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -167,7 +167,6 @@ const struct file_operations nilfs_file_operations = { }; const struct inode_operations nilfs_file_inode_operations = { - .truncate = nilfs_truncate, .setattr = nilfs_setattr, .permission = nilfs_permission, .fiemap = nilfs_fiemap, diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 4d31d2cca7fd..6b49f14eac8c 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -213,6 +213,16 @@ static int nilfs_set_page_dirty(struct page *page) return ret; } +void nilfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + nilfs_truncate(inode); + } +} + static int nilfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -227,10 +237,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, err = block_write_begin(mapping, pos, len, flags, pagep, nilfs_get_block); if (unlikely(err)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - + nilfs_write_failed(mapping, pos + len); nilfs_transaction_abort(inode->i_sb); } return err; @@ -259,6 +266,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host; ssize_t size; @@ -278,7 +286,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t end = offset + iov_length(iov, nr_segs); if (end > isize) - vmtruncate(inode, isize); + nilfs_write_failed(mapping, end); } return size; @@ -786,10 +794,8 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { inode_dio_wait(inode); - - err = vmtruncate(inode, iattr->ia_size); - if (unlikely(err)) - goto out_err; + truncate_setsize(inode, iattr->ia_size); + nilfs_truncate(inode); } setattr_copy(inode, iattr); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 74cece80e9a3..9bc72dec3fa6 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -277,6 +277,7 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); extern int nilfs_setattr(struct dentry *, struct iattr *); +extern void nilfs_write_failed(struct address_space *mapping, loff_t to); int nilfs_permission(struct inode *inode, int mask); int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index f1626f5011c5..ff00a0b7acb9 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -527,7 +527,8 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, if (unlikely(err)) { loff_t isize = inode->i_size; if (pos + blocksize > isize) - vmtruncate(inode, isize); + nilfs_write_failed(inode->i_mapping, + pos + blocksize); goto failed_inode; } diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 3344bdd5506e..08b886f119ce 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -201,7 +201,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) /* nothing else could have found us thanks to the dnotify_mark_mutex */ if (dn_mark->dn == NULL) - fsnotify_destroy_mark(fsn_mark); + fsnotify_destroy_mark(fsn_mark, dnotify_group); mutex_unlock(&dnotify_mark_mutex); @@ -385,7 +385,7 @@ out: spin_unlock(&fsn_mark->lock); if (destroy) - fsnotify_destroy_mark(fsn_mark); + fsnotify_destroy_mark(fsn_mark, dnotify_group); mutex_unlock(&dnotify_mark_mutex); fsnotify_put_mark(fsn_mark); diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index a50636025364..0c2f9122b262 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -18,6 +18,12 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) old->tgid == new->tgid) { switch (old->data_type) { case (FSNOTIFY_EVENT_PATH): +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + /* dont merge two permission events */ + if ((old->mask & FAN_ALL_PERM_EVENTS) && + (new->mask & FAN_ALL_PERM_EVENTS)) + return false; +#endif if ((old->path.mnt == new->path.mnt) && (old->path.dentry == new->path.dentry)) return true; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a5cd9bba022f..9ff4a5ee6e20 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -397,8 +397,12 @@ static int fanotify_release(struct inode *ignored, struct file *file) wake_up(&group->fanotify_data.access_waitq); #endif + + if (file->f_flags & FASYNC) + fsnotify_fasync(-1, file, 0); + /* matches the fanotify_init->fsnotify_alloc_group */ - fsnotify_put_group(group); + fsnotify_destroy_group(group); return 0; } @@ -493,7 +497,8 @@ out: static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u32 mask, - unsigned int flags) + unsigned int flags, + int *destroy) { __u32 oldmask; @@ -507,8 +512,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, } spin_unlock(&fsn_mark->lock); - if (!(oldmask & ~mask)) - fsnotify_destroy_mark(fsn_mark); + *destroy = !(oldmask & ~mask); return mask & oldmask; } @@ -519,12 +523,17 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, { struct fsnotify_mark *fsn_mark = NULL; __u32 removed; + int destroy_mark; fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); if (!fsn_mark) return -ENOENT; - removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags); + removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, + &destroy_mark); + if (destroy_mark) + fsnotify_destroy_mark(fsn_mark, group); + fsnotify_put_mark(fsn_mark); if (removed & real_mount(mnt)->mnt_fsnotify_mask) fsnotify_recalc_vfsmount_mask(mnt); @@ -538,12 +547,16 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, { struct fsnotify_mark *fsn_mark = NULL; __u32 removed; + int destroy_mark; fsn_mark = fsnotify_find_inode_mark(group, inode); if (!fsn_mark) return -ENOENT; - removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags); + removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, + &destroy_mark); + if (destroy_mark) + fsnotify_destroy_mark(fsn_mark, group); /* matches the fsnotify_find_inode_mark() */ fsnotify_put_mark(fsn_mark); if (removed & inode->i_fsnotify_mask) @@ -710,13 +723,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) break; default: fd = -EINVAL; - goto out_put_group; + goto out_destroy_group; } if (flags & FAN_UNLIMITED_QUEUE) { fd = -EPERM; if (!capable(CAP_SYS_ADMIN)) - goto out_put_group; + goto out_destroy_group; group->max_events = UINT_MAX; } else { group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; @@ -725,7 +738,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (flags & FAN_UNLIMITED_MARKS) { fd = -EPERM; if (!capable(CAP_SYS_ADMIN)) - goto out_put_group; + goto out_destroy_group; group->fanotify_data.max_marks = UINT_MAX; } else { group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; @@ -733,12 +746,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) - goto out_put_group; + goto out_destroy_group; return fd; -out_put_group: - fsnotify_put_group(group); +out_destroy_group: + fsnotify_destroy_group(group); return fd; } diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 514c4b81483d..238a5930cb3c 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -27,13 +27,13 @@ static int show_fdinfo(struct seq_file *m, struct file *f, struct fsnotify_mark *mark; int ret = 0; - spin_lock(&group->mark_lock); + mutex_lock(&group->mark_mutex); list_for_each_entry(mark, &group->marks_list, g_list) { ret = show(m, mark); if (ret) break; } - spin_unlock(&group->mark_lock); + mutex_unlock(&group->mark_mutex); return ret; } diff --git a/fs/notify/group.c b/fs/notify/group.c index 63fc294a4692..bd2625bd88b4 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -33,9 +33,6 @@ */ void fsnotify_final_destroy_group(struct fsnotify_group *group) { - /* clear the notification queue of all events */ - fsnotify_flush_notify(group); - if (group->ops->free_group_priv) group->ops->free_group_priv(group); @@ -43,23 +40,30 @@ void fsnotify_final_destroy_group(struct fsnotify_group *group) } /* - * Trying to get rid of a group. We need to first get rid of any outstanding - * allocations and then free the group. Remember that fsnotify_clear_marks_by_group - * could miss marks that are being freed by inode and those marks could still - * hold a reference to this group (via group->num_marks) If we get into that - * situtation, the fsnotify_final_destroy_group will get called when that final - * mark is freed. + * Trying to get rid of a group. Remove all marks, flush all events and release + * the group reference. + * Note that another thread calling fsnotify_clear_marks_by_group() may still + * hold a ref to the group. */ -static void fsnotify_destroy_group(struct fsnotify_group *group) +void fsnotify_destroy_group(struct fsnotify_group *group) { /* clear all inode marks for this group */ fsnotify_clear_marks_by_group(group); synchronize_srcu(&fsnotify_mark_srcu); - /* past the point of no return, matches the initial value of 1 */ - if (atomic_dec_and_test(&group->num_marks)) - fsnotify_final_destroy_group(group); + /* clear the notification queue of all events */ + fsnotify_flush_notify(group); + + fsnotify_put_group(group); +} + +/* + * Get reference to a group. + */ +void fsnotify_get_group(struct fsnotify_group *group) +{ + atomic_inc(&group->refcnt); } /* @@ -68,7 +72,7 @@ static void fsnotify_destroy_group(struct fsnotify_group *group) void fsnotify_put_group(struct fsnotify_group *group) { if (atomic_dec_and_test(&group->refcnt)) - fsnotify_destroy_group(group); + fsnotify_final_destroy_group(group); } /* @@ -84,21 +88,24 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) /* set to 0 when there a no external references to this group */ atomic_set(&group->refcnt, 1); - /* - * hits 0 when there are no external references AND no marks for - * this group - */ - atomic_set(&group->num_marks, 1); + atomic_set(&group->num_marks, 0); mutex_init(&group->notification_mutex); INIT_LIST_HEAD(&group->notification_list); init_waitqueue_head(&group->notification_waitq); group->max_events = UINT_MAX; - spin_lock_init(&group->mark_lock); + mutex_init(&group->mark_mutex); INIT_LIST_HEAD(&group->marks_list); group->ops = ops; return group; } + +int fsnotify_fasync(int fd, struct file *file, int on) +{ + struct fsnotify_group *group = file->private_data; + + return fasync_helper(fd, file, on, &group->fsn_fa) >= 0 ? 0 : -EIO; +} diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index f3035691f528..f31e90fc050d 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -63,8 +63,8 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) { struct inode *inode = mark->i.inode; + BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); assert_spin_locked(&mark->lock); - assert_spin_locked(&mark->group->mark_lock); spin_lock(&inode->i_lock); @@ -99,8 +99,16 @@ void fsnotify_clear_marks_by_inode(struct inode *inode) spin_unlock(&inode->i_lock); list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) { - fsnotify_destroy_mark(mark); + struct fsnotify_group *group; + + spin_lock(&mark->lock); + fsnotify_get_group(mark->group); + group = mark->group; + spin_unlock(&mark->lock); + + fsnotify_destroy_mark(mark, group); fsnotify_put_mark(mark); + fsnotify_put_group(group); } } @@ -192,8 +200,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, mark->flags |= FSNOTIFY_MARK_FLAG_INODE; + BUG_ON(!mutex_is_locked(&group->mark_mutex)); assert_spin_locked(&mark->lock); - assert_spin_locked(&group->mark_lock); spin_lock(&inode->i_lock); diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index e3cbd746f64a..871569c7d609 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -118,6 +118,7 @@ static int inotify_handle_event(struct fsnotify_group *group, fsn_event_priv = &event_priv->fsnotify_event_priv_data; + fsnotify_get_group(group); fsn_event_priv->group = group; event_priv->wd = wd; @@ -131,7 +132,7 @@ static int inotify_handle_event(struct fsnotify_group *group, } if (inode_mark->mask & IN_ONESHOT) - fsnotify_destroy_mark(inode_mark); + fsnotify_destroy_mark(inode_mark, group); return ret; } @@ -210,6 +211,7 @@ void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) event_priv = container_of(fsn_event_priv, struct inotify_event_private_data, fsnotify_event_priv_data); + fsnotify_put_group(fsn_event_priv->group); kmem_cache_free(event_priv_cachep, event_priv); } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 36cb013c7c13..228a2c2ad8d7 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -265,7 +265,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf, ret = -EAGAIN; if (file->f_flags & O_NONBLOCK) break; - ret = -EINTR; + ret = -ERESTARTSYS; if (signal_pending(current)) break; @@ -281,23 +281,17 @@ static ssize_t inotify_read(struct file *file, char __user *buf, return ret; } -static int inotify_fasync(int fd, struct file *file, int on) -{ - struct fsnotify_group *group = file->private_data; - - return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO; -} - static int inotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; pr_debug("%s: group=%p\n", __func__, group); - fsnotify_clear_marks_by_group(group); + if (file->f_flags & FASYNC) + fsnotify_fasync(-1, file, 0); /* free this group, matching get was inotify_init->fsnotify_obtain_group */ - fsnotify_put_group(group); + fsnotify_destroy_group(group); return 0; } @@ -339,7 +333,7 @@ static const struct file_operations inotify_fops = { .show_fdinfo = inotify_show_fdinfo, .poll = inotify_poll, .read = inotify_read, - .fasync = inotify_fasync, + .fasync = fsnotify_fasync, .release = inotify_release, .unlocked_ioctl = inotify_ioctl, .compat_ioctl = inotify_ioctl, @@ -521,13 +515,13 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_event_private_data *fsn_event_priv; int ret; + i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); + ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0, GFP_NOFS); if (!ignored_event) - return; - - i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); + goto skip_send_ignore; event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS); if (unlikely(!event_priv)) @@ -535,6 +529,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, fsn_event_priv = &event_priv->fsnotify_event_priv_data; + fsnotify_get_group(group); fsn_event_priv->group = group; event_priv->wd = i_mark->wd; @@ -548,9 +543,9 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, } skip_send_ignore: - /* matches the reference taken when the event was created */ - fsnotify_put_event(ignored_event); + if (ignored_event) + fsnotify_put_event(ignored_event); /* remove this mark from the idr */ inotify_remove_from_idr(group, i_mark); @@ -709,12 +704,11 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); group->inotify_data.last_wd = 0; - group->inotify_data.fa = NULL; group->inotify_data.user = get_current_user(); if (atomic_inc_return(&group->inotify_data.user->inotify_devs) > inotify_max_user_instances) { - fsnotify_put_group(group); + fsnotify_destroy_group(group); return ERR_PTR(-EMFILE); } @@ -743,7 +737,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) ret = anon_inode_getfd("inotify", &inotify_fops, group, O_RDONLY | flags); if (ret < 0) - fsnotify_put_group(group); + fsnotify_destroy_group(group); return ret; } @@ -819,7 +813,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) ret = 0; - fsnotify_destroy_mark(&i_mark->fsn_mark); + fsnotify_destroy_mark(&i_mark->fsn_mark, group); /* match ref taken by inotify_idr_find */ fsnotify_put_mark(&i_mark->fsn_mark); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index f104d565b682..fc6b49bf7360 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -109,8 +109,11 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) void fsnotify_put_mark(struct fsnotify_mark *mark) { - if (atomic_dec_and_test(&mark->refcnt)) + if (atomic_dec_and_test(&mark->refcnt)) { + if (mark->group) + fsnotify_put_group(mark->group); mark->free_mark(mark); + } } /* @@ -118,14 +121,14 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) * The caller had better be holding a reference to this mark so we don't actually * do the final put under the mark->lock */ -void fsnotify_destroy_mark(struct fsnotify_mark *mark) +void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, + struct fsnotify_group *group) { - struct fsnotify_group *group; struct inode *inode = NULL; - spin_lock(&mark->lock); + BUG_ON(!mutex_is_locked(&group->mark_mutex)); - group = mark->group; + spin_lock(&mark->lock); /* something else already called this function on this mark */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { @@ -135,8 +138,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; - spin_lock(&group->mark_lock); - if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { inode = mark->i.inode; fsnotify_destroy_inode_mark(mark); @@ -147,13 +148,22 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) list_del_init(&mark->g_list); - spin_unlock(&group->mark_lock); spin_unlock(&mark->lock); + if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) + iput(inode); + /* release lock temporarily */ + mutex_unlock(&group->mark_mutex); + spin_lock(&destroy_lock); list_add(&mark->destroy_list, &destroy_list); spin_unlock(&destroy_lock); wake_up(&destroy_waitq); + /* + * We don't necessarily have a ref on mark from caller so the above destroy + * may have actually freed it, unless this group provides a 'freeing_mark' + * function which must be holding a reference. + */ /* * Some groups like to know that marks are being freed. This is a @@ -175,21 +185,17 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) * is just a lazy update (and could be a perf win...) */ - if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) - iput(inode); + atomic_dec(&group->num_marks); - /* - * We don't necessarily have a ref on mark from caller so the above iput - * may have already destroyed it. Don't touch from now on. - */ + mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); +} - /* - * it's possible that this group tried to destroy itself, but this - * this mark was simultaneously being freed by inode. If that's the - * case, we finish freeing the group here. - */ - if (unlikely(atomic_dec_and_test(&group->num_marks))) - fsnotify_final_destroy_group(group); +void fsnotify_destroy_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group) +{ + mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + fsnotify_destroy_mark_locked(mark, group); + mutex_unlock(&group->mark_mutex); } void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) @@ -214,26 +220,26 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group. */ -int fsnotify_add_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, - struct vfsmount *mnt, int allow_dups) +int fsnotify_add_mark_locked(struct fsnotify_mark *mark, + struct fsnotify_group *group, struct inode *inode, + struct vfsmount *mnt, int allow_dups) { int ret = 0; BUG_ON(inode && mnt); BUG_ON(!inode && !mnt); + BUG_ON(!mutex_is_locked(&group->mark_mutex)); /* * LOCKING ORDER!!!! + * group->mark_mutex * mark->lock - * group->mark_lock * inode->i_lock */ spin_lock(&mark->lock); - spin_lock(&group->mark_lock); - mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE; + fsnotify_get_group(group); mark->group = group; list_add(&mark->g_list, &group->marks_list); atomic_inc(&group->num_marks); @@ -251,11 +257,8 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, BUG(); } - spin_unlock(&group->mark_lock); - /* this will pin the object if appropriate */ fsnotify_set_mark_mask_locked(mark, mark->mask); - spin_unlock(&mark->lock); if (inode) @@ -265,10 +268,10 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, err: mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; list_del_init(&mark->g_list); + fsnotify_put_group(group); mark->group = NULL; atomic_dec(&group->num_marks); - spin_unlock(&group->mark_lock); spin_unlock(&mark->lock); spin_lock(&destroy_lock); @@ -279,6 +282,16 @@ err: return ret; } +int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, + struct inode *inode, struct vfsmount *mnt, int allow_dups) +{ + int ret; + mutex_lock(&group->mark_mutex); + ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups); + mutex_unlock(&group->mark_mutex); + return ret; +} + /* * clear any marks in a group in which mark->flags & flags is true */ @@ -286,22 +299,16 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags) { struct fsnotify_mark *lmark, *mark; - LIST_HEAD(free_list); - spin_lock(&group->mark_lock); + mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { if (mark->flags & flags) { - list_add(&mark->free_g_list, &free_list); - list_del_init(&mark->g_list); fsnotify_get_mark(mark); + fsnotify_destroy_mark_locked(mark, group); + fsnotify_put_mark(mark); } } - spin_unlock(&group->mark_lock); - - list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) { - fsnotify_destroy_mark(mark); - fsnotify_put_mark(mark); - } + mutex_unlock(&group->mark_mutex); } /* @@ -317,6 +324,8 @@ void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *ol assert_spin_locked(&old->lock); new->i.inode = old->i.inode; new->m.mnt = old->m.mnt; + if (old->group) + fsnotify_get_group(old->group); new->group = old->group; new->mask = old->mask; new->free_mark = old->free_mark; diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 48cb994e4922..7b51b05f160c 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -225,6 +225,7 @@ alloc_holder: mutex_unlock(&group->notification_mutex); wake_up(&group->notification_waitq); + kill_fasync(&group->fsn_fa, SIGIO, POLL_IN); return return_event; } diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index b7b4b0e8554f..4df58b8ea64a 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -46,8 +46,16 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) spin_unlock(&mnt->mnt_root->d_lock); list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) { - fsnotify_destroy_mark(mark); + struct fsnotify_group *group; + + spin_lock(&mark->lock); + fsnotify_get_group(mark->group); + group = mark->group; + spin_unlock(&mark->lock); + + fsnotify_destroy_mark(mark, group); fsnotify_put_mark(mark); + fsnotify_put_group(group); } } @@ -88,8 +96,8 @@ void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark) { struct vfsmount *mnt = mark->m.mnt; + BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); assert_spin_locked(&mark->lock); - assert_spin_locked(&mark->group->mark_lock); spin_lock(&mnt->mnt_root->d_lock); @@ -151,8 +159,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; + BUG_ON(!mutex_is_locked(&group->mark_mutex)); assert_spin_locked(&mark->lock); - assert_spin_locked(&group->mark_lock); spin_lock(&mnt->mnt_root->d_lock); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 1ecf46448f85..5b2d4f0853ac 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1762,6 +1762,16 @@ err_out: return err; } +static void ntfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + ntfs_truncate_vfs(inode); + } +} + /** * ntfs_file_buffered_write - * @@ -2022,8 +2032,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, * allocated space, which is not a disaster. */ i_size = i_size_read(vi); - if (pos + bytes > i_size) - vmtruncate(vi, i_size); + if (pos + bytes > i_size) { + ntfs_write_failed(mapping, pos + bytes); + } break; } } @@ -2227,7 +2238,6 @@ const struct file_operations ntfs_file_ops = { const struct inode_operations ntfs_file_inode_ops = { #ifdef NTFS_RW - .truncate = ntfs_truncate_vfs, .setattr = ntfs_setattr, #endif /* NTFS_RW */ }; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 1d27331e6fc9..d3e118cc6ffa 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2866,9 +2866,11 @@ conv_err_out: * * See ntfs_truncate() description above for details. */ +#ifdef NTFS_RW void ntfs_truncate_vfs(struct inode *vi) { ntfs_truncate(vi); } +#endif /** * ntfs_setattr - called from notify_change() when an attribute is being changed @@ -2914,8 +2916,10 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) NInoCompressed(ni) ? "compressed" : "encrypted"); err = -EOPNOTSUPP; - } else - err = vmtruncate(vi, attr->ia_size); + } else { + truncate_setsize(vi, attr->ia_size); + ntfs_truncate_vfs(vi); + } if (err || ia_valid == ATTR_SIZE) goto out; } else { diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index db29695f845c..76b6cfb579d7 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -316,6 +316,10 @@ static inline void ntfs_commit_inode(struct inode *vi) return; } +#else + +static inline void ntfs_truncate_vfs(struct inode *vi) {} + #endif /* NTFS_RW */ #endif /* _LINUX_NTFS_INODE_H */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index fe492e1a3cfc..37d313ede159 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1218,24 +1218,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } } - /* - * This will intentionally not wind up calling truncate_setsize(), - * since all the work for a size change has been done above. - * Otherwise, we could get into problems with truncate as - * ip_alloc_sem is used there to protect against i_size - * changes. - * - * XXX: this means the conditional below can probably be removed. - */ - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - status = vmtruncate(inode, attr->ia_size); - if (status) { - mlog_errno(status); - goto bail_commit; - } - } - setattr_copy(inode, attr); mark_inode_dirty(inode); diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 77e3cb2962b4..e0d9b3e722bd 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -306,6 +306,16 @@ omfs_writepages(struct address_space *mapping, struct writeback_control *wbc) return mpage_writepages(mapping, wbc, omfs_get_block); } +static void omfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + omfs_truncate(inode); + } +} + static int omfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -314,11 +324,8 @@ static int omfs_write_begin(struct file *file, struct address_space *mapping, ret = block_write_begin(mapping, pos, len, flags, pagep, omfs_get_block); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + omfs_write_failed(mapping, pos + len); return ret; } @@ -350,9 +357,11 @@ static int omfs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); + error = inode_newsize_ok(inode, attr->ia_size); if (error) return error; + truncate_setsize(inode, attr->ia_size); + omfs_truncate(inode); } setattr_copy(inode, attr); @@ -362,7 +371,6 @@ static int omfs_setattr(struct dentry *dentry, struct iattr *attr) const struct inode_operations omfs_file_inops = { .setattr = omfs_setattr, - .truncate = omfs_truncate }; const struct address_space_operations omfs_aops = { diff --git a/fs/open.c b/fs/open.c index 182d8667b7bd..9b33c0cbfacf 100644 --- a/fs/open.c +++ b/fs/open.c @@ -61,33 +61,22 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, return ret; } -static long do_sys_truncate(const char __user *pathname, loff_t length) +long vfs_truncate(struct path *path, loff_t length) { - struct path path; struct inode *inode; - int error; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; + long error; - error = user_path(pathname, &path); - if (error) - goto out; - inode = path.dentry->d_inode; + inode = path->dentry->d_inode; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ - error = -EISDIR; if (S_ISDIR(inode->i_mode)) - goto dput_and_out; - - error = -EINVAL; + return -EISDIR; if (!S_ISREG(inode->i_mode)) - goto dput_and_out; + return -EINVAL; - error = mnt_want_write(path.mnt); + error = mnt_want_write(path->mnt); if (error) - goto dput_and_out; + goto out; error = inode_permission(inode, MAY_WRITE); if (error) @@ -111,19 +100,40 @@ static long do_sys_truncate(const char __user *pathname, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) - error = security_path_truncate(&path); + error = security_path_truncate(path); if (!error) - error = do_truncate(path.dentry, length, 0, NULL); + error = do_truncate(path->dentry, length, 0, NULL); put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: - mnt_drop_write(path.mnt); -dput_and_out: - path_put(&path); + mnt_drop_write(path->mnt); out: return error; } +EXPORT_SYMBOL_GPL(vfs_truncate); + +static long do_sys_truncate(const char __user *pathname, loff_t length) +{ + unsigned int lookup_flags = LOOKUP_FOLLOW; + struct path path; + int error; + + if (length < 0) /* sorry, but loff_t says... */ + return -EINVAL; + +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); + if (!error) { + error = vfs_truncate(&path, length); + path_put(&path); + } + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } + return error; +} SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) { @@ -306,6 +316,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) struct path path; struct inode *inode; int res; + unsigned int lookup_flags = LOOKUP_FOLLOW; if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; @@ -328,8 +339,8 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) } old_cred = override_creds(override_cred); - - res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); +retry: + res = user_path_at(dfd, filename, lookup_flags, &path); if (res) goto out; @@ -364,6 +375,10 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) out_path_release: path_put(&path); + if (retry_estale(res, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out: revert_creds(old_cred); put_cred(override_cred); @@ -379,8 +394,9 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename) { struct path path; int error; - - error = user_path_dir(filename, &path); + unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; +retry: + error = user_path_at(AT_FDCWD, filename, lookup_flags, &path); if (error) goto out; @@ -392,6 +408,10 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename) dput_and_out: path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out: return error; } @@ -425,8 +445,9 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) { struct path path; int error; - - error = user_path_dir(filename, &path); + unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; +retry: + error = user_path_at(AT_FDCWD, filename, lookup_flags, &path); if (error) goto out; @@ -445,6 +466,10 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) error = 0; dput_and_out: path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out: return error; } @@ -489,11 +514,16 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode { struct path path; int error; - - error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); + unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: + error = user_path_at(dfd, filename, lookup_flags, &path); if (!error) { error = chmod_common(&path, mode); path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } } return error; } @@ -552,6 +582,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; +retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) goto out; @@ -562,6 +593,10 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, mnt_drop_write(path.mnt); out_release: path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out: return error; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 5a5a0be40e40..9b43ff77a51e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -542,13 +542,6 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); - if (error) - return error; - } - setattr_copy(inode, attr); mark_inode_dirty(inode); return 0; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 7b3ae3cc0ef9..e064f562b1f7 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -261,16 +261,9 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) if (error) return error; - if ((iattr->ia_valid & ATTR_SIZE) && - iattr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, iattr->ia_size); - if (error) - return error; - } - setattr_copy(inode, iattr); mark_inode_dirty(inode); - + de->uid = inode->i_uid; de->gid = inode->i_gid; de->mode = inode->i_mode; @@ -359,18 +352,18 @@ retry: if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL)) return -ENOMEM; - spin_lock(&proc_inum_lock); + spin_lock_bh(&proc_inum_lock); error = ida_get_new(&proc_inum_ida, &i); - spin_unlock(&proc_inum_lock); + spin_unlock_bh(&proc_inum_lock); if (error == -EAGAIN) goto retry; else if (error) return error; if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { - spin_lock(&proc_inum_lock); + spin_lock_bh(&proc_inum_lock); ida_remove(&proc_inum_ida, i); - spin_unlock(&proc_inum_lock); + spin_unlock_bh(&proc_inum_lock); return -ENOSPC; } *inum = PROC_DYNAMIC_FIRST + i; @@ -379,9 +372,9 @@ retry: void proc_free_inum(unsigned int inum) { - spin_lock(&proc_inum_lock); + spin_lock_bh(&proc_inum_lock); ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); - spin_unlock(&proc_inum_lock); + spin_unlock_bh(&proc_inum_lock); } static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 701580ddfcc3..1827d88ad58b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -736,13 +736,6 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); - if (error) - return error; - } - setattr_copy(inode, attr); mark_inode_dirty(inode); return 0; diff --git a/fs/read_write.c b/fs/read_write.c index 1edaf099ddd7..bb34af315280 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -935,6 +935,8 @@ ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, if (retval > 0) { add_rchar(current, retval); add_wchar(current, retval); + fsnotify_access(in.file); + fsnotify_modify(out.file); } inc_syscr(current); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 8375c922c0d5..50302d6f8895 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -126,7 +126,7 @@ static int reiserfs_file_open(struct inode *inode, struct file *file) return err; } -static void reiserfs_vfs_truncate_file(struct inode *inode) +void reiserfs_vfs_truncate_file(struct inode *inode) { mutex_lock(&(REISERFS_I(inode)->tailpack)); reiserfs_truncate_file(inode, 1); @@ -312,7 +312,6 @@ const struct file_operations reiserfs_file_operations = { }; const struct inode_operations reiserfs_file_inode_operations = { - .truncate = reiserfs_vfs_truncate_file, .setattr = reiserfs_setattr, .setxattr = reiserfs_setxattr, .getxattr = reiserfs_getxattr, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index d83736fbc26c..95d7680ead47 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3085,8 +3085,10 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, loff_t isize = i_size_read(inode); loff_t end = offset + iov_length(iov, nr_segs); - if (end > isize) - vmtruncate(inode, isize); + if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { + truncate_setsize(inode, isize); + reiserfs_vfs_truncate_file(inode); + } } return ret; @@ -3200,8 +3202,13 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) */ reiserfs_write_unlock_once(inode->i_sb, depth); if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) - error = vmtruncate(inode, attr->ia_size); + attr->ia_size != i_size_read(inode)) { + error = inode_newsize_ok(inode, attr->ia_size); + if (!error) { + truncate_setsize(inode, attr->ia_size); + reiserfs_vfs_truncate_file(inode); + } + } if (!error) { setattr_copy(inode, attr); diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 33215f57ea06..157e474ab303 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -2455,6 +2455,7 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct *, int count); int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); +void reiserfs_vfs_truncate_file(struct inode *inode); int reiserfs_commit_page(struct inode *inode, struct page *page, unsigned from, unsigned to); void reiserfs_flush_old_commits(struct super_block *); diff --git a/fs/stat.c b/fs/stat.c index eae494630a36..14f45459c83d 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -74,7 +74,7 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, { struct path path; int error = -EINVAL; - int lookup_flags = 0; + unsigned int lookup_flags = 0; if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | AT_EMPTY_PATH)) != 0) @@ -84,13 +84,17 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, lookup_flags |= LOOKUP_FOLLOW; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; - +retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) goto out; error = vfs_getattr(path.mnt, path.dentry, stat); path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out: return error; } @@ -296,11 +300,13 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, struct path path; int error; int empty = 0; + unsigned int lookup_flags = LOOKUP_EMPTY; if (bufsiz <= 0) return -EINVAL; - error = user_path_at_empty(dfd, pathname, LOOKUP_EMPTY, &path, &empty); +retry: + error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty); if (!error) { struct inode *inode = path.dentry->d_inode; @@ -314,6 +320,10 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, } } path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } } return error; } diff --git a/fs/statfs.c b/fs/statfs.c index f8e832e6f0a2..c219e733f553 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -77,10 +77,17 @@ EXPORT_SYMBOL(vfs_statfs); int user_statfs(const char __user *pathname, struct kstatfs *st) { struct path path; - int error = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); + int error; + unsigned int lookup_flags = LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT; +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (!error) { error = vfs_statfs(&path, st); path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } } return error; } diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 0a65939508e9..9d4dc6831792 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -41,9 +41,11 @@ static int sysv_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); + error = inode_newsize_ok(inode, attr->ia_size); if (error) return error; + truncate_setsize(inode, attr->ia_size); + sysv_truncate(inode); } setattr_copy(inode, attr); @@ -52,7 +54,6 @@ static int sysv_setattr(struct dentry *dentry, struct iattr *attr) } const struct inode_operations sysv_file_inode_operations = { - .truncate = sysv_truncate, .setattr = sysv_setattr, .getattr = sysv_getattr, }; diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 90b54b438789..c1a591a4725b 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -464,6 +464,16 @@ int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len) return __block_write_begin(page, pos, len, get_block); } +static void sysv_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, to, inode->i_size); + sysv_truncate(inode); + } +} + static int sysv_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -471,11 +481,8 @@ static int sysv_write_begin(struct file *file, struct address_space *mapping, int ret; ret = block_write_begin(mapping, pos, len, flags, pagep, get_block); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + sysv_write_failed(mapping, pos + len); return ret; } diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index eb6d0b7dc879..ff24e4449ece 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -526,6 +526,14 @@ int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len) return __block_write_begin(page, pos, len, ufs_getfrag_block); } +static void ufs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) + truncate_pagecache(inode, to, inode->i_size); +} + static int ufs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -534,11 +542,8 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping, ret = block_write_begin(mapping, pos, len, flags, pagep, ufs_getfrag_block); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } + if (unlikely(ret)) + ufs_write_failed(mapping, pos + len); return ret; } diff --git a/fs/utimes.c b/fs/utimes.c index bb0696a41735..f4fb7eca10e8 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -158,13 +158,17 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, if (!(flags & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; - +retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) goto out; error = utimes_common(&path, times); path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } } out: diff --git a/fs/xattr.c b/fs/xattr.c index e21c119f4f99..3377dff18404 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -370,8 +370,9 @@ SYSCALL_DEFINE5(setxattr, const char __user *, pathname, { struct path path; int error; - - error = user_path(pathname, &path); + unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = mnt_want_write(path.mnt); @@ -380,6 +381,10 @@ SYSCALL_DEFINE5(setxattr, const char __user *, pathname, mnt_drop_write(path.mnt); } path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } @@ -389,8 +394,9 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, { struct path path; int error; - - error = user_lpath(pathname, &path); + unsigned int lookup_flags = 0; +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = mnt_want_write(path.mnt); @@ -399,6 +405,10 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, mnt_drop_write(path.mnt); } path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } @@ -476,12 +486,17 @@ SYSCALL_DEFINE4(getxattr, const char __user *, pathname, { struct path path; ssize_t error; - - error = user_path(pathname, &path); + unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = getxattr(path.dentry, name, value, size); path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } @@ -490,12 +505,17 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, { struct path path; ssize_t error; - - error = user_lpath(pathname, &path); + unsigned int lookup_flags = 0; +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = getxattr(path.dentry, name, value, size); path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } @@ -556,12 +576,17 @@ SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list, { struct path path; ssize_t error; - - error = user_path(pathname, &path); + unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = listxattr(path.dentry, list, size); path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } @@ -570,12 +595,17 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, { struct path path; ssize_t error; - - error = user_lpath(pathname, &path); + unsigned int lookup_flags = 0; +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = listxattr(path.dentry, list, size); path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } @@ -615,8 +645,9 @@ SYSCALL_DEFINE2(removexattr, const char __user *, pathname, { struct path path; int error; - - error = user_path(pathname, &path); + unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = mnt_want_write(path.mnt); @@ -625,6 +656,10 @@ SYSCALL_DEFINE2(removexattr, const char __user *, pathname, mnt_drop_write(path.mnt); } path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } @@ -633,8 +668,9 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, { struct path path; int error; - - error = user_lpath(pathname, &path); + unsigned int lookup_flags = 0; +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = mnt_want_write(path.mnt); @@ -643,6 +679,10 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, mnt_drop_write(path.mnt); } path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index d1e93284d72a..33bbbae4ddc6 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -12,7 +12,6 @@ #define __ASM_GENERIC_IO_H #include <asm/page.h> /* I/O is all done through memory accesses */ -#include <asm/cacheflush.h> #include <linux/types.h> #ifdef CONFIG_GENERIC_IOMAP @@ -223,36 +222,6 @@ static inline void outsl(unsigned long addr, const void *buffer, int count) } #endif -static inline void readsl(const void __iomem *addr, void *buf, int len) -{ - insl(addr - PCI_IOBASE, buf, len); -} - -static inline void readsw(const void __iomem *addr, void *buf, int len) -{ - insw(addr - PCI_IOBASE, buf, len); -} - -static inline void readsb(const void __iomem *addr, void *buf, int len) -{ - insb(addr - PCI_IOBASE, buf, len); -} - -static inline void writesl(const void __iomem *addr, const void *buf, int len) -{ - outsl(addr - PCI_IOBASE, buf, len); -} - -static inline void writesw(const void __iomem *addr, const void *buf, int len) -{ - outsw(addr - PCI_IOBASE, buf, len); -} - -static inline void writesb(const void __iomem *addr, const void *buf, int len) -{ - outsb(addr - PCI_IOBASE, buf, len); -} - #ifndef CONFIG_GENERIC_IOMAP #define ioread8(addr) readb(addr) #define ioread16(addr) readw(addr) diff --git a/include/asm-generic/mmu.h b/include/asm-generic/mmu.h index 4f4aa56d6b52..0ed3f1cfb854 100644 --- a/include/asm-generic/mmu.h +++ b/include/asm-generic/mmu.h @@ -7,8 +7,12 @@ */ #ifndef __ASSEMBLY__ typedef struct { - struct vm_list_struct *vmlist; unsigned long end_brk; + +#ifdef CONFIG_BINFMT_ELF_FDPIC + unsigned long exec_fdpic_loadmap; + unsigned long interp_fdpic_loadmap; +#endif } mm_context_t; #endif diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2a9a9abc9126..12731a19ef06 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -114,6 +114,7 @@ struct backing_dev_info { int bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); +__printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index a4c2b565c835..0530b9860359 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -112,6 +112,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); +extern int bprm_change_interp(char *interp, struct linux_binprm *bprm); extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); @@ -119,8 +120,4 @@ extern void install_exec_creds(struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); -#ifdef __ARCH_WANT_KERNEL_EXECVE -extern void ret_from_kernel_execve(struct pt_regs *normal) __noreturn; -#endif - #endif /* _LINUX_BINFMTS_H */ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 6470792b13d3..084d3c622b12 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -43,7 +43,6 @@ struct ceph_options { struct ceph_entity_addr my_addr; int mount_timeout; int osd_idle_ttl; - int osd_timeout; int osd_keepalive_timeout; /* @@ -63,7 +62,6 @@ struct ceph_options { * defaults */ #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ #define CEPH_OSD_KEEPALIVE_DEFAULT 5 #define CEPH_OSD_IDLE_TTL_DEFAULT 60 diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e37acbe989a9..10a417f9f76f 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -123,6 +123,7 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); +extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); #endif diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index de91fbdf127e..2c04afeead1c 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -87,6 +87,8 @@ struct ceph_pg { * * lpgp_num -- as above. */ +#define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */ + #define CEPH_PG_TYPE_REP 1 #define CEPH_PG_TYPE_RAID4 2 #define CEPH_PG_POOL_VERSION 2 diff --git a/include/linux/compat.h b/include/linux/compat.h index e4920bd58a47..dec7e2d18875 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -23,6 +23,61 @@ #define COMPAT_USE_64BIT_TIME 0 #endif +#ifndef __SC_DELOUSE +#define __SC_DELOUSE(t,v) ((t)(unsigned long)(v)) +#endif + +#define __SC_CCAST1(t1, a1) __SC_DELOUSE(t1,a1) +#define __SC_CCAST2(t2, a2, ...) __SC_DELOUSE(t2,a2), __SC_CCAST1(__VA_ARGS__) +#define __SC_CCAST3(t3, a3, ...) __SC_DELOUSE(t3,a3), __SC_CCAST2(__VA_ARGS__) +#define __SC_CCAST4(t4, a4, ...) __SC_DELOUSE(t4,a4), __SC_CCAST3(__VA_ARGS__) +#define __SC_CCAST5(t5, a5, ...) __SC_DELOUSE(t5,a5), __SC_CCAST4(__VA_ARGS__) +#define __SC_CCAST6(t6, a6, ...) __SC_DELOUSE(t6,a6), __SC_CCAST5(__VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE1(name, ...) \ + COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE2(name, ...) \ + COMPAT_SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE3(name, ...) \ + COMPAT_SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE4(name, ...) \ + COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE5(name, ...) \ + COMPAT_SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_DEFINE6(name, ...) \ + COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) + +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS + +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__)); \ + static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ + asmlinkage long compat_SyS##name(__SC_LONG##x(__VA_ARGS__)) \ + { \ + return (long) C_SYSC##name(__SC_CCAST##x(__VA_ARGS__)); \ + } \ + SYSCALL_ALIAS(compat_sys##name, compat_SyS##name); \ + static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__)) + +#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ + +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__)) + +#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ + +#ifndef compat_user_stack_pointer +#define compat_user_stack_pointer() current_user_stack_pointer() +#endif +#ifdef CONFIG_GENERIC_SIGALTSTACK +#ifndef compat_sigaltstack /* we'll need that for MIPS */ +typedef struct compat_sigaltstack { + compat_uptr_t ss_sp; + int ss_flags; + compat_size_t ss_size; +} compat_stack_t; +#endif +#endif + #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) @@ -587,6 +642,13 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, compat_size_t count); +#ifdef CONFIG_GENERIC_SIGALTSTACK +asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, + compat_stack_t __user *uoss_ptr); + +int compat_restore_altstack(const compat_stack_t __user *uss); +int __compat_save_altstack(compat_stack_t __user *, unsigned long); +#endif asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 59200795482e..c1754b59ddd3 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -202,7 +202,6 @@ struct dentry_operations { #define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ #define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ #define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */ -#define DCACHE_NEED_LOOKUP 0x80000 /* dentry requires i_op->lookup */ #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) @@ -408,13 +407,6 @@ static inline bool d_mountpoint(struct dentry *dentry) return dentry->d_flags & DCACHE_MOUNTED; } -static inline bool d_need_lookup(struct dentry *dentry) -{ - return dentry->d_flags & DCACHE_NEED_LOOKUP; -} - -extern void d_clear_need_lookup(struct dentry *dentry); - extern int sysctl_vfs_cache_pressure; #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 38d27a10aa5d..bf6afa2fc432 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -23,7 +23,6 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; union map_info { void *ptr; unsigned long long ll; - unsigned target_request_nr; }; /* @@ -46,8 +45,7 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti); * = 1: simple remap complete * = 2: The target wants to push back the io */ -typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, - union map_info *map_context); +typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio); typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, union map_info *map_context); @@ -60,8 +58,7 @@ typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, * 2 : The target wants to push back the io */ typedef int (*dm_endio_fn) (struct dm_target *ti, - struct bio *bio, int error, - union map_info *map_context); + struct bio *bio, int error); typedef int (*dm_request_endio_fn) (struct dm_target *ti, struct request *clone, int error, union map_info *map_context); @@ -193,18 +190,30 @@ struct dm_target { * A number of zero-length barrier requests that will be submitted * to the target for the purpose of flushing cache. * - * The request number will be placed in union map_info->target_request_nr. + * The request number can be accessed with dm_bio_get_target_request_nr. * It is a responsibility of the target driver to remap these requests * to the real underlying devices. */ unsigned num_flush_requests; /* - * The number of discard requests that will be submitted to the - * target. map_info->request_nr is used just like num_flush_requests. + * The number of discard requests that will be submitted to the target. + * The request number can be accessed with dm_bio_get_target_request_nr. */ unsigned num_discard_requests; + /* + * The number of WRITE SAME requests that will be submitted to the target. + * The request number can be accessed with dm_bio_get_target_request_nr. + */ + unsigned num_write_same_requests; + + /* + * The minimum number of extra bytes allocated in each bio for the + * target to use. dm_per_bio_data returns the data location. + */ + unsigned per_bio_data_size; + /* target specific data */ void *private; @@ -241,6 +250,36 @@ struct dm_target_callbacks { int (*congested_fn) (struct dm_target_callbacks *, int); }; +/* + * For bio-based dm. + * One of these is allocated for each bio. + * This structure shouldn't be touched directly by target drivers. + * It is here so that we can inline dm_per_bio_data and + * dm_bio_from_per_bio_data + */ +struct dm_target_io { + struct dm_io *io; + struct dm_target *ti; + union map_info info; + unsigned target_request_nr; + struct bio clone; +}; + +static inline void *dm_per_bio_data(struct bio *bio, size_t data_size) +{ + return (char *)bio - offsetof(struct dm_target_io, clone) - data_size; +} + +static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size) +{ + return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone)); +} + +static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio) +{ + return container_of(bio, struct dm_target_io, clone)->target_request_nr; +} + int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index eb48f3816df9..bd2e52ccc4f2 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -156,7 +156,6 @@ static inline void get_dma_buf(struct dma_buf *dmabuf) get_file(dmabuf->file); } -#ifdef CONFIG_DMA_SHARED_BUFFER struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev); void dma_buf_detach(struct dma_buf *dmabuf, @@ -184,103 +183,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); void *dma_buf_vmap(struct dma_buf *); void dma_buf_vunmap(struct dma_buf *, void *vaddr); -#else - -static inline struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, - struct device *dev) -{ - return ERR_PTR(-ENODEV); -} - -static inline void dma_buf_detach(struct dma_buf *dmabuf, - struct dma_buf_attachment *dmabuf_attach) -{ - return; -} - -static inline struct dma_buf *dma_buf_export(void *priv, - const struct dma_buf_ops *ops, - size_t size, int flags) -{ - return ERR_PTR(-ENODEV); -} - -static inline int dma_buf_fd(struct dma_buf *dmabuf, int flags) -{ - return -ENODEV; -} - -static inline struct dma_buf *dma_buf_get(int fd) -{ - return ERR_PTR(-ENODEV); -} - -static inline void dma_buf_put(struct dma_buf *dmabuf) -{ - return; -} - -static inline struct sg_table *dma_buf_map_attachment( - struct dma_buf_attachment *attach, enum dma_data_direction write) -{ - return ERR_PTR(-ENODEV); -} - -static inline void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, - struct sg_table *sg, enum dma_data_direction dir) -{ - return; -} - -static inline int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, - size_t start, size_t len, - enum dma_data_direction dir) -{ - return -ENODEV; -} - -static inline void dma_buf_end_cpu_access(struct dma_buf *dmabuf, - size_t start, size_t len, - enum dma_data_direction dir) -{ -} - -static inline void *dma_buf_kmap_atomic(struct dma_buf *dmabuf, - unsigned long pnum) -{ - return NULL; -} - -static inline void dma_buf_kunmap_atomic(struct dma_buf *dmabuf, - unsigned long pnum, void *vaddr) -{ -} - -static inline void *dma_buf_kmap(struct dma_buf *dmabuf, unsigned long pnum) -{ - return NULL; -} - -static inline void dma_buf_kunmap(struct dma_buf *dmabuf, - unsigned long pnum, void *vaddr) -{ -} - -static inline int dma_buf_mmap(struct dma_buf *dmabuf, - struct vm_area_struct *vma, - unsigned long pgoff) -{ - return -ENODEV; -} - -static inline void *dma_buf_vmap(struct dma_buf *dmabuf) -{ - return NULL; -} - -static inline void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr) -{ -} -#endif /* CONFIG_DMA_SHARED_BUFFER */ #endif /* __DMA_BUF_H__ */ diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 171ad8aedc83..fc0e34ce038f 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -39,6 +39,8 @@ extern void debug_dma_map_page(struct device *dev, struct page *page, int direction, dma_addr_t dma_addr, bool map_single); +extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); + extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction, bool map_single); @@ -105,6 +107,11 @@ static inline void debug_dma_map_page(struct device *dev, struct page *page, { } +static inline void debug_dma_mapping_error(struct device *dev, + dma_addr_t dma_addr) +{ +} + static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction, bool map_single) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index c7e6b6392ab8..5b9b5b317180 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -83,6 +83,11 @@ enum fid_type { * 64 bit parent inode number. */ FILEID_NILFS_WITH_PARENT = 0x62, + + /* + * Filesystems must not use 0xff file ID. + */ + FILEID_INVALID = 0xff, }; struct fid { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h new file mode 100644 index 000000000000..f9a12f6243a5 --- /dev/null +++ b/include/linux/f2fs_fs.h @@ -0,0 +1,413 @@ +/** + * include/linux/f2fs_fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_FS_H +#define _LINUX_F2FS_FS_H + +#include <linux/pagemap.h> +#include <linux/types.h> + +#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ +#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ +#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ +#define F2FS_BLKSIZE 4096 /* support only 4KB block */ +#define F2FS_MAX_EXTENSION 64 /* # of extension entries */ + +#define NULL_ADDR 0x0U +#define NEW_ADDR -1U + +#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) +#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) +#define F2FS_META_INO(sbi) (sbi->meta_ino_num) + +/* This flag is used by node and meta inodes, and by recovery */ +#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) + +/* + * For further optimization on multi-head logs, on-disk layout supports maximum + * 16 logs by default. The number, 16, is expected to cover all the cases + * enoughly. The implementaion currently uses no more than 6 logs. + * Half the logs are used for nodes, and the other half are used for data. + */ +#define MAX_ACTIVE_LOGS 16 +#define MAX_ACTIVE_NODE_LOGS 8 +#define MAX_ACTIVE_DATA_LOGS 8 + +/* + * For superblock + */ +struct f2fs_super_block { + __le32 magic; /* Magic Number */ + __le16 major_ver; /* Major Version */ + __le16 minor_ver; /* Minor Version */ + __le32 log_sectorsize; /* log2 sector size in bytes */ + __le32 log_sectors_per_block; /* log2 # of sectors per block */ + __le32 log_blocksize; /* log2 block size in bytes */ + __le32 log_blocks_per_seg; /* log2 # of blocks per segment */ + __le32 segs_per_sec; /* # of segments per section */ + __le32 secs_per_zone; /* # of sections per zone */ + __le32 checksum_offset; /* checksum offset inside super block */ + __le64 block_count; /* total # of user blocks */ + __le32 section_count; /* total # of sections */ + __le32 segment_count; /* total # of segments */ + __le32 segment_count_ckpt; /* # of segments for checkpoint */ + __le32 segment_count_sit; /* # of segments for SIT */ + __le32 segment_count_nat; /* # of segments for NAT */ + __le32 segment_count_ssa; /* # of segments for SSA */ + __le32 segment_count_main; /* # of segments for main area */ + __le32 segment0_blkaddr; /* start block address of segment 0 */ + __le32 cp_blkaddr; /* start block address of checkpoint */ + __le32 sit_blkaddr; /* start block address of SIT */ + __le32 nat_blkaddr; /* start block address of NAT */ + __le32 ssa_blkaddr; /* start block address of SSA */ + __le32 main_blkaddr; /* start block address of main area */ + __le32 root_ino; /* root inode number */ + __le32 node_ino; /* node inode number */ + __le32 meta_ino; /* meta inode number */ + __u8 uuid[16]; /* 128-bit uuid for volume */ + __le16 volume_name[512]; /* volume name */ + __le32 extension_count; /* # of extensions below */ + __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ +} __packed; + +/* + * For checkpoint + */ +#define CP_ERROR_FLAG 0x00000008 +#define CP_COMPACT_SUM_FLAG 0x00000004 +#define CP_ORPHAN_PRESENT_FLAG 0x00000002 +#define CP_UMOUNT_FLAG 0x00000001 + +struct f2fs_checkpoint { + __le64 checkpoint_ver; /* checkpoint block version number */ + __le64 user_block_count; /* # of user blocks */ + __le64 valid_block_count; /* # of valid blocks in main area */ + __le32 rsvd_segment_count; /* # of reserved segments for gc */ + __le32 overprov_segment_count; /* # of overprovision segments */ + __le32 free_segment_count; /* # of free segments in main area */ + + /* information of current node segments */ + __le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS]; + __le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS]; + /* information of current data segments */ + __le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS]; + __le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS]; + __le32 ckpt_flags; /* Flags : umount and journal_present */ + __le32 cp_pack_total_block_count; /* total # of one cp pack */ + __le32 cp_pack_start_sum; /* start block number of data summary */ + __le32 valid_node_count; /* Total number of valid nodes */ + __le32 valid_inode_count; /* Total number of valid inodes */ + __le32 next_free_nid; /* Next free node number */ + __le32 sit_ver_bitmap_bytesize; /* Default value 64 */ + __le32 nat_ver_bitmap_bytesize; /* Default value 256 */ + __le32 checksum_offset; /* checksum offset inside cp block */ + __le64 elapsed_time; /* mounted time */ + /* allocation type of current segment */ + unsigned char alloc_type[MAX_ACTIVE_LOGS]; + + /* SIT and NAT version bitmap */ + unsigned char sit_nat_version_bitmap[1]; +} __packed; + +/* + * For orphan inode management + */ +#define F2FS_ORPHANS_PER_BLOCK 1020 + +struct f2fs_orphan_block { + __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */ + __le32 reserved; /* reserved */ + __le16 blk_addr; /* block index in current CP */ + __le16 blk_count; /* Number of orphan inode blocks in CP */ + __le32 entry_count; /* Total number of orphan nodes in current CP */ + __le32 check_sum; /* CRC32 for orphan inode block */ +} __packed; + +/* + * For NODE structure + */ +struct f2fs_extent { + __le32 fofs; /* start file offset of the extent */ + __le32 blk_addr; /* start block address of the extent */ + __le32 len; /* lengh of the extent */ +} __packed; + +#define F2FS_MAX_NAME_LEN 256 +#define ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ + +struct f2fs_inode { + __le16 i_mode; /* file mode */ + __u8 i_advise; /* file hints */ + __u8 i_reserved; /* reserved */ + __le32 i_uid; /* user ID */ + __le32 i_gid; /* group ID */ + __le32 i_links; /* links count */ + __le64 i_size; /* file size in bytes */ + __le64 i_blocks; /* file size in blocks */ + __le64 i_atime; /* access time */ + __le64 i_ctime; /* change time */ + __le64 i_mtime; /* modification time */ + __le32 i_atime_nsec; /* access time in nano scale */ + __le32 i_ctime_nsec; /* change time in nano scale */ + __le32 i_mtime_nsec; /* modification time in nano scale */ + __le32 i_generation; /* file version (for NFS) */ + __le32 i_current_depth; /* only for directory depth */ + __le32 i_xattr_nid; /* nid to save xattr */ + __le32 i_flags; /* file attributes */ + __le32 i_pino; /* parent inode number */ + __le32 i_namelen; /* file name length */ + __u8 i_name[F2FS_MAX_NAME_LEN]; /* file name for SPOR */ + + struct f2fs_extent i_ext; /* caching a largest extent */ + + __le32 i_addr[ADDRS_PER_INODE]; /* Pointers to data blocks */ + + __le32 i_nid[5]; /* direct(2), indirect(2), + double_indirect(1) node id */ +} __packed; + +struct direct_node { + __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */ +} __packed; + +struct indirect_node { + __le32 nid[NIDS_PER_BLOCK]; /* array of data block address */ +} __packed; + +enum { + COLD_BIT_SHIFT = 0, + FSYNC_BIT_SHIFT, + DENT_BIT_SHIFT, + OFFSET_BIT_SHIFT +}; + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode nunmber */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +struct f2fs_node { + /* can be one of three types: inode, direct, and indirect types */ + union { + struct f2fs_inode i; + struct direct_node dn; + struct indirect_node in; + }; + struct node_footer footer; +} __packed; + +/* + * For NAT entries + */ +#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry)) + +struct f2fs_nat_entry { + __u8 version; /* latest version of cached nat entry */ + __le32 ino; /* inode number */ + __le32 block_addr; /* block address */ +} __packed; + +struct f2fs_nat_block { + struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For SIT entries + * + * Each segment is 2MB in size by default so that a bitmap for validity of + * there-in blocks should occupy 64 bytes, 512 bits. + * Not allow to change this. + */ +#define SIT_VBLOCK_MAP_SIZE 64 +#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) + +/* + * Note that f2fs_sit_entry->vblocks has the following bit-field information. + * [15:10] : allocation type such as CURSEG_XXXX_TYPE + * [9:0] : valid block count + */ +#define SIT_VBLOCKS_SHIFT 10 +#define SIT_VBLOCKS_MASK ((1 << SIT_VBLOCKS_SHIFT) - 1) +#define GET_SIT_VBLOCKS(raw_sit) \ + (le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK) +#define GET_SIT_TYPE(raw_sit) \ + ((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK) \ + >> SIT_VBLOCKS_SHIFT) + +struct f2fs_sit_entry { + __le16 vblocks; /* reference above */ + __u8 valid_map[SIT_VBLOCK_MAP_SIZE]; /* bitmap for valid blocks */ + __le64 mtime; /* segment age for cleaning */ +} __packed; + +struct f2fs_sit_block { + struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK]; +} __packed; + +/* + * For segment summary + * + * One summary block contains exactly 512 summary entries, which represents + * exactly 2MB segment by default. Not allow to change the basic units. + * + * NOTE: For initializing fields, you must use set_summary + * + * - If data page, nid represents dnode's nid + * - If node page, nid represents the node page's nid. + * + * The ofs_in_node is used by only data page. It represents offset + * from node's page's beginning to get a data block address. + * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) + */ +#define ENTRIES_IN_SUM 512 +#define SUMMARY_SIZE (7) /* sizeof(struct summary) */ +#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ +#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) + +/* a summary entry for a 4KB-sized block in a segment */ +struct f2fs_summary { + __le32 nid; /* parent node id */ + union { + __u8 reserved[3]; + struct { + __u8 version; /* node version number */ + __le16 ofs_in_node; /* block index in parent node */ + } __packed; + }; +} __packed; + +/* summary block type, node or data, is stored to the summary_footer */ +#define SUM_TYPE_NODE (1) +#define SUM_TYPE_DATA (0) + +struct summary_footer { + unsigned char entry_type; /* SUM_TYPE_XXX */ + __u32 check_sum; /* summary checksum */ +} __packed; + +#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\ + SUM_ENTRY_SIZE) +#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct nat_journal_entry)) +#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct nat_journal_entry)) +#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\ + sizeof(struct sit_journal_entry)) +#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\ + sizeof(struct sit_journal_entry)) +/* + * frequently updated NAT/SIT entries can be stored in the spare area in + * summary blocks + */ +enum { + NAT_JOURNAL = 0, + SIT_JOURNAL +}; + +struct nat_journal_entry { + __le32 nid; + struct f2fs_nat_entry ne; +} __packed; + +struct nat_journal { + struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES]; + __u8 reserved[NAT_JOURNAL_RESERVED]; +} __packed; + +struct sit_journal_entry { + __le32 segno; + struct f2fs_sit_entry se; +} __packed; + +struct sit_journal { + struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES]; + __u8 reserved[SIT_JOURNAL_RESERVED]; +} __packed; + +/* 4KB-sized summary block structure */ +struct f2fs_summary_block { + struct f2fs_summary entries[ENTRIES_IN_SUM]; + union { + __le16 n_nats; + __le16 n_sits; + }; + /* spare area is used by NAT or SIT journals */ + union { + struct nat_journal nat_j; + struct sit_journal sit_j; + }; + struct summary_footer footer; +} __packed; + +/* + * For directory operations + */ +#define F2FS_DOT_HASH 0 +#define F2FS_DDOT_HASH F2FS_DOT_HASH +#define F2FS_MAX_HASH (~((0x3ULL) << 62)) +#define F2FS_HASH_COL_BIT ((0x1ULL) << 63) + +typedef __le32 f2fs_hash_t; + +/* One directory entry slot covers 8bytes-long file name */ +#define F2FS_NAME_LEN 8 +#define F2FS_NAME_LEN_BITS 3 + +#define GET_DENTRY_SLOTS(x) ((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS) + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK 214 + +/* MAX level for dir lookup */ +#define MAX_DIR_HASH_DEPTH 63 + +#define SIZE_OF_DIR_ENTRY 11 /* by byte */ +#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ + BITS_PER_BYTE) +#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ + F2FS_NAME_LEN) * \ + NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) + +/* One directory entry slot representing F2FS_NAME_LEN-sized file name */ +struct f2fs_dir_entry { + __le32 hash_code; /* hash code of file name */ + __le32 ino; /* inode number */ + __le16 name_len; /* lengh of file name */ + __u8 file_type; /* file type */ +} __packed; + +/* 4KB-sized directory entry block */ +struct f2fs_dentry_block { + /* validity bitmap for directory entries in each block */ + __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; + __u8 reserved[SIZE_OF_RESERVED]; + struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; + __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN]; +} __packed; + +/* file types used in inode_info->flags */ +enum { + F2FS_FT_UNKNOWN, + F2FS_FT_REG_FILE, + F2FS_FT_DIR, + F2FS_FT_CHRDEV, + F2FS_FT_BLKDEV, + F2FS_FT_FIFO, + F2FS_FT_SOCK, + F2FS_FT_SYMLINK, + F2FS_FT_MAX +}; + +#endif /* _LINUX_F2FS_FS_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index a823d4be38e7..7617ee04f066 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1445,10 +1445,6 @@ static inline void sb_start_intwrite(struct super_block *sb) extern bool inode_owner_or_capable(const struct inode *inode); -/* not quite ready to be deprecated, but... */ -extern void lock_super(struct super_block *); -extern void unlock_super(struct super_block *); - /* * VFS helper functions.. */ @@ -1565,7 +1561,6 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); - void (*truncate) (struct inode *); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -1999,6 +1994,7 @@ struct filename { bool separate; /* should "name" be freed? */ }; +extern long vfs_truncate(struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int do_fallocate(struct file *file, int mode, loff_t offset, diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index ce31408b1e47..5dfa0aa216b6 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -75,6 +75,16 @@ extern wait_queue_head_t fscache_cache_cleared_wq; typedef void (*fscache_operation_release_t)(struct fscache_operation *op); typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); +enum fscache_operation_state { + FSCACHE_OP_ST_BLANK, /* Op is not yet submitted */ + FSCACHE_OP_ST_INITIALISED, /* Op is initialised */ + FSCACHE_OP_ST_PENDING, /* Op is blocked from running */ + FSCACHE_OP_ST_IN_PROGRESS, /* Op is in progress */ + FSCACHE_OP_ST_COMPLETE, /* Op is complete */ + FSCACHE_OP_ST_CANCELLED, /* Op has been cancelled */ + FSCACHE_OP_ST_DEAD /* Op is now dead */ +}; + struct fscache_operation { struct work_struct work; /* record for async ops */ struct list_head pend_link; /* link in object->pending_ops */ @@ -86,10 +96,10 @@ struct fscache_operation { #define FSCACHE_OP_MYTHREAD 0x0002 /* - processing is done be issuing thread, not pool */ #define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ #define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ -#define FSCACHE_OP_DEAD 6 /* op is now dead */ -#define FSCACHE_OP_DEC_READ_CNT 7 /* decrement object->n_reads on destruction */ -#define FSCACHE_OP_KEEP_FLAGS 0xc0 /* flags to keep when repurposing an op */ +#define FSCACHE_OP_DEC_READ_CNT 6 /* decrement object->n_reads on destruction */ +#define FSCACHE_OP_KEEP_FLAGS 0x0070 /* flags to keep when repurposing an op */ + enum fscache_operation_state state; atomic_t usage; unsigned debug_id; /* debugging ID */ @@ -106,6 +116,7 @@ extern atomic_t fscache_op_debug_id; extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); +extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); /** @@ -122,6 +133,7 @@ static inline void fscache_operation_init(struct fscache_operation *op, { INIT_WORK(&op->work, fscache_op_work_func); atomic_set(&op->usage, 1); + op->state = FSCACHE_OP_ST_INITIALISED; op->debug_id = atomic_inc_return(&fscache_op_debug_id); op->processor = processor; op->release = release; @@ -138,6 +150,7 @@ struct fscache_retrieval { void *context; /* netfs read context (pinned) */ struct list_head to_do; /* list of things to be done by the backend */ unsigned long start_time; /* time at which retrieval started */ + unsigned n_pages; /* number of pages to be retrieved */ }; typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, @@ -174,8 +187,22 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) } /** + * fscache_retrieval_complete - Record (partial) completion of a retrieval + * @op: The retrieval operation affected + * @n_pages: The number of pages to account for + */ +static inline void fscache_retrieval_complete(struct fscache_retrieval *op, + int n_pages) +{ + op->n_pages -= n_pages; + if (op->n_pages <= 0) + fscache_op_complete(&op->op, true); +} + +/** * fscache_put_retrieval - Drop a reference to a retrieval operation * @op: The retrieval operation affected + * @n_pages: The number of pages to account for * * Drop a reference to a retrieval operation. */ @@ -227,6 +254,9 @@ struct fscache_cache_ops { /* store the updated auxiliary data on an object */ void (*update_object)(struct fscache_object *object); + /* Invalidate an object */ + void (*invalidate_object)(struct fscache_operation *op); + /* discard the resources pinned by an object and effect retirement if * necessary */ void (*drop_object)(struct fscache_object *object); @@ -301,11 +331,30 @@ struct fscache_cookie { #define FSCACHE_COOKIE_PENDING_FILL 3 /* T if pending initial fill on object */ #define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */ #define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */ +#define FSCACHE_COOKIE_WAITING_ON_READS 6 /* T if cookie is waiting on reads */ +#define FSCACHE_COOKIE_INVALIDATING 7 /* T if cookie is being invalidated */ }; extern struct fscache_cookie fscache_fsdef_index; /* + * Event list for fscache_object::{event_mask,events} + */ +enum { + FSCACHE_OBJECT_EV_REQUEUE, /* T if object should be requeued */ + FSCACHE_OBJECT_EV_UPDATE, /* T if object should be updated */ + FSCACHE_OBJECT_EV_INVALIDATE, /* T if cache requested object invalidation */ + FSCACHE_OBJECT_EV_CLEARED, /* T if accessors all gone */ + FSCACHE_OBJECT_EV_ERROR, /* T if fatal error occurred during processing */ + FSCACHE_OBJECT_EV_RELEASE, /* T if netfs requested object release */ + FSCACHE_OBJECT_EV_RETIRE, /* T if netfs requested object retirement */ + FSCACHE_OBJECT_EV_WITHDRAW, /* T if cache requested object withdrawal */ + NR_FSCACHE_OBJECT_EVENTS +}; + +#define FSCACHE_OBJECT_EVENTS_MASK ((1UL << NR_FSCACHE_OBJECT_EVENTS) - 1) + +/* * on-disk cache file or index handle */ struct fscache_object { @@ -317,6 +366,7 @@ struct fscache_object { /* active states */ FSCACHE_OBJECT_AVAILABLE, /* cleaning up object after creation */ FSCACHE_OBJECT_ACTIVE, /* object is usable */ + FSCACHE_OBJECT_INVALIDATING, /* object is invalidating */ FSCACHE_OBJECT_UPDATING, /* object is updating */ /* terminal states */ @@ -332,10 +382,10 @@ struct fscache_object { int debug_id; /* debugging ID */ int n_children; /* number of child objects */ - int n_ops; /* number of ops outstanding on object */ + int n_ops; /* number of extant ops on object */ int n_obj_ops; /* number of object ops outstanding on object */ int n_in_progress; /* number of ops in progress */ - int n_exclusive; /* number of exclusive ops queued */ + int n_exclusive; /* number of exclusive ops queued or in progress */ atomic_t n_reads; /* number of read ops in progress */ spinlock_t lock; /* state and operations lock */ @@ -343,14 +393,6 @@ struct fscache_object { unsigned long event_mask; /* events this object is interested in */ unsigned long events; /* events to be processed by this object * (order is important - using fls) */ -#define FSCACHE_OBJECT_EV_REQUEUE 0 /* T if object should be requeued */ -#define FSCACHE_OBJECT_EV_UPDATE 1 /* T if object should be updated */ -#define FSCACHE_OBJECT_EV_CLEARED 2 /* T if accessors all gone */ -#define FSCACHE_OBJECT_EV_ERROR 3 /* T if fatal error occurred during processing */ -#define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ -#define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ -#define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ -#define FSCACHE_OBJECT_EVENTS_MASK 0x7f /* mask of all events*/ unsigned long flags; #define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ @@ -504,6 +546,9 @@ extern void fscache_withdraw_cache(struct fscache_cache *cache); extern void fscache_io_error(struct fscache_cache *cache); +extern void fscache_mark_page_cached(struct fscache_retrieval *op, + struct page *page); + extern void fscache_mark_pages_cached(struct fscache_retrieval *op, struct pagevec *pagevec); diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9ec20dec3353..7a086235da4b 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -135,14 +135,14 @@ struct fscache_cookie_def { */ void (*put_context)(void *cookie_netfs_data, void *context); - /* indicate pages that now have cache metadata retained - * - this function should mark the specified pages as now being cached - * - the pages will have been marked with PG_fscache before this is + /* indicate page that now have cache metadata retained + * - this function should mark the specified page as now being cached + * - the page will have been marked with PG_fscache before this is * called, so this is optional */ - void (*mark_pages_cached)(void *cookie_netfs_data, - struct address_space *mapping, - struct pagevec *cached_pvec); + void (*mark_page_cached)(void *cookie_netfs_data, + struct address_space *mapping, + struct page *page); /* indicate the cookie is no longer cached * - this function is called when the backing store currently caching @@ -185,6 +185,8 @@ extern struct fscache_cookie *__fscache_acquire_cookie( extern void __fscache_relinquish_cookie(struct fscache_cookie *, int); extern void __fscache_update_cookie(struct fscache_cookie *); extern int __fscache_attr_changed(struct fscache_cookie *); +extern void __fscache_invalidate(struct fscache_cookie *); +extern void __fscache_wait_on_invalidate(struct fscache_cookie *); extern int __fscache_read_or_alloc_page(struct fscache_cookie *, struct page *, fscache_rw_complete_t, @@ -390,6 +392,42 @@ int fscache_attr_changed(struct fscache_cookie *cookie) } /** + * fscache_invalidate - Notify cache that an object needs invalidation + * @cookie: The cookie representing the cache object + * + * Notify the cache that an object is needs to be invalidated and that it + * should abort any retrievals or stores it is doing on the cache. The object + * is then marked non-caching until such time as the invalidation is complete. + * + * This can be called with spinlocks held. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_invalidate(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + __fscache_invalidate(cookie); +} + +/** + * fscache_wait_on_invalidate - Wait for invalidation to complete + * @cookie: The cookie representing the cache object + * + * Wait for the invalidation of an object to complete. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_wait_on_invalidate(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + __fscache_wait_on_invalidate(cookie); +} + +/** * fscache_reserve_space - Reserve data space for a cached object * @cookie: The cookie representing the cache object * @i_size: The amount of space to be reserved diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 63d966d5c2ea..d5b0910d4961 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -88,9 +88,10 @@ struct fsnotify_event_private_data; * if the group is interested in this event. * handle_event - main call for a group to handle an fs event * free_group_priv - called when a group refcnt hits 0 to clean up the private union - * freeing-mark - this means that a mark has been flagged to die when everything - * finishes using it. The function is supplied with what must be a - * valid group and inode to use to clean up. + * freeing_mark - called when a mark is being destroyed for some reason. The group + * MUST be holding a reference on each mark and that reference must be + * dropped in this function. inotify uses this function to send + * userspace messages that marks have been removed. */ struct fsnotify_ops { bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, @@ -141,12 +142,14 @@ struct fsnotify_group { unsigned int priority; /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ - spinlock_t mark_lock; /* protect marks_list */ + struct mutex mark_mutex; /* protect marks_list */ atomic_t num_marks; /* 1 for each mark and 1 for not being * past the point of no return when freeing * a group */ struct list_head marks_list; /* all inode marks for this group */ + struct fasync_struct *fsn_fa; /* async notification */ + /* groups can define private fields here or use the void *private */ union { void *private; @@ -155,7 +158,6 @@ struct fsnotify_group { spinlock_t idr_lock; struct idr idr; u32 last_wd; - struct fasync_struct *fa; /* async notification */ struct user_struct *user; } inotify_data; #endif @@ -287,7 +289,6 @@ struct fsnotify_mark { struct fsnotify_inode_mark i; struct fsnotify_vfsmount_mark m; }; - struct list_head free_g_list; /* tmp list used when freeing this mark */ __u32 ignored_mask; /* events types to ignore */ #define FSNOTIFY_MARK_FLAG_INODE 0x01 #define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02 @@ -360,11 +361,16 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode /* called from fsnotify listeners, such as fanotify or dnotify */ -/* get a reference to an existing or create a new group */ +/* create a new group */ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); +/* get reference to a group */ +extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); - +/* destroy group */ +extern void fsnotify_destroy_group(struct fsnotify_group *group); +/* fasync handler function */ +extern int fsnotify_fasync(int fd, struct file *file, int on); /* take a reference to an event */ extern void fsnotify_get_event(struct fsnotify_event *event); extern void fsnotify_put_event(struct fsnotify_event *event); @@ -405,8 +411,13 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask /* attach the mark to both the group and the inode */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, struct vfsmount *mnt, int allow_dups); -/* given a mark, flag it to be freed when all references are dropped */ -extern void fsnotify_destroy_mark(struct fsnotify_mark *mark); +extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group, + struct inode *inode, struct vfsmount *mnt, int allow_dups); +/* given a group and a mark, flag mark to be freed when all references are dropped */ +extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group); +extern void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, + struct fsnotify_group *group); /* run all the marks in a group, and clear all of the vfsmount marks */ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the inode marks */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d140e8fb075f..c566927efcbd 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -77,13 +77,15 @@ /* * Divide positive or negative dividend by positive divisor and round - * to closest integer. Result is undefined for negative divisors. + * to closest integer. Result is undefined for negative divisors and + * for negative dividends if the divisor variable type is unsigned. */ #define DIV_ROUND_CLOSEST(x, divisor)( \ { \ typeof(x) __x = x; \ typeof(divisor) __d = divisor; \ - (((typeof(x))-1) > 0 || (__x) > 0) ? \ + (((typeof(x))-1) > 0 || \ + ((typeof(divisor))-1) > 0 || (__x) > 0) ? \ (((__x) + ((__d) / 2)) / (__d)) : \ (((__x) - ((__d) / 2)) / (__d)); \ } \ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 21821da2abfd..20ea939c22a6 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -625,6 +625,7 @@ struct mlx4_dev { u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; int num_vfs; + int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; }; diff --git a/include/linux/mm.h b/include/linux/mm.h index 7f4f906190bd..63204078f72b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1007,7 +1007,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); -extern int vmtruncate(struct inode *inode, loff_t offset); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); diff --git a/include/linux/namei.h b/include/linux/namei.h index 4bf19d8174ed..e998c030061d 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -65,8 +65,8 @@ extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, extern int kern_path(const char *, unsigned, struct path *); -extern struct dentry *kern_path_create(int, const char *, struct path *, int); -extern struct dentry *user_path_create(int, const char __user *, struct path *, int); +extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); +extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, @@ -98,4 +98,20 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) ((char *) name)[min(len, maxlen)] = '\0'; } +/** + * retry_estale - determine whether the caller should retry an operation + * @error: the error that would currently be returned + * @flags: flags being used for next lookup attempt + * + * Check to see if the error code was -ESTALE, and then determine whether + * to retry the call based on whether "flags" already has LOOKUP_REVAL set. + * + * Returns true if the caller should try the operation again. + */ +static inline bool +retry_estale(const long error, const unsigned int flags) +{ + return error == -ESTALE && !(flags & LOOKUP_REVAL); +} + #endif /* _LINUX_NAMEI_H */ diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index b47d2040c9f2..3863a4dbdf18 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -100,6 +100,7 @@ extern int of_platform_populate(struct device_node *root, #if !defined(CONFIG_OF_ADDRESS) struct of_dev_auxdata; +struct device; static inline int of_platform_populate(struct device_node *root, const struct of_device_id *matches, const struct of_dev_auxdata *lookup, diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index c677b9f2fefa..5b429c43a297 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#include <linux/platform_device.h> + #define MMU_REG_SIZE 256 /** @@ -42,8 +44,11 @@ struct omap_mmu_dev_attr { struct iommu_platform_data { const char *name; - const char *clk_name; - const int nr_tlb_entries; + const char *reset_name; + int nr_tlb_entries; u32 da_start; u32 da_end; + + int (*assert_reset)(struct platform_device *pdev, const char *name); + int (*deassert_reset)(struct platform_device *pdev, const char *name); }; diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/include/linux/platform_data/serial-omap.h index ff9b0aab5281..ff9b0aab5281 100644 --- a/arch/arm/plat-omap/include/plat/omap-serial.h +++ b/include/linux/platform_data/serial-omap.h diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h index 8570bcfe6311..ef65b67c56c3 100644 --- a/include/linux/platform_data/usb-omap.h +++ b/include/linux/platform_data/usb-omap.h @@ -59,6 +59,9 @@ struct usbhs_omap_platform_data { struct ehci_hcd_omap_platform_data *ehci_data; struct ohci_hcd_omap_platform_data *ohci_data; + + /* OMAP3 <= ES2.1 have a single ulpi bypass control bit */ + unsigned single_ulpi_bypass:1; }; /*-------------------------------------------------------------------------*/ diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index addfbe7c180e..1693775ecfe8 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -344,6 +344,10 @@ static inline void user_single_step_siginfo(struct task_struct *tsk, #define signal_pt_regs() task_pt_regs(current) #endif +#ifndef current_user_stack_pointer +#define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) +#endif + extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); diff --git a/include/linux/sched.h b/include/linux/sched.h index f712465b05c5..206bb089c06b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2353,9 +2353,7 @@ extern int do_execve(const char *, const char __user * const __user *); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); -#ifdef CONFIG_GENERIC_KERNEL_THREAD extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -#endif extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); diff --git a/include/linux/signal.h b/include/linux/signal.h index e19a011b43b7..0a89ffc48466 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -385,4 +385,7 @@ int unhandled_signal(struct task_struct *tsk, int sig); void signals_init(void); +int restore_altstack(const stack_t __user *); +int __save_altstack(stack_t __user *, unsigned long); + #endif /* _LINUX_SIGNAL_H */ diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index f792794f6634..5dc9ee4d616e 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -217,6 +217,8 @@ extern int qword_get(char **bpp, char *dest, int bufsize); static inline int get_int(char **bpp, int *anint) { char buf[50]; + char *ep; + int rv; int len = qword_get(bpp, buf, sizeof(buf)); if (len < 0) @@ -224,9 +226,11 @@ static inline int get_int(char **bpp, int *anint) if (len == 0) return -ENOENT; - if (kstrtoint(buf, 0, anint)) + rv = simple_strtol(buf, &ep, 0); + if (*ep) return -EINVAL; + *anint = rv; return 0; } diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d83db800fe02..676ddf53b3ee 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -243,6 +243,7 @@ struct svc_rqst { struct page * rq_pages[RPCSVC_MAXPAGES]; struct page * *rq_respages; /* points into rq_pages */ int rq_resused; /* number of pages used for result */ + struct page * *rq_next_page; /* next reply page to use */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ @@ -338,9 +339,8 @@ xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p) static inline void svc_free_res_pages(struct svc_rqst *rqstp) { - while (rqstp->rq_resused) { - struct page **pp = (rqstp->rq_respages + - --rqstp->rq_resused); + while (rqstp->rq_next_page != rqstp->rq_respages) { + struct page **pp = --rqstp->rq_next_page; if (*pp) { put_page(*pp); *pp = NULL; diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 92ad02f0dcc0..62fd1b756e99 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -26,11 +26,28 @@ struct svc_sock { void (*sk_owspace)(struct sock *); /* private TCP part */ - u32 sk_reclen; /* length of record */ - u32 sk_tcplen; /* current read length */ + /* On-the-wire fragment header: */ + __be32 sk_reclen; + /* As we receive a record, this includes the length received so + * far (including the fragment header): */ + u32 sk_tcplen; + /* Total length of the data (not including fragment headers) + * received so far in the fragments making up this rpc: */ + u32 sk_datalen; + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; +static inline u32 svc_sock_reclen(struct svc_sock *svsk) +{ + return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK; +} + +static inline u32 svc_sock_final_rec(struct svc_sock *svsk) +{ + return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT; +} + /* * Function prototypes. */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6caee34bf8a2..45e2db270255 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -63,6 +63,7 @@ struct getcpu_cache; struct old_linux_dirent; struct perf_event_attr; struct file_handle; +struct sigaltstack; #include <linux/types.h> #include <linux/aio_abi.h> @@ -299,6 +300,11 @@ asmlinkage long sys_personality(unsigned int personality); asmlinkage long sys_sigpending(old_sigset_t __user *set); asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset); +#ifdef CONFIG_GENERIC_SIGALTSTACK +asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss, + struct sigaltstack __user *uoss); +#endif + asmlinkage long sys_getitimer(int which, struct itimerval __user *value); asmlinkage long sys_setitimer(int which, struct itimerval __user *value, @@ -827,15 +833,6 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, const char __user *pathname); asmlinkage long sys_syncfs(int fd); -#ifndef CONFIG_GENERIC_KERNEL_EXECVE -int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]); -#else -#define kernel_execve(filename, argv, envp) \ - do_execve(filename, \ - (const char __user *const __user *)argv, \ - (const char __user *const __user *)envp) -#endif - asmlinkage long sys_fork(void); asmlinkage long sys_vfork(void); #ifdef CONFIG_CLONE_BACKWARDS diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 9bbeabf66c54..bd45eb7bedc8 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -69,6 +69,7 @@ struct usbnet { # define EVENT_DEV_ASLEEP 6 # define EVENT_DEV_OPEN 7 # define EVENT_DEVICE_REPORT_IDLE 8 +# define EVENT_NO_RUNTIME_PM 9 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) @@ -240,4 +241,6 @@ extern void usbnet_set_msglevel(struct net_device *, u32); extern void usbnet_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); extern int usbnet_nway_reset(struct net_device *net); +extern int usbnet_manage_power(struct usbnet *, int); + #endif /* __LINUX_USB_USBNET_H */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 533b1157f22e..cf8adb1f5b2c 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -16,12 +16,20 @@ * @name: the name of this virtqueue (mainly for debugging) * @vdev: the virtio device this queue was created for. * @priv: a pointer for the virtqueue implementation to use. + * @index: the zero-based ordinal number for this queue. + * @num_free: number of elements we expect to be able to fit. + * + * A note on @num_free: with indirect buffers, each buffer needs one + * element in the queue, otherwise a buffer will need one element per + * sg element. */ struct virtqueue { struct list_head list; void (*callback)(struct virtqueue *vq); const char *name; struct virtio_device *vdev; + unsigned int index; + unsigned int num_free; void *priv; }; @@ -50,7 +58,11 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq); unsigned int virtqueue_get_vring_size(struct virtqueue *vq); -int virtqueue_get_queue_index(struct virtqueue *vq); +/* FIXME: Obsolete accessor, but required for virtio_net merge. */ +static inline unsigned int virtqueue_get_queue_index(struct virtqueue *vq) +{ + return vq->index; +} /** * virtio_device - representation of a device using virtio @@ -73,7 +85,11 @@ struct virtio_device { void *priv; }; -#define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev) +static inline struct virtio_device *dev_to_virtio(struct device *_dev) +{ + return container_of(_dev, struct virtio_device, dev); +} + int register_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev); @@ -103,6 +119,11 @@ struct virtio_driver { #endif }; +static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) +{ + return container_of(drv, struct virtio_driver, driver); +} + int register_virtio_driver(struct virtio_driver *drv); void unregister_virtio_driver(struct virtio_driver *drv); #endif /* _LINUX_VIRTIO_H */ diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h index d6b4440387b7..4195b97a3def 100644 --- a/include/linux/virtio_scsi.h +++ b/include/linux/virtio_scsi.h @@ -1,7 +1,31 @@ +/* + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #ifndef _LINUX_VIRTIO_SCSI_H #define _LINUX_VIRTIO_SCSI_H -/* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ #define VIRTIO_SCSI_CDB_SIZE 32 #define VIRTIO_SCSI_SENSE_SIZE 96 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ba1d3615acbb..183292722f6e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -318,6 +318,7 @@ extern void inet_csk_reqsk_queue_prune(struct sock *parent, const unsigned long max_rto); extern void inet_csk_destroy_sock(struct sock *sk); +extern void inet_csk_prepare_forced_close(struct sock *sk); /* * LISTEN is a special case for poll.. diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 7af1ea893038..23b3a7c58783 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -78,6 +78,13 @@ struct ra_msg { __be32 retrans_timer; }; +struct rd_msg { + struct icmp6hdr icmph; + struct in6_addr target; + struct in6_addr dest; + __u8 opt[0]; +}; + struct nd_opt_hdr { __u8 nd_opt_type; __u8 nd_opt_len; diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 628db7bca4fd..3953cea0ecfb 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -242,7 +242,6 @@ struct snd_soc_dai { unsigned int symmetric_rates:1; struct snd_pcm_runtime *runtime; unsigned int active; - unsigned char pop_wait:1; unsigned char probed:1; struct snd_soc_dapm_widget *playback_widget; diff --git a/include/sound/soc.h b/include/sound/soc.h index 91244a096c19..769e27c774a3 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1039,6 +1039,7 @@ struct snd_soc_pcm_runtime { struct snd_soc_dpcm_runtime dpcm[2]; long pmdown_time; + unsigned char pop_wait:1; /* runtime devices */ struct snd_pcm *pcm; diff --git a/include/uapi/asm-generic/signal.h b/include/uapi/asm-generic/signal.h index 0a78028984de..6fae30fd16ab 100644 --- a/include/uapi/asm-generic/signal.h +++ b/include/uapi/asm-generic/signal.h @@ -80,12 +80,6 @@ * SA_RESTORER 0x04000000 */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 91e3a360f611..539b179b349c 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -268,8 +268,8 @@ enum { #define DM_VERSION_MAJOR 4 #define DM_VERSION_MINOR 23 -#define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2012-07-25)" +#define DM_VERSION_PATCHLEVEL 1 +#define DM_VERSION_EXTRA "-ioctl (2012-12-18)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index afbb18a0227c..5db297514aec 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -163,6 +163,9 @@ struct br_port_msg { struct br_mdb_entry { __u32 ifindex; +#define MDB_TEMPORARY 0 +#define MDB_PERMANENT 1 + __u8 state; struct { union { __be32 ip4; diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 12f68c7ceba6..873e086ce3a1 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -23,6 +23,7 @@ #define EXT4_SUPER_MAGIC 0xEF53 #define BTRFS_SUPER_MAGIC 0x9123683E #define NILFS_SUPER_MAGIC 0x3434 +#define F2FS_SUPER_MAGIC 0xF2F52010 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 diff --git a/include/uapi/linux/signal.h b/include/uapi/linux/signal.h index dff452ed6d00..e1bd50c29ded 100644 --- a/include/uapi/linux/signal.h +++ b/include/uapi/linux/signal.h @@ -4,5 +4,7 @@ #include <asm/signal.h> #include <asm/siginfo.h> +#define SS_ONSTACK 1 +#define SS_DISABLE 2 #endif /* _UAPI_LINUX_SIGNAL_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 270fb22c5811..a7630d04029f 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -37,5 +37,6 @@ #define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ #define VIRTIO_ID_SCSI 8 /* virtio scsi */ #define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/video/omap-panel-tfp410.h b/include/video/omap-panel-tfp410.h index 68c31d79c571..aef35e48bc7e 100644 --- a/include/video/omap-panel-tfp410.h +++ b/include/video/omap-panel-tfp410.h @@ -28,7 +28,7 @@ struct omap_dss_device; * @power_down_gpio: gpio number for PD pin (or -1 if not available) */ struct tfp410_platform_data { - u16 i2c_bus_num; + int i2c_bus_num; int power_down_gpio; }; diff --git a/init/main.c b/init/main.c index baf1f0f5c461..85d69dffe864 100644 --- a/init/main.c +++ b/init/main.c @@ -797,7 +797,9 @@ static void __init do_pre_smp_initcalls(void) static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; - return kernel_execve(init_filename, argv_init, envp_init); + return do_execve(init_filename, + (const char __user *const __user *)argv_init, + (const char __user *const __user *)envp_init); } static void __init kernel_init_freeable(void); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index ed206fd88cca..e81175ef25f8 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -249,7 +249,7 @@ static void untag_chunk(struct node *p) list_del_rcu(&chunk->hash); spin_unlock(&hash_lock); spin_unlock(&entry->lock); - fsnotify_destroy_mark(entry); + fsnotify_destroy_mark(entry, audit_tree_group); goto out; } @@ -291,7 +291,7 @@ static void untag_chunk(struct node *p) owner->root = new; spin_unlock(&hash_lock); spin_unlock(&entry->lock); - fsnotify_destroy_mark(entry); + fsnotify_destroy_mark(entry, audit_tree_group); fsnotify_put_mark(&new->mark); /* drop initial reference */ goto out; @@ -331,7 +331,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&hash_lock); chunk->dead = 1; spin_unlock(&entry->lock); - fsnotify_destroy_mark(entry); + fsnotify_destroy_mark(entry, audit_tree_group); fsnotify_put_mark(entry); return 0; } @@ -412,7 +412,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&chunk_entry->lock); spin_unlock(&old_entry->lock); - fsnotify_destroy_mark(chunk_entry); + fsnotify_destroy_mark(chunk_entry, audit_tree_group); fsnotify_put_mark(chunk_entry); fsnotify_put_mark(old_entry); @@ -443,7 +443,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&hash_lock); spin_unlock(&chunk_entry->lock); spin_unlock(&old_entry->lock); - fsnotify_destroy_mark(old_entry); + fsnotify_destroy_mark(old_entry, audit_tree_group); fsnotify_put_mark(chunk_entry); /* drop initial reference */ fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ return 0; diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 9a9ae6e3d290..4a599f699adc 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -350,7 +350,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent) } mutex_unlock(&audit_filter_mutex); - fsnotify_destroy_mark(&parent->mark); + fsnotify_destroy_mark(&parent->mark, audit_watch_group); } /* Get path information necessary for adding watches. */ @@ -457,7 +457,7 @@ void audit_remove_watch_rule(struct audit_krule *krule) if (list_empty(&parent->watches)) { audit_get_parent(parent); - fsnotify_destroy_mark(&parent->mark); + fsnotify_destroy_mark(&parent->mark, audit_watch_group); audit_put_parent(parent); } } diff --git a/kernel/fork.c b/kernel/fork.c index 85f6d536608d..a31b823b3c2d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1613,7 +1613,6 @@ long do_fork(unsigned long clone_flags, return nr; } -#ifdef CONFIG_GENERIC_KERNEL_THREAD /* * Create a kernel thread. */ @@ -1622,7 +1621,6 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, (unsigned long)arg, NULL, NULL); } -#endif #ifdef __ARCH_WANT_SYS_FORK SYSCALL_DEFINE0(fork) diff --git a/kernel/kcmp.c b/kernel/kcmp.c index 30b7b225306c..e30ac0fe61c3 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -4,6 +4,7 @@ #include <linux/string.h> #include <linux/random.h> #include <linux/module.h> +#include <linux/ptrace.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/cache.h> diff --git a/kernel/kmod.c b/kernel/kmod.c index 1c317e386831..0023a87e8de6 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -219,9 +219,9 @@ static int ____call_usermodehelper(void *data) commit_creds(new); - retval = kernel_execve(sub_info->path, - (const char *const *)sub_info->argv, - (const char *const *)sub_info->envp); + retval = do_execve(sub_info->path, + (const char __user *const __user *)sub_info->argv, + (const char __user *const __user *)sub_info->envp); if (!retval) return 0; diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c index 045504fffbb2..2b6e69909c39 100644 --- a/kernel/modsign_pubkey.c +++ b/kernel/modsign_pubkey.c @@ -34,18 +34,15 @@ static __init int module_verify_init(void) { pr_notice("Initialise module verification\n"); - modsign_keyring = key_alloc(&key_type_keyring, ".module_sign", - KUIDT_INIT(0), KGIDT_INIT(0), - current_cred(), - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA); + modsign_keyring = keyring_alloc(".module_sign", + KUIDT_INIT(0), KGIDT_INIT(0), + current_cred(), + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(modsign_keyring)) panic("Can't allocate module signing keyring\n"); - if (key_instantiate_and_link(modsign_keyring, NULL, 0, NULL, NULL) < 0) - panic("Can't instantiate module signing keyring\n"); - return 0; } diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d73840271dce..a278cad1d5d6 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -9,6 +9,7 @@ #include <asm/uaccess.h> #include <linux/kernel_stat.h> #include <trace/events/timer.h> +#include <linux/random.h> /* * Called after updating RLIMIT_CPU to run cpu timer and update @@ -470,6 +471,8 @@ static void cleanup_timers(struct list_head *head, */ void posix_cpu_timers_exit(struct task_struct *tsk) { + add_device_randomness((const void*) &tsk->se.sum_exec_runtime, + sizeof(unsigned long long)); cleanup_timers(tsk->cpu_timers, tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4603d6cb9e25..5eea8707234a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -793,8 +793,11 @@ unsigned int sysctl_numa_balancing_scan_delay = 1000; static void task_numa_placement(struct task_struct *p) { - int seq = ACCESS_ONCE(p->mm->numa_scan_seq); + int seq; + if (!p->mm) /* for example, ksmd faulting in a user's mm */ + return; + seq = ACCESS_ONCE(p->mm->numa_scan_seq); if (p->numa_scan_seq == seq) return; p->numa_scan_seq = seq; diff --git a/kernel/signal.c b/kernel/signal.c index 580a91e63471..7aaa51d8e5b8 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -31,6 +31,7 @@ #include <linux/nsproxy.h> #include <linux/user_namespace.h> #include <linux/uprobes.h> +#include <linux/compat.h> #define CREATE_TRACE_POINTS #include <trace/events/signal.h> @@ -3094,6 +3095,79 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s out: return error; } +#ifdef CONFIG_GENERIC_SIGALTSTACK +SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) +{ + return do_sigaltstack(uss, uoss, current_user_stack_pointer()); +} +#endif + +int restore_altstack(const stack_t __user *uss) +{ + int err = do_sigaltstack(uss, NULL, current_user_stack_pointer()); + /* squash all but EFAULT for now */ + return err == -EFAULT ? err : 0; +} + +int __save_altstack(stack_t __user *uss, unsigned long sp) +{ + struct task_struct *t = current; + return __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) | + __put_user(sas_ss_flags(sp), &uss->ss_flags) | + __put_user(t->sas_ss_size, &uss->ss_size); +} + +#ifdef CONFIG_COMPAT +#ifdef CONFIG_GENERIC_SIGALTSTACK +asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, + compat_stack_t __user *uoss_ptr) +{ + stack_t uss, uoss; + int ret; + mm_segment_t seg; + + if (uss_ptr) { + compat_stack_t uss32; + + memset(&uss, 0, sizeof(stack_t)); + if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t))) + return -EFAULT; + uss.ss_sp = compat_ptr(uss32.ss_sp); + uss.ss_flags = uss32.ss_flags; + uss.ss_size = uss32.ss_size; + } + seg = get_fs(); + set_fs(KERNEL_DS); + ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL), + (stack_t __force __user *) &uoss, + compat_user_stack_pointer()); + set_fs(seg); + if (ret >= 0 && uoss_ptr) { + if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(compat_stack_t)) || + __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || + __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || + __put_user(uoss.ss_size, &uoss_ptr->ss_size)) + ret = -EFAULT; + } + return ret; +} + +int compat_restore_altstack(const compat_stack_t __user *uss) +{ + int err = compat_sys_sigaltstack(uss, NULL); + /* squash all but -EFAULT for now */ + return err == -EFAULT ? err : 0; +} + +int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) +{ + struct task_struct *t = current; + return __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) | + __put_user(sas_ss_flags(sp), &uss->ss_flags) | + __put_user(t->sas_ss_size, &uss->ss_size); +} +#endif +#endif #ifdef __ARCH_WANT_SYS_SIGPENDING diff --git a/lib/atomic64.c b/lib/atomic64.c index 978537809d84..08a4f068e61e 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -31,7 +31,11 @@ static union { raw_spinlock_t lock; char pad[L1_CACHE_BYTES]; -} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp; +} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = { + [0 ... (NR_LOCKS - 1)] = { + .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock), + }, +}; static inline raw_spinlock_t *lock_addr(const atomic64_t *v) { @@ -173,14 +177,3 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u) return ret; } EXPORT_SYMBOL(atomic64_add_unless); - -static int init_atomic64_lock(void) -{ - int i; - - for (i = 0; i < NR_LOCKS; ++i) - raw_spin_lock_init(&atomic64_lock[i].lock); - return 0; -} - -pure_initcall(init_atomic64_lock); diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d84beb994f36..5e396accd3d0 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -45,6 +45,12 @@ enum { dma_debug_coherent, }; +enum map_err_types { + MAP_ERR_CHECK_NOT_APPLICABLE, + MAP_ERR_NOT_CHECKED, + MAP_ERR_CHECKED, +}; + #define DMA_DEBUG_STACKTRACE_ENTRIES 5 struct dma_debug_entry { @@ -57,6 +63,7 @@ struct dma_debug_entry { int direction; int sg_call_ents; int sg_mapped_ents; + enum map_err_types map_err_type; #ifdef CONFIG_STACKTRACE struct stack_trace stacktrace; unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; @@ -114,6 +121,12 @@ static struct device_driver *current_driver __read_mostly; static DEFINE_RWLOCK(driver_name_lock); +static const char *const maperr2str[] = { + [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable", + [MAP_ERR_NOT_CHECKED] = "dma map error not checked", + [MAP_ERR_CHECKED] = "dma map error checked", +}; + static const char *type2name[4] = { "single", "page", "scather-gather", "coherent" }; @@ -376,11 +389,12 @@ void debug_dma_dump_mappings(struct device *dev) list_for_each_entry(entry, &bucket->list, list) { if (!dev || dev == entry->dev) { dev_info(entry->dev, - "%s idx %d P=%Lx D=%Lx L=%Lx %s\n", + "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", type2name[entry->type], idx, (unsigned long long)entry->paddr, entry->dev_addr, entry->size, - dir2name[entry->direction]); + dir2name[entry->direction], + maperr2str[entry->map_err_type]); } } @@ -844,16 +858,16 @@ static void check_unmap(struct dma_debug_entry *ref) struct hash_bucket *bucket; unsigned long flags; - if (dma_mapping_error(ref->dev, ref->dev_addr)) { - err_printk(ref->dev, NULL, "DMA-API: device driver tries " - "to free an invalid DMA memory address\n"); - return; - } - bucket = get_hash_bucket(ref, &flags); entry = bucket_find_exact(bucket, ref); if (!entry) { + if (dma_mapping_error(ref->dev, ref->dev_addr)) { + err_printk(ref->dev, NULL, + "DMA-API: device driver tries " + "to free an invalid DMA memory address\n"); + return; + } err_printk(ref->dev, NULL, "DMA-API: device driver tries " "to free DMA memory it has not allocated " "[device address=0x%016llx] [size=%llu bytes]\n", @@ -910,6 +924,15 @@ static void check_unmap(struct dma_debug_entry *ref) dir2name[ref->direction]); } + if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { + err_printk(ref->dev, entry, + "DMA-API: device driver failed to check map error" + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped as %s]", + ref->dev_addr, ref->size, + type2name[entry->type]); + } + hash_bucket_del(entry); dma_entry_free(entry); @@ -1017,7 +1040,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, if (unlikely(global_disable)) return; - if (unlikely(dma_mapping_error(dev, dma_addr))) + if (dma_mapping_error(dev, dma_addr)) return; entry = dma_entry_alloc(); @@ -1030,6 +1053,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; + entry->map_err_type = MAP_ERR_NOT_CHECKED; if (map_single) entry->type = dma_debug_single; @@ -1045,6 +1069,30 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, } EXPORT_SYMBOL(debug_dma_map_page); +void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + struct dma_debug_entry ref; + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + if (unlikely(global_disable)) + return; + + ref.dev = dev; + ref.dev_addr = dma_addr; + bucket = get_hash_bucket(&ref, &flags); + entry = bucket_find_exact(bucket, &ref); + + if (!entry) + goto out; + + entry->map_err_type = MAP_ERR_CHECKED; +out: + put_hash_bucket(bucket, &flags); +} +EXPORT_SYMBOL(debug_dma_mapping_error); + void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction, bool map_single) { diff --git a/mm/compaction.c b/mm/compaction.c index 5ad7f4f4d6f7..6b807e466497 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -17,6 +17,21 @@ #include <linux/balloon_compaction.h> #include "internal.h" +#ifdef CONFIG_COMPACTION +static inline void count_compact_event(enum vm_event_item item) +{ + count_vm_event(item); +} + +static inline void count_compact_events(enum vm_event_item item, long delta) +{ + count_vm_events(item, delta); +} +#else +#define count_compact_event(item) do { } while (0) +#define count_compact_events(item, delta) do { } while (0) +#endif + #if defined CONFIG_COMPACTION || defined CONFIG_CMA #define CREATE_TRACE_POINTS @@ -303,10 +318,9 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, if (blockpfn == end_pfn) update_pageblock_skip(cc, valid_page, total_isolated, false); - count_vm_events(COMPACTFREE_SCANNED, nr_scanned); + count_compact_events(COMPACTFREE_SCANNED, nr_scanned); if (total_isolated) - count_vm_events(COMPACTISOLATED, total_isolated); - + count_compact_events(COMPACTISOLATED, total_isolated); return total_isolated; } @@ -613,9 +627,9 @@ next_pageblock: trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); - count_vm_events(COMPACTMIGRATE_SCANNED, nr_scanned); + count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned); if (nr_isolated) - count_vm_events(COMPACTISOLATED, nr_isolated); + count_compact_events(COMPACTISOLATED, nr_isolated); return low_pfn; } @@ -1110,7 +1124,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, if (!order || !may_enter_fs || !may_perform_io) return rc; - count_vm_event(COMPACTSTALL); + count_compact_event(COMPACTSTALL); #ifdef CONFIG_CMA if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) diff --git a/mm/highmem.c b/mm/highmem.c index d999077431df..b32b70cdaed6 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -105,6 +105,7 @@ struct page *kmap_to_page(void *vaddr) return virt_to_page(addr); } +EXPORT_SYMBOL(kmap_to_page); static void flush_all_zero_pkmaps(void) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 32754eece63e..9e894edc7811 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -574,19 +574,19 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); if (unlikely(!*hugepage_kobj)) { - printk(KERN_ERR "hugepage: failed kobject create\n"); + printk(KERN_ERR "hugepage: failed to create transparent hugepage kobject\n"); return -ENOMEM; } err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group); if (err) { - printk(KERN_ERR "hugepage: failed register hugeage group\n"); + printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n"); goto delete_obj; } err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group); if (err) { - printk(KERN_ERR "hugepage: failed register hugeage group\n"); + printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n"); goto remove_hp_group; } @@ -1624,7 +1624,7 @@ again: struct anon_vma_chain *vmac; struct vm_area_struct *vma; - anon_vma_lock_write(anon_vma); + anon_vma_lock_read(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { vma = vmac->vma; @@ -1648,7 +1648,7 @@ again: if (!search_new_forks || !mapcount) break; } - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); if (!mapcount) goto out; } @@ -1678,7 +1678,7 @@ again: struct anon_vma_chain *vmac; struct vm_area_struct *vma; - anon_vma_lock_write(anon_vma); + anon_vma_lock_read(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { vma = vmac->vma; @@ -1697,11 +1697,11 @@ again: ret = try_to_unmap_one(page, vma, rmap_item->address, flags); if (ret != SWAP_AGAIN || !page_mapped(page)) { - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); goto out; } } - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); } if (!search_new_forks++) goto again; @@ -1731,7 +1731,7 @@ again: struct anon_vma_chain *vmac; struct vm_area_struct *vma; - anon_vma_lock_write(anon_vma); + anon_vma_lock_read(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { vma = vmac->vma; @@ -1749,11 +1749,11 @@ again: ret = rmap_one(page, vma, rmap_item->address, arg); if (ret != SWAP_AGAIN) { - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); goto out; } } - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); } if (!search_new_forks++) goto again; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f3009b4bae51..09255ec8159c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6090,7 +6090,6 @@ mem_cgroup_css_alloc(struct cgroup *cont) &per_cpu(memcg_stock, cpu); INIT_WORK(&stock->work, drain_local_stock); } - hotcpu_notifier(memcg_cpu_hotplug_callback, 0); } else { parent = mem_cgroup_from_cont(cont->parent); memcg->use_hierarchy = parent->use_hierarchy; @@ -6756,6 +6755,19 @@ struct cgroup_subsys mem_cgroup_subsys = { .use_id = 1, }; +/* + * The rest of init is performed during ->css_alloc() for root css which + * happens before initcalls. hotcpu_notifier() can't be done together as + * it would introduce circular locking by adding cgroup_lock -> cpu hotplug + * dependency. Do it from a subsys_initcall(). + */ +static int __init mem_cgroup_init(void) +{ + hotcpu_notifier(memcg_cpu_hotplug_callback, 0); + return 0; +} +subsys_initcall(mem_cgroup_init); + #ifdef CONFIG_MEMCG_SWAP static int __init enable_swap_account(char *s) { diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6f4271224493..0713bfbf0954 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -201,6 +201,18 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) zone_reclaimable_pages(z) - z->dirty_balance_reserve; } /* + * Unreclaimable memory (kernel memory or anonymous memory + * without swap) can bring down the dirtyable pages below + * the zone's dirty balance reserve and the above calculation + * will underflow. However we still want to add in nodes + * which are below threshold (negative values) to get a more + * accurate calculation but make sure that the total never + * underflows. + */ + if ((long)x < 0) + x = 0; + + /* * Make sure that the number of highmem pages is never larger * than the number of the total dirtyable memory. This can only * occur in very strange VM situations but we want to make sure @@ -222,8 +234,8 @@ static unsigned long global_dirtyable_memory(void) { unsigned long x; - x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() - - dirty_balance_reserve; + x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages(); + x -= min(x, dirty_balance_reserve); if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); @@ -290,9 +302,12 @@ static unsigned long zone_dirtyable_memory(struct zone *zone) * highmem zone can hold its share of dirty pages, so we don't * care about vm_highmem_is_dirtyable here. */ - return zone_page_state(zone, NR_FREE_PAGES) + - zone_reclaimable_pages(zone) - - zone->dirty_balance_reserve; + unsigned long nr_pages = zone_page_state(zone, NR_FREE_PAGES) + + zone_reclaimable_pages(zone); + + /* don't allow this to underflow */ + nr_pages -= min(nr_pages, zone->dirty_balance_reserve); + return nr_pages; } /** diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2ad2ad168efe..4ba5e37127fc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5978,8 +5978,15 @@ done: void free_contig_range(unsigned long pfn, unsigned nr_pages) { - for (; nr_pages--; ++pfn) - __free_page(pfn_to_page(pfn)); + unsigned int count = 0; + + for (; nr_pages--; pfn++) { + struct page *page = pfn_to_page(pfn); + + count += page_count(page) != 1; + __free_page(page); + } + WARN(count != 0, "%d pages are still in use!\n", count); } #endif diff --git a/mm/truncate.c b/mm/truncate.c index d51ce92d6e83..c75b736e54b7 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -577,29 +577,6 @@ void truncate_setsize(struct inode *inode, loff_t newsize) EXPORT_SYMBOL(truncate_setsize); /** - * vmtruncate - unmap mappings "freed" by truncate() syscall - * @inode: inode of the file used - * @newsize: file offset to start truncating - * - * This function is deprecated and truncate_setsize or truncate_pagecache - * should be used instead, together with filesystem specific block truncation. - */ -int vmtruncate(struct inode *inode, loff_t newsize) -{ - int error; - - error = inode_newsize_ok(inode, newsize); - if (error) - return error; - - truncate_setsize(inode, newsize); - if (inode->i_op->truncate) - inode->i_op->truncate(inode); - return 0; -} -EXPORT_SYMBOL(vmtruncate); - -/** * truncate_pagecache_range - unmap and remove pagecache that is hole-punched * @inode: inode * @lstart: offset of beginning of hole diff --git a/mm/vmscan.c b/mm/vmscan.c index 828530e2794a..adc7e9058181 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2570,7 +2570,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, static unsigned long balance_pgdat(pg_data_t *pgdat, int order, int *classzone_idx) { - int all_zones_ok; + struct zone *unbalanced_zone; unsigned long balanced; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ @@ -2604,7 +2604,7 @@ loop_again: unsigned long lru_pages = 0; int has_under_min_watermark_zone = 0; - all_zones_ok = 1; + unbalanced_zone = NULL; balanced = 0; /* @@ -2743,7 +2743,7 @@ loop_again: } if (!zone_balanced(zone, testorder, 0, end_zone)) { - all_zones_ok = 0; + unbalanced_zone = zone; /* * We are still under min water mark. This * means that we have a GFP_ATOMIC allocation @@ -2776,7 +2776,7 @@ loop_again: pfmemalloc_watermark_ok(pgdat)) wake_up(&pgdat->pfmemalloc_wait); - if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) + if (!unbalanced_zone || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) break; /* kswapd: all done */ /* * OK, kswapd is getting into trouble. Take a nap, then take @@ -2786,7 +2786,7 @@ loop_again: if (has_under_min_watermark_zone) count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); else - congestion_wait(BLK_RW_ASYNC, HZ/10); + wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10); } /* @@ -2805,7 +2805,7 @@ out: * high-order: Balanced zones must make up at least 25% of the node * for the node to be balanced */ - if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))) { + if (unbalanced_zone && (!order || !pgdat_balanced(pgdat, balanced, *classzone_idx))) { cond_resched(); try_to_freeze(); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 35b8911b1c8e..fd05c81cb348 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -39,6 +39,7 @@ #include <linux/inet.h> #include <linux/idr.h> #include <linux/file.h> +#include <linux/highmem.h> #include <linux/slab.h> #include <net/9p/9p.h> #include <linux/parser.h> @@ -325,7 +326,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, int count = nr_pages; while (nr_pages) { s = rest_of_page(data); - pages[index++] = virt_to_page(data); + pages[index++] = kmap_to_page(data); data += s; nr_pages--; } diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index f49da5814bc3..350bf62b2ae3 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -14,49 +14,45 @@ static ssize_t show_type(struct device *cdev, struct device_attribute *attr, char *buf) { struct atm_dev *adev = to_atm_dev(cdev); - return sprintf(buf, "%s\n", adev->type); + + return scnprintf(buf, PAGE_SIZE, "%s\n", adev->type); } static ssize_t show_address(struct device *cdev, struct device_attribute *attr, char *buf) { - char *pos = buf; struct atm_dev *adev = to_atm_dev(cdev); - int i; - - for (i = 0; i < (ESI_LEN - 1); i++) - pos += sprintf(pos, "%02x:", adev->esi[i]); - pos += sprintf(pos, "%02x\n", adev->esi[i]); - return pos - buf; + return scnprintf(buf, PAGE_SIZE, "%pM\n", adev->esi); } static ssize_t show_atmaddress(struct device *cdev, struct device_attribute *attr, char *buf) { unsigned long flags; - char *pos = buf; struct atm_dev *adev = to_atm_dev(cdev); struct atm_dev_addr *aaddr; int bin[] = { 1, 2, 10, 6, 1 }, *fmt = bin; - int i, j; + int i, j, count = 0; spin_lock_irqsave(&adev->lock, flags); list_for_each_entry(aaddr, &adev->local, entry) { for (i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) { if (j == *fmt) { - pos += sprintf(pos, "."); + count += scnprintf(buf + count, + PAGE_SIZE - count, "."); ++fmt; j = 0; } - pos += sprintf(pos, "%02x", - aaddr->addr.sas_addr.prv[i]); + count += scnprintf(buf + count, + PAGE_SIZE - count, "%02x", + aaddr->addr.sas_addr.prv[i]); } - pos += sprintf(pos, "\n"); + count += scnprintf(buf + count, PAGE_SIZE - count, "\n"); } spin_unlock_irqrestore(&adev->lock, flags); - return pos - buf; + return count; } static ssize_t show_atmindex(struct device *cdev, @@ -64,25 +60,21 @@ static ssize_t show_atmindex(struct device *cdev, { struct atm_dev *adev = to_atm_dev(cdev); - return sprintf(buf, "%d\n", adev->number); + return scnprintf(buf, PAGE_SIZE, "%d\n", adev->number); } static ssize_t show_carrier(struct device *cdev, struct device_attribute *attr, char *buf) { - char *pos = buf; struct atm_dev *adev = to_atm_dev(cdev); - pos += sprintf(pos, "%d\n", - adev->signal == ATM_PHY_SIG_LOST ? 0 : 1); - - return pos - buf; + return scnprintf(buf, PAGE_SIZE, "%d\n", + adev->signal == ATM_PHY_SIG_LOST ? 0 : 1); } static ssize_t show_link_rate(struct device *cdev, struct device_attribute *attr, char *buf) { - char *pos = buf; struct atm_dev *adev = to_atm_dev(cdev); int link_rate; @@ -100,9 +92,7 @@ static ssize_t show_link_rate(struct device *cdev, default: link_rate = adev->link_rate * 8 * 53; } - pos += sprintf(pos, "%d\n", link_rate); - - return pos - buf; + return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate); } static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 6f0a2eebcb27..acc9f4cc18f7 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -83,9 +83,12 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (port) { struct br_mdb_entry e; e.ifindex = port->dev->ifindex; - e.addr.u.ip4 = p->addr.u.ip4; + e.state = p->state; + if (p->addr.proto == htons(ETH_P_IP)) + e.addr.u.ip4 = p->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) - e.addr.u.ip6 = p->addr.u.ip6; + if (p->addr.proto == htons(ETH_P_IPV6)) + e.addr.u.ip6 = p->addr.u.ip6; #endif e.addr.proto = p->addr.proto; if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) { @@ -253,6 +256,8 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry) #endif } else return false; + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) + return false; return true; } @@ -310,7 +315,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, } static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) + struct br_ip *group, unsigned char state) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; @@ -336,7 +341,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, break; } - p = br_multicast_new_port_group(port, group, *pp); + p = br_multicast_new_port_group(port, group, *pp, state); if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); @@ -373,7 +378,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, #endif spin_lock_bh(&br->multicast_lock); - ret = br_mdb_add_group(br, p, &ip); + ret = br_mdb_add_group(br, p, &ip, entry->state); spin_unlock_bh(&br->multicast_lock); return ret; } @@ -479,3 +484,10 @@ void br_mdb_init(void) rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL); } + +void br_mdb_uninit(void) +{ + rtnl_unregister(PF_BRIDGE, RTM_GETMDB); + rtnl_unregister(PF_BRIDGE, RTM_NEWMDB); + rtnl_unregister(PF_BRIDGE, RTM_DELMDB); +} diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1093c89095d8..5391ca43336a 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -279,7 +279,7 @@ static void br_multicast_port_group_expired(unsigned long data) spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || timer_pending(&pg->timer) || - hlist_unhashed(&pg->mglist)) + hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT) goto out; br_multicast_del_pg(br, pg); @@ -622,7 +622,8 @@ out: struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, - struct net_bridge_port_group __rcu *next) + struct net_bridge_port_group __rcu *next, + unsigned char state) { struct net_bridge_port_group *p; @@ -632,6 +633,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->addr = *group; p->port = port; + p->state = state; rcu_assign_pointer(p->next, next); hlist_add_head(&p->mglist, &port->mglist); setup_timer(&p->timer, br_multicast_port_group_expired, @@ -674,7 +676,7 @@ static int br_multicast_add_group(struct net_bridge *br, break; } - p = br_multicast_new_port_group(port, group, *pp); + p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY); if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); @@ -1165,7 +1167,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (max_delay) group = &mld->mld_mca; } else if (skb->len >= sizeof(*mld2q)) { - u16 mrc; if (!pskb_may_pull(skb, sizeof(*mld2q))) { err = -EINVAL; goto out; @@ -1173,8 +1174,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; - mrc = ntohs(mld2q->mld2q_mrc); - max_delay = mrc ? MLDV2_MRC(mrc) : 1; + max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; } if (!group) @@ -1633,6 +1633,7 @@ void br_multicast_stop(struct net_bridge *br) del_timer_sync(&br->multicast_querier_timer); del_timer_sync(&br->multicast_query_timer); + br_mdb_uninit(); spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); if (!mdb) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index dead9dfe865b..97ba0189c6f7 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -305,5 +305,4 @@ int __init br_netlink_init(void) void __exit br_netlink_fini(void) { rtnl_link_unregister(&br_link_ops); - rtnl_unregister_all(PF_BRIDGE); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f21a739a6186..8d83be5ffedc 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -83,6 +83,7 @@ struct net_bridge_port_group { struct rcu_head rcu; struct timer_list timer; struct br_ip addr; + unsigned char state; }; struct net_bridge_mdb_entry @@ -443,8 +444,10 @@ extern void br_multicast_free_pg(struct rcu_head *head); extern struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, - struct net_bridge_port_group *next); + struct net_bridge_port_group *next, + unsigned char state); extern void br_mdb_init(void); +extern void br_mdb_uninit(void); extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index a8020293f342..ee71ea26777a 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -305,7 +305,6 @@ ceph_parse_options(char *options, const char *dev_name, /* start with defaults */ opt->flags = CEPH_OPT_DEFAULT; - opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ @@ -391,7 +390,7 @@ ceph_parse_options(char *options, const char *dev_name, /* misc */ case Opt_osdtimeout: - opt->osd_timeout = intval; + pr_warning("ignoring deprecated osdtimeout option\n"); break; case Opt_osdkeepalivetimeout: opt->osd_keepalive_timeout = intval; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 3ef1759403b4..4d111fd2b492 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2244,22 +2244,62 @@ bad_tag: /* - * Atomically queue work on a connection. Bump @con reference to - * avoid races with connection teardown. + * Atomically queue work on a connection after the specified delay. + * Bump @con reference to avoid races with connection teardown. + * Returns 0 if work was queued, or an error code otherwise. */ -static void queue_con(struct ceph_connection *con) +static int queue_con_delay(struct ceph_connection *con, unsigned long delay) { if (!con->ops->get(con)) { - dout("queue_con %p ref count 0\n", con); - return; + dout("%s %p ref count 0\n", __func__, con); + + return -ENOENT; } - if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { - dout("queue_con %p - already queued\n", con); + if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { + dout("%s %p - already queued\n", __func__, con); con->ops->put(con); - } else { - dout("queue_con %p\n", con); + + return -EBUSY; } + + dout("%s %p %lu\n", __func__, con, delay); + + return 0; +} + +static void queue_con(struct ceph_connection *con) +{ + (void) queue_con_delay(con, 0); +} + +static bool con_sock_closed(struct ceph_connection *con) +{ + if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) + return false; + +#define CASE(x) \ + case CON_STATE_ ## x: \ + con->error_msg = "socket closed (con state " #x ")"; \ + break; + + switch (con->state) { + CASE(CLOSED); + CASE(PREOPEN); + CASE(CONNECTING); + CASE(NEGOTIATING); + CASE(OPEN); + CASE(STANDBY); + default: + pr_warning("%s con %p unrecognized state %lu\n", + __func__, con, con->state); + con->error_msg = "unrecognized con state"; + BUG(); + break; + } +#undef CASE + + return true; } /* @@ -2273,35 +2313,16 @@ static void con_work(struct work_struct *work) mutex_lock(&con->mutex); restart: - if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) { - switch (con->state) { - case CON_STATE_CONNECTING: - con->error_msg = "connection failed"; - break; - case CON_STATE_NEGOTIATING: - con->error_msg = "negotiation failed"; - break; - case CON_STATE_OPEN: - con->error_msg = "socket closed"; - break; - default: - dout("unrecognized con state %d\n", (int)con->state); - con->error_msg = "unrecognized con state"; - BUG(); - } + if (con_sock_closed(con)) goto fault; - } if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { dout("con_work %p backing off\n", con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay))) { - dout("con_work %p backoff %lu\n", con, con->delay); - mutex_unlock(&con->mutex); - return; - } else { + ret = queue_con_delay(con, round_jiffies_relative(con->delay)); + if (ret) { dout("con_work %p FAILED to back off %lu\n", con, con->delay); + BUG_ON(ret == -ENOENT); set_bit(CON_FLAG_BACKOFF, &con->flags); } goto done; @@ -2356,7 +2377,7 @@ fault: static void ceph_fault(struct ceph_connection *con) __releases(con->mutex) { - pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), + pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); dout("fault %p state %lu to peer %s\n", con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); @@ -2398,24 +2419,8 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - con->ops->get(con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay))) { - dout("fault queued %p delay %lu\n", con, con->delay); - } else { - con->ops->put(con); - dout("fault failed to queue %p delay %lu, backoff\n", - con, con->delay); - /* - * In many cases we see a socket state change - * while con_work is running and end up - * queuing (non-delayed) work, such that we - * can't backoff with a delay. Set a flag so - * that when con_work restarts we schedule the - * delay then. - */ - set_bit(CON_FLAG_BACKOFF, &con->flags); - } + set_bit(CON_FLAG_BACKOFF, &con->flags); + queue_con(con); } out_unlock: diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index c1d756cc7448..780caf6b0491 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -221,6 +221,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); + RB_CLEAR_NODE(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); @@ -580,7 +581,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, dout("__kick_osd_requests osd%d\n", osd->o_osd); err = __reset_osd(osdc, osd); - if (err == -EAGAIN) + if (err) return; list_for_each_entry(req, &osd->o_requests, r_osd_item) { @@ -607,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, } } -static void kick_osd_requests(struct ceph_osd_client *osdc, - struct ceph_osd *kickosd) -{ - mutex_lock(&osdc->request_mutex); - __kick_osd_requests(osdc, kickosd); - mutex_unlock(&osdc->request_mutex); -} - /* * If the osd connection drops, we need to resubmit all requests. */ @@ -628,7 +621,9 @@ static void osd_reset(struct ceph_connection *con) dout("osd_reset osd%d\n", osd->o_osd); osdc = osd->o_osdc; down_read(&osdc->map_sem); - kick_osd_requests(osdc, osd); + mutex_lock(&osdc->request_mutex); + __kick_osd_requests(osdc, osd); + mutex_unlock(&osdc->request_mutex); send_queued(osdc); up_read(&osdc->map_sem); } @@ -647,6 +642,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) atomic_set(&osd->o_ref, 1); osd->o_osdc = osdc; osd->o_osd = onum; + RB_CLEAR_NODE(&osd->o_node); INIT_LIST_HEAD(&osd->o_requests); INIT_LIST_HEAD(&osd->o_linger_requests); INIT_LIST_HEAD(&osd->o_osd_lru); @@ -750,6 +746,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) if (list_empty(&osd->o_requests) && list_empty(&osd->o_linger_requests)) { __remove_osd(osdc, osd); + ret = -ENODEV; } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], &osd->o_con.peer_addr, sizeof(osd->o_con.peer_addr)) == 0 && @@ -876,9 +873,9 @@ static void __unregister_request(struct ceph_osd_client *osdc, req->r_osd = NULL; } + list_del_init(&req->r_req_lru_item); ceph_osdc_put_request(req); - list_del_init(&req->r_req_lru_item); if (osdc->num_requests == 0) { dout(" no requests, canceling timeout\n"); __cancel_osd_timeout(osdc); @@ -910,8 +907,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { dout("__unregister_linger_request %p\n", req); + list_del_init(&req->r_linger_item); if (req->r_osd) { - list_del_init(&req->r_linger_item); list_del_init(&req->r_linger_osd); if (list_empty(&req->r_osd->o_requests) && @@ -1090,12 +1087,10 @@ static void handle_timeout(struct work_struct *work) { struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, timeout_work.work); - struct ceph_osd_request *req, *last_req = NULL; + struct ceph_osd_request *req; struct ceph_osd *osd; - unsigned long timeout = osdc->client->options->osd_timeout * HZ; unsigned long keepalive = osdc->client->options->osd_keepalive_timeout * HZ; - unsigned long last_stamp = 0; struct list_head slow_osds; dout("timeout\n"); down_read(&osdc->map_sem); @@ -1105,37 +1100,6 @@ static void handle_timeout(struct work_struct *work) mutex_lock(&osdc->request_mutex); /* - * reset osds that appear to be _really_ unresponsive. this - * is a failsafe measure.. we really shouldn't be getting to - * this point if the system is working properly. the monitors - * should mark the osd as failed and we should find out about - * it from an updated osd map. - */ - while (timeout && !list_empty(&osdc->req_lru)) { - req = list_entry(osdc->req_lru.next, struct ceph_osd_request, - r_req_lru_item); - - /* hasn't been long enough since we sent it? */ - if (time_before(jiffies, req->r_stamp + timeout)) - break; - - /* hasn't been long enough since it was acked? */ - if (req->r_request->ack_stamp == 0 || - time_before(jiffies, req->r_request->ack_stamp + timeout)) - break; - - BUG_ON(req == last_req && req->r_stamp == last_stamp); - last_req = req; - last_stamp = req->r_stamp; - - osd = req->r_osd; - BUG_ON(!osd); - pr_warning(" tid %llu timed out on osd%d, will reset osd\n", - req->r_tid, osd->o_osd); - __kick_osd_requests(osdc, osd); - } - - /* * ping osds that are a bit slow. this ensures that if there * is a break in the TCP connection we will notice, and reopen * a connection with that osd (from the fault callback). @@ -1364,8 +1328,8 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); - __unregister_linger_request(osdc, req); __register_request(osdc, req); + __unregister_linger_request(osdc, req); } mutex_unlock(&osdc->request_mutex); @@ -1599,6 +1563,7 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, event->data = data; event->osdc = osdc; INIT_LIST_HEAD(&event->osd_node); + RB_CLEAR_NODE(&event->node); kref_init(&event->kref); /* one ref for us */ kref_get(&event->kref); /* one ref for the caller */ init_completion(&event->completion); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 5433fb0eb3c6..de73214b5d26 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -469,6 +469,22 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) return NULL; } +const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id) +{ + struct ceph_pg_pool_info *pi; + + if (id == CEPH_NOPOOL) + return NULL; + + if (WARN_ON_ONCE(id > (u64) INT_MAX)) + return NULL; + + pi = __lookup_pg_pool(&map->pg_pools, (int) id); + + return pi ? pi->name : NULL; +} +EXPORT_SYMBOL(ceph_pg_pool_name_by_id); + int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) { struct rb_node *rbp; @@ -645,10 +661,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_32_safe(p, end, max, bad); while (max--) { ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); + err = -ENOMEM; pi = kzalloc(sizeof(*pi), GFP_NOFS); if (!pi) goto bad; pi->id = ceph_decode_32(p); + err = -EINVAL; ev = ceph_decode_8(p); /* encoding version */ if (ev > CEPH_PG_POOL_VERSION) { pr_warning("got unknown v %d > %d of ceph_pg_pool\n", @@ -664,8 +682,13 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) __insert_pg_pool(&map->pg_pools, pi); } - if (version >= 5 && __decode_pool_names(p, end, map) < 0) - goto bad; + if (version >= 5) { + err = __decode_pool_names(p, end, map); + if (err < 0) { + dout("fail to decode pool names"); + goto bad; + } + } ceph_decode_32_safe(p, end, map->pool_max, bad); @@ -745,7 +768,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) return map; bad: - dout("osdmap_decode fail\n"); + dout("osdmap_decode fail err %d\n", err); ceph_osdmap_destroy(map); return ERR_PTR(err); } @@ -839,6 +862,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, if (ev > CEPH_PG_POOL_VERSION) { pr_warning("got unknown v %d > %d of ceph_pg_pool\n", ev, CEPH_PG_POOL_VERSION); + err = -EINVAL; goto bad; } pi = __lookup_pg_pool(&map->pg_pools, pool); @@ -855,8 +879,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, if (err < 0) goto bad; } - if (version >= 5 && __decode_pool_names(p, end, map) < 0) - goto bad; + if (version >= 5) { + err = __decode_pool_names(p, end, map); + if (err < 0) + goto bad; + } /* old_pool */ ceph_decode_32_safe(p, end, len, bad); @@ -932,15 +959,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, (void) __remove_pg_mapping(&map->pg_temp, pgid); /* insert */ - if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) { - err = -EINVAL; + err = -EINVAL; + if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) goto bad; - } + err = -ENOMEM; pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); - if (!pg) { - err = -ENOMEM; + if (!pg) goto bad; - } pg->pgid = pgid; pg->len = pglen; for (j = 0; j < pglen; j++) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 176ecdba4a22..4f9f5eb478f1 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -439,8 +439,8 @@ exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: - bh_unlock_sock(newsk); - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); goto exit; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 56840b249f3b..6e05981f271e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -585,7 +585,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newinet->inet_rcv_saddr = LOOPBACK4_IPV6; if (__inet_inherit_port(sk, newsk) < 0) { - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); goto out; } __inet6_hash(newsk, NULL); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2026542d6836..d0670f00d524 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -710,6 +710,22 @@ void inet_csk_destroy_sock(struct sock *sk) } EXPORT_SYMBOL(inet_csk_destroy_sock); +/* This function allows to force a closure of a socket after the call to + * tcp/dccp_create_openreq_child(). + */ +void inet_csk_prepare_forced_close(struct sock *sk) +{ + /* sk_clone_lock locked the socket and set refcnt to 2 */ + bh_unlock_sock(sk); + sock_put(sk); + + /* The below has to be done to allow calling inet_csk_destroy_sock */ + sock_set_flag(sk, SOCK_DEAD); + percpu_counter_inc(sk->sk_prot->orphan_count); + inet_sk(sk)->inet_num = 0; +} +EXPORT_SYMBOL(inet_csk_prepare_forced_close); + int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1ed230716d51..54139fa514e6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1767,10 +1767,8 @@ exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: - tcp_clear_xmit_timers(newsk); - tcp_cleanup_congestion_control(newsk); - bh_unlock_sock(newsk); - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); goto exit; } EXPORT_SYMBOL(tcp_v4_syn_recv_sock); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2068ac4fbdad..4ea244891b58 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -41,6 +41,6 @@ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-y += addrconf_core.o exthdrs_core.o -obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6_offload) +obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6fca01f136ad..408cac4ae00a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -534,8 +534,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC); return; errout: - if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); } static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f2a007b7bde3..6574175795df 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1314,6 +1314,12 @@ out: static void ndisc_redirect_rcv(struct sk_buff *skb) { + u8 *hdr; + struct ndisc_options ndopts; + struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); + u32 ndoptlen = skb->tail - (skb->transport_header + + offsetof(struct rd_msg, opt)); + #ifdef CONFIG_IPV6_NDISC_NODETYPE switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: @@ -1330,6 +1336,17 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } + if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) + return; + + if (!ndopts.nd_opts_rh) + return; + + hdr = (u8 *)ndopts.nd_opts_rh; + hdr += 8; + if (!pskb_pull(skb, hdr - skb_transport_header(skb))) + return; + icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6565cf55eb1e..93825dd3a7c0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1288,7 +1288,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #endif if (__inet_inherit_port(sk, newsk) < 0) { - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); goto out; } __inet6_hash(newsk, NULL); diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index e748aed290aa..b7c7f815deae 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -224,9 +224,9 @@ void ieee802154_free_device(struct ieee802154_dev *hw) BUG_ON(!list_empty(&priv->slaves)); - wpan_phy_free(priv->phy); - mutex_destroy(&priv->slaves_mtx); + + wpan_phy_free(priv->phy); } EXPORT_SYMBOL(ieee802154_free_device); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c8a1eb6eca2d..c0353d55d56f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -669,6 +669,9 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; + if (addr_len < sizeof(struct sockaddr_nl)) + return -EINVAL; + if (nladdr->nl_family != AF_NETLINK) return -EINVAL; @@ -2059,7 +2062,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) struct sock *s = v; struct netlink_sock *nlk = nlk_sk(s); - seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", + seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", s, s->sk_protocol, nlk->portid, diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index a9edd2e205f4..c26210618e14 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -66,12 +66,36 @@ config SCTP_DBG_OBJCNT 'cat /proc/net/sctp/sctp_dbg_objcnt' If unsure, say N +choice + prompt "Default SCTP cookie HMAC encoding" + default SCTP_COOKIE_HMAC_MD5 + help + This option sets the default sctp cookie hmac algorithm + when in doubt select 'md5' + +config SCTP_DEFAULT_COOKIE_HMAC_MD5 + bool "Enable optional MD5 hmac cookie generation" + help + Enable optional MD5 hmac based SCTP cookie generation + select SCTP_COOKIE_HMAC_MD5 + +config SCTP_DEFAULT_COOKIE_HMAC_SHA1 + bool "Enable optional SHA1 hmac cookie generation" + help + Enable optional SHA1 hmac based SCTP cookie generation + select SCTP_COOKIE_HMAC_SHA1 + +config SCTP_DEFAULT_COOKIE_HMAC_NONE + bool "Use no hmac alg in SCTP cookie generation" + help + Use no hmac algorithm in SCTP cookie generation + +endchoice config SCTP_COOKIE_HMAC_MD5 bool "Enable optional MD5 hmac cookie generation" help Enable optional MD5 hmac based SCTP cookie generation - default y select CRYPTO_HMAC if SCTP_COOKIE_HMAC_MD5 select CRYPTO_MD5 if SCTP_COOKIE_HMAC_MD5 @@ -79,7 +103,6 @@ config SCTP_COOKIE_HMAC_SHA1 bool "Enable optional SHA1 hmac cookie generation" help Enable optional SHA1 hmac based SCTP cookie generation - default y select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1 select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1 diff --git a/net/sctp/probe.c b/net/sctp/probe.c index bc6cd75cc1dc..5f7518de2fd1 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -122,7 +122,8 @@ static const struct file_operations sctpprobe_fops = { .llseek = noop_llseek, }; -sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep, +sctp_disposition_t jsctp_sf_eat_sack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2c7785bacf74..f898b1c58bd2 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1191,9 +1191,9 @@ static int __net_init sctp_net_init(struct net *net) net->sctp.cookie_preserve_enable = 1; /* Default sctp sockets to use md5 as their hmac alg */ -#if defined (CONFIG_CRYPTO_MD5) +#if defined (CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5) net->sctp.sctp_hmac_alg = "md5"; -#elif defined (CONFIG_CRYPTO_SHA1) +#elif defined (CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1) net->sctp.sctp_hmac_alg = "sha1"; #else net->sctp.sctp_hmac_alg = NULL; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 411f332de0b3..795a0f4e920b 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -23,7 +23,6 @@ #include <linux/errno.h> #include <linux/mutex.h> #include <linux/slab.h> -#include <linux/nsproxy.h> #include <net/ipv6.h> #include <linux/sunrpc/clnt.h> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index dfa4ba69ff45..dbf12ac5ecb7 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -20,7 +20,6 @@ #include <linux/module.h> #include <linux/kthread.h> #include <linux/slab.h> -#include <linux/nsproxy.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> @@ -1041,7 +1040,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) } /* - * Printk the given error with the address of the client that caused it. + * dprintk the given error with the address of the client that caused it. */ static __printf(2, 3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) @@ -1055,8 +1054,7 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - net_warn_ratelimited("svc: %s: %pV", - svc_print_addr(rqstp, buf, sizeof(buf)), &vaf); + dprintk("svc: %s: %pV", svc_print_addr(rqstp, buf, sizeof(buf)), &vaf); va_end(args); } @@ -1305,7 +1303,7 @@ svc_process(struct svc_rqst *rqstp) * Setup response xdr_buf. * Initially it has just one page */ - rqstp->rq_resused = 1; + rqstp->rq_next_page = &rqstp->rq_respages[1]; resv->iov_base = page_address(rqstp->rq_respages[0]); resv->iov_len = 0; rqstp->rq_res.pages = rqstp->rq_respages + 1; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cc3020d16789..0a148c9d2a5c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -605,6 +605,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_respages = rqstp->rq_pages + 1 + DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE); } + rqstp->rq_next_page = rqstp->rq_respages+1; if (serv->sv_stats) serv->sv_stats->netudpcnt++; @@ -878,9 +879,9 @@ static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) return 0; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (rqstp->rq_pages[i] != NULL) @@ -897,9 +898,9 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) return; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { svsk->sk_pages[i] = rqstp->rq_pages[i]; @@ -911,9 +912,9 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) goto out; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { BUG_ON(svsk->sk_pages[i] == NULL); @@ -922,13 +923,12 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) } out: svsk->sk_tcplen = 0; + svsk->sk_datalen = 0; } /* - * Receive data. + * Receive fragment record header. * If we haven't gotten the record length yet, get the next four bytes. - * Otherwise try to gobble up as much as possible up to the complete - * record length. */ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { @@ -954,32 +954,16 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return -EAGAIN; } - svsk->sk_reclen = ntohl(svsk->sk_reclen); - if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) { - /* FIXME: technically, a record can be fragmented, - * and non-terminal fragments will not have the top - * bit set in the fragment length header. - * But apparently no known nfs clients send fragmented - * records. */ - net_notice_ratelimited("RPC: multiple fragments per record not supported\n"); - goto err_delete; - } - - svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; - dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); - if (svsk->sk_reclen > serv->sv_max_mesg) { - net_notice_ratelimited("RPC: fragment too large: 0x%08lx\n", - (unsigned long)svsk->sk_reclen); + dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk)); + if (svc_sock_reclen(svsk) + svsk->sk_datalen > + serv->sv_max_mesg) { + net_notice_ratelimited("RPC: fragment too large: %d\n", + svc_sock_reclen(svsk)); goto err_delete; } } - if (svsk->sk_reclen < 8) - goto err_delete; /* client is nuts. */ - - len = svsk->sk_reclen; - - return len; + return svc_sock_reclen(svsk); error: dprintk("RPC: TCP recv_record got %d\n", len); return len; @@ -1023,7 +1007,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) if (dst->iov_len < src->iov_len) return -EAGAIN; /* whatever; just giving up. */ memcpy(dst->iov_base, src->iov_base, src->iov_len); - xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); rqstp->rq_arg.len = 0; return 0; } @@ -1042,6 +1026,17 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) return i; } +static void svc_tcp_fragment_received(struct svc_sock *svsk) +{ + /* If we have more data, signal svc_xprt_enqueue() to try again */ + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + dprintk("svc: TCP %s record (%d bytes)\n", + svc_sock_final_rec(svsk) ? "final" : "nonfinal", + svc_sock_reclen(svsk)); + svsk->sk_tcplen = 0; + svsk->sk_reclen = 0; +} /* * Receive data from a TCP socket. @@ -1068,29 +1063,39 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto error; base = svc_tcp_restore_pages(svsk, rqstp); - want = svsk->sk_reclen - base; + want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); vec = rqstp->rq_vec; pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], - svsk->sk_reclen); + svsk->sk_datalen + want); rqstp->rq_respages = &rqstp->rq_pages[pnum]; + rqstp->rq_next_page = rqstp->rq_respages + 1; /* Now receive data */ len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); - if (len >= 0) + if (len >= 0) { svsk->sk_tcplen += len; - if (len != want) { + svsk->sk_datalen += len; + } + if (len != want || !svc_sock_final_rec(svsk)) { svc_tcp_save_pages(svsk, rqstp); if (len < 0 && len != -EAGAIN) - goto err_other; - dprintk("svc: incomplete TCP record (%d of %d)\n", - svsk->sk_tcplen, svsk->sk_reclen); + goto err_delete; + if (len == want) + svc_tcp_fragment_received(svsk); + else + dprintk("svc: incomplete TCP record (%d of %d)\n", + (int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)), + svc_sock_reclen(svsk)); goto err_noclose; } - rqstp->rq_arg.len = svsk->sk_reclen; + if (svc_sock_reclen(svsk) < 8) + goto err_delete; /* client is nuts. */ + + rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; @@ -1107,11 +1112,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) len = receive_cb_reply(svsk, rqstp); /* Reset TCP read info */ - svsk->sk_reclen = 0; - svsk->sk_tcplen = 0; - /* If we have more data, signal svc_xprt_enqueue() to try again */ - if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + svsk->sk_datalen = 0; + svc_tcp_fragment_received(svsk); if (len < 0) goto error; @@ -1120,15 +1122,14 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); return rqstp->rq_arg.len; error: if (len != -EAGAIN) - goto err_other; + goto err_delete; dprintk("RPC: TCP recvfrom got EAGAIN\n"); return 0; -err_other: +err_delete: printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", svsk->sk_xprt.xpt_server->sv_name, -len); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1305,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 41cb63b623df..0ce75524ed21 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -521,11 +521,11 @@ next_sge: rqstp->rq_pages[ch_no] = NULL; /* - * Detach res pages. svc_release must see a resused count of - * zero or it will attempt to put them. + * Detach res pages. If svc_release sees any it will attempt to + * put them. */ - while (rqstp->rq_resused) - rqstp->rq_respages[--rqstp->rq_resused] = NULL; + while (rqstp->rq_next_page != rqstp->rq_respages) + *(--rqstp->rq_next_page) = NULL; return err; } @@ -550,7 +550,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp, /* rq_respages starts after the last arg page */ rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; - rqstp->rq_resused = 0; + rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no]; /* Rebuild rq_arg head and tail. */ rqstp->rq_arg.head[0] = head->arg.head[0]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 42eb7ba0b903..c1d124dc772b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -548,6 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma, int sge_no; int sge_bytes; int page_no; + int pages; int ret; /* Post a recv buffer to handle another request. */ @@ -611,7 +612,8 @@ static int send_reply(struct svcxprt_rdma *rdma, * respages array. They are our pages until the I/O * completes. */ - for (page_no = 0; page_no < rqstp->rq_resused; page_no++) { + pages = rqstp->rq_next_page - rqstp->rq_respages; + for (page_no = 0; page_no < pages; page_no++) { ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL; diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1d6e4c541370..4d2c7dfdaabd 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2226,8 +2226,11 @@ sub process { my $path = $1; if ($path =~ m{//}) { ERROR("MALFORMED_INCLUDE", - "malformed #include filename\n" . - $herecurr); + "malformed #include filename\n" . $herecurr); + } + if ($path =~ "^uapi/" && $realfile =~ m@\binclude/uapi/@) { + ERROR("UAPI_INCLUDE", + "No #include in ...include/uapi/... should use a uapi/ path prefix\n" . $herecurr); } } diff --git a/scripts/coccinelle/misc/warn.cocci b/scripts/coccinelle/misc/warn.cocci new file mode 100644 index 000000000000..fda8c3558e4f --- /dev/null +++ b/scripts/coccinelle/misc/warn.cocci @@ -0,0 +1,109 @@ +/// Use WARN(1,...) rather than printk followed by WARN_ON(1) +/// +// Confidence: High +// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6. GPLv2. +// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6. GPLv2. +// URL: http://coccinelle.lip6.fr/ +// Comments: +// Options: -no_includes -include_headers + +virtual patch +virtual context +virtual org +virtual report + +@bad1@ +position p; +@@ + +printk(...); +printk@p(...); +WARN_ON(1); + +@r1 depends on context || report || org@ +position p != bad1.p; +@@ + + printk@p(...); +*WARN_ON(1); + +@script:python depends on org@ +p << r1.p; +@@ + +cocci.print_main("printk + WARN_ON can be just WARN",p) + +@script:python depends on report@ +p << r1.p; +@@ + +msg = "SUGGESTION: printk + WARN_ON can be just WARN" +coccilib.report.print_report(p[0],msg) + +@ok1 depends on patch@ +expression list es; +position p != bad1.p; +@@ + +-printk@p( ++WARN(1, + es); +-WARN_ON(1); + +@depends on patch@ +expression list ok1.es; +@@ + +if (...) +- { + WARN(1,es); +- } + +// -------------------------------------------------------------------- + +@bad2@ +position p; +@@ + +printk(...); +printk@p(...); +WARN_ON_ONCE(1); + +@r2 depends on context || report || org@ +position p != bad1.p; +@@ + + printk@p(...); +*WARN_ON_ONCE(1); + +@script:python depends on org@ +p << r2.p; +@@ + +cocci.print_main("printk + WARN_ON_ONCE can be just WARN_ONCE",p) + +@script:python depends on report@ +p << r2.p; +@@ + +msg = "SUGGESTION: printk + WARN_ON_ONCE can be just WARN_ONCE" +coccilib.report.print_report(p[0],msg) + +@ok2 depends on patch@ +expression list es; +position p != bad2.p; +@@ + +-printk@p( ++WARN_ONCE(1, + es); +-WARN_ON_ONCE(1); + +@depends on patch@ +expression list ok2.es; +@@ + +if (...) +- { + WARN_ONCE(1,es); +- } diff --git a/scripts/config b/scripts/config index ee355394f4ef..bb4d3deb6d1c 100755 --- a/scripts/config +++ b/scripts/config @@ -101,7 +101,6 @@ while [ "$1" != "" ] ; do case "$CMD" in --keep-case|-k) MUNGE_CASE=no - shift continue ;; --refresh) diff --git a/scripts/pnmtologo.c b/scripts/pnmtologo.c index 5c113123ed9f..68bb4efc5af4 100644 --- a/scripts/pnmtologo.c +++ b/scripts/pnmtologo.c @@ -74,6 +74,7 @@ static unsigned int logo_height; static struct color **logo_data; static struct color logo_clut[MAX_LINUX_LOGO_COLORS]; static unsigned int logo_clutsize; +static int is_plain_pbm = 0; static void die(const char *fmt, ...) __attribute__ ((noreturn)) __attribute ((format (printf, 1, 2))); @@ -103,6 +104,11 @@ static unsigned int get_number(FILE *fp) val = 0; while (isdigit(c)) { val = 10*val+c-'0'; + /* some PBM are 'broken'; GiMP for example exports a PBM without space + * between the digits. This is Ok cause we know a PBM can only have a '1' + * or a '0' for the digit. */ + if (is_plain_pbm) + break; c = fgetc(fp); if (c == EOF) die("%s: end of file\n", filename); @@ -167,6 +173,7 @@ static void read_image(void) switch (magic) { case '1': /* Plain PBM */ + is_plain_pbm = 1; for (i = 0; i < logo_height; i++) for (j = 0; j < logo_width; j++) logo_data[i][j].red = logo_data[i][j].green = diff --git a/scripts/tags.sh b/scripts/tags.sh index 79fdafb0d263..08f06c00745e 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -48,13 +48,14 @@ find_arch_sources() for i in $archincludedir; do prune="$prune -wholename $i -prune -o" done - find ${tree}arch/$1 $ignore $prune -name "$2" -print; + find ${tree}arch/$1 $ignore $subarchprune $prune -name "$2" -print; } # find sources in arch/$1/include find_arch_include_sources() { - include=$(find ${tree}arch/$1/ -name include -type d); + include=$(find ${tree}arch/$1/ $subarchprune \ + -name include -type d -print); if [ -n "$include" ]; then archincludedir="$archincludedir $include" find $include $ignore -name "$2" -print; @@ -95,6 +96,32 @@ all_sources() find_other_sources '*.[chS]' } +all_compiled_sources() +{ + for i in $(all_sources); do + case "$i" in + *.[cS]) + j=${i/\.[cS]/\.o} + if [ -e $j ]; then + echo $i + fi + ;; + *) + echo $i + ;; + esac + done +} + +all_target_sources() +{ + if [ -n "$COMPILED_SOURCE" ]; then + all_compiled_sources + else + all_sources + fi +} + all_kconfigs() { for arch in $ALLSOURCE_ARCHS; do @@ -110,18 +137,18 @@ all_defconfigs() docscope() { - (echo \-k; echo \-q; all_sources) > cscope.files + (echo \-k; echo \-q; all_target_sources) > cscope.files cscope -b -f cscope.out } dogtags() { - all_sources | gtags -i -f - + all_target_sources | gtags -i -f - } exuberant() { - all_sources | xargs $1 -a \ + all_target_sources | xargs $1 -a \ -I __initdata,__exitdata,__acquires,__releases \ -I __read_mostly,____cacheline_aligned \ -I ____cacheline_aligned_in_smp \ @@ -173,7 +200,7 @@ exuberant() emacs() { - all_sources | xargs $1 -a \ + all_target_sources | xargs $1 -a \ --regex='/^(ENTRY|_GLOBAL)(\([^)]*\)).*/\2/' \ --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' \ --regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/' \ @@ -220,11 +247,10 @@ xtags() elif $1 --version 2>&1 | grep -iq emacs; then emacs $1 else - all_sources | xargs $1 -a + all_target_sources | xargs $1 -a fi } - # Support um (which uses SUBARCH) if [ "${ARCH}" = "um" ]; then if [ "$SUBARCH" = "i386" ]; then @@ -234,6 +260,21 @@ if [ "${ARCH}" = "um" ]; then else archinclude=${SUBARCH} fi +elif [ "${SRCARCH}" = "arm" -a "${SUBARCH}" != "" ]; then + subarchdir=$(find ${tree}arch/$SRCARCH/ -name "mach-*" -type d -o \ + -name "plat-*" -type d); + for i in $subarchdir; do + case "$i" in + *"mach-"${SUBARCH}) + ;; + *"plat-"${SUBARCH}) + ;; + *) + subarchprune="$subarchprune \ + -wholename $i -prune -o" + ;; + esac + done fi remove_structs= diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 58dfe0890947..20e4bf57aec8 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -367,8 +367,6 @@ key_ref_t search_my_process_keyrings(struct key_type *type, switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ - if (ret) - break; case -ENOKEY: /* negative key */ ret = key_ref; break; diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 370a6468b3ba..855e464e92ef 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -69,6 +69,8 @@ static struct nlmsg_perm nlmsg_route_perms[] = { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_READ }, + { RTM_NEWMDB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_DELMDB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETMDB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, }; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 0f3d3db0df71..cca87277baf0 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2876,7 +2876,7 @@ static int azx_free(struct azx *chip) azx_notifier_unregister(chip); chip->init_failed = 1; /* to be sure */ - complete(&chip->probe_wait); + complete_all(&chip->probe_wait); if (use_vga_switcheroo(chip)) { if (chip->disabled && chip->bus) @@ -3504,7 +3504,7 @@ static int azx_probe(struct pci_dev *pci, pm_runtime_put_noidle(&pci->dev); dev++; - complete(&chip->probe_wait); + complete_all(&chip->probe_wait); return 0; out_free: diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 0fcfa6f406b8..b6c21ea187ca 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -431,9 +431,11 @@ static void hdmi_init_pin(struct hda_codec *codec, hda_nid_t pin_nid) if (get_wcaps(codec, pin_nid) & AC_WCAP_OUT_AMP) snd_hda_codec_write(codec, pin_nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); - /* Disable pin out until stream is active*/ + /* Enable pin out: some machines with GM965 gets broken output when + * the pin is disabled or changed while using with HDMI + */ snd_hda_codec_write(codec, pin_nid, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, 0); + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); } static int hdmi_get_channel_count(struct hda_codec *codec, hda_nid_t cvt_nid) @@ -1341,7 +1343,6 @@ static int generic_hdmi_playback_pcm_prepare(struct hda_pcm_stream *hinfo, struct hdmi_spec *spec = codec->spec; int pin_idx = hinfo_to_pin_index(spec, hinfo); hda_nid_t pin_nid = spec->pins[pin_idx].pin_nid; - int pinctl; bool non_pcm; non_pcm = check_non_pcm_per_cvt(codec, cvt_nid); @@ -1350,11 +1351,6 @@ static int generic_hdmi_playback_pcm_prepare(struct hda_pcm_stream *hinfo, hdmi_setup_audio_infoframe(codec, pin_idx, non_pcm, substream); - pinctl = snd_hda_codec_read(codec, pin_nid, 0, - AC_VERB_GET_PIN_WIDGET_CONTROL, 0); - snd_hda_codec_write(codec, pin_nid, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, pinctl | PIN_OUT); - return hdmi_setup_stream(codec, cvt_nid, pin_nid, stream_tag, format); } @@ -1374,7 +1370,6 @@ static int hdmi_pcm_close(struct hda_pcm_stream *hinfo, int cvt_idx, pin_idx; struct hdmi_spec_per_cvt *per_cvt; struct hdmi_spec_per_pin *per_pin; - int pinctl; if (hinfo->nid) { cvt_idx = cvt_nid_to_cvt_index(spec, hinfo->nid); @@ -1391,11 +1386,6 @@ static int hdmi_pcm_close(struct hda_pcm_stream *hinfo, return -EINVAL; per_pin = &spec->pins[pin_idx]; - pinctl = snd_hda_codec_read(codec, per_pin->pin_nid, 0, - AC_VERB_GET_PIN_WIDGET_CONTROL, 0); - snd_hda_codec_write(codec, per_pin->pin_nid, 0, - AC_VERB_SET_PIN_WIDGET_CONTROL, - pinctl & ~PIN_OUT); snd_hda_spdif_ctls_unassign(codec, pin_idx); per_pin->chmap_set = false; memset(per_pin->chmap, 0, sizeof(per_pin->chmap)); @@ -1691,6 +1681,30 @@ static const struct hda_codec_ops generic_hdmi_patch_ops = { .unsol_event = hdmi_unsol_event, }; +static void intel_haswell_fixup_connect_list(struct hda_codec *codec) +{ + unsigned int vendor_param; + hda_nid_t list[3] = {0x2, 0x3, 0x4}; + + vendor_param = snd_hda_codec_read(codec, 0x08, 0, 0xf81, 0); + if (vendor_param == -1 || vendor_param & 0x02) + return; + + /* enable DP1.2 mode */ + vendor_param |= 0x02; + snd_hda_codec_read(codec, 0x08, 0, 0x781, vendor_param); + + vendor_param = snd_hda_codec_read(codec, 0x08, 0, 0xf81, 0); + if (vendor_param == -1 || !(vendor_param & 0x02)) + return; + + /* override 3 pins connection list */ + snd_hda_override_conn_list(codec, 0x05, 3, list); + snd_hda_override_conn_list(codec, 0x06, 3, list); + snd_hda_override_conn_list(codec, 0x07, 3, list); +} + + static int patch_generic_hdmi(struct hda_codec *codec) { struct hdmi_spec *spec; @@ -1700,6 +1714,10 @@ static int patch_generic_hdmi(struct hda_codec *codec) return -ENOMEM; codec->spec = spec; + + if (codec->vendor_id == 0x80862807) + intel_haswell_fixup_connect_list(codec); + if (hdmi_parse_codec(codec) < 0) { codec->spec = NULL; kfree(spec); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7743775f6abb..6ee34593774a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4373,6 +4373,7 @@ static int alc_alloc_spec(struct hda_codec *codec, hda_nid_t mixer_nid) if (!spec) return -ENOMEM; codec->spec = spec; + codec->single_adc_amp = 1; spec->mixer_nid = mixer_nid; snd_hda_gen_init(&spec->gen); snd_array_init(&spec->kctls, sizeof(struct snd_kcontrol_new), 32); @@ -6569,8 +6570,8 @@ static void alc861vd_fixup_dallas(struct hda_codec *codec, const struct alc_fixup *fix, int action) { if (action == ALC_FIXUP_ACT_PRE_PROBE) { - snd_hda_override_pin_caps(codec, 0x18, 0x00001714); - snd_hda_override_pin_caps(codec, 0x19, 0x0000171c); + snd_hda_override_pin_caps(codec, 0x18, 0x00000734); + snd_hda_override_pin_caps(codec, 0x19, 0x0000073c); } } diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index df13c0f84899..a86547ca17c8 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1725,7 +1725,7 @@ static const struct snd_pci_quirk stac92hd83xxx_cfg_tbl[] = { SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1658, "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1659, - "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + "HP Pavilion dv7", STAC_HP_DV7_4000), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165A, "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165B, diff --git a/sound/soc/codecs/cs42l73.c b/sound/soc/codecs/cs42l73.c index a0791ecf6d95..6361dab48bd1 100644 --- a/sound/soc/codecs/cs42l73.c +++ b/sound/soc/codecs/cs42l73.c @@ -40,6 +40,7 @@ struct cs42l73_private { u32 sysclk; u8 mclksel; u32 mclk; + int shutdwn_delay; }; static const struct reg_default cs42l73_reg_defaults[] = { @@ -588,7 +589,60 @@ static const struct snd_kcontrol_new cs42l73_snd_controls[] = { SOC_ENUM("XSPOUT Mono/Stereo Select", xsp_output_mux_enum), }; +static int cs42l73_spklo_spk_amp_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = w->codec; + struct cs42l73_private *priv = snd_soc_codec_get_drvdata(codec); + switch (event) { + case SND_SOC_DAPM_POST_PMD: + /* 150 ms delay between setting PDN and MCLKDIS */ + priv->shutdwn_delay = 150; + break; + default: + pr_err("Invalid event = 0x%x\n", event); + } + return 0; +} + +static int cs42l73_ear_amp_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = w->codec; + struct cs42l73_private *priv = snd_soc_codec_get_drvdata(codec); + switch (event) { + case SND_SOC_DAPM_POST_PMD: + /* 50 ms delay between setting PDN and MCLKDIS */ + if (priv->shutdwn_delay < 50) + priv->shutdwn_delay = 50; + break; + default: + pr_err("Invalid event = 0x%x\n", event); + } + return 0; +} + + +static int cs42l73_hp_amp_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = w->codec; + struct cs42l73_private *priv = snd_soc_codec_get_drvdata(codec); + switch (event) { + case SND_SOC_DAPM_POST_PMD: + /* 30 ms delay between setting PDN and MCLKDIS */ + if (priv->shutdwn_delay < 30) + priv->shutdwn_delay = 30; + break; + default: + pr_err("Invalid event = 0x%x\n", event); + } + return 0; +} + static const struct snd_soc_dapm_widget cs42l73_dapm_widgets[] = { + SND_SOC_DAPM_INPUT("DMICA"), + SND_SOC_DAPM_INPUT("DMICB"), SND_SOC_DAPM_INPUT("LINEINA"), SND_SOC_DAPM_INPUT("LINEINB"), SND_SOC_DAPM_INPUT("MIC1"), @@ -604,9 +658,7 @@ static const struct snd_soc_dapm_widget cs42l73_dapm_widgets[] = { CS42L73_PWRCTL2, 3, 1), SND_SOC_DAPM_AIF_OUT("ASPOUTR", NULL, 0, CS42L73_PWRCTL2, 3, 1), - SND_SOC_DAPM_AIF_OUT("VSPOUTL", NULL, 0, - CS42L73_PWRCTL2, 4, 1), - SND_SOC_DAPM_AIF_OUT("VSPOUTR", NULL, 0, + SND_SOC_DAPM_AIF_OUT("VSPINOUT", NULL, 0, CS42L73_PWRCTL2, 4, 1), SND_SOC_DAPM_PGA("PGA Left", SND_SOC_NOPM, 0, 0, NULL, 0), @@ -632,8 +684,7 @@ static const struct snd_soc_dapm_widget cs42l73_dapm_widgets[] = { SND_SOC_DAPM_MIXER("ASPR Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_MIXER("XSPL Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_MIXER("XSPR Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_MIXER("VSPL Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_MIXER("VSPR Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), + SND_SOC_DAPM_MIXER("VSP Output Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_AIF_IN("XSPINL", NULL, 0, CS42L73_PWRCTL2, 0, 1), @@ -649,7 +700,7 @@ static const struct snd_soc_dapm_widget cs42l73_dapm_widgets[] = { SND_SOC_DAPM_AIF_IN("ASPINM", NULL, 0, CS42L73_PWRCTL2, 2, 1), - SND_SOC_DAPM_AIF_IN("VSPIN", NULL, 0, + SND_SOC_DAPM_AIF_IN("VSPINOUT", NULL, 0, CS42L73_PWRCTL2, 4, 1), SND_SOC_DAPM_MIXER("HL Left Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), @@ -674,16 +725,20 @@ static const struct snd_soc_dapm_widget cs42l73_dapm_widgets[] = { SND_SOC_DAPM_PGA("SPK DAC", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_PGA("ESL DAC", SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_SWITCH("HP Amp", CS42L73_PWRCTL3, 0, 1, - &hp_amp_ctl), + SND_SOC_DAPM_SWITCH_E("HP Amp", CS42L73_PWRCTL3, 0, 1, + &hp_amp_ctl, cs42l73_hp_amp_event, + SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_SWITCH("LO Amp", CS42L73_PWRCTL3, 1, 1, &lo_amp_ctl), - SND_SOC_DAPM_SWITCH("SPK Amp", CS42L73_PWRCTL3, 2, 1, - &spk_amp_ctl), - SND_SOC_DAPM_SWITCH("EAR Amp", CS42L73_PWRCTL3, 3, 1, - &ear_amp_ctl), - SND_SOC_DAPM_SWITCH("SPKLO Amp", CS42L73_PWRCTL3, 4, 1, - &spklo_amp_ctl), + SND_SOC_DAPM_SWITCH_E("SPK Amp", CS42L73_PWRCTL3, 2, 1, + &spk_amp_ctl, cs42l73_spklo_spk_amp_event, + SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SWITCH_E("EAR Amp", CS42L73_PWRCTL3, 3, 1, + &ear_amp_ctl, cs42l73_ear_amp_event, + SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_SWITCH_E("SPKLO Amp", CS42L73_PWRCTL3, 4, 1, + &spklo_amp_ctl, cs42l73_spklo_spk_amp_event, + SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_OUTPUT("HPOUTA"), SND_SOC_DAPM_OUTPUT("HPOUTB"), @@ -705,7 +760,7 @@ static const struct snd_soc_dapm_route cs42l73_audio_map[] = { {"ESL DAC", "ESL-ASP Mono Volume", "ESL Mixer"}, {"ESL DAC", "ESL-XSP Mono Volume", "ESL Mixer"}, - {"ESL DAC", "ESL-VSP Mono Volume", "VSPIN"}, + {"ESL DAC", "ESL-VSP Mono Volume", "VSPINOUT"}, /* Loopback */ {"ESL DAC", "ESL-IP Mono Volume", "Input Left Capture"}, {"ESL DAC", "ESL-IP Mono Volume", "Input Right Capture"}, @@ -727,7 +782,7 @@ static const struct snd_soc_dapm_route cs42l73_audio_map[] = { {"SPK DAC", "SPK-ASP Mono Volume", "SPK Mixer"}, {"SPK DAC", "SPK-XSP Mono Volume", "SPK Mixer"}, - {"SPK DAC", "SPK-VSP Mono Volume", "VSPIN"}, + {"SPK DAC", "SPK-VSP Mono Volume", "VSPINOUT"}, /* Loopback */ {"SPK DAC", "SPK-IP Mono Volume", "Input Left Capture"}, {"SPK DAC", "SPK-IP Mono Volume", "Input Right Capture"}, @@ -770,8 +825,8 @@ static const struct snd_soc_dapm_route cs42l73_audio_map[] = { {"HL Right Mixer", NULL, "ASPINR"}, {"HL Left Mixer", NULL, "XSPINL"}, {"HL Right Mixer", NULL, "XSPINR"}, - {"HL Left Mixer", NULL, "VSPIN"}, - {"HL Right Mixer", NULL, "VSPIN"}, + {"HL Left Mixer", NULL, "VSPINOUT"}, + {"HL Right Mixer", NULL, "VSPINOUT"}, {"ASPINL", NULL, "ASP Playback"}, {"ASPINM", NULL, "ASP Playback"}, @@ -779,7 +834,7 @@ static const struct snd_soc_dapm_route cs42l73_audio_map[] = { {"XSPINL", NULL, "XSP Playback"}, {"XSPINM", NULL, "XSP Playback"}, {"XSPINR", NULL, "XSP Playback"}, - {"VSPIN", NULL, "VSP Playback"}, + {"VSPINOUT", NULL, "VSP Playback"}, /* Capture Paths */ {"MIC1", NULL, "MIC1 Bias"}, @@ -795,6 +850,8 @@ static const struct snd_soc_dapm_route cs42l73_audio_map[] = { {"ADC Left", NULL, "PGA Left"}, {"ADC Right", NULL, "PGA Right"}, + {"DMIC Left", NULL, "DMICA"}, + {"DMIC Right", NULL, "DMICB"}, {"Input Left Capture", "ADC Left Input", "ADC Left"}, {"Input Right Capture", "ADC Right Input", "ADC Right"}, @@ -819,21 +876,18 @@ static const struct snd_soc_dapm_route cs42l73_audio_map[] = { {"XSPOUTR", NULL, "XSPR Output Mixer"}, /* Voice Capture */ - {"VSPL Output Mixer", NULL, "Input Left Capture"}, - {"VSPR Output Mixer", NULL, "Input Left Capture"}, + {"VSP Output Mixer", NULL, "Input Left Capture"}, + {"VSP Output Mixer", NULL, "Input Right Capture"}, - {"VSPOUTL", "VSP-IP Volume", "VSPL Output Mixer"}, - {"VSPOUTR", "VSP-IP Volume", "VSPR Output Mixer"}, + {"VSPINOUT", "VSP-IP Volume", "VSP Output Mixer"}, - {"VSPOUTL", NULL, "VSPL Output Mixer"}, - {"VSPOUTR", NULL, "VSPR Output Mixer"}, + {"VSPINOUT", NULL, "VSP Output Mixer"}, {"ASP Capture", NULL, "ASPOUTL"}, {"ASP Capture", NULL, "ASPOUTR"}, {"XSP Capture", NULL, "XSPOUTL"}, {"XSP Capture", NULL, "XSPOUTR"}, - {"VSP Capture", NULL, "VSPOUTL"}, - {"VSP Capture", NULL, "VSPOUTR"}, + {"VSP Capture", NULL, "VSPINOUT"}, }; struct cs42l73_mclk_div { @@ -1167,6 +1221,14 @@ static int cs42l73_set_bias_level(struct snd_soc_codec *codec, case SND_SOC_BIAS_OFF: snd_soc_update_bits(codec, CS42L73_PWRCTL1, PDN, 1); + if (cs42l73->shutdwn_delay > 0) { + mdelay(cs42l73->shutdwn_delay); + cs42l73->shutdwn_delay = 0; + } else { + mdelay(15); /* Min amount of time requred to power + * down. + */ + } snd_soc_update_bits(codec, CS42L73_DMMCC, MCLKDIS, 1); break; } diff --git a/sound/soc/codecs/sigmadsp.c b/sound/soc/codecs/sigmadsp.c index 5be42bf56996..4068f2491232 100644 --- a/sound/soc/codecs/sigmadsp.c +++ b/sound/soc/codecs/sigmadsp.c @@ -225,7 +225,7 @@ EXPORT_SYMBOL(process_sigma_firmware); static int sigma_action_write_regmap(void *control_data, const struct sigma_action *sa, size_t len) { - return regmap_raw_write(control_data, le16_to_cpu(sa->addr), + return regmap_raw_write(control_data, be16_to_cpu(sa->addr), sa->payload, len - 2); } diff --git a/sound/soc/codecs/tpa6130a2.c b/sound/soc/codecs/tpa6130a2.c index 8d75aa152c8c..c58bee8346ce 100644 --- a/sound/soc/codecs/tpa6130a2.c +++ b/sound/soc/codecs/tpa6130a2.c @@ -398,7 +398,8 @@ static int tpa6130a2_probe(struct i2c_client *client, TPA6130A2_MUTE_L; if (data->power_gpio >= 0) { - ret = gpio_request(data->power_gpio, "tpa6130a2 enable"); + ret = devm_gpio_request(dev, data->power_gpio, + "tpa6130a2 enable"); if (ret < 0) { dev_err(dev, "Failed to request power GPIO (%d)\n", data->power_gpio); @@ -419,16 +420,16 @@ static int tpa6130a2_probe(struct i2c_client *client, break; } - data->supply = regulator_get(dev, regulator); + data->supply = devm_regulator_get(dev, regulator); if (IS_ERR(data->supply)) { ret = PTR_ERR(data->supply); dev_err(dev, "Failed to request supply: %d\n", ret); - goto err_regulator; + goto err_gpio; } ret = tpa6130a2_power(1); if (ret != 0) - goto err_power; + goto err_gpio; /* Read version */ @@ -440,15 +441,10 @@ static int tpa6130a2_probe(struct i2c_client *client, /* Disable the chip */ ret = tpa6130a2_power(0); if (ret != 0) - goto err_power; + goto err_gpio; return 0; -err_power: - regulator_put(data->supply); -err_regulator: - if (data->power_gpio >= 0) - gpio_free(data->power_gpio); err_gpio: tpa6130a2_client = NULL; @@ -457,14 +453,7 @@ err_gpio: static int tpa6130a2_remove(struct i2c_client *client) { - struct tpa6130a2_data *data = i2c_get_clientdata(client); - tpa6130a2_power(0); - - if (data->power_gpio >= 0) - gpio_free(data->power_gpio); - - regulator_put(data->supply); tpa6130a2_client = NULL; return 0; diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 967d0e173e1b..5fbfb06e8083 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -113,7 +113,7 @@ static int soc_compr_free(struct snd_compr_stream *cstream) SNDRV_PCM_STREAM_PLAYBACK, SND_SOC_DAPM_STREAM_STOP); } else - codec_dai->pop_wait = 1; + rtd->pop_wait = 1; schedule_delayed_work(&rtd->delayed_work, msecs_to_jiffies(rtd->pmdown_time)); } else { diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 9c768bcb98a6..91d592ff67b7 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -4155,9 +4155,9 @@ int snd_soc_of_parse_audio_routing(struct snd_soc_card *card, ret = of_property_read_string_index(np, propname, 2 * i, &routes[i].sink); if (ret) { - dev_err(card->dev, "ASoC: Property '%s' index %d" - " could not be read: %d\n", propname, 2 * i, - ret); + dev_err(card->dev, + "ASoC: Property '%s' index %d could not be read: %d\n", + propname, 2 * i, ret); kfree(routes); return -EINVAL; } @@ -4165,8 +4165,8 @@ int snd_soc_of_parse_audio_routing(struct snd_soc_card *card, (2 * i) + 1, &routes[i].source); if (ret) { dev_err(card->dev, - "ASoC: Property '%s' index %d could not be" - " read: %d\n", propname, (2 * i) + 1, ret); + "ASoC: Property '%s' index %d could not be read: %d\n", + propname, (2 * i) + 1, ret); kfree(routes); return -EINVAL; } diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 5c3ca2a34661..d7711fce119b 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -334,11 +334,11 @@ static void close_delayed_work(struct work_struct *work) dev_dbg(rtd->dev, "ASoC: pop wq checking: %s status: %s waiting: %s\n", codec_dai->driver->playback.stream_name, codec_dai->playback_active ? "active" : "inactive", - codec_dai->pop_wait ? "yes" : "no"); + rtd->pop_wait ? "yes" : "no"); /* are we waiting on this codec DAI stream */ - if (codec_dai->pop_wait == 1) { - codec_dai->pop_wait = 0; + if (rtd->pop_wait == 1) { + rtd->pop_wait = 0; snd_soc_dapm_stream_event(rtd, SNDRV_PCM_STREAM_PLAYBACK, SND_SOC_DAPM_STREAM_STOP); } @@ -408,7 +408,7 @@ static int soc_pcm_close(struct snd_pcm_substream *substream) SND_SOC_DAPM_STREAM_STOP); } else { /* start delayed pop wq here for playback streams */ - codec_dai->pop_wait = 1; + rtd->pop_wait = 1; schedule_delayed_work(&rtd->delayed_work, msecs_to_jiffies(rtd->pmdown_time)); } @@ -480,8 +480,8 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) /* cancel any delayed stream shutdown that is pending */ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && - codec_dai->pop_wait) { - codec_dai->pop_wait = 0; + rtd->pop_wait) { + rtd->pop_wait = 0; cancel_delayed_work(&rtd->delayed_work); } diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 34b9bb7fe87c..c183d34842ac 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -2181,6 +2181,10 @@ int snd_usbmidi_create(struct snd_card *card, umidi->usb_protocol_ops = &snd_usbmidi_novation_ops; err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; + case QUIRK_MIDI_MBOX2: + umidi->usb_protocol_ops = &snd_usbmidi_midiman_ops; + err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); + break; case QUIRK_MIDI_RAW_BYTES: umidi->usb_protocol_ops = &snd_usbmidi_raw_ops; /* diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 49f9af995d7a..cdcf6b45e8a8 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -99,6 +99,42 @@ }, /* + * HP Wireless Audio + * When not ignored, causes instability issues for some users, forcing them to + * blacklist the entire module. + */ +{ + USB_DEVICE(0x0424, 0xb832), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Standard Microsystems Corp.", + .product_name = "HP Wireless Audio", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + /* Mixer */ + { + .ifnum = 0, + .type = QUIRK_IGNORE_INTERFACE, + }, + /* Playback */ + { + .ifnum = 1, + .type = QUIRK_IGNORE_INTERFACE, + }, + /* Capture */ + { + .ifnum = 2, + .type = QUIRK_IGNORE_INTERFACE, + }, + /* HID Device, .ifnum = 3 */ + { + .ifnum = -1, + } + } + } +}, + +/* * Logitech QuickCam: bDeviceClass is vendor-specific, so generic interface * class matches do not take effect without an explicit ID match. */ @@ -2885,6 +2921,93 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, + +/* DIGIDESIGN MBOX 2 */ +{ + USB_DEVICE(0x0dba, 0x3000), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Digidesign", + .product_name = "Mbox 2", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3BE, + .channels = 2, + .iface = 2, + .altsetting = 2, + .altset_idx = 1, + .attributes = 0x00, + .endpoint = 0x03, + .ep_attr = USB_ENDPOINT_SYNC_ASYNC, + .maxpacksize = 0x128, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { + 48000 + } + } + }, + { + .ifnum = 3, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = 4, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3BE, + .channels = 2, + .iface = 4, + .altsetting = 2, + .altset_idx = 1, + .attributes = UAC_EP_CS_ATTR_SAMPLE_RATE, + .endpoint = 0x85, + .ep_attr = USB_ENDPOINT_SYNC_SYNC, + .maxpacksize = 0x128, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { + 48000 + } + } + }, + { + .ifnum = 5, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = 6, + .type = QUIRK_MIDI_MBOX2, + .data = &(const struct snd_usb_midi_endpoint_info) { + .out_ep = 0x02, + .out_cables = 0x0001, + .in_ep = 0x81, + .in_interval = 0x01, + .in_cables = 0x0001 + } + }, + { + .ifnum = -1 + } + } + } +}, { /* Tascam US122 MKII - playback-only support */ .match_flags = USB_DEVICE_ID_MATCH_DEVICE, diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 007fcecdf5cd..f104c68fe1e0 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -306,6 +306,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip, [QUIRK_MIDI_YAMAHA] = create_any_midi_quirk, [QUIRK_MIDI_MIDIMAN] = create_any_midi_quirk, [QUIRK_MIDI_NOVATION] = create_any_midi_quirk, + [QUIRK_MIDI_MBOX2] = create_any_midi_quirk, [QUIRK_MIDI_RAW_BYTES] = create_any_midi_quirk, [QUIRK_MIDI_EMAGIC] = create_any_midi_quirk, [QUIRK_MIDI_CME] = create_any_midi_quirk, @@ -497,6 +498,92 @@ static int snd_usb_nativeinstruments_boot_quirk(struct usb_device *dev) return -EAGAIN; } +static void mbox2_setup_48_24_magic(struct usb_device *dev) +{ + u8 srate[3]; + u8 temp[12]; + + /* Choose 48000Hz permanently */ + srate[0] = 0x80; + srate[1] = 0xbb; + srate[2] = 0x00; + + /* Send the magic! */ + snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), + 0x01, 0x22, 0x0100, 0x0085, &temp, 0x0003); + snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), + 0x81, 0xa2, 0x0100, 0x0085, &srate, 0x0003); + snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), + 0x81, 0xa2, 0x0100, 0x0086, &srate, 0x0003); + snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), + 0x81, 0xa2, 0x0100, 0x0003, &srate, 0x0003); + return; +} + +/* Digidesign Mbox 2 needs to load firmware onboard + * and driver must wait a few seconds for initialisation. + */ + +#define MBOX2_FIRMWARE_SIZE 646 +#define MBOX2_BOOT_LOADING 0x01 /* Hard coded into the device */ +#define MBOX2_BOOT_READY 0x02 /* Hard coded into the device */ + +int snd_usb_mbox2_boot_quirk(struct usb_device *dev) +{ + struct usb_host_config *config = dev->actconfig; + int err; + u8 bootresponse; + int fwsize; + int count; + + fwsize = le16_to_cpu(get_cfg_desc(config)->wTotalLength); + + if (fwsize != MBOX2_FIRMWARE_SIZE) { + snd_printk(KERN_ERR "usb-audio: Invalid firmware size=%d.\n", fwsize); + return -ENODEV; + } + + snd_printd("usb-audio: Sending Digidesign Mbox 2 boot sequence...\n"); + + count = 0; + bootresponse = MBOX2_BOOT_LOADING; + while ((bootresponse == MBOX2_BOOT_LOADING) && (count < 10)) { + msleep(500); /* 0.5 second delay */ + snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), + /* Control magic - load onboard firmware */ + 0x85, 0xc0, 0x0001, 0x0000, &bootresponse, 0x0012); + if (bootresponse == MBOX2_BOOT_READY) + break; + snd_printd("usb-audio: device not ready, resending boot sequence...\n"); + count++; + } + + if (bootresponse != MBOX2_BOOT_READY) { + snd_printk(KERN_ERR "usb-audio: Unknown bootresponse=%d, or timed out, ignoring device.\n", bootresponse); + return -ENODEV; + } + + snd_printdd("usb-audio: device initialised!\n"); + + err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, + &dev->descriptor, sizeof(dev->descriptor)); + config = dev->actconfig; + if (err < 0) + snd_printd("error usb_get_descriptor: %d\n", err); + + err = usb_reset_configuration(dev); + if (err < 0) + snd_printd("error usb_reset_configuration: %d\n", err); + snd_printdd("mbox2_boot: new boot length = %d\n", + le16_to_cpu(get_cfg_desc(config)->wTotalLength)); + + mbox2_setup_48_24_magic(dev); + + snd_printk(KERN_INFO "usb-audio: Digidesign Mbox 2: 24bit 48kHz"); + + return 0; /* Successful boot */ +} + /* * Setup quirks */ @@ -655,6 +742,10 @@ int snd_usb_apply_boot_quirk(struct usb_device *dev, case USB_ID(0x0ccd, 0x00b1): /* Terratec Aureon 7.1 USB */ return snd_usb_cm6206_boot_quirk(dev); + case USB_ID(0x0dba, 0x3000): + /* Digidesign Mbox 2 */ + return snd_usb_mbox2_boot_quirk(dev); + case USB_ID(0x133e, 0x0815): /* Access Music VirusTI Desktop */ return snd_usb_accessmusic_boot_quirk(dev); diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 1ac3fd9cc5a6..a8172c119796 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -76,6 +76,7 @@ enum quirk_type { QUIRK_MIDI_YAMAHA, QUIRK_MIDI_MIDIMAN, QUIRK_MIDI_NOVATION, + QUIRK_MIDI_MBOX2, QUIRK_MIDI_RAW_BYTES, QUIRK_MIDI_EMAGIC, QUIRK_MIDI_CME, diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index fd2f9221b241..07a03452c227 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -179,29 +179,6 @@ static struct termios orig_term; #define wmb() __asm__ __volatile__("" : : : "memory") #define mb() __asm__ __volatile__("" : : : "memory") -/* - * Convert an iovec element to the given type. - * - * This is a fairly ugly trick: we need to know the size of the type and - * alignment requirement to check the pointer is kosher. It's also nice to - * have the name of the type in case we report failure. - * - * Typing those three things all the time is cumbersome and error prone, so we - * have a macro which sets them all up and passes to the real function. - */ -#define convert(iov, type) \ - ((type *)_convert((iov), sizeof(type), __alignof__(type), #type)) - -static void *_convert(struct iovec *iov, size_t size, size_t align, - const char *name) -{ - if (iov->iov_len != size) - errx(1, "Bad iovec size %zu for %s", iov->iov_len, name); - if ((unsigned long)iov->iov_base % align != 0) - errx(1, "Bad alignment %p for %s", iov->iov_base, name); - return iov->iov_base; -} - /* Wrapper for the last available index. Makes it easier to change. */ #define lg_last_avail(vq) ((vq)->last_avail_idx) @@ -228,7 +205,8 @@ static bool iov_empty(const struct iovec iov[], unsigned int num_iov) } /* Take len bytes from the front of this iovec. */ -static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) +static void iov_consume(struct iovec iov[], unsigned num_iov, + void *dest, unsigned len) { unsigned int i; @@ -236,11 +214,16 @@ static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) unsigned int used; used = iov[i].iov_len < len ? iov[i].iov_len : len; + if (dest) { + memcpy(dest, iov[i].iov_base, used); + dest += used; + } iov[i].iov_base += used; iov[i].iov_len -= used; len -= used; } - assert(len == 0); + if (len != 0) + errx(1, "iovec too short!"); } /* The device virtqueue descriptors are followed by feature bitmasks. */ @@ -864,7 +847,7 @@ static void console_output(struct virtqueue *vq) warn("Write to stdout gave %i (%d)", len, errno); break; } - iov_consume(iov, out, len); + iov_consume(iov, out, NULL, len); } /* @@ -1591,9 +1574,9 @@ static void blk_request(struct virtqueue *vq) { struct vblk_info *vblk = vq->dev->priv; unsigned int head, out_num, in_num, wlen; - int ret; + int ret, i; u8 *in; - struct virtio_blk_outhdr *out; + struct virtio_blk_outhdr out; struct iovec iov[vq->vring.num]; off64_t off; @@ -1603,32 +1586,36 @@ static void blk_request(struct virtqueue *vq) */ head = wait_for_vq_desc(vq, iov, &out_num, &in_num); - /* - * Every block request should contain at least one output buffer - * (detailing the location on disk and the type of request) and one - * input buffer (to hold the result). - */ - if (out_num == 0 || in_num == 0) - errx(1, "Bad virtblk cmd %u out=%u in=%u", - head, out_num, in_num); + /* Copy the output header from the front of the iov (adjusts iov) */ + iov_consume(iov, out_num, &out, sizeof(out)); + + /* Find and trim end of iov input array, for our status byte. */ + in = NULL; + for (i = out_num + in_num - 1; i >= out_num; i--) { + if (iov[i].iov_len > 0) { + in = iov[i].iov_base + iov[i].iov_len - 1; + iov[i].iov_len--; + break; + } + } + if (!in) + errx(1, "Bad virtblk cmd with no room for status"); - out = convert(&iov[0], struct virtio_blk_outhdr); - in = convert(&iov[out_num+in_num-1], u8); /* * For historical reasons, block operations are expressed in 512 byte * "sectors". */ - off = out->sector * 512; + off = out.sector * 512; /* * In general the virtio block driver is allowed to try SCSI commands. * It'd be nice if we supported eject, for example, but we don't. */ - if (out->type & VIRTIO_BLK_T_SCSI_CMD) { + if (out.type & VIRTIO_BLK_T_SCSI_CMD) { fprintf(stderr, "Scsi commands unsupported\n"); *in = VIRTIO_BLK_S_UNSUPP; wlen = sizeof(*in); - } else if (out->type & VIRTIO_BLK_T_OUT) { + } else if (out.type & VIRTIO_BLK_T_OUT) { /* * Write * @@ -1636,10 +1623,10 @@ static void blk_request(struct virtqueue *vq) * if they try to write past end. */ if (lseek64(vblk->fd, off, SEEK_SET) != off) - err(1, "Bad seek to sector %llu", out->sector); + err(1, "Bad seek to sector %llu", out.sector); - ret = writev(vblk->fd, iov+1, out_num-1); - verbose("WRITE to sector %llu: %i\n", out->sector, ret); + ret = writev(vblk->fd, iov, out_num); + verbose("WRITE to sector %llu: %i\n", out.sector, ret); /* * Grr... Now we know how long the descriptor they sent was, we @@ -1655,7 +1642,7 @@ static void blk_request(struct virtqueue *vq) wlen = sizeof(*in); *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); - } else if (out->type & VIRTIO_BLK_T_FLUSH) { + } else if (out.type & VIRTIO_BLK_T_FLUSH) { /* Flush */ ret = fdatasync(vblk->fd); verbose("FLUSH fdatasync: %i\n", ret); @@ -1669,10 +1656,9 @@ static void blk_request(struct virtqueue *vq) * if they try to read past end. */ if (lseek64(vblk->fd, off, SEEK_SET) != off) - err(1, "Bad seek to sector %llu", out->sector); + err(1, "Bad seek to sector %llu", out.sector); - ret = readv(vblk->fd, iov+1, in_num-1); - verbose("READ from sector %llu: %i\n", out->sector, ret); + ret = readv(vblk->fd, iov + out_num, in_num); if (ret >= 0) { wlen = sizeof(*in) + ret; *in = VIRTIO_BLK_S_OK; @@ -1758,7 +1744,7 @@ static void rng_input(struct virtqueue *vq) len = readv(rng_info->rfd, iov, in_num); if (len <= 0) err(1, "Read from /dev/random gave %i", len); - iov_consume(iov, in_num, len); + iov_consume(iov, in_num, NULL, len); totlen += len; } diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 6d25dcd2e97a..fcc9aa25fd08 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -164,7 +164,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, r = virtqueue_add_buf(vq->vq, &sl, 1, 0, dev->buf + started, GFP_ATOMIC); - if (likely(r >= 0)) { + if (likely(r == 0)) { ++started; virtqueue_kick(vq->vq); } @@ -177,7 +177,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, r = 0; } - } while (r >= 0); + } while (r == 0); if (completed == completed_before) ++spurious; assert(completed <= bufs); |