From 159ef31e81edd75225c6a844115faf999de67b38 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 26 Feb 2019 15:32:29 +0100 Subject: device.h: reorganize struct device struct device is big, around 760 bytes on x86_64. It's not a critical structure, but it is embedded everywhere, so making it smaller is always a good thing. With a recent patch that moved a field from struct device to the private structure, some benchmarks showed a very odd regression, despite this structure having nothing to do with those benchmarks. That caused me to look into the layout of the structure. Using 'pahole', it showed a number of holes and ways that the structure could be reordered in order to align some cachelines better, as well as reduce the size of the overall structure. Move 'struct kobj' to the start of the structure, to keep that access in the first cacheline, and try to organize things a bit more compactly where possible By doing these few moves, the result removes at least 8 bytes from 'struct device' on a 64bit system. Given we know there are systems with at least 30k devices in memory at once, every little byte counts, and this change could be a savings of 240k of kernel memory for them. On "normal" systems the overall memory savings would be much less. Cc: "Rafael J. Wysocki" Cc: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 4e6987e11f68..4457e560bc2b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -976,18 +976,14 @@ struct dev_links_info { * a higher-level representation of the device. */ struct device { + struct kobject kobj; struct device *parent; struct device_private *p; - struct kobject kobj; const char *init_name; /* initial name of the device */ const struct device_type *type; - struct mutex mutex; /* mutex to synchronize calls to - * its driver. - */ - struct bus_type *bus; /* type of bus device is on */ struct device_driver *driver; /* which driver has allocated this device */ @@ -995,6 +991,10 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ + struct mutex mutex; /* mutex to synchronize calls to + * its driver. + */ + struct dev_links_info links; struct dev_pm_info power; struct dev_pm_domain *pm_domain; @@ -1009,9 +1009,6 @@ struct device { struct list_head msi_list; #endif -#ifdef CONFIG_NUMA - int numa_node; /* NUMA node this device is close to */ -#endif const struct dma_map_ops *dma_ops; u64 *dma_mask; /* dma mask (if dma'able device) */ u64 coherent_dma_mask;/* Like dma_mask, but for @@ -1040,6 +1037,9 @@ struct device { struct device_node *of_node; /* associated device tree node */ struct fwnode_handle *fwnode; /* firmware device node */ +#ifdef CONFIG_NUMA + int numa_node; /* NUMA node this device is close to */ +#endif dev_t devt; /* dev_t, creates the sysfs "dev" */ u32 id; /* device instance */ -- cgit v1.2.3 From 60574d1e05b094d222162260dd9cac49f4d0996a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 11 Mar 2019 14:55:57 -0600 Subject: acpi: Create subtable parsing infrastructure Parsing entries in an ACPI table had assumed a generic header structure. There is no standard ACPI header, though, so less common layouts with different field sizes required custom parsers to go through their subtable entry list. Create the infrastructure for adding different table types so parsing the entries array may be more reused for all ACPI system tables and the common code doesn't need to be duplicated. Reviewed-by: Rafael J. Wysocki Acked-by: Jonathan Cameron Tested-by: Jonathan Cameron Signed-off-by: Keith Busch Tested-by: Brice Goglin Signed-off-by: Greg Kroah-Hartman --- include/linux/acpi.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d5dcebd7aad3..9494d42bf507 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -141,10 +141,13 @@ enum acpi_address_range_id { /* Table Handlers */ +union acpi_subtable_headers { + struct acpi_subtable_header common; +}; typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); -typedef int (*acpi_tbl_entry_handler)(struct acpi_subtable_header *header, +typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, const unsigned long end); /* Debugger support */ -- cgit v1.2.3 From 3bc0e8eb179deebf1c06f5c4261d362c24b26ce1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 11 Mar 2019 14:55:58 -0600 Subject: acpi: Add HMAT to generic parsing tables The Heterogeneous Memory Attribute Table (HMAT) header has different field lengths than the existing parsing uses. Add the HMAT type to the parsing rules so it may be generically parsed. Reviewed-by: Rafael J. Wysocki Acked-by: Jonathan Cameron Tested-by: Jonathan Cameron Signed-off-by: Keith Busch Tested-by: Brice Goglin Signed-off-by: Greg Kroah-Hartman --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9494d42bf507..7c7515b0767e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -143,6 +143,7 @@ enum acpi_address_range_id { /* Table Handlers */ union acpi_subtable_headers { struct acpi_subtable_header common; + struct acpi_hmat_structure hmat; }; typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); -- cgit v1.2.3 From 08d9dbe72b1f899468b2b34f9309e88a84f440f2 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 11 Mar 2019 14:56:00 -0600 Subject: node: Link memory nodes to their compute nodes Systems may be constructed with various specialized nodes. Some nodes may provide memory, some provide compute devices that access and use that memory, and others may provide both. Nodes that provide memory are referred to as memory targets, and nodes that can initiate memory access are referred to as memory initiators. Memory targets will often have varying access characteristics from different initiators, and platforms may have ways to express those relationships. In preparation for these systems, provide interfaces for the kernel to export the memory relationship among different nodes memory targets and their initiators with symlinks to each other. If a system provides access locality for each initiator-target pair, nodes may be grouped into ranked access classes relative to other nodes. The new interface allows a subsystem to register relationships of varying classes if available and desired to be exported. A memory initiator may have multiple memory targets in the same access class. The target memory's initiators in a given class indicate the nodes access characteristics share the same performance relative to other linked initiator nodes. Each target within an initiator's access class, though, do not necessarily perform the same as each other. A memory target node may have multiple memory initiators. All linked initiators in a target's class have the same access characteristics to that target. The following example show the nodes' new sysfs hierarchy for a memory target node 'Y' with access class 0 from initiator node 'X': # symlinks -v /sys/devices/system/node/nodeX/access0/ relative: /sys/devices/system/node/nodeX/access0/targets/nodeY -> ../../nodeY # symlinks -v /sys/devices/system/node/nodeY/access0/ relative: /sys/devices/system/node/nodeY/access0/initiators/nodeX -> ../../nodeX The new attributes are added to the sysfs stable documentation. Reviewed-by: Jonathan Cameron Signed-off-by: Keith Busch Reviewed-by: Rafael J. Wysocki Tested-by: Brice Goglin Signed-off-by: Greg Kroah-Hartman --- include/linux/node.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/node.h b/include/linux/node.h index 257bb3d6d014..bb288817ed33 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -17,10 +17,12 @@ #include #include +#include #include struct node { struct device dev; + struct list_head access_list; #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) struct work_struct node_work; @@ -75,6 +77,10 @@ extern int register_mem_sect_under_node(struct memory_block *mem_blk, extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, unsigned long phys_index); +extern int register_memory_node_under_compute_node(unsigned int mem_nid, + unsigned int cpu_nid, + unsigned access); + #ifdef CONFIG_HUGETLBFS extern void register_hugetlbfs_with_node(node_registration_func_t doregister, node_registration_func_t unregister); -- cgit v1.2.3 From e1cf33aafb8462c7d0a0e6349925870316f040ee Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 11 Mar 2019 14:56:01 -0600 Subject: node: Add heterogenous memory access attributes Heterogeneous memory systems provide memory nodes with different latency and bandwidth performance attributes. Provide a new kernel interface for subsystems to register the attributes under the memory target node's initiator access class. If the system provides this information, applications may query these attributes when deciding which node to request memory. The following example shows the new sysfs hierarchy for a node exporting performance attributes: # tree -P "read*|write*"/sys/devices/system/node/nodeY/accessZ/initiators/ /sys/devices/system/node/nodeY/accessZ/initiators/ |-- read_bandwidth |-- read_latency |-- write_bandwidth `-- write_latency The bandwidth is exported as MB/s and latency is reported in nanoseconds. The values are taken from the platform as reported by the manufacturer. Memory accesses from an initiator node that is not one of the memory's access "Z" initiator nodes linked in the same directory may observe different performance than reported here. When a subsystem makes use of this interface, initiators of a different access number may not have the same performance relative to initiators in other access numbers, or omitted from the any access class' initiators. Descriptions for memory access initiator performance access attributes are added to sysfs stable documentation. Acked-by: Jonathan Cameron Tested-by: Jonathan Cameron Signed-off-by: Keith Busch Reviewed-by: Rafael J. Wysocki Tested-by: Brice Goglin Signed-off-by: Greg Kroah-Hartman --- include/linux/node.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/node.h b/include/linux/node.h index bb288817ed33..4139d728f8b3 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -20,6 +20,32 @@ #include #include +/** + * struct node_hmem_attrs - heterogeneous memory performance attributes + * + * @read_bandwidth: Read bandwidth in MB/s + * @write_bandwidth: Write bandwidth in MB/s + * @read_latency: Read latency in nanoseconds + * @write_latency: Write latency in nanoseconds + */ +struct node_hmem_attrs { + unsigned int read_bandwidth; + unsigned int write_bandwidth; + unsigned int read_latency; + unsigned int write_latency; +}; + +#ifdef CONFIG_HMEM_REPORTING +void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, + unsigned access); +#else +static inline void node_set_perf_attrs(unsigned int nid, + struct node_hmem_attrs *hmem_attrs, + unsigned access) +{ +} +#endif + struct node { struct device dev; struct list_head access_list; -- cgit v1.2.3 From acc02a109b0497e917c83f986a89c51e47d0022c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 11 Mar 2019 14:56:02 -0600 Subject: node: Add memory-side caching attributes System memory may have caches to help improve access speed to frequently requested address ranges. While the system provided cache is transparent to the software accessing these memory ranges, applications can optimize their own access based on cache attributes. Provide a new API for the kernel to register these memory-side caches under the memory node that provides it. The new sysfs representation is modeled from the existing cpu cacheinfo attributes, as seen from /sys/devices/system/cpu//cache/. Unlike CPU cacheinfo though, the node cache level is reported from the view of the memory. A higher level number is nearer to the CPU, while lower levels are closer to the last level memory. The exported attributes are the cache size, the line size, associativity indexing, and write back policy, and add the attributes for the system memory caches to sysfs stable documentation. Signed-off-by: Keith Busch Reviewed-by: Rafael J. Wysocki Reviewed-by: Brice Goglin Tested-by: Brice Goglin Signed-off-by: Greg Kroah-Hartman --- include/linux/node.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/node.h b/include/linux/node.h index 4139d728f8b3..1a557c589ecb 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -35,10 +35,45 @@ struct node_hmem_attrs { unsigned int write_latency; }; +enum cache_indexing { + NODE_CACHE_DIRECT_MAP, + NODE_CACHE_INDEXED, + NODE_CACHE_OTHER, +}; + +enum cache_write_policy { + NODE_CACHE_WRITE_BACK, + NODE_CACHE_WRITE_THROUGH, + NODE_CACHE_WRITE_OTHER, +}; + +/** + * struct node_cache_attrs - system memory caching attributes + * + * @indexing: The ways memory blocks may be placed in cache + * @write_policy: Write back or write through policy + * @size: Total size of cache in bytes + * @line_size: Number of bytes fetched on a cache miss + * @level: The cache hierarchy level + */ +struct node_cache_attrs { + enum cache_indexing indexing; + enum cache_write_policy write_policy; + u64 size; + u16 line_size; + u8 level; +}; + #ifdef CONFIG_HMEM_REPORTING +void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, unsigned access); #else +static inline void node_add_cache(unsigned int nid, + struct node_cache_attrs *cache_attrs) +{ +} + static inline void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, unsigned access) @@ -53,6 +88,10 @@ struct node { #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) struct work_struct node_work; #endif +#ifdef CONFIG_HMEM_REPORTING + struct list_head cache_attrs; + struct device *cache_dev; +#endif }; struct memory_block; -- cgit v1.2.3 From 0d1a393d61e44e2755eeff2e62fc8e91f8b296b6 Mon Sep 17 00:00:00 2001 From: Christina Quast Date: Tue, 2 Apr 2019 16:01:46 +0200 Subject: fs: kernfs: Corrected spelling mistake flies => files Signed-off-by: Christina Quast Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index c8893f663470..e446ab97ee0c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -64,7 +64,7 @@ enum kernfs_root_flag { KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001, /* - * For regular flies, if the opener has CAP_DAC_OVERRIDE, open(2) + * For regular files, if the opener has CAP_DAC_OVERRIDE, open(2) * succeeds regardless of the RW permissions. sysfs had an extra * layer of enforcement where open(2) fails with -EACCES regardless * of CAP_DAC_OVERRIDE if the permission doesn't have the -- cgit v1.2.3 From aa30f47cf666111f6bbfd15f290a27e8a7b9d854 Mon Sep 17 00:00:00 2001 From: Kimberly Brown Date: Mon, 1 Apr 2019 22:51:18 -0400 Subject: kobject: Add support for default attribute groups to kobj_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kobj_type currently uses a list of individual attributes to store default attributes. Attribute groups are more flexible than a list of attributes because groups provide support for attribute visibility. So, add support for default attribute groups to kobj_type. In future patches, the existing uses of kobj_type’s attribute list will be converted to attribute groups. When that is complete, kobj_type’s attribute list, “default_attrs”, will be removed. Signed-off-by: Kimberly Brown Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 1ab0d624fb36..e2ca0a292e21 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -139,7 +139,8 @@ static inline bool kobject_has_children(struct kobject *kobj) struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; - struct attribute **default_attrs; + struct attribute **default_attrs; /* use default_groups instead */ + const struct attribute_group **default_groups; const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); const void *(*namespace)(struct kobject *kobj); void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid); -- cgit v1.2.3