From eca2040972b411ec27483bf75dc8b84e730e88ff Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:34 +0200 Subject: scsi: block: ioprio: Clean up interface definition The I/O priority user interface defines the 16-bits ioprio values as the combination of the upper 3-bits for an I/O priority class and the lower 13-bits as priority data. However, the kernel only uses the lower 3-bits of the priority data to define priority levels for the RT and BE priority classes. The data part of an ioprio value is completely ignored for the IDLE and NONE classes. This is enforced by checks done in ioprio_check_cap(), which is called for all paths that allow defining an I/O priority for I/Os: the per-context ioprio_set() system call, aio interface and io_uring interface. Clarify this fact in the uapi ioprio.h header file and introduce the IOPRIO_PRIO_LEVEL_MASK and IOPRIO_PRIO_LEVEL() macros for users to define and get priority levels in an ioprio value. The coarser macro IOPRIO_PRIO_DATA() is retained for backward compatibility with old applications already using it. There is no functional change introduced with this. In-kernel users of the IOPRIO_PRIO_DATA() macro which are explicitly handling I/O priority data as a priority level are modified to use the new IOPRIO_PRIO_LEVEL() macro without any functional change. Since f2fs is the only user of this macro not explicitly using that value as a priority level, it is left unchanged. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-2-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/uapi/linux/ioprio.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index f70f2596a6bf..4444b4e4fdad 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -17,7 +17,7 @@ ((data) & IOPRIO_PRIO_MASK)) /* - * These are the io priority groups as implemented by the BFQ and mq-deadline + * These are the io priority classes as implemented by the BFQ and mq-deadline * schedulers. RT is the realtime class, it always gets premium service. For * ATA disks supporting NCQ IO priority, RT class IOs will be processed using * high priority NCQ commands. BE is the best-effort scheduling class, the @@ -32,11 +32,20 @@ enum { }; /* - * The RT and BE priority classes both support up to 8 priority levels. + * The RT and BE priority classes both support up to 8 priority levels that + * can be specified using the lower 3-bits of the priority data. */ -#define IOPRIO_NR_LEVELS 8 -#define IOPRIO_BE_NR IOPRIO_NR_LEVELS +#define IOPRIO_LEVEL_NR_BITS 3 +#define IOPRIO_NR_LEVELS (1 << IOPRIO_LEVEL_NR_BITS) +#define IOPRIO_LEVEL_MASK (IOPRIO_NR_LEVELS - 1) +#define IOPRIO_PRIO_LEVEL(ioprio) ((ioprio) & IOPRIO_LEVEL_MASK) +#define IOPRIO_BE_NR IOPRIO_NR_LEVELS + +/* + * Possible values for the "which" argument of the ioprio_get() and + * ioprio_set() system calls (see "man ioprio_set"). + */ enum { IOPRIO_WHO_PROCESS = 1, IOPRIO_WHO_PGRP, @@ -44,7 +53,7 @@ enum { }; /* - * Fallback BE priority level. + * Fallback BE class priority level. */ #define IOPRIO_NORM 4 #define IOPRIO_BE_NORM IOPRIO_NORM -- cgit v1.2.3 From 6c913257226a25879bfd6226e0ee265e98904ce6 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 May 2023 03:13:35 +0200 Subject: scsi: block: Introduce ioprio hints I/O priorities currently only use 6-bits of the 16-bits ioprio value: the 3-upper bits are used to define up to 8 priority classes (4 of which are valid) and the 3 lower bits of the value are used to define a priority level for the real-time and best-effort class. The remaining 10-bits between the I/O priority class and level are unused, and in fact, cannot be used by the user as doing so would either result in the value being completely ignored, or in an error returned by ioprio_check_cap(). Use these 10-bits of an ioprio value to allow a user to specify I/O hints. An I/O hint is defined as a 10-bitsvalue, allowing up to 1023 different hints to be specified, with the value 0 being reserved as the "no hint" case. An I/O hint can apply to any I/O that specifies a valid priority class other than NONE, regardless of the I/O priority level specified. To do so, the macros IOPRIO_PRIO_HINT() and IOPRIO_PRIO_VALUE_HINT() are introduced in include/uapi/linux/ioprio.h to respectively allow a user to get and set a hint in an ioprio value. To support the ATA and SCSI command duration limits feature, 7 hints are defined: IOPRIO_HINT_DEV_DURATION_LIMIT_1 to IOPRIO_HINT_DEV_DURATION_LIMIT_7, allowing a user to specify which command duration limit descriptor should be applied to the commands serving an I/O. Specifying these hints has for now no effect whatsoever if the target block devices do not support the command duration limits feature. However, in the future, block I/O schedulers can be modified to optimize I/O issuing order based on these hints, even for devices that do not support the command duration limits feature. Given that the 7 duration limits hints defined have no effect on any block layer component, the actual definition of the duration limits implied by these hints remains at the device level. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20230511011356.227789-3-nks@flawful.org Signed-off-by: Martin K. Petersen --- include/uapi/linux/ioprio.h | 49 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 4444b4e4fdad..4c4806e8230b 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -58,4 +58,53 @@ enum { #define IOPRIO_NORM 4 #define IOPRIO_BE_NORM IOPRIO_NORM +/* + * The 10 bits between the priority class and the priority level are used to + * optionally define I/O hints for any combination of I/O priority class and + * level. Depending on the kernel configuration, I/O scheduler being used and + * the target I/O device being used, hints can influence how I/Os are processed + * without affecting the I/O scheduling ordering defined by the I/O priority + * class and level. + */ +#define IOPRIO_HINT_SHIFT IOPRIO_LEVEL_NR_BITS +#define IOPRIO_HINT_NR_BITS 10 +#define IOPRIO_NR_HINTS (1 << IOPRIO_HINT_NR_BITS) +#define IOPRIO_HINT_MASK (IOPRIO_NR_HINTS - 1) +#define IOPRIO_PRIO_HINT(ioprio) \ + (((ioprio) >> IOPRIO_HINT_SHIFT) & IOPRIO_HINT_MASK) + +/* + * Alternate macro for IOPRIO_PRIO_VALUE() to define an I/O priority with + * a class, level and hint. + */ +#define IOPRIO_PRIO_VALUE_HINT(class, level, hint) \ + ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \ + (((hint) & IOPRIO_HINT_MASK) << IOPRIO_HINT_SHIFT) | \ + ((level) & IOPRIO_LEVEL_MASK)) + +/* + * I/O hints. + */ +enum { + /* No hint */ + IOPRIO_HINT_NONE = 0, + + /* + * Device command duration limits: indicate to the device a desired + * duration limit for the commands that will be used to process an I/O. + * These will currently only be effective for SCSI and ATA devices that + * support the command duration limits feature. If this feature is + * enabled, then the commands issued to the device to process an I/O with + * one of these hints set will have the duration limit index (dld field) + * set to the value of the hint. + */ + IOPRIO_HINT_DEV_DURATION_LIMIT_1 = 1, + IOPRIO_HINT_DEV_DURATION_LIMIT_2 = 2, + IOPRIO_HINT_DEV_DURATION_LIMIT_3 = 3, + IOPRIO_HINT_DEV_DURATION_LIMIT_4 = 4, + IOPRIO_HINT_DEV_DURATION_LIMIT_5 = 5, + IOPRIO_HINT_DEV_DURATION_LIMIT_6 = 6, + IOPRIO_HINT_DEV_DURATION_LIMIT_7 = 7, +}; + #endif /* _UAPI_LINUX_IOPRIO_H */ -- cgit v1.2.3 From 01584c1e233740519d0e11aa20daa323d26bf598 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 8 Jun 2023 18:55:56 +0900 Subject: scsi: block: Improve ioprio value validity checks The introduction of the macro IOPRIO_PRIO_LEVEL() in commit eca2040972b4 ("scsi: block: ioprio: Clean up interface definition") results in an iopriority level to always be masked using the macro IOPRIO_LEVEL_MASK, and thus to the kernel always seeing an acceptable value for an I/O priority level when checked in ioprio_check_cap(). Before this patch, this function would return an error for some (but not all) invalid values for a level valid range of [0..7]. Restore and improve the detection of invalid priority levels by introducing the inline function ioprio_value() to check an ioprio class, level and hint value before combining these fields into a single value to be used with ioprio_set() or AIOs. If an invalid value for the class, level or hint of an ioprio is detected, ioprio_value() returns an ioprio using the class IOPRIO_CLASS_INVALID, indicating an invalid value and causing ioprio_check_cap() to return -EINVAL. Fixes: 6c913257226a ("scsi: block: Introduce ioprio hints") Fixes: eca2040972b4 ("scsi: block: ioprio: Clean up interface definition") Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230608095556.124001-1-dlemoal@kernel.org Reviewed-by: Niklas Cassel Reviewed-by: Linus Walleij Signed-off-by: Martin K. Petersen --- include/uapi/linux/ioprio.h | 50 ++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h index 4c4806e8230b..99440b2e8c35 100644 --- a/include/uapi/linux/ioprio.h +++ b/include/uapi/linux/ioprio.h @@ -2,19 +2,20 @@ #ifndef _UAPI_LINUX_IOPRIO_H #define _UAPI_LINUX_IOPRIO_H +#include +#include + /* * Gives us 8 prio classes with 13-bits of data for each class */ #define IOPRIO_CLASS_SHIFT 13 -#define IOPRIO_CLASS_MASK 0x07 +#define IOPRIO_NR_CLASSES 8 +#define IOPRIO_CLASS_MASK (IOPRIO_NR_CLASSES - 1) #define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) #define IOPRIO_PRIO_CLASS(ioprio) \ (((ioprio) >> IOPRIO_CLASS_SHIFT) & IOPRIO_CLASS_MASK) #define IOPRIO_PRIO_DATA(ioprio) ((ioprio) & IOPRIO_PRIO_MASK) -#define IOPRIO_PRIO_VALUE(class, data) \ - ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \ - ((data) & IOPRIO_PRIO_MASK)) /* * These are the io priority classes as implemented by the BFQ and mq-deadline @@ -25,10 +26,13 @@ * served when no one else is using the disk. */ enum { - IOPRIO_CLASS_NONE, - IOPRIO_CLASS_RT, - IOPRIO_CLASS_BE, - IOPRIO_CLASS_IDLE, + IOPRIO_CLASS_NONE = 0, + IOPRIO_CLASS_RT = 1, + IOPRIO_CLASS_BE = 2, + IOPRIO_CLASS_IDLE = 3, + + /* Special class to indicate an invalid ioprio value */ + IOPRIO_CLASS_INVALID = 7, }; /* @@ -73,15 +77,6 @@ enum { #define IOPRIO_PRIO_HINT(ioprio) \ (((ioprio) >> IOPRIO_HINT_SHIFT) & IOPRIO_HINT_MASK) -/* - * Alternate macro for IOPRIO_PRIO_VALUE() to define an I/O priority with - * a class, level and hint. - */ -#define IOPRIO_PRIO_VALUE_HINT(class, level, hint) \ - ((((class) & IOPRIO_CLASS_MASK) << IOPRIO_CLASS_SHIFT) | \ - (((hint) & IOPRIO_HINT_MASK) << IOPRIO_HINT_SHIFT) | \ - ((level) & IOPRIO_LEVEL_MASK)) - /* * I/O hints. */ @@ -107,4 +102,25 @@ enum { IOPRIO_HINT_DEV_DURATION_LIMIT_7 = 7, }; +#define IOPRIO_BAD_VALUE(val, max) ((val) < 0 || (val) >= (max)) + +/* + * Return an I/O priority value based on a class, a level and a hint. + */ +static __always_inline __u16 ioprio_value(int class, int level, int hint) +{ + if (IOPRIO_BAD_VALUE(class, IOPRIO_NR_CLASSES) || + IOPRIO_BAD_VALUE(level, IOPRIO_NR_LEVELS) || + IOPRIO_BAD_VALUE(hint, IOPRIO_NR_HINTS)) + return IOPRIO_CLASS_INVALID << IOPRIO_CLASS_SHIFT; + + return (class << IOPRIO_CLASS_SHIFT) | + (hint << IOPRIO_HINT_SHIFT) | level; +} + +#define IOPRIO_PRIO_VALUE(class, level) \ + ioprio_value(class, level, IOPRIO_HINT_NONE) +#define IOPRIO_PRIO_VALUE_HINT(class, level, hint) \ + ioprio_value(class, level, hint) + #endif /* _UAPI_LINUX_IOPRIO_H */ -- cgit v1.2.3