diff options
author | Tom Rini <trini@konsulko.com> | 2025-05-29 08:27:13 -0600 |
---|---|---|
committer | Tom Rini <trini@konsulko.com> | 2025-05-29 08:27:13 -0600 |
commit | 23be77e18d19ddb9c2ecdf71638bacfbc369fd13 (patch) | |
tree | 73d82f75a865be88af0e376baa0fc2980f1b4b48 | |
parent | 2f3766949bbea7aa5a472157561d387fd94205d2 (diff) | |
parent | 6990cc5257283a631a286a4321a3515c7537f636 (diff) |
Merge patch series "regex patches"
Rasmus Villemoes <ravi@prevas.dk> says:
This started as a rather simple patch, 1/12, adding the ability to
more conveniently do regex matching in shell.
But with that, it became very easy to see what the slre library can
and especially what it cannot do, and that way I found both outright
bugs and a "wow, doesn't it support that syntax" gotcha. I couldn't
find any tests ('git grep slre -- test/' was empty), so I added a
small test suite and tweaked slre.c.
Link: https://lore.kernel.org/r/20250513084034.654865-1-ravi@prevas.dk
-rw-r--r-- | MAINTAINERS | 7 | ||||
-rw-r--r-- | cmd/test.c | 19 | ||||
-rw-r--r-- | doc/usage/cmd/setexpr.rst | 5 | ||||
-rw-r--r-- | doc/usage/cmd/test.rst | 102 | ||||
-rw-r--r-- | doc/usage/index.rst | 1 | ||||
-rw-r--r-- | include/slre.h | 1 | ||||
-rw-r--r-- | lib/slre.c | 78 | ||||
-rw-r--r-- | test/lib/Makefile | 1 | ||||
-rw-r--r-- | test/lib/slre.c | 58 |
9 files changed, 244 insertions, 28 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index d62dd35a385..111e2767917 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1628,6 +1628,13 @@ F: drivers/gpio/sl28cpld-gpio.c F: drivers/misc/sl28cpld.c F: drivers/watchdog/sl28cpld-wdt.c +SLRE +M: Rasmus Villemoes <ravi@prevas.dk> +S: Maintained +F: include/slre.h +F: lib/slre.c +F: test/lib/slre.c + SMCCC TRNG M: Etienne Carriere <etienne.carriere@linaro.org> S: Maintained diff --git a/cmd/test.c b/cmd/test.c index b4c3eabf9f6..a42a523d33d 100644 --- a/cmd/test.c +++ b/cmd/test.c @@ -7,6 +7,7 @@ #include <command.h> #include <fs.h> #include <log.h> +#include <slre.h> #include <vsprintf.h> #define OP_INVALID 0 @@ -26,6 +27,7 @@ #define OP_INT_GT 14 #define OP_INT_GE 15 #define OP_FILE_EXISTS 16 +#define OP_REGEX 17 const struct { int arg; @@ -49,6 +51,9 @@ const struct { {0, "-z", OP_STR_EMPTY, 2}, {0, "-n", OP_STR_NEMPTY, 2}, {0, "-e", OP_FILE_EXISTS, 4}, +#ifdef CONFIG_REGEX + {1, "=~", OP_REGEX, 3}, +#endif }; static int do_test(struct cmd_tbl *cmdtp, int flag, int argc, @@ -141,6 +146,20 @@ static int do_test(struct cmd_tbl *cmdtp, int flag, int argc, case OP_FILE_EXISTS: expr = file_exists(ap[1], ap[2], ap[3], FS_TYPE_ANY); break; +#ifdef CONFIG_REGEX + case OP_REGEX: { + struct slre slre; + + if (slre_compile(&slre, ap[2]) == 0) { + printf("Error compiling regex: %s\n", slre.err_str); + expr = 0; + break; + } + + expr = slre_match(&slre, ap[0], strlen(ap[0]), NULL); + break; + } +#endif } switch (op) { diff --git a/doc/usage/cmd/setexpr.rst b/doc/usage/cmd/setexpr.rst index 593a0ea91e1..5bc37ae50fc 100644 --- a/doc/usage/cmd/setexpr.rst +++ b/doc/usage/cmd/setexpr.rst @@ -144,8 +144,9 @@ Configuration * The *setexpr* command is only available if CMD_SETEXPR=y. * The *setexpr fmt* sub-command is only available if CMD_SETEXPR_FMT=y. -* The *setexpr gsub* and *setexpr sub* sub-commands are only available if - CONFIG_REGEX=y. +* The *setexpr gsub* and *setexpr sub* sub-commands are only available + if CONFIG_REGEX=y. For an overview of the supported regex syntax, + see :doc:`test`. Return value ------------ diff --git a/doc/usage/cmd/test.rst b/doc/usage/cmd/test.rst new file mode 100644 index 00000000000..d1379117fca --- /dev/null +++ b/doc/usage/cmd/test.rst @@ -0,0 +1,102 @@ +.. SPDX-License-Identifier: GPL-2.0-or-later + +.. index:: + single: test (command) + +test command +============ + +Synopsis +-------- + +:: + + test <str-op> <s> + test <s1> <str-cmp> <s2> + test <n1> <num-cmp> <n2> + test ! <expr> + test <expr1> -o <expr2> + test <expr1> -a <expr2> + test -e <interface> <dev[:part]> <path> + test <s> =~ <re> + +Description +----------- + +The ``test`` command is similar to the ordinary shell built-in by the +same name. Unlike in ordinary shells, it cannot be spelled ``[``. + +Strings +~~~~~~~ + +The string tests ``-n`` and ``-z``, and string comparison operators +``=``, ``!=``, ``<`` and ``>``, work exactly as in ordinary shells. + +Numbers +~~~~~~~ + +The number comparison operators ``-lt``, ``-le``, ``-gt``, ``-gt``, +``-eq`` and ``-ne`` work as in ordinary shells. + +.. note:: + Numbers are parsed with ``simple_strtol(, 0)``, meaning that they + are treated as decimal unless there is a `0x` prefix, any errors in + parsing are ignored, and parsing stops as soon as a non-digit (for + the selected base) is encountered. And most U-Boot commands that + generate "numeric" environment variables store them as hexadecimal + *without* a `0x` prefix. + +For example, this is not a correct way of testing whether a given file +has a size less than 4KiB:: + + # Assuming readme.txt exists, sets 'filesize' environment variable + $ size mmc 0:1 readme.txt + $ if test "$filesize" -lt 4096 ; then ... + +If the file size is actually 8000 (decimal), its hexadecimal +representation, and thus the value of ``$filesize``, is ``1f40``, so +the comparison that is done ends up being "1 < 4096". + +Logic +~~~~~ + +The ``!`` operator negates the sense of the test of the expression +``<expr>``. + +The ``-o`` and ``-a`` operators perform logical OR and logical AND, +respectively, of the two expressions. + +File existence +~~~~~~~~~~~~~~ + +Like ordinary shells, the ``-e`` operator can be used to test for +existence of a file. However, the U-Boot version takes three +arguments: + +- The interface (e.g. ``mmc``). +- The device number, possibly including a partition specification. +- The usual path argument, which is interpreted relative to the root + of the filesystem. + +Regular expressions +~~~~~~~~~~~~~~~~~~~ + +When ``CONFIG_REGEX`` is enabled, an additional operator ``=~`` is +available. This is similar to the same operator available with bash's +extended test command ``[[ ]]``. The left operand is a string which is +matched against the regular expression described by the right operand. + +The regular expression engine supports these features: + +- Anchoring ``^`` and ``$``, matching at the beginning/end of the + string. +- Matching any single character (including whitespace) using ``.``. +- Character classes ``[ ]``, including ranges ``[0-9]`` and negation + ``[^ /.]``. +- Grouping ``( )``. +- Alternation ``|``. +- Postfix qualifiers ``*``, ``+`` and ``?`` and their non-greedy + variants ``*?``, ``+?`` and ``??`` + +For extracting the parts matching a capture group and/or performing +substitutions, including back references, see :doc:`setexpr`. diff --git a/doc/usage/index.rst b/doc/usage/index.rst index 372ef56c967..c5b45fd9290 100644 --- a/doc/usage/index.rst +++ b/doc/usage/index.rst @@ -123,6 +123,7 @@ Shell commands cmd/source cmd/tcpm cmd/temperature + cmd/test cmd/tftpput cmd/trace cmd/true diff --git a/include/slre.h b/include/slre.h index 4b41a4b276f..af5b1302d9c 100644 --- a/include/slre.h +++ b/include/slre.h @@ -63,7 +63,6 @@ struct slre { int code_size; int data_size; int num_caps; /* Number of bracket pairs */ - int anchored; /* Must match from string start */ const char *err_str; /* Error string */ }; diff --git a/lib/slre.c b/lib/slre.c index 277a59a03a7..117815a6d60 100644 --- a/lib/slre.c +++ b/lib/slre.c @@ -30,7 +30,7 @@ #include <slre.h> enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL, - STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT}; + STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT, RANGE}; #ifdef SLRE_TEST static struct { @@ -55,7 +55,8 @@ static struct { {"QUEST", 1, "o"}, /* Match zero or one time, "?" */ {"SPACE", 0, ""}, /* Match whitespace, "\s" */ {"NONSPACE", 0, ""}, /* Match non-space, "\S" */ - {"DIGIT", 0, ""} /* Match digit, "\d" */ + {"DIGIT", 0, ""}, /* Match digit, "\d" */ + {"RANGE", 0, ""}, /* Range separator - */ }; #endif /* SLRE_TEST */ @@ -260,6 +261,15 @@ anyof(struct slre *r, const char **re) return; /* NOTREACHED */ break; + case '-': + if (r->data_size == old_data_size || **re == ']') { + /* First or last character, just match - itself. */ + store_char_in_data(r, '-'); + break; + } + store_char_in_data(r, 0); + store_char_in_data(r, RANGE); + break; case '\\': esc = get_escape_char(re); if ((esc & 0xff) == 0) { @@ -413,10 +423,7 @@ int slre_compile(struct slre *r, const char *re) { r->err_str = NULL; - r->code_size = r->data_size = r->num_caps = r->anchored = 0; - - if (*re == '^') - r->anchored++; + r->code_size = r->data_size = r->num_caps = 0; emit(r, OPEN); /* This will capture what matches full RE */ emit(r, 0); @@ -475,29 +482,54 @@ is_any_of(const unsigned char *p, int len, const char *s, int *ofs) ch = s[*ofs]; - for (i = 0; i < len; i++) - if (p[i] == ch) { - (*ofs)++; - return 1; + for (i = 0; i < len; i++) { + if (p[i] == '\0') { + switch (p[++i]) { + case NONSPACE: + if (!isspace(ch)) + goto match; + break; + case SPACE: + if (isspace(ch)) + goto match; + break; + case DIGIT: + if (isdigit(ch)) + goto match; + break; + case RANGE: + /* + * a-z is represented in the data array as {'a', \0, RANGE, 'z'} + */ + ++i; + if (p[i - 3] <= (unsigned char)ch && (unsigned char)ch <= p[i]) + goto match; + break; + } + continue; } + if (p[i] == ch) + goto match; + } return 0; + +match: + (*ofs)++; + return 1; } static int is_any_but(const unsigned char *p, int len, const char *s, int *ofs) { - int i, ch; - - ch = s[*ofs]; + int dummy = *ofs; - for (i = 0; i < len; i++) { - if (p[i] == ch) - return 0; + if (is_any_of(p, len, s, &dummy)) { + return 0; + } else { + (*ofs)++; + return 1; } - - (*ofs)++; - return 1; } static int @@ -650,13 +682,9 @@ slre_match(const struct slre *r, const char *buf, int len, { int i, ofs = 0, res = 0; - if (r->anchored) { + for (i = 0; i <= len && res == 0; i++) { + ofs = i; res = match(r, 0, buf, len, &ofs, caps); - } else { - for (i = 0; i < len && res == 0; i++) { - ofs = i; - res = match(r, 0, buf, len, &ofs, caps); - } } return res; diff --git a/test/lib/Makefile b/test/lib/Makefile index d620510f998..ff4ff63270d 100644 --- a/test/lib/Makefile +++ b/test/lib/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_SHA256) += test_sha256_hmac.o obj-$(CONFIG_HKDF_MBEDTLS) += test_sha256_hkdf.o obj-$(CONFIG_GETOPT) += getopt.o obj-$(CONFIG_CRC8) += test_crc8.o +obj-$(CONFIG_REGEX) += slre.o obj-$(CONFIG_UT_LIB_CRYPT) += test_crypt.o obj-$(CONFIG_UT_TIME) += time.o obj-$(CONFIG_$(PHASE_)UT_UNICODE) += unicode.o diff --git a/test/lib/slre.c b/test/lib/slre.c new file mode 100644 index 00000000000..ff2386d614a --- /dev/null +++ b/test/lib/slre.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +#include <test/lib.h> +#include <test/ut.h> +#include <slre.h> + +struct re_test { + const char *str; + const char *re; + int match; +}; + +static const struct re_test re_test[] = { + { "123", "^\\d+$", 1}, + { "x23", "^\\d+$", 0}, + { "banana", "^([bn]a)*$", 1}, + { "panama", "^([bn]a)*$", 0}, + { "xby", "^a|b", 1}, + { "xby", "b|^a", 1}, + { "xby", "b|c$", 1}, + { "xby", "c$|b", 1}, + { "", "x*$", 1}, + { "", "^x*$", 1}, + { "yy", "x*$", 1}, + { "yy", "^x*$", 0}, + { "Gadsby", "^[^eE]*$", 1}, + { "Ernest", "^[^eE]*$", 0}, + { "6d41f0a39d6", "^[0123456789abcdef]*$", 1 }, + /* DIGIT is 17 */ + { "##\x11%%\x11", "^[#%\\d]*$", 0 }, + { "##23%%45", "^[#%\\d]*$", 1 }, + { "U-Boot", "^[B-Uo-t]*$", 0 }, + { "U-Boot", "^[A-Zm-v-]*$", 1 }, + { "U-Boot", "^[-A-Za-z]*$", 1 }, + /* The range --C covers both - and B. */ + { "U-Boot", "^[--CUot]*$", 1 }, + { "U-Boot", "^[^0-9]*$", 1 }, + { "U-Boot", "^[^0-9<->]*$", 1 }, + { "U-Boot", "^[^0-9<\\->]*$", 0 }, + {} +}; + +static int lib_slre(struct unit_test_state *uts) +{ + const struct re_test *t; + + for (t = re_test; t->str; t++) { + struct slre slre; + + ut_assert(slre_compile(&slre, t->re)); + ut_assertf(!!slre_match(&slre, t->str, strlen(t->str), NULL) == t->match, + "'%s' unexpectedly %s '%s'\n", t->str, + t->match ? "didn't match" : "matched", t->re); + } + + return 0; +} +LIB_TEST(lib_slre, 0); |