diff options
Diffstat (limited to 'tools')
718 files changed, 21516 insertions, 6150 deletions
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 1334214546d7..100ad3dc091a 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -192,60 +192,77 @@ static int get_family_id(int sd) } #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1)) +#define delay_ms(t) (t / 1000000ULL) static void print_delayacct(struct taskstats *t) { - printf("\n\nCPU %15s%15s%15s%15s%15s\n" - " %15llu%15llu%15llu%15llu%15.3fms\n" - "IO %15s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "SWAP %15s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "RECLAIM %12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "THRASHING%12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "COMPACT %12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "WPCOPY %12s%15s%15s\n" - " %15llu%15llu%15.3fms\n" - "IRQ %15s%15s%15s\n" - " %15llu%15llu%15.3fms\n", + printf("\n\nCPU %15s%15s%15s%15s%15s%15s\n" + " %15llu%15llu%15llu%15llu%15.3fms%13.6fms\n" + "IO %15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "SWAP %15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "RECLAIM %12s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "THRASHING%12s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "COMPACT %12s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "WPCOPY %12s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n" + "IRQ %15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms\n", "count", "real total", "virtual total", - "delay total", "delay average", + "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->cpu_count, (unsigned long long)t->cpu_run_real_total, (unsigned long long)t->cpu_run_virtual_total, (unsigned long long)t->cpu_delay_total, average_ms((double)t->cpu_delay_total, t->cpu_count), - "count", "delay total", "delay average", + delay_ms((double)t->cpu_delay_max), + delay_ms((double)t->cpu_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->blkio_count, (unsigned long long)t->blkio_delay_total, average_ms((double)t->blkio_delay_total, t->blkio_count), - "count", "delay total", "delay average", + delay_ms((double)t->blkio_delay_max), + delay_ms((double)t->blkio_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->swapin_count, (unsigned long long)t->swapin_delay_total, average_ms((double)t->swapin_delay_total, t->swapin_count), - "count", "delay total", "delay average", + delay_ms((double)t->swapin_delay_max), + delay_ms((double)t->swapin_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->freepages_count, (unsigned long long)t->freepages_delay_total, average_ms((double)t->freepages_delay_total, t->freepages_count), - "count", "delay total", "delay average", + delay_ms((double)t->freepages_delay_max), + delay_ms((double)t->freepages_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_delay_total, average_ms((double)t->thrashing_delay_total, t->thrashing_count), - "count", "delay total", "delay average", + delay_ms((double)t->thrashing_delay_max), + delay_ms((double)t->thrashing_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->compact_count, (unsigned long long)t->compact_delay_total, average_ms((double)t->compact_delay_total, t->compact_count), - "count", "delay total", "delay average", + delay_ms((double)t->compact_delay_max), + delay_ms((double)t->compact_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->wpcopy_count, (unsigned long long)t->wpcopy_delay_total, average_ms((double)t->wpcopy_delay_total, t->wpcopy_count), - "count", "delay total", "delay average", + delay_ms((double)t->wpcopy_delay_max), + delay_ms((double)t->wpcopy_delay_min), + "count", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->irq_count, (unsigned long long)t->irq_delay_total, - average_ms((double)t->irq_delay_total, t->irq_count)); + average_ms((double)t->irq_delay_total, t->irq_count), + delay_ms((double)t->irq_delay_max), + delay_ms((double)t->irq_delay_min)); } static void task_context_switch_counts(struct taskstats *t) diff --git a/tools/accounting/procacct.c b/tools/accounting/procacct.c index 90c4a37f53d9..e8dee05a6264 100644 --- a/tools/accounting/procacct.c +++ b/tools/accounting/procacct.c @@ -274,12 +274,11 @@ int main(int argc, char *argv[]) int maskset = 0; char *logfile = NULL; int cfd = 0; - int forking = 0; struct msgtemplate msg; - while (!forking) { - c = getopt(argc, argv, "m:vr:"); + while (1) { + c = getopt(argc, argv, "m:vr:w:"); if (c < 0) break; diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index cd8420e8c3ad..150416682e2c 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -11,6 +11,7 @@ #include <linux/bits.h> #include <linux/stringify.h> +#include <linux/kasan-tags.h> #include <asm/gpr-num.h> @@ -108,6 +109,9 @@ #define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x)) #define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x)) +/* Register-based PAN access, for save/restore purposes */ +#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3) + #define __SYS_BARRIER_INSN(CRm, op2, Rt) \ __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) @@ -123,6 +127,37 @@ #define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4) #define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6) +#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0) +#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0) +#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1) + +#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1) +#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3) +#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5) + +#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1) +#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3) +#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5) + +#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1) + +#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1) +#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3) +#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5) + +#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1) +#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3) +#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5) + +#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1) +#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3) +#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5) + +/* Data cache zero operations */ +#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1) +#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3) +#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4) + /* * Automatically generated definitions for system registers, the * manual encodings below are in the process of being converted to @@ -162,6 +197,84 @@ #define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) #define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) +#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0)) +#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0) +#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1)) +#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1) +#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2)) +#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2) +#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2) + +#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0) +#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1) +#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0) + +#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3) +#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3))) +#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3))) +#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6) +#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0) +#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0) +#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0) +#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2) +#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2) +#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0) +#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6) +#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6) +#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5) +#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5) +#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5) +#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0) +#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6) +#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7) +#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0) +#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0) +#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4) +#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7) +#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6) +#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6) +#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6) +#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6) +#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7) +#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7) +#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7) +#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7) +#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7) +#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7) +#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7) +#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6) +#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6) +#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7) +#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1) +#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4) +#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0) +#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1) +#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4))) +#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0) +#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4) +#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4) +#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4) +#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2) +#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2) +#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3) +#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0) +#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0) +#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0) +#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1) +#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0) +#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2) +#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2) +#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2) +#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2) +#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2) +#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2) +#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1) + +/* ETM */ +#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4) + +#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -170,8 +283,6 @@ #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) -#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) - #define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) #define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0) @@ -202,15 +313,38 @@ #define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1) #define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2) #define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3) +#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4) +#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5) +#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6) #define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0) #define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1) +#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2) +#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3) #define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0) #define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1) #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) #define SYS_PAR_EL1_F BIT(0) +/* When PAR_EL1.F == 1 */ #define SYS_PAR_EL1_FST GENMASK(6, 1) +#define SYS_PAR_EL1_PTW BIT(8) +#define SYS_PAR_EL1_S BIT(9) +#define SYS_PAR_EL1_AssuredOnly BIT(12) +#define SYS_PAR_EL1_TopLevel BIT(13) +#define SYS_PAR_EL1_Overlay BIT(14) +#define SYS_PAR_EL1_DirtyBit BIT(15) +#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48) +#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16)) +#define SYS_PAR_EL1_RES1 BIT(11) +/* When PAR_EL1.F == 0 */ +#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7) +#define SYS_PAR_EL1_NS BIT(9) +#define SYS_PAR_EL1_F0_IMPDEF BIT(10) +#define SYS_PAR_EL1_NSE BIT(11) +#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12) +#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56) +#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52)) /*** Statistical Profiling Extension ***/ #define PMSEVFR_EL1_RES0_IMP \ @@ -274,6 +408,8 @@ #define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) #define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) +#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5) + #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) @@ -286,7 +422,6 @@ #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) #define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3) #define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4) -#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5) #define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6) #define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7) #define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0) @@ -369,6 +504,7 @@ #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) #define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) +#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3) #define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) #define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1) #define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2) @@ -381,13 +517,15 @@ #define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0) #define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) -#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) -#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) -#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) +#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) #define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0) +#define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0) +#define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1) +#define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2) +#define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) #define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0) #define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1) @@ -449,24 +587,49 @@ #define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1) #define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2) +#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7) + +#define __AMEV_op2(m) (m & 0x7) +#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3)) +#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m)) +#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m) +#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m)) +#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m) #define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3) #define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) +#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0) +#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1) +#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2) +#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0) +#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1) +#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2) /* VHE encodings for architectural EL0/1 system registers */ +#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) +#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) +#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3) +#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1) +#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) +#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3) #define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) #define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) #define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) #define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) +#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) +#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) #define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) +#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) +#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7) #define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) #define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) #define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) @@ -477,6 +640,183 @@ #define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0) +/* AT instructions */ +#define AT_Op0 1 +#define AT_CRn 7 + +#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0) +#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1) +#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2) +#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3) +#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0) +#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1) +#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2) +#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0) +#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1) +#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4) +#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5) +#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6) +#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7) +#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2) + +/* TLBI instructions */ +#define TLBI_Op0 1 + +#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */ +#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */ + +#define TLBI_CRn_XS 8 /* Extra Slow (the common one) */ +#define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/ + +#define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */ +#define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */ +#define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */ +#define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */ +#define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */ +#define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */ +#define TLBI_CRm_RNS 6 /* Range, Non-Sharable */ +#define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */ + +#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0) +#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1) +#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2) +#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3) +#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5) +#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7) +#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1) +#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3) +#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5) +#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7) +#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0) +#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1) +#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2) +#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3) +#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5) +#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7) +#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1) +#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3) +#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5) +#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7) +#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1) +#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3) +#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5) +#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7) +#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0) +#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1) +#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2) +#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3) +#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5) +#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7) +#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0) +#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1) +#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2) +#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3) +#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5) +#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7) +#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1) +#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3) +#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5) +#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7) +#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0) +#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1) +#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2) +#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3) +#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5) +#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7) +#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1) +#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3) +#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5) +#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7) +#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1) +#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3) +#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5) +#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7) +#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0) +#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1) +#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2) +#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3) +#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5) +#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7) +#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1) +#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2) +#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5) +#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6) +#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0) +#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1) +#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4) +#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5) +#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6) +#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1) +#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5) +#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0) +#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1) +#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4) +#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5) +#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6) +#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0) +#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1) +#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2) +#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3) +#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4) +#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5) +#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6) +#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7) +#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1) +#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5) +#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1) +#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5) +#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0) +#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1) +#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4) +#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5) +#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6) +#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1) +#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2) +#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5) +#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6) +#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0) +#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1) +#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4) +#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5) +#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6) +#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1) +#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5) +#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0) +#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1) +#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4) +#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5) +#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6) +#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0) +#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1) +#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2) +#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3) +#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4) +#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5) +#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6) +#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7) +#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1) +#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5) +#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1) +#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5) +#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0) +#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1) +#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4) +#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5) +#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6) + +/* Misc instructions */ +#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4) +#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5) +#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6) +#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0) + +#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4) +#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5) +#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4) +#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5) +#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6) +#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7) + /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) @@ -555,16 +895,14 @@ /* Position the attr at the correct index */ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) -/* id_aa64pfr0 */ -#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1 -#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 - /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1 +#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf #define ARM64_MIN_PARANGE_BITS 32 @@ -572,6 +910,7 @@ #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7 #ifdef CONFIG_ARM64_PA_BITS_52 @@ -582,11 +921,13 @@ #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT #elif defined(CONFIG_ARM64_16K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT @@ -610,6 +951,19 @@ #define SYS_GCR_EL1_RRND (BIT(16)) #define SYS_GCR_EL1_EXCL_MASK 0xffffUL +#ifdef CONFIG_KASAN_HW_TAGS +/* + * KASAN always uses a whole byte for its tags. With CONFIG_KASAN_HW_TAGS it + * only uses tags in the range 0xF0-0xFF, which we map to MTE tags 0x0-0xF. + */ +#define __MTE_TAG_MIN (KASAN_TAG_MIN & 0xf) +#define __MTE_TAG_MAX (KASAN_TAG_MAX & 0xf) +#define __MTE_TAG_INCL GENMASK(__MTE_TAG_MAX, __MTE_TAG_MIN) +#define KERNEL_GCR_EL1_EXCL (SYS_GCR_EL1_EXCL_MASK & ~__MTE_TAG_INCL) +#else +#define KERNEL_GCR_EL1_EXCL SYS_GCR_EL1_EXCL_MASK +#endif + #define KERNEL_GCR_EL1 (SYS_GCR_EL1_RRND | KERNEL_GCR_EL1_EXCL) /* RGSR_EL1 Definitions */ @@ -626,15 +980,6 @@ /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31)) -#define TRFCR_ELx_TS_SHIFT 5 -#define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT) -#define TRFCR_EL2_CX BIT(3) -#define TRFCR_ELx_ExTRE BIT(1) -#define TRFCR_ELx_E0TRE BIT(0) - /* GIC Hypervisor interface registers */ /* ICH_MISR_EL2 bit definitions */ #define ICH_MISR_EOI (1 << 0) @@ -716,6 +1061,22 @@ #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) +/* + * Permission Overlay Extension (POE) permission encodings. + */ +#define POE_NONE UL(0x0) +#define POE_R UL(0x1) +#define POE_X UL(0x2) +#define POE_RX UL(0x3) +#define POE_W UL(0x4) +#define POE_RW UL(0x5) +#define POE_XW UL(0x6) +#define POE_RXW UL(0x7) +#define POE_MASK UL(0xf) + +/* Initial value for Permission Overlay Extension for EL0 */ +#define POR_EL0_INIT POE_RXW + #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ @@ -789,15 +1150,21 @@ /* * For registers without architectural names, or simply unsupported by * GAS. + * + * __check_r forces warnings to be generated by the compiler when + * evaluating r which wouldn't normally happen due to being passed to + * the assembler via __stringify(r). */ #define read_sysreg_s(r) ({ \ u64 __val; \ + u32 __maybe_unused __check_r = (u32)(r); \ asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ __val; \ }) #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ + u32 __maybe_unused __check_r = (u32)(r); \ asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ } while (0) @@ -827,6 +1194,8 @@ par; \ }) +#define SYS_FIELD_VALUE(reg, field, val) reg##_##field##_##val + #define SYS_FIELD_GET(reg, field, val) \ FIELD_GET(reg##_##field##_MASK, val) @@ -834,7 +1203,8 @@ FIELD_PREP(reg##_##field##_MASK, val) #define SYS_FIELD_PREP_ENUM(reg, field, val) \ - FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) + FIELD_PREP(reg##_##field##_MASK, \ + SYS_FIELD_VALUE(reg, field, val)) #endif diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 156b62a163c5..8a48cc2536f5 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -226,7 +226,7 @@ static int load_xbc_from_initrd(int fd, char **buf) /* Wrong Checksum */ rcsum = xbc_calc_checksum(*buf, size); if (csum != rcsum) { - pr_err("checksum error: %d != %d\n", csum, rcsum); + pr_err("checksum error: %u != %u\n", csum, rcsum); return -EINVAL; } @@ -395,7 +395,7 @@ static int apply_xbc(const char *path, const char *xbc_path) xbc_get_info(&ret, NULL); printf("\tNumber of nodes: %d\n", ret); printf("\tSize: %u bytes\n", (unsigned int)size); - printf("\tChecksum: %d\n", (unsigned int)csum); + printf("\tChecksum: %u\n", (unsigned int)csum); /* TODO: Check the options by schema */ xbc_exit(); diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 3f6bca03ad2e..d47dddc2b4ee 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -24,7 +24,7 @@ BTF COMMANDS ============= | **bpftool** **btf** { **show** | **list** } [**id** *BTF_ID*] -| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] +| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] [**root_id** *ROOT_ID*] | **bpftool** **btf help** | | *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } @@ -43,7 +43,7 @@ bpftool btf { show | list } [id *BTF_ID*] that hold open file descriptors (FDs) against BTF objects. On such kernels bpftool will automatically emit this information as well. -bpftool btf dump *BTF_SRC* +bpftool btf dump *BTF_SRC* [format *FORMAT*] [root_id *ROOT_ID*] Dump BTF entries from a given *BTF_SRC*. When **id** is specified, BTF object with that ID will be loaded and all @@ -67,6 +67,11 @@ bpftool btf dump *BTF_SRC* formatting, the output is sorted by default. Use the **unsorted** option to avoid sorting the output. + **root_id** option can be used to filter a dump to a single type and all + its dependent types. It cannot be used with any other types of filtering + (such as the "key", "value", or "kv" arguments when dumping BTF for a map). + It can be passed multiple times to dump multiple types. + bpftool btf help Print short help message. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 0c541498c301..1ce409a6cbd9 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -930,19 +930,24 @@ _bpftool() format) COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) ) ;; + root_id) + return 0; + ;; c) - COMPREPLY=( $( compgen -W "unsorted" -- "$cur" ) ) + COMPREPLY=( $( compgen -W "unsorted root_id" -- "$cur" ) ) ;; *) # emit extra options case ${words[3]} in id|file) + COMPREPLY=( $( compgen -W "root_id" -- "$cur" ) ) _bpftool_once_attr 'format' ;; map|prog) if [[ ${words[3]} == "map" ]] && [[ $cword == 6 ]]; then COMPREPLY+=( $( compgen -W "key value kv all" -- "$cur" ) ) fi + COMPREPLY=( $( compgen -W "root_id" -- "$cur" ) ) _bpftool_once_attr 'format' ;; *) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index d005e4fd6128..2636655ac180 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -27,6 +27,8 @@ #define KFUNC_DECL_TAG "bpf_kfunc" #define FASTCALL_DECL_TAG "bpf_fastcall" +#define MAX_ROOT_IDS 16 + static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_UNKN] = "UNKNOWN", [BTF_KIND_INT] = "INT", @@ -880,12 +882,14 @@ static int do_dump(int argc, char **argv) { bool dump_c = false, sort_dump_c = true; struct btf *btf = NULL, *base = NULL; - __u32 root_type_ids[2]; + __u32 root_type_ids[MAX_ROOT_IDS]; + bool have_id_filtering; int root_type_cnt = 0; __u32 btf_id = -1; const char *src; int fd = -1; int err = 0; + int i; if (!REQ_ARGS(2)) { usage(); @@ -973,6 +977,8 @@ static int do_dump(int argc, char **argv) goto done; } + have_id_filtering = !!root_type_cnt; + while (argc) { if (is_prefix(*argv, "format")) { NEXT_ARG(); @@ -992,6 +998,36 @@ static int do_dump(int argc, char **argv) goto done; } NEXT_ARG(); + } else if (is_prefix(*argv, "root_id")) { + __u32 root_id; + char *end; + + if (have_id_filtering) { + p_err("cannot use root_id with other type filtering"); + err = -EINVAL; + goto done; + } else if (root_type_cnt == MAX_ROOT_IDS) { + p_err("only %d root_id are supported", MAX_ROOT_IDS); + err = -E2BIG; + goto done; + } + + NEXT_ARG(); + root_id = strtoul(*argv, &end, 0); + if (*end) { + err = -1; + p_err("can't parse %s as root ID", *argv); + goto done; + } + for (i = 0; i < root_type_cnt; i++) { + if (root_type_ids[i] == root_id) { + err = -EINVAL; + p_err("duplicate root_id %d supplied", root_id); + goto done; + } + } + root_type_ids[root_type_cnt++] = root_id; + NEXT_ARG(); } else if (is_prefix(*argv, "unsorted")) { sort_dump_c = false; NEXT_ARG(); @@ -1017,6 +1053,17 @@ static int do_dump(int argc, char **argv) } } + /* Invalid root IDs causes half emitted boilerplate and then unclean + * exit. It's an ugly user experience, so handle common error here. + */ + for (i = 0; i < root_type_cnt; i++) { + if (root_type_ids[i] >= btf__type_cnt(btf)) { + err = -EINVAL; + p_err("invalid root ID: %u", root_type_ids[i]); + goto done; + } + } + if (dump_c) { if (json_output) { p_err("JSON output for C-syntax dump is not supported"); @@ -1391,7 +1438,7 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s { show | list } [id BTF_ID]\n" - " %1$s %2$s dump BTF_SRC [format FORMAT]\n" + " %1$s %2$s dump BTF_SRC [format FORMAT] [root_id ROOT_ID]\n" " %1$s %2$s help\n" "\n" " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c index eec437cca2ea..e3785f9a697d 100644 --- a/tools/bpf/bpftool/cfg.c +++ b/tools/bpf/bpftool/cfg.c @@ -302,6 +302,7 @@ static bool func_add_bb_edges(struct func_node *func) insn = bb->tail; if (!is_jmp_insn(insn->code) || + BPF_OP(insn->code) == BPF_CALL || BPF_OP(insn->code) == BPF_EXIT) { e->dst = bb_next(bb); e->flags |= EDGE_FLAG_FALLTHROUGH; diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 4dbc4fcdf473..24fecdf8e430 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -885,6 +885,28 @@ probe_v3_isa_extension(const char *define_prefix, __u32 ifindex) "V3_ISA_EXTENSION"); } +/* + * Probe for the v4 instruction set extension introduced in commit 1f9a1ea821ff + * ("bpf: Support new sign-extension load insns"). + */ +static void +probe_v4_isa_extension(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[5] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 1, 1), + BPF_JMP32_A(1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_v4_isa_extension", + "ISA extension v4", + "V4_ISA_EXTENSION"); +} + static void section_system_config(enum probe_component target, const char *define_prefix) { @@ -1029,6 +1051,7 @@ static void section_misc(const char *define_prefix, __u32 ifindex) probe_bounded_loops(define_prefix, ifindex); probe_v2_isa_extension(define_prefix, ifindex); probe_v3_isa_extension(define_prefix, ifindex); + probe_v4_isa_extension(define_prefix, ifindex); print_end_section(); } diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index bd9f960bce3d..d47191c6e55e 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -141,6 +141,7 @@ struct object { }; static int verbose; +static int warnings; static int eprintf(int level, int var, const char *fmt, ...) { @@ -604,6 +605,7 @@ static int symbols_resolve(struct object *obj) if (id->id) { pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n", str, id->id, type_id, id->id); + warnings++; } else { id->id = type_id; (*nr)--; @@ -625,8 +627,10 @@ static int id_patch(struct object *obj, struct btf_id *id) int i; /* For set, set8, id->id may be 0 */ - if (!id->id && !id->is_set && !id->is_set8) + if (!id->id && !id->is_set && !id->is_set8) { pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); + warnings++; + } for (i = 0; i < id->addr_cnt; i++) { unsigned long addr = id->addr[i]; @@ -782,6 +786,7 @@ int main(int argc, const char **argv) .funcs = RB_ROOT, .sets = RB_ROOT, }; + bool fatal_warnings = false; struct option btfid_options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show errors, etc)"), @@ -789,6 +794,8 @@ int main(int argc, const char **argv) "BTF data"), OPT_STRING('b', "btf_base", &obj.base_btf_path, "file", "path of file providing base BTF"), + OPT_BOOLEAN(0, "fatal_warnings", &fatal_warnings, + "turn warnings into errors"), OPT_END() }; int err = -1; @@ -823,7 +830,8 @@ int main(int argc, const char **argv) if (symbols_patch(&obj)) goto out; - err = 0; + if (!(fatal_warnings && warnings)) + err = 0; out: if (obj.efile.elf) { elf_end(obj.efile.elf); diff --git a/tools/hv/.gitignore b/tools/hv/.gitignore new file mode 100644 index 000000000000..0c5bc15d602f --- /dev/null +++ b/tools/hv/.gitignore @@ -0,0 +1,3 @@ +hv_fcopy_uio_daemon +hv_kvp_daemon +hv_vss_daemon diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c index 7a00f3066a98..0198321d14a2 100644 --- a/tools/hv/hv_fcopy_uio_daemon.c +++ b/tools/hv/hv_fcopy_uio_daemon.c @@ -35,8 +35,6 @@ #define WIN8_SRV_MINOR 1 #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) -#define MAX_FOLDER_NAME 15 -#define MAX_PATH_LEN 15 #define FCOPY_UIO "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/uio" #define FCOPY_VER_COUNT 1 @@ -51,7 +49,7 @@ static const int fw_versions[] = { #define HV_RING_SIZE 0x4000 /* 16KB ring buffer size */ -unsigned char desc[HV_RING_SIZE]; +static unsigned char desc[HV_RING_SIZE]; static int target_fd; static char target_fname[PATH_MAX]; @@ -409,8 +407,8 @@ int main(int argc, char *argv[]) struct vmbus_br txbr, rxbr; void *ring; uint32_t len = HV_RING_SIZE; - char uio_name[MAX_FOLDER_NAME] = {0}; - char uio_dev_path[MAX_PATH_LEN] = {0}; + char uio_name[NAME_MAX] = {0}; + char uio_dev_path[PATH_MAX] = {0}; static struct option long_options[] = { {"help", no_argument, 0, 'h' }, @@ -468,8 +466,10 @@ int main(int argc, char *argv[]) */ ret = pread(fcopy_fd, &tmp, sizeof(int), 0); if (ret < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; syslog(LOG_ERR, "pread failed: %s", strerror(errno)); - continue; + goto close; } len = HV_RING_SIZE; diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh index 058c17b46ffc..268521234d4b 100755 --- a/tools/hv/hv_get_dns_info.sh +++ b/tools/hv/hv_get_dns_info.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # This example script parses /etc/resolv.conf to retrive DNS information. # In the interest of keeping the KVP daemon code free of distro specific @@ -10,4 +10,4 @@ # this script can be based on the Network Manager APIs for retrieving DNS # entries. -cat /etc/resolv.conf 2>/dev/null | awk '/^nameserver/ { print $2 }' +exec awk '/^nameserver/ { print $2 }' /etc/resolv.conf 2>/dev/null diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index ae57bf69ad4a..04ba035d67e9 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -725,7 +725,7 @@ static void kvp_get_ipconfig_info(char *if_name, * . */ - sprintf(cmd, KVP_SCRIPTS_PATH "%s", "hv_get_dns_info"); + sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dns_info", if_name); /* * Execute the command to gather DNS info. @@ -742,7 +742,7 @@ static void kvp_get_ipconfig_info(char *if_name, * Enabled: DHCP enabled. */ - sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dhcp_info", if_name); + sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dhcp_info", if_name); file = popen(cmd, "r"); if (file == NULL) @@ -1606,8 +1606,9 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * invoke the external script to do its magic. */ - str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s %s", - "hv_set_ifconfig", if_filename, nm_filename); + str_len = snprintf(cmd, sizeof(cmd), "exec %s %s %s", + KVP_SCRIPTS_PATH "hv_set_ifconfig", + if_filename, nm_filename); /* * This is a little overcautious, but it's necessary to suppress some * false warnings from gcc 8.0.1. diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index 440a91b35823..2f8baed2b8f7 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -81,7 +81,7 @@ echo "ONBOOT=yes" >> $1 cp $1 /etc/sysconfig/network-scripts/ -chmod 600 $2 +umask 0177 interface=$(echo $2 | awk -F - '{ print $2 }') filename="${2##*/}" diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 65aa8ce142e5..bcc6df79301a 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -273,6 +273,16 @@ .off = OFF, \ .imm = 0 }) +/* Unconditional jumps, gotol pc + imm32 */ + +#define BPF_JMP32_A(IMM) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ diff --git a/tools/include/linux/kasan-tags.h b/tools/include/linux/kasan-tags.h new file mode 100644 index 000000000000..4f85f562512c --- /dev/null +++ b/tools/include/linux/kasan-tags.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KASAN_TAGS_H +#define _LINUX_KASAN_TAGS_H + +#define KASAN_TAG_KERNEL 0xFF /* native kernel pointers tag */ +#define KASAN_TAG_INVALID 0xFE /* inaccessible memory tag */ +#define KASAN_TAG_MAX 0xFD /* maximum value for random tags */ + +#ifdef CONFIG_KASAN_HW_TAGS +#define KASAN_TAG_MIN 0xF0 /* minimum value for random tags */ +#else +#define KASAN_TAG_MIN 0x00 /* minimum value for random tags */ +#endif + +#endif /* LINUX_KASAN_TAGS_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 453a4f4ef39d..df5d9fa84dba 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -54,4 +54,16 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_SAVE 6 #define UNWIND_HINT_TYPE_RESTORE 7 +/* + * Annotate types + */ +#define ANNOTYPE_NOENDBR 1 +#define ANNOTYPE_RETPOLINE_SAFE 2 +#define ANNOTYPE_INSTR_BEGIN 3 +#define ANNOTYPE_INSTR_END 4 +#define ANNOTYPE_UNRET_BEGIN 5 +#define ANNOTYPE_IGNORE_ALTS 6 +#define ANNOTYPE_INTRA_FUNCTION_CALL 7 +#define ANNOTYPE_REACHABLE 8 + #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 7b82bc3cf107..d4a5c2399a66 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -23,6 +23,7 @@ #include <linux/prctl.h> #include <linux/resource.h> #include <linux/utsname.h> +#include <linux/signal.h> #include "arch.h" #include "errno.h" @@ -1226,6 +1227,23 @@ pid_t waitpid(pid_t pid, int *status, int options) /* + * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); + */ + +static __attribute__((unused)) +int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) +{ + return my_syscall5(__NR_waitid, which, pid, infop, options, rusage); +} + +static __attribute__((unused)) +int waitid(int which, pid_t pid, siginfo_t *infop, int options) +{ + return __sysret(sys_waitid(which, pid, infop, options, NULL)); +} + + +/* * ssize_t write(int fd, const void *buf, size_t count); */ diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 281df9139d2b..ffff554a5230 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -126,6 +126,8 @@ #define SCM_TS_OPT_ID 78 +#define SO_RCVPRIORITY 79 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4162afc6b5d0..2acf9b336371 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1573,6 +1573,16 @@ union bpf_attr { * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. */ __s32 prog_token_fd; + /* The fd_array_cnt can be used to pass the length of the + * fd_array array. In this case all the [map] file descriptors + * passed in this array will be bound to the program, even if + * the maps are not referenced directly. The functionality is + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be + * used by the verifier during the program load. If provided, + * then the fd_array[0,...,fd_array_cnt-1] is expected to be + * continuous. + */ + __u32 fd_array_cnt; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 8516c1ccd57a..7e46ca4cd31b 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -1315,6 +1315,8 @@ enum { IFLA_NETKIT_MODE, IFLA_NETKIT_SCRUB, IFLA_NETKIT_PEER_SCRUB, + IFLA_NETKIT_HEADROOM, + IFLA_NETKIT_TAILROOM, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 2f082b01ff22..42ec5ddaab8d 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -117,12 +117,12 @@ struct xdp_options { ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) /* Request transmit timestamp. Upon completion, put it into tx_timestamp - * field of union xsk_tx_metadata. + * field of struct xsk_tx_metadata. */ #define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0) /* Request transmit checksum offload. Checksum start position and offset - * are communicated via csum_start and csum_offset fields of union + * are communicated via csum_start and csum_offset fields of struct * xsk_tx_metadata. */ #define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h index bb6ea517efb5..c53cde425406 100644 --- a/tools/include/uapi/linux/stddef.h +++ b/tools/include/uapi/linux/stddef.h @@ -8,6 +8,13 @@ #define __always_inline __inline__ #endif +/* Not all C++ standards support type declarations inside an anonymous union */ +#ifndef __cplusplus +#define __struct_group_tag(TAG) TAG +#else +#define __struct_group_tag(TAG) +#endif + /** * __struct_group() - Create a mirrored named and anonyomous struct * @@ -20,14 +27,14 @@ * and size: one anonymous and one named. The former's members can be used * normally without sub-struct naming, and the latter can be used to * reason about the start, end, and size of the group of struct members. - * The named struct can also be explicitly tagged for layer reuse, as well - * as both having struct attributes appended. + * The named struct can also be explicitly tagged for layer reuse (C only), + * as well as both having struct attributes appended. */ #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ union { \ struct { MEMBERS } ATTRS; \ - struct TAG { MEMBERS } ATTRS NAME; \ - } + struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ + } ATTRS /** * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index becdfa701c75..359f73ead613 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -238,7 +238,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); + const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -311,6 +311,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); + attr.fd_array_cnt = OPTS_GET(opts, fd_array_cnt, 0); if (log_level) { attr.log_buf = ptr_to_u64(log_buf); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index a4a7b1ad1b63..435da95d2058 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -107,9 +107,12 @@ struct bpf_prog_load_opts { */ __u32 log_true_size; __u32 token_fd; + + /* if set, provides the length of fd_array */ + __u32 fd_array_cnt; size_t :0; }; -#define bpf_prog_load_opts__last_field token_fd +#define bpf_prog_load_opts__last_field fd_array_cnt LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 12468ae0d573..48c66f3a9200 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -283,7 +283,7 @@ static int btf_parse_str_sec(struct btf *btf) return -EINVAL; } if (!btf->base_btf && start[0]) { - pr_debug("Invalid BTF string section\n"); + pr_debug("Malformed BTF string section, did you forget to provide base BTF?\n"); return -EINVAL; } return 0; @@ -1186,6 +1186,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, elf = elf_begin(fd, ELF_C_READ, NULL); if (!elf) { + err = -LIBBPF_ERRNO__FORMAT; pr_warn("failed to open %s as ELF file\n", path); goto done; } diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c index b72f83e15156..53d1f3541bce 100644 --- a/tools/lib/bpf/btf_relocate.c +++ b/tools/lib/bpf/btf_relocate.c @@ -212,7 +212,7 @@ static int btf_relocate_map_distilled_base(struct btf_relocate *r) * need to match both name and size, otherwise embedding the base * struct/union in the split type is invalid. */ - for (id = r->nr_dist_base_types; id < r->nr_split_types; id++) { + for (id = r->nr_dist_base_types; id < r->nr_dist_base_types + r->nr_split_types; id++) { err = btf_mark_embedded_composite_type_ids(r, id); if (err) goto done; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 66173ddb5a2d..194809da5172 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1731,12 +1731,24 @@ static int sys_memfd_create(const char *name, unsigned flags) #ifndef MFD_CLOEXEC #define MFD_CLOEXEC 0x0001U #endif +#ifndef MFD_NOEXEC_SEAL +#define MFD_NOEXEC_SEAL 0x0008U +#endif static int create_placeholder_fd(void) { + unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL; + const char *name = "libbpf-placeholder-fd"; int fd; - fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); + fd = ensure_good_fd(sys_memfd_create(name, flags)); + if (fd >= 0) + return fd; + else if (errno != EINVAL) + return -errno; + + /* Possibly running on kernel without MFD_NOEXEC_SEAL */ + fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL)); if (fd < 0) return -errno; return fd; @@ -11375,9 +11387,33 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, struct kprobe_multi_resolve *res = data->res; int err; - if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) + if (!glob_match(sym_name, res->pattern)) return 0; + if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) { + /* Some versions of kernel strip out .llvm.<hash> suffix from + * function names reported in available_filter_functions, but + * don't do so for kallsyms. While this is clearly a kernel + * bug (fixed by [0]) we try to accommodate that in libbpf to + * make multi-kprobe usability a bit better: if no match is + * found, we will strip .llvm. suffix and try one more time. + * + * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG") + */ + char sym_trim[256], *psym_trim = sym_trim, *sym_sfx; + + if (!(sym_sfx = strstr(sym_name, ".llvm."))) + return 0; + + /* psym_trim vs sym_trim dance is done to avoid pointer vs array + * coercion differences and get proper `const char **` pointer + * which avail_func_cmp() expects + */ + snprintf(sym_trim, sizeof(sym_trim), "%.*s", (int)(sym_sfx - sym_name), sym_name); + if (!bsearch(&psym_trim, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) + return 0; + } + err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1); if (err) return err; @@ -11522,7 +11558,7 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, struct bpf_link *link = NULL; const unsigned long *addrs; int err, link_fd, prog_fd; - bool retprobe, session; + bool retprobe, session, unique_match; const __u64 *cookies; const char **syms; size_t cnt; @@ -11541,6 +11577,7 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, addrs = OPTS_GET(opts, addrs, false); cnt = OPTS_GET(opts, cnt, false); cookies = OPTS_GET(opts, cookies, false); + unique_match = OPTS_GET(opts, unique_match, false); if (!pattern && !addrs && !syms) return libbpf_err_ptr(-EINVAL); @@ -11548,6 +11585,8 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, return libbpf_err_ptr(-EINVAL); if (!pattern && !cnt) return libbpf_err_ptr(-EINVAL); + if (!pattern && unique_match) + return libbpf_err_ptr(-EINVAL); if (addrs && syms) return libbpf_err_ptr(-EINVAL); @@ -11558,6 +11597,14 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, err = libbpf_available_kallsyms_parse(&res); if (err) goto error; + + if (unique_match && res.cnt != 1) { + pr_warn("prog '%s': failed to find a unique match for '%s' (%zu matches)\n", + prog->name, pattern, res.cnt); + err = -EINVAL; + goto error; + } + addrs = res.addrs; cnt = res.cnt; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index b2ce3a72b11d..3020ee45303a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -552,10 +552,12 @@ struct bpf_kprobe_multi_opts { bool retprobe; /* create session kprobes */ bool session; + /* enforce unique match */ + bool unique_match; size_t :0; }; -#define bpf_kprobe_multi_opts__last_field session +#define bpf_kprobe_multi_opts__last_field unique_match LIBBPF_API struct bpf_link * bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, @@ -1796,9 +1798,14 @@ struct bpf_linker_file_opts { struct bpf_linker; LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts); +LIBBPF_API struct bpf_linker *bpf_linker__new_fd(int fd, struct bpf_linker_opts *opts); LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts); +LIBBPF_API int bpf_linker__add_fd(struct bpf_linker *linker, int fd, + const struct bpf_linker_file_opts *opts); +LIBBPF_API int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, + const struct bpf_linker_file_opts *opts); LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 54b6f312cfa8..a8b2936a1646 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -432,4 +432,8 @@ LIBBPF_1.5.0 { } LIBBPF_1.4.0; LIBBPF_1.6.0 { + global: + bpf_linker__add_buf; + bpf_linker__add_fd; + bpf_linker__new_fd; } LIBBPF_1.5.0; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index cf71d149fe26..b52f71c59616 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -4,6 +4,10 @@ * * Copyright (c) 2021 Facebook */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + #include <stdbool.h> #include <stddef.h> #include <stdio.h> @@ -16,6 +20,7 @@ #include <elf.h> #include <libelf.h> #include <fcntl.h> +#include <sys/mman.h> #include "libbpf.h" #include "btf.h" #include "libbpf_internal.h" @@ -152,15 +157,19 @@ struct bpf_linker { /* global (including extern) ELF symbols */ int glob_sym_cnt; struct glob_sym *glob_syms; + + bool fd_is_owned; }; #define pr_warn_elf(fmt, ...) \ libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1)) -static int init_output_elf(struct bpf_linker *linker, const char *file); +static int init_output_elf(struct bpf_linker *linker); -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts, +static int bpf_linker_add_file(struct bpf_linker *linker, int fd, + const char *filename); + +static int linker_load_obj_file(struct bpf_linker *linker, struct src_obj *obj); static int linker_sanity_check_elf(struct src_obj *obj); static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec); @@ -191,7 +200,7 @@ void bpf_linker__free(struct bpf_linker *linker) if (linker->elf) elf_end(linker->elf); - if (linker->fd >= 0) + if (linker->fd >= 0 && linker->fd_is_owned) close(linker->fd); strset__free(linker->strtab_strs); @@ -233,9 +242,63 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts if (!linker) return errno = ENOMEM, NULL; - linker->fd = -1; + linker->filename = strdup(filename); + if (!linker->filename) { + err = -ENOMEM; + goto err_out; + } + + linker->fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); + if (linker->fd < 0) { + err = -errno; + pr_warn("failed to create '%s': %d\n", filename, err); + goto err_out; + } + linker->fd_is_owned = true; + + err = init_output_elf(linker); + if (err) + goto err_out; + + return linker; + +err_out: + bpf_linker__free(linker); + return errno = -err, NULL; +} + +struct bpf_linker *bpf_linker__new_fd(int fd, struct bpf_linker_opts *opts) +{ + struct bpf_linker *linker; + char filename[32]; + int err; + + if (fd < 0) + return errno = EINVAL, NULL; + + if (!OPTS_VALID(opts, bpf_linker_opts)) + return errno = EINVAL, NULL; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn_elf("libelf initialization failed"); + return errno = EINVAL, NULL; + } - err = init_output_elf(linker, filename); + linker = calloc(1, sizeof(*linker)); + if (!linker) + return errno = ENOMEM, NULL; + + snprintf(filename, sizeof(filename), "fd:%d", fd); + linker->filename = strdup(filename); + if (!linker->filename) { + err = -ENOMEM; + goto err_out; + } + + linker->fd = fd; + linker->fd_is_owned = false; + + err = init_output_elf(linker); if (err) goto err_out; @@ -294,23 +357,12 @@ static Elf64_Sym *add_new_sym(struct bpf_linker *linker, size_t *sym_idx) return sym; } -static int init_output_elf(struct bpf_linker *linker, const char *file) +static int init_output_elf(struct bpf_linker *linker) { int err, str_off; Elf64_Sym *init_sym; struct dst_sec *sec; - linker->filename = strdup(file); - if (!linker->filename) - return -ENOMEM; - - linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); - if (linker->fd < 0) { - err = -errno; - pr_warn("failed to create '%s': %s\n", file, errstr(err)); - return err; - } - linker->elf = elf_begin(linker->fd, ELF_C_WRITE, NULL); if (!linker->elf) { pr_warn_elf("failed to create ELF object"); @@ -436,19 +488,16 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) return 0; } -int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts) +static int bpf_linker_add_file(struct bpf_linker *linker, int fd, + const char *filename) { struct src_obj obj = {}; int err = 0; - if (!OPTS_VALID(opts, bpf_linker_file_opts)) - return libbpf_err(-EINVAL); - - if (!linker->elf) - return libbpf_err(-EINVAL); + obj.filename = filename; + obj.fd = fd; - err = err ?: linker_load_obj_file(linker, filename, opts, &obj); + err = err ?: linker_load_obj_file(linker, &obj); err = err ?: linker_append_sec_data(linker, &obj); err = err ?: linker_append_elf_syms(linker, &obj); err = err ?: linker_append_elf_relos(linker, &obj); @@ -463,12 +512,91 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, free(obj.sym_map); if (obj.elf) elf_end(obj.elf); - if (obj.fd >= 0) - close(obj.fd); + return err; +} + +int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, + const struct bpf_linker_file_opts *opts) +{ + int fd, err; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + fd = open(filename, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = -errno; + pr_warn("failed to open file '%s': %s\n", filename, errstr(err)); + return libbpf_err(err); + } + + err = bpf_linker_add_file(linker, fd, filename); + close(fd); return libbpf_err(err); } +int bpf_linker__add_fd(struct bpf_linker *linker, int fd, + const struct bpf_linker_file_opts *opts) +{ + char filename[32]; + int err; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + if (fd < 0) + return libbpf_err(-EINVAL); + + snprintf(filename, sizeof(filename), "fd:%d", fd); + err = bpf_linker_add_file(linker, fd, filename); + return libbpf_err(err); +} + +int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, + const struct bpf_linker_file_opts *opts) +{ + char filename[32]; + int fd, written, ret; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz); + + fd = memfd_create(filename, 0); + if (fd < 0) { + ret = -errno; + pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret)); + return libbpf_err(ret); + } + + written = 0; + while (written < buf_sz) { + ret = write(fd, buf, buf_sz); + if (ret < 0) { + ret = -errno; + pr_warn("failed to write '%s': %s\n", filename, errstr(ret)); + goto err_out; + } + written += ret; + } + + ret = bpf_linker_add_file(linker, fd, filename); +err_out: + close(fd); + return libbpf_err(ret); +} + static bool is_dwarf_sec_name(const char *name) { /* approximation, but the actual list is too long */ @@ -534,8 +662,7 @@ static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name) return sec; } -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts, +static int linker_load_obj_file(struct bpf_linker *linker, struct src_obj *obj) { int err = 0; @@ -554,36 +681,26 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, #error "Unknown __BYTE_ORDER__" #endif - pr_debug("linker: adding object file '%s'...\n", filename); - - obj->filename = filename; + pr_debug("linker: adding object file '%s'...\n", obj->filename); - obj->fd = open(filename, O_RDONLY | O_CLOEXEC); - if (obj->fd < 0) { - err = -errno; - pr_warn("failed to open file '%s': %s\n", filename, errstr(err)); - return err; - } obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL); if (!obj->elf) { - err = -errno; - pr_warn_elf("failed to parse ELF file '%s'", filename); - return err; + pr_warn_elf("failed to parse ELF file '%s'", obj->filename); + return -EINVAL; } /* Sanity check ELF file high-level properties */ ehdr = elf64_getehdr(obj->elf); if (!ehdr) { - err = -errno; - pr_warn_elf("failed to get ELF header for %s", filename); - return err; + pr_warn_elf("failed to get ELF header for %s", obj->filename); + return -EINVAL; } /* Linker output endianness set by first input object */ obj_byteorder = ehdr->e_ident[EI_DATA]; if (obj_byteorder != ELFDATA2LSB && obj_byteorder != ELFDATA2MSB) { err = -EOPNOTSUPP; - pr_warn("unknown byte order of ELF file %s\n", filename); + pr_warn("unknown byte order of ELF file %s\n", obj->filename); return err; } if (link_byteorder == ELFDATANONE) { @@ -593,7 +710,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj_byteorder == ELFDATA2MSB ? "big" : "little"); } else if (link_byteorder != obj_byteorder) { err = -EOPNOTSUPP; - pr_warn("byte order mismatch with ELF file %s\n", filename); + pr_warn("byte order mismatch with ELF file %s\n", obj->filename); return err; } @@ -601,14 +718,13 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, || ehdr->e_machine != EM_BPF || ehdr->e_ident[EI_CLASS] != ELFCLASS64) { err = -EOPNOTSUPP; - pr_warn_elf("unsupported kind of ELF file %s", filename); + pr_warn_elf("unsupported kind of ELF file %s", obj->filename); return err; } if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) { - err = -errno; - pr_warn_elf("failed to get SHSTRTAB section index for %s", filename); - return err; + pr_warn_elf("failed to get SHSTRTAB section index for %s", obj->filename); + return -EINVAL; } scn = NULL; @@ -618,26 +734,23 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, shdr = elf64_getshdr(scn); if (!shdr) { - err = -errno; pr_warn_elf("failed to get section #%zu header for %s", - sec_idx, filename); - return err; + sec_idx, obj->filename); + return -EINVAL; } sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name); if (!sec_name) { - err = -errno; pr_warn_elf("failed to get section #%zu name for %s", - sec_idx, filename); - return err; + sec_idx, obj->filename); + return -EINVAL; } data = elf_getdata(scn, 0); if (!data) { - err = -errno; pr_warn_elf("failed to get section #%zu (%s) data from %s", - sec_idx, sec_name, filename); - return err; + sec_idx, sec_name, obj->filename); + return -EINVAL; } sec = add_src_sec(obj, sec_name); @@ -672,7 +785,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, err = libbpf_get_error(obj->btf); if (err) { pr_warn("failed to parse .BTF from %s: %s\n", - filename, errstr(err)); + obj->filename, errstr(err)); return err; } sec->skipped = true; @@ -683,7 +796,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, err = libbpf_get_error(obj->btf_ext); if (err) { pr_warn("failed to parse .BTF.ext from '%s': %s\n", - filename, errstr(err)); + obj->filename, errstr(err)); return err; } sec->skipped = true; @@ -700,7 +813,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, break; default: pr_warn("unrecognized section #%zu (%s) in %s\n", - sec_idx, sec_name, filename); + sec_idx, sec_name, obj->filename); err = -EINVAL; return err; } @@ -2680,22 +2793,23 @@ int bpf_linker__finalize(struct bpf_linker *linker) /* Finalize ELF layout */ if (elf_update(linker->elf, ELF_C_NULL) < 0) { - err = -errno; + err = -EINVAL; pr_warn_elf("failed to finalize ELF layout"); return libbpf_err(err); } /* Write out final ELF contents */ if (elf_update(linker->elf, ELF_C_WRITE) < 0) { - err = -errno; + err = -EINVAL; pr_warn_elf("failed to write ELF contents"); return libbpf_err(err); } elf_end(linker->elf); - close(linker->fd); - linker->elf = NULL; + + if (linker->fd_is_owned) + close(linker->fd); linker->fd = -1; return 0; diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 5f085736c6c4..4e4a52742b01 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -661,7 +661,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation */ usdt_abs_ip = note.loc_addr; - if (base_addr) + if (base_addr && note.base_addr) usdt_abs_ip += base_addr - note.base_addr; /* When attaching uprobes (which is what USDTs basically are) diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index d1cdf2a8f826..211df5a93ad9 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -1,5 +1,17 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.arch + +INSTALL ?= install +prefix ?= /usr +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif +libdir ?= $(prefix)/$(libdir_relative) +includedir ?= $(prefix)/include + SUBDIRS = lib generated samples all: $(SUBDIRS) libynl.a @@ -21,5 +33,20 @@ clean distclean: fi \ done rm -f libynl.a + rm -rf pyynl/__pycache__ + rm -rf pyynl/lib/__pycache__ + rm -rf pyynl.egg-info + rm -rf build + +install: libynl.a lib/*.h + @echo -e "\tINSTALL libynl.a" + @$(INSTALL) -d $(DESTDIR)$(libdir) + @$(INSTALL) -m 0644 libynl.a $(DESTDIR)$(libdir)/libynl.a + @echo -e "\tINSTALL libynl headers" + @$(INSTALL) -d $(DESTDIR)$(includedir)/ynl + @$(INSTALL) -m 0644 lib/*.h $(DESTDIR)$(includedir)/ynl/ + @echo -e "\tINSTALL pyynl" + @pip install --prefix=$(DESTDIR)$(prefix) . + @make -C generated install -.PHONY: all clean distclean $(SUBDIRS) +.PHONY: all clean distclean install $(SUBDIRS) diff --git a/tools/net/ynl/generated/.gitignore b/tools/net/ynl/generated/.gitignore index ade488626d26..859a6fb446e1 100644 --- a/tools/net/ynl/generated/.gitignore +++ b/tools/net/ynl/generated/.gitignore @@ -1,2 +1,3 @@ *-user.c *-user.h +*.rst diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 7db5240de58a..21f9e299dc75 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -7,32 +7,44 @@ ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan endif +INSTALL ?= install +prefix ?= /usr +datarootdir ?= $(prefix)/share +docdir ?= $(datarootdir)/doc +includedir ?= $(prefix)/include + include ../Makefile.deps YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \ --exclude-op stats-get -TOOL:=../ynl-gen-c.py +TOOL:=../pyynl/ynl_gen_c.py +TOOL_RST:=../pyynl/ynl_gen_rst.py +SPECS_DIR:=../../../../Documentation/netlink/specs GENS_PATHS=$(shell grep -nrI --files-without-match \ 'protocol: netlink' \ - ../../../../Documentation/netlink/specs/) -GENS=$(patsubst ../../../../Documentation/netlink/specs/%.yaml,%,${GENS_PATHS}) + $(SPECS_DIR)) +GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS}) SRCS=$(patsubst %,%-user.c,${GENS}) HDRS=$(patsubst %,%-user.h,${GENS}) OBJS=$(patsubst %,%-user.o,${GENS}) -all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI) +SPECS_PATHS=$(wildcard $(SPECS_DIR)/*.yaml) +SPECS=$(patsubst $(SPECS_DIR)/%.yaml,%,${SPECS_PATHS}) +RSTS=$(patsubst %,%.rst,${SPECS}) + +all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI) $(RSTS) protos.a: $(OBJS) @echo -e "\tAR $@" @ar rcs $@ $(OBJS) -%-user.h: ../../../../Documentation/netlink/specs/%.yaml $(TOOL) +%-user.h: $(SPECS_DIR)/%.yaml $(TOOL) @echo -e "\tGEN $@" @$(TOOL) --mode user --header --spec $< -o $@ $(YNL_GEN_ARG_$*) -%-user.c: ../../../../Documentation/netlink/specs/%.yaml $(TOOL) +%-user.c: $(SPECS_DIR)/%.yaml $(TOOL) @echo -e "\tGEN $@" @$(TOOL) --mode user --source --spec $< -o $@ $(YNL_GEN_ARG_$*) @@ -40,14 +52,37 @@ protos.a: $(OBJS) @echo -e "\tCC $@" @$(COMPILE.c) $(CFLAGS_$*) -o $@ $< +%.rst: $(SPECS_DIR)/%.yaml $(TOOL_RST) + @echo -e "\tGEN_RST $@" + @$(TOOL_RST) -o $@ -i $< + clean: rm -f *.o distclean: clean - rm -f *.c *.h *.a + rm -f *.c *.h *.a *.rst regen: @../ynl-regen.sh -.PHONY: all clean distclean regen +install-headers: $(HDRS) + @echo -e "\tINSTALL generated headers" + @$(INSTALL) -d $(DESTDIR)$(includedir)/ynl + @$(INSTALL) -m 0644 *.h $(DESTDIR)$(includedir)/ynl/ + +install-rsts: $(RSTS) + @echo -e "\tINSTALL generated docs" + @$(INSTALL) -d $(DESTDIR)$(docdir)/ynl + @$(INSTALL) -m 0644 $(RSTS) $(DESTDIR)$(docdir)/ynl/ + +install-specs: + @echo -e "\tINSTALL specs" + @$(INSTALL) -d $(DESTDIR)$(datarootdir)/ynl + @$(INSTALL) -m 0644 ../../../../Documentation/netlink/*.yaml $(DESTDIR)$(datarootdir)/ynl/ + @$(INSTALL) -d $(DESTDIR)$(datarootdir)/ynl/specs + @$(INSTALL) -m 0644 $(SPECS_DIR)/*.yaml $(DESTDIR)$(datarootdir)/ynl/specs/ + +install: install-headers install-rsts install-specs + +.PHONY: all clean distclean regen install install-headers install-rsts install-specs .DEFAULT_GOAL: all diff --git a/tools/net/ynl/lib/.gitignore b/tools/net/ynl/lib/.gitignore index 296c4035dbf2..a4383358ec72 100644 --- a/tools/net/ynl/lib/.gitignore +++ b/tools/net/ynl/lib/.gitignore @@ -1,2 +1 @@ -__pycache__/ *.d diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile index 94c49cca3dca..4b2b98704ff9 100644 --- a/tools/net/ynl/lib/Makefile +++ b/tools/net/ynl/lib/Makefile @@ -19,7 +19,6 @@ ynl.a: $(OBJS) clean: rm -f *.o *.d *~ - rm -rf __pycache__ distclean: clean rm -f *.a diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index e16cef160bc2..ce32cb35007d 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -95,7 +95,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, ynl_attr_for_each_payload(start, data_len, attr) { astart_off = (char *)attr - (char *)start; - aend_off = astart_off + ynl_attr_data_len(attr); + aend_off = (char *)ynl_attr_data_end(attr) - (char *)start; if (aend_off <= off) continue; diff --git a/tools/net/ynl/pyproject.toml b/tools/net/ynl/pyproject.toml new file mode 100644 index 000000000000..a81d8779b0e0 --- /dev/null +++ b/tools/net/ynl/pyproject.toml @@ -0,0 +1,24 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "pyynl" +authors = [ + {name = "Donald Hunter", email = "donald.hunter@gmail.com"}, + {name = "Jakub Kicinski", email = "kuba@kernel.org"}, +] +description = "yaml netlink (ynl)" +version = "0.0.1" +requires-python = ">=3.9" +dependencies = [ + "pyyaml==6.*", + "jsonschema==4.*" +] + +[tool.setuptools.packages.find] +include = ["pyynl", "pyynl.lib"] + +[project.scripts] +ynl = "pyynl.cli:main" +ynl-ethtool = "pyynl.ethtool:main" diff --git a/tools/net/ynl/pyynl/.gitignore b/tools/net/ynl/pyynl/.gitignore new file mode 100644 index 000000000000..b801cd2d016e --- /dev/null +++ b/tools/net/ynl/pyynl/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +lib/__pycache__/ diff --git a/tools/net/ynl/pyynl/__init__.py b/tools/net/ynl/pyynl/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/net/ynl/pyynl/__init__.py diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/pyynl/cli.py index 41d9fa5c818d..794e3c7dcc65 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/pyynl/cli.py @@ -3,6 +3,7 @@ import argparse import json +import os import pathlib import pprint import sys @@ -10,6 +11,24 @@ import sys sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import YnlFamily, Netlink, NlError +sys_schema_dir='/usr/share/ynl' +relative_schema_dir='../../../../Documentation/netlink' + +def schema_dir(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + schema_dir = os.path.abspath(f"{script_dir}/{relative_schema_dir}") + if not os.path.isdir(schema_dir): + schema_dir = sys_schema_dir + if not os.path.isdir(schema_dir): + raise Exception(f"Schema directory {schema_dir} does not exist") + return schema_dir + +def spec_dir(): + spec_dir = schema_dir() + '/specs' + if not os.path.isdir(spec_dir): + raise Exception(f"Spec directory {spec_dir} does not exist") + return spec_dir + class YnlEncoder(json.JSONEncoder): def default(self, obj): @@ -32,7 +51,14 @@ def main(): parser = argparse.ArgumentParser(description=description, epilog=epilog) - parser.add_argument('--spec', dest='spec', type=str, required=True) + spec_group = parser.add_mutually_exclusive_group(required=True) + spec_group.add_argument('--family', dest='family', type=str, + help='name of the netlink FAMILY') + spec_group.add_argument('--list-families', action='store_true', + help='list all netlink families supported by YNL (has spec)') + spec_group.add_argument('--spec', dest='spec', type=str, + help='choose the family by SPEC file path') + parser.add_argument('--schema', dest='schema', type=str) parser.add_argument('--no-schema', action='store_true') parser.add_argument('--json', dest='json_text', type=str) @@ -70,6 +96,12 @@ def main(): else: pprint.PrettyPrinter().pprint(msg) + if args.list_families: + for filename in sorted(os.listdir(spec_dir())): + if filename.endswith('.yaml'): + print(filename.removesuffix('.yaml')) + return + if args.no_schema: args.schema = '' @@ -77,7 +109,16 @@ def main(): if args.json_text: attrs = json.loads(args.json_text) - ynl = YnlFamily(args.spec, args.schema, args.process_unknown, + if args.family: + spec = f"{spec_dir()}/{args.family}.yaml" + if args.schema is None and spec.startswith(sys_schema_dir): + args.schema = '' # disable schema validation when installed + else: + spec = args.spec + if not os.path.isfile(spec): + raise Exception(f"Spec file {spec} does not exist") + + ynl = YnlFamily(spec, args.schema, args.process_unknown, recv_size=args.dbg_small_recv) if args.dbg_small_recv: ynl.set_recv_dbg(True) diff --git a/tools/net/ynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index ebb0a11f67bf..af7fddd7b085 100755 --- a/tools/net/ynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -11,6 +11,7 @@ import os sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import YnlFamily +from cli import schema_dir, spec_dir def args_to_req(ynl, op_name, args, req): """ @@ -156,10 +157,8 @@ def main(): args = parser.parse_args() script_abs_dir = os.path.dirname(os.path.abspath(sys.argv[0])) - spec = os.path.join(script_abs_dir, - '../../../Documentation/netlink/specs/ethtool.yaml') - schema = os.path.join(script_abs_dir, - '../../../Documentation/netlink/genetlink-legacy.yaml') + spec = os.path.join(spec_dir(), 'ethtool.yaml') + schema = os.path.join(schema_dir(), 'genetlink-legacy.yaml') ynl = YnlFamily(spec, schema) diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py index 9137b83e580a..9137b83e580a 100644 --- a/tools/net/ynl/lib/__init__.py +++ b/tools/net/ynl/pyynl/lib/__init__.py diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/pyynl/lib/nlspec.py index a745739655ad..314ec8007496 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/pyynl/lib/nlspec.py @@ -219,7 +219,10 @@ class SpecAttrSet(SpecElement): else: real_set = family.attr_sets[self.subset_of] for elem in self.yaml['attributes']: - attr = real_set[elem['name']] + real_attr = real_set[elem['name']] + combined_elem = real_attr.yaml | elem + attr = self.new_attr(combined_elem, real_attr.value) + self.attrs[attr.name] = attr self.attrs_by_val[attr.value] = attr diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 01ec01a90e76..08f8bf89cfc2 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -556,10 +556,10 @@ class YnlFamily(SpecFamily): if attr["type"] == 'nest': nl_type |= Netlink.NLA_F_NESTED attr_payload = b'' - sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs) + sub_space = attr['nested-attributes'] + sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs) for subname, subvalue in value.items(): - attr_payload += self._add_attr(attr['nested-attributes'], - subname, subvalue, sub_attrs) + attr_payload += self._add_attr(sub_space, subname, subvalue, sub_attrs) elif attr["type"] == 'flag': if not value: # If value is absent or false then skip attribute creation. @@ -733,41 +733,45 @@ class YnlFamily(SpecFamily): self._rsp_add(rsp, attr_name, None, self._decode_unknown(attr)) continue - if attr_spec["type"] == 'nest': - subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs) - decoded = subdict - elif attr_spec["type"] == 'string': - decoded = attr.as_strz() - elif attr_spec["type"] == 'binary': - decoded = self._decode_binary(attr, attr_spec) - elif attr_spec["type"] == 'flag': - decoded = True - elif attr_spec.is_auto_scalar: - decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order) - elif attr_spec["type"] in NlAttr.type_formats: - decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order) - if 'enum' in attr_spec: - decoded = self._decode_enum(decoded, attr_spec) - elif attr_spec.display_hint: - decoded = self._formatted_string(decoded, attr_spec.display_hint) - elif attr_spec["type"] == 'indexed-array': - decoded = self._decode_array_attr(attr, attr_spec) - elif attr_spec["type"] == 'bitfield32': - value, selector = struct.unpack("II", attr.raw) - if 'enum' in attr_spec: - value = self._decode_enum(value, attr_spec) - selector = self._decode_enum(selector, attr_spec) - decoded = {"value": value, "selector": selector} - elif attr_spec["type"] == 'sub-message': - decoded = self._decode_sub_msg(attr, attr_spec, search_attrs) - elif attr_spec["type"] == 'nest-type-value': - decoded = self._decode_nest_type_value(attr, attr_spec) - else: - if not self.process_unknown: - raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') - decoded = self._decode_unknown(attr) - - self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded) + try: + if attr_spec["type"] == 'nest': + subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs) + decoded = subdict + elif attr_spec["type"] == 'string': + decoded = attr.as_strz() + elif attr_spec["type"] == 'binary': + decoded = self._decode_binary(attr, attr_spec) + elif attr_spec["type"] == 'flag': + decoded = True + elif attr_spec.is_auto_scalar: + decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order) + elif attr_spec["type"] in NlAttr.type_formats: + decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order) + if 'enum' in attr_spec: + decoded = self._decode_enum(decoded, attr_spec) + elif attr_spec.display_hint: + decoded = self._formatted_string(decoded, attr_spec.display_hint) + elif attr_spec["type"] == 'indexed-array': + decoded = self._decode_array_attr(attr, attr_spec) + elif attr_spec["type"] == 'bitfield32': + value, selector = struct.unpack("II", attr.raw) + if 'enum' in attr_spec: + value = self._decode_enum(value, attr_spec) + selector = self._decode_enum(selector, attr_spec) + decoded = {"value": value, "selector": selector} + elif attr_spec["type"] == 'sub-message': + decoded = self._decode_sub_msg(attr, attr_spec, search_attrs) + elif attr_spec["type"] == 'nest-type-value': + decoded = self._decode_nest_type_value(attr, attr_spec) + else: + if not self.process_unknown: + raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') + decoded = self._decode_unknown(attr) + + self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded) + except: + print(f"Error decoding '{attr_spec.name}' from '{space}'") + raise return rsp diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index d8201c4b1520..c2eabc90dce8 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -79,6 +79,20 @@ class Type(SpecAttr): self.enum_name = None delattr(self, "enum_name") + def _get_real_attr(self): + # if the attr is for a subset return the "real" attr (just one down, does not recurse) + return self.family.attr_sets[self.attr_set.subset_of][self.name] + + def set_request(self): + self.request = True + if self.attr_set.subset_of: + self._get_real_attr().set_request() + + def set_reply(self): + self.reply = True + if self.attr_set.subset_of: + self._get_real_attr().set_reply() + def get_limit(self, limit, default=None): value = self.checks.get(limit, default) if value is None: @@ -106,6 +120,10 @@ class Type(SpecAttr): enum_name = f"{self.attr_set.name_prefix}{self.name}" self.enum_name = c_upper(enum_name) + if self.attr_set.subset_of: + if self.checks != self._get_real_attr().checks: + raise Exception("Overriding checks not supported by codegen, yet") + def is_multi_val(self): return None @@ -801,6 +819,8 @@ class EnumSet(SpecEnumSet): self.user_type = 'int' self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-") + self.header = yaml.get('header', None) + self.enum_cnt_name = yaml.get('enum-cnt-name', None) super().__init__(family, yaml) @@ -1117,17 +1137,17 @@ class Family(SpecFamily): for _, struct in self.pure_nested_structs.items(): if struct.request: for _, arg in struct.member_list(): - arg.request = True + arg.set_request() if struct.reply: for _, arg in struct.member_list(): - arg.reply = True + arg.set_reply() for root_set, rs_members in self.root_sets.items(): for attr, spec in self.attr_sets[root_set].items(): if attr in rs_members['request']: - spec.request = True + spec.set_request() if attr in rs_members['reply']: - spec.reply = True + spec.set_reply() def _load_global_policy(self): global_set = set() @@ -1763,7 +1783,14 @@ def parse_rsp_nested(ri, struct): f'{struct.ptr_name}dst = yarg->data;'] init_lines = [] - _multi_parse(ri, struct, init_lines, local_vars) + if struct.member_list(): + _multi_parse(ri, struct, init_lines, local_vars) + else: + # Empty nest + ri.cw.block_start() + ri.cw.p('return 0;') + ri.cw.block_end() + ri.cw.nl() def parse_rsp_msg(ri, deref=False): @@ -2384,6 +2411,17 @@ def print_kernel_family_struct_src(family, cw): if not kernel_can_gen_family_struct(family): return + if 'sock-priv' in family.kernel_family: + # Generate "trampolines" to make CFI happy + cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_init", + [f"{family.c_name}_nl_sock_priv_init(priv);"], + ["void *priv"]) + cw.nl() + cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_destroy", + [f"{family.c_name}_nl_sock_priv_destroy(priv);"], + ["void *priv"]) + cw.nl() + cw.block_start(f"struct genl_family {family.ident_name}_nl_family __ro_after_init =") cw.p('.name\t\t= ' + family.fam_key + ',') cw.p('.version\t= ' + family.ver_key + ',') @@ -2401,9 +2439,8 @@ def print_kernel_family_struct_src(family, cw): cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),') if 'sock-priv' in family.kernel_family: cw.p(f'.sock_priv_size\t= sizeof({family.kernel_family["sock-priv"]}),') - # Force cast here, actual helpers take pointer to the real type. - cw.p(f'.sock_priv_init\t= (void *){family.c_name}_nl_sock_priv_init,') - cw.p(f'.sock_priv_destroy = (void *){family.c_name}_nl_sock_priv_destroy,') + cw.p(f'.sock_priv_init\t= __{family.c_name}_nl_sock_priv_init,') + cw.p(f'.sock_priv_destroy = __{family.c_name}_nl_sock_priv_destroy,') cw.block_end(';') @@ -2417,6 +2454,87 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'): cw.block_start(line=start_line) +def render_uapi_unified(family, cw, max_by_define, separate_ntf): + max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX")) + cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX")) + max_value = f"({cnt_name} - 1)" + + uapi_enum_start(family, cw, family['operations'], 'enum-name') + val = 0 + for op in family.msgs.values(): + if separate_ntf and ('notify' in op or 'event' in op): + continue + + suffix = ',' + if op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(op.enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + +def render_uapi_directional(family, cw, max_by_define): + max_name = f"{family.op_prefix}USER_MAX" + cnt_name = f"__{family.op_prefix}USER_CNT" + max_value = f"({cnt_name} - 1)" + + cw.block_start(line='enum') + cw.p(c_upper(f'{family.name}_MSG_USER_NONE = 0,')) + val = 0 + for op in family.msgs.values(): + if 'do' in op and 'event' not in op: + suffix = ',' + if op.value and op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(op.enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + max_name = f"{family.op_prefix}KERNEL_MAX" + cnt_name = f"__{family.op_prefix}KERNEL_CNT" + max_value = f"({cnt_name} - 1)" + + cw.block_start(line='enum') + cw.p(c_upper(f'{family.name}_MSG_KERNEL_NONE = 0,')) + val = 0 + for op in family.msgs.values(): + if ('do' in op and 'reply' in op['do']) or 'notify' in op or 'event' in op: + enum_name = op.enum_name + if 'event' not in op and 'notify' not in op: + enum_name = f'{enum_name}_REPLY' + + suffix = ',' + if op.value and op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + def render_uapi(family, cw): hdr_prot = f"_UAPI_LINUX_{c_upper(family.uapi_header_name)}_H" hdr_prot = hdr_prot.replace('/', '_') @@ -2440,6 +2558,9 @@ def render_uapi(family, cw): if const['type'] == 'enum' or const['type'] == 'flags': enum = family.consts[const['name']] + if enum.header: + continue + if enum.has_doc(): if enum.has_entry_doc(): cw.p('/**') @@ -2472,9 +2593,12 @@ def render_uapi(family, cw): max_val = f' = {enum.get_mask()},' cw.p(max_name + max_val) else: + cnt_name = enum.enum_cnt_name max_name = c_upper(name_pfx + 'max') - cw.p('__' + max_name + ',') - cw.p(max_name + ' = (__' + max_name + ' - 1)') + if not cnt_name: + cnt_name = '__' + name_pfx + 'max' + cw.p(c_upper(cnt_name) + ',') + cw.p(max_name + ' = (' + c_upper(cnt_name) + ' - 1)') cw.block_end(line=';') cw.nl() elif const['type'] == 'const': @@ -2503,7 +2627,8 @@ def render_uapi(family, cw): val = attr.value val += 1 cw.p(attr.enum_name + suffix) - cw.nl() + if attr_set.items(): + cw.nl() cw.p(attr_set.cnt_name + ('' if max_by_define else ',')) if not max_by_define: cw.p(f"{attr_set.max_name} = {max_value}") @@ -2515,30 +2640,12 @@ def render_uapi(family, cw): # Commands separate_ntf = 'async-prefix' in family['operations'] - max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX")) - cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX")) - max_value = f"({cnt_name} - 1)" - - uapi_enum_start(family, cw, family['operations'], 'enum-name') - val = 0 - for op in family.msgs.values(): - if separate_ntf and ('notify' in op or 'event' in op): - continue - - suffix = ',' - if op.value != val: - suffix = f" = {op.value}," - val = op.value - cw.p(op.enum_name + suffix) - val += 1 - cw.nl() - cw.p(cnt_name + ('' if max_by_define else ',')) - if not max_by_define: - cw.p(f"{max_name} = {max_value}") - cw.block_end(line=';') - if max_by_define: - cw.p(f"#define {max_name} {max_value}") - cw.nl() + if family.msg_id_model == 'unified': + render_uapi_unified(family, cw, max_by_define, separate_ntf) + elif family.msg_id_model == 'directional': + render_uapi_directional(family, cw, max_by_define) + else: + raise Exception(f'Unsupported message enum-model {family.msg_id_model}') if separate_ntf: uapi_enum_start(family, cw, family['operations'], enum_name='async-enum') @@ -2635,7 +2742,8 @@ def find_kernel_root(full_path): def main(): parser = argparse.ArgumentParser(description='Netlink simple parsing generator') - parser.add_argument('--mode', dest='mode', type=str, required=True) + parser.add_argument('--mode', dest='mode', type=str, required=True, + choices=('user', 'kernel', 'uapi')) parser.add_argument('--spec', dest='spec', type=str, required=True) parser.add_argument('--header', dest='header', action='store_true', default=None) parser.add_argument('--source', dest='header', action='store_false') @@ -2662,13 +2770,6 @@ def main(): os.sys.exit(1) return - supported_models = ['unified'] - if args.mode in ['user', 'kernel']: - supported_models += ['directional'] - if parsed.msg_id_model not in supported_models: - print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation') - os.sys.exit(1) - cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out)) _, spec_kernel = find_kernel_root(args.spec) @@ -2696,7 +2797,10 @@ def main(): cw.p('#define ' + hdr_prot) cw.nl() - hdr_file=os.path.basename(args.out_file[:-2]) + ".h" + if args.out_file: + hdr_file = os.path.basename(args.out_file[:-2]) + ".h" + else: + hdr_file = "generated_header_file.h" if args.mode == 'kernel': cw.p('#include <net/netlink.h>') @@ -2718,12 +2822,17 @@ def main(): else: cw.p(f'#include "{hdr_file}"') cw.p('#include "ynl.h"') - headers = [parsed.uapi_header] + headers = [] for definition in parsed['definitions']: if 'header' in definition: headers.append(definition['header']) + if args.mode == 'user': + headers.append(parsed.uapi_header) + seen_header = [] for one in headers: - cw.p(f"#include <{one}>") + if one not in seen_header: + cw.p(f"#include <{one}>") + seen_header.append(one) cw.nl() if args.mode == "user": diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py index 6c56d0d726b4..6c56d0d726b4 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/pyynl/ynl_gen_rst.py diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh index a37304dcc88e..81b4ecd89100 100755 --- a/tools/net/ynl/ynl-regen.sh +++ b/tools/net/ynl/ynl-regen.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause -TOOL=$(dirname $(realpath $0))/ynl-gen-c.py +TOOL=$(dirname $(realpath $0))/pyynl/ynl_gen_c.py force= search= diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c index 9bba1e9318e0..87230ed570fd 100644 --- a/tools/objtool/arch/loongarch/special.c +++ b/tools/objtool/arch/loongarch/special.c @@ -9,7 +9,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, } struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn) + struct instruction *insn, + unsigned long *table_size) { return NULL; } diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c index d33868147196..51610689abf7 100644 --- a/tools/objtool/arch/powerpc/special.c +++ b/tools/objtool/arch/powerpc/special.c @@ -13,7 +13,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, } struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn) + struct instruction *insn, + unsigned long *table_size) { exit(-1); } diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 4ea0f9815fda..9c1c9df09aaa 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -109,7 +109,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, * NOTE: MITIGATION_RETPOLINE made it harder still to decode dynamic jumps. */ struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn) + struct instruction *insn, + unsigned long *table_size) { struct reloc *text_reloc, *rodata_reloc; struct section *table_sec; @@ -158,5 +159,6 @@ struct reloc *arch_find_switch_table(struct objtool_file *file, if (reloc_type(text_reloc) == R_X86_64_PC32) file->ignore_unreachables = true; + *table_size = 0; return rodata_reloc; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4ce176ad411f..753dbc4f8198 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -150,6 +150,15 @@ static inline struct reloc *insn_jump_table(struct instruction *insn) return NULL; } +static inline unsigned long insn_jump_table_size(struct instruction *insn) +{ + if (insn->type == INSN_JUMP_DYNAMIC || + insn->type == INSN_CALL_DYNAMIC) + return insn->_jump_table_size; + + return 0; +} + static bool is_jump_table_jump(struct instruction *insn) { struct alt_group *alt_group = insn->alt_group; @@ -614,108 +623,6 @@ static int init_pv_ops(struct objtool_file *file) return 0; } -static struct instruction *find_last_insn(struct objtool_file *file, - struct section *sec) -{ - struct instruction *insn = NULL; - unsigned int offset; - unsigned int end = (sec->sh.sh_size > 10) ? sec->sh.sh_size - 10 : 0; - - for (offset = sec->sh.sh_size - 1; offset >= end && !insn; offset--) - insn = find_insn(file, sec, offset); - - return insn; -} - -/* - * Mark "ud2" instructions and manually annotated dead ends. - */ -static int add_dead_ends(struct objtool_file *file) -{ - struct section *rsec; - struct reloc *reloc; - struct instruction *insn; - uint64_t offset; - - /* - * Check for manually annotated dead ends. - */ - rsec = find_section_by_name(file->elf, ".rela.discard.unreachable"); - if (!rsec) - goto reachable; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type == STT_SECTION) { - offset = reloc_addend(reloc); - } else if (reloc->sym->local_label) { - offset = reloc->sym->offset; - } else { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, offset); - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) { - insn = find_last_insn(file, reloc->sym->sec); - if (!insn) { - WARN("can't find unreachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - } else { - WARN("can't find unreachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - - insn->dead_end = true; - } - -reachable: - /* - * These manually annotated reachable checks are needed for GCC 4.4, - * where the Linux unreachable() macro isn't supported. In that case - * GCC doesn't know the "ud2" is fatal, so it generates code as if it's - * not a dead end. - */ - rsec = find_section_by_name(file->elf, ".rela.discard.reachable"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type == STT_SECTION) { - offset = reloc_addend(reloc); - } else if (reloc->sym->local_label) { - offset = reloc->sym->offset; - } else { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, offset); - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) { - insn = find_last_insn(file, reloc->sym->sec); - if (!insn) { - WARN("can't find reachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - } else { - WARN("can't find reachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - - insn->dead_end = false; - } - - return 0; -} - static int create_static_call_sections(struct objtool_file *file) { struct static_call_site *site; @@ -1310,40 +1217,6 @@ static void add_uaccess_safe(struct objtool_file *file) } /* - * FIXME: For now, just ignore any alternatives which add retpolines. This is - * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. - * But it at least allows objtool to understand the control flow *around* the - * retpoline. - */ -static int add_ignore_alternatives(struct objtool_file *file) -{ - struct section *rsec; - struct reloc *reloc; - struct instruction *insn; - - rsec = find_section_by_name(file->elf, ".rela.discard.ignore_alts"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.ignore_alts entry"); - return -1; - } - - insn->ignore_alts = true; - } - - return 0; -} - -/* * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol * will be added to the .retpoline_sites section. */ @@ -2073,6 +1946,7 @@ out: static int add_jump_table(struct objtool_file *file, struct instruction *insn, struct reloc *next_table) { + unsigned long table_size = insn_jump_table_size(insn); struct symbol *pfunc = insn_func(insn)->pfunc; struct reloc *table = insn_jump_table(insn); struct instruction *dest_insn; @@ -2087,6 +1961,8 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, for_each_reloc_from(table->sec, reloc) { /* Check for the end of the table: */ + if (table_size && reloc_offset(reloc) - reloc_offset(table) >= table_size) + break; if (reloc != table && reloc == next_table) break; @@ -2131,12 +2007,12 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, * find_jump_table() - Given a dynamic jump, find the switch jump table * associated with it. */ -static struct reloc *find_jump_table(struct objtool_file *file, - struct symbol *func, - struct instruction *insn) +static void find_jump_table(struct objtool_file *file, struct symbol *func, + struct instruction *insn) { struct reloc *table_reloc; struct instruction *dest_insn, *orig_insn = insn; + unsigned long table_size; /* * Backward search using the @first_jump_src links, these help avoid @@ -2157,17 +2033,17 @@ static struct reloc *find_jump_table(struct objtool_file *file, insn->jump_dest->offset > orig_insn->offset)) break; - table_reloc = arch_find_switch_table(file, insn); + table_reloc = arch_find_switch_table(file, insn, &table_size); if (!table_reloc) continue; dest_insn = find_insn(file, table_reloc->sym->sec, reloc_addend(table_reloc)); if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func) continue; - return table_reloc; + orig_insn->_jump_table = table_reloc; + orig_insn->_jump_table_size = table_size; + break; } - - return NULL; } /* @@ -2178,7 +2054,6 @@ static void mark_func_jump_tables(struct objtool_file *file, struct symbol *func) { struct instruction *insn, *last = NULL; - struct reloc *reloc; func_for_each_insn(file, func, insn) { if (!last) @@ -2201,9 +2076,7 @@ static void mark_func_jump_tables(struct objtool_file *file, if (insn->type != INSN_JUMP_DYNAMIC) continue; - reloc = find_jump_table(file, func, insn); - if (reloc) - insn->_jump_table = reloc; + find_jump_table(file, func, insn); } } @@ -2373,185 +2246,147 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } -static int read_noendbr_hints(struct objtool_file *file) +static int read_annotate(struct objtool_file *file, + int (*func)(struct objtool_file *file, int type, struct instruction *insn)) { + struct section *sec; struct instruction *insn; - struct section *rsec; struct reloc *reloc; + uint64_t offset; + int type, ret; - rsec = find_section_by_name(file->elf, ".rela.discard.noendbr"); - if (!rsec) + sec = find_section_by_name(file->elf, ".discard.annotate_insn"); + if (!sec) return 0; - for_each_reloc(rsec, reloc) { - insn = find_insn(file, reloc->sym->sec, - reloc->sym->offset + reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.noendbr entry"); - return -1; - } + if (!sec->rsec) + return 0; - insn->noendbr = 1; + if (sec->sh.sh_entsize != 8) { + static bool warned = false; + if (!warned) { + WARN("%s: dodgy linker, sh_entsize != 8", sec->name); + warned = true; + } + sec->sh.sh_entsize = 8; } - return 0; -} - -static int read_retpoline_hints(struct objtool_file *file) -{ - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe"); - if (!rsec) - return 0; + for_each_reloc(sec->rsec, reloc) { + type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4); - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } + offset = reloc->sym->offset + reloc_addend(reloc); + insn = find_insn(file, reloc->sym->sec, offset); - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); if (!insn) { - WARN("bad .discard.retpoline_safe entry"); + WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type); return -1; } - if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC && - insn->type != INSN_RETURN && - insn->type != INSN_NOP) { - WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); - return -1; - } - - insn->retpoline_safe = true; + ret = func(file, type, insn); + if (ret < 0) + return ret; } return 0; } -static int read_instr_hints(struct objtool_file *file) +static int __annotate_early(struct objtool_file *file, int type, struct instruction *insn) { - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; + switch (type) { + case ANNOTYPE_IGNORE_ALTS: + insn->ignore_alts = true; + break; - rsec = find_section_by_name(file->elf, ".rela.discard.instr_end"); - if (!rsec) - return 0; + /* + * Must be before read_unwind_hints() since that needs insn->noendbr. + */ + case ANNOTYPE_NOENDBR: + insn->noendbr = 1; + break; - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } + default: + break; + } - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_end entry"); - return -1; - } + return 0; +} - insn->instr--; - } +static int __annotate_ifc(struct objtool_file *file, int type, struct instruction *insn) +{ + unsigned long dest_off; - rsec = find_section_by_name(file->elf, ".rela.discard.instr_begin"); - if (!rsec) + if (type != ANNOTYPE_INTRA_FUNCTION_CALL) return 0; - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } + if (insn->type != INSN_CALL) { + WARN_INSN(insn, "intra_function_call not a direct call"); + return -1; + } - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_begin entry"); - return -1; - } + /* + * Treat intra-function CALLs as JMPs, but with a stack_op. + * See add_call_destinations(), which strips stack_ops from + * normal CALLs. + */ + insn->type = INSN_JUMP_UNCONDITIONAL; - insn->instr++; + dest_off = arch_jump_destination(insn); + insn->jump_dest = find_insn(file, insn->sec, dest_off); + if (!insn->jump_dest) { + WARN_INSN(insn, "can't find call dest at %s+0x%lx", + insn->sec->name, dest_off); + return -1; } return 0; } -static int read_validate_unret_hints(struct objtool_file *file) +static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn) { - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.validate_unret"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } + switch (type) { + case ANNOTYPE_NOENDBR: + /* early */ + break; - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_end entry"); + case ANNOTYPE_RETPOLINE_SAFE: + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN && + insn->type != INSN_NOP) { + WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); return -1; } - insn->unret = 1; - } - - return 0; -} - -static int read_intra_function_calls(struct objtool_file *file) -{ - struct instruction *insn; - struct section *rsec; - struct reloc *reloc; + insn->retpoline_safe = true; + break; - rsec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls"); - if (!rsec) - return 0; + case ANNOTYPE_INSTR_BEGIN: + insn->instr++; + break; - for_each_reloc(rsec, reloc) { - unsigned long dest_off; + case ANNOTYPE_INSTR_END: + insn->instr--; + break; - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", - rsec->name); - return -1; - } + case ANNOTYPE_UNRET_BEGIN: + insn->unret = 1; + break; - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.intra_function_call entry"); - return -1; - } + case ANNOTYPE_IGNORE_ALTS: + /* early */ + break; - if (insn->type != INSN_CALL) { - WARN_INSN(insn, "intra_function_call not a direct call"); - return -1; - } + case ANNOTYPE_INTRA_FUNCTION_CALL: + /* ifc */ + break; - /* - * Treat intra-function CALLs as JMPs, but with a stack_op. - * See add_call_destinations(), which strips stack_ops from - * normal CALLs. - */ - insn->type = INSN_JUMP_UNCONDITIONAL; + case ANNOTYPE_REACHABLE: + insn->dead_end = false; + break; - dest_off = arch_jump_destination(insn); - insn->jump_dest = find_insn(file, insn->sec, dest_off); - if (!insn->jump_dest) { - WARN_INSN(insn, "can't find call dest at %s+0x%lx", - insn->sec->name, dest_off); - return -1; - } + default: + WARN_INSN(insn, "Unknown annotation type: %d", type); + break; } return 0; @@ -2666,14 +2501,7 @@ static int decode_sections(struct objtool_file *file) add_ignores(file); add_uaccess_safe(file); - ret = add_ignore_alternatives(file); - if (ret) - return ret; - - /* - * Must be before read_unwind_hints() since that needs insn->noendbr. - */ - ret = read_noendbr_hints(file); + ret = read_annotate(file, __annotate_early); if (ret) return ret; @@ -2695,7 +2523,7 @@ static int decode_sections(struct objtool_file *file) * Must be before add_call_destination(); it changes INSN_CALL to * INSN_JUMP. */ - ret = read_intra_function_calls(file); + ret = read_annotate(file, __annotate_ifc); if (ret) return ret; @@ -2703,14 +2531,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - /* - * Must be after add_call_destinations() such that it can override - * dead_end_function() marks. - */ - ret = add_dead_ends(file); - if (ret) - return ret; - ret = add_jump_table_alts(file); if (ret) return ret; @@ -2719,15 +2539,11 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_retpoline_hints(file); - if (ret) - return ret; - - ret = read_instr_hints(file); - if (ret) - return ret; - - ret = read_validate_unret_hints(file); + /* + * Must be after add_call_destinations() such that it can override + * dead_end_function() marks. + */ + ret = read_annotate(file, __annotate_late); if (ret) return ret; @@ -3820,9 +3636,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_CONTEXT_SWITCH: - if (func && (!next_insn || !next_insn->hint)) { - WARN_INSN(insn, "unsupported instruction in callable function"); - return 1; + if (func) { + if (!next_insn || !next_insn->hint) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; + } + break; } return 0; diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index daa46f1f0965..e1cd13cd28a3 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -71,7 +71,10 @@ struct instruction { struct instruction *first_jump_src; union { struct symbol *_call_dest; - struct reloc *_jump_table; + struct { + struct reloc *_jump_table; + unsigned long _jump_table_size; + }; }; struct alternative *alts; struct symbol *sym; diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h index 86d4af9c5aa9..e7ee7ffccefd 100644 --- a/tools/objtool/include/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -38,5 +38,6 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, struct instruction *insn, struct reloc *reloc); struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn); + struct instruction *insn, + unsigned long *table_size); #endif /* _SPECIAL_H */ diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index f37614cc2c1b..b2174894f9f7 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -19,6 +19,7 @@ NORETURN(__x64_sys_exit_group) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) +NORETURN(bch2_trans_unlocked_error) NORETURN(cpu_bringup_and_idle) NORETURN(cpu_startup_entry) NORETURN(do_exit) diff --git a/tools/pci/Build b/tools/pci/Build deleted file mode 100644 index c375aea21790..000000000000 --- a/tools/pci/Build +++ /dev/null @@ -1 +0,0 @@ -pcitest-y += pcitest.o diff --git a/tools/pci/Makefile b/tools/pci/Makefile deleted file mode 100644 index 62d41f1a1e2c..000000000000 --- a/tools/pci/Makefile +++ /dev/null @@ -1,58 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -include ../scripts/Makefile.include - -bindir ?= /usr/bin - -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -endif - -# Do not use make's built-in rules -# (this improves performance and avoids hard-to-debug behaviour); -MAKEFLAGS += -r - -CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include - -ALL_TARGETS := pcitest -ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) - -SCRIPTS := pcitest.sh - -all: $(ALL_PROGRAMS) - -export srctree OUTPUT CC LD CFLAGS -include $(srctree)/tools/build/Makefile.include - -# -# We need the following to be outside of kernel tree -# -$(OUTPUT)include/linux/: ../../include/uapi/linux/ - mkdir -p $(OUTPUT)include/linux/ 2>&1 || true - ln -sf $(CURDIR)/../../include/uapi/linux/pcitest.h $@ - -prepare: $(OUTPUT)include/linux/ - -PCITEST_IN := $(OUTPUT)pcitest-in.o -$(PCITEST_IN): prepare FORCE - $(Q)$(MAKE) $(build)=pcitest -$(OUTPUT)pcitest: $(PCITEST_IN) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ - -clean: - rm -f $(ALL_PROGRAMS) - rm -rf $(OUTPUT)include/ - find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete - -install: $(ALL_PROGRAMS) - install -d -m 755 $(DESTDIR)$(bindir); \ - for program in $(ALL_PROGRAMS); do \ - install $$program $(DESTDIR)$(bindir); \ - done; \ - for script in $(SCRIPTS); do \ - install $$script $(DESTDIR)$(bindir); \ - done - -FORCE: - -.PHONY: all install clean FORCE prepare diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c deleted file mode 100644 index 7b530d838d40..000000000000 --- a/tools/pci/pcitest.c +++ /dev/null @@ -1,250 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/** - * Userspace PCI Endpoint Test Module - * - * Copyright (C) 2017 Texas Instruments - * Author: Kishon Vijay Abraham I <kishon@ti.com> - */ - -#include <errno.h> -#include <fcntl.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/ioctl.h> -#include <unistd.h> - -#include <linux/pcitest.h> - -static char *result[] = { "NOT OKAY", "OKAY" }; -static char *irq[] = { "LEGACY", "MSI", "MSI-X" }; - -struct pci_test { - char *device; - char barnum; - bool legacyirq; - unsigned int msinum; - unsigned int msixnum; - int irqtype; - bool set_irqtype; - bool get_irqtype; - bool clear_irq; - bool read; - bool write; - bool copy; - unsigned long size; - bool use_dma; -}; - -static int run_test(struct pci_test *test) -{ - struct pci_endpoint_test_xfer_param param = {}; - int ret = -EINVAL; - int fd; - - fd = open(test->device, O_RDWR); - if (fd < 0) { - perror("can't open PCI Endpoint Test device"); - return -ENODEV; - } - - if (test->barnum >= 0 && test->barnum <= 5) { - ret = ioctl(fd, PCITEST_BAR, test->barnum); - fprintf(stdout, "BAR%d:\t\t", test->barnum); - if (ret < 0) - fprintf(stdout, "TEST FAILED\n"); - else - fprintf(stdout, "%s\n", result[ret]); - } - - if (test->set_irqtype) { - ret = ioctl(fd, PCITEST_SET_IRQTYPE, test->irqtype); - fprintf(stdout, "SET IRQ TYPE TO %s:\t\t", irq[test->irqtype]); - if (ret < 0) - fprintf(stdout, "FAILED\n"); - else - fprintf(stdout, "%s\n", result[ret]); - } - - if (test->get_irqtype) { - ret = ioctl(fd, PCITEST_GET_IRQTYPE); - fprintf(stdout, "GET IRQ TYPE:\t\t"); - if (ret < 0) - fprintf(stdout, "FAILED\n"); - else - fprintf(stdout, "%s\n", irq[ret]); - } - - if (test->clear_irq) { - ret = ioctl(fd, PCITEST_CLEAR_IRQ); - fprintf(stdout, "CLEAR IRQ:\t\t"); - if (ret < 0) - fprintf(stdout, "FAILED\n"); - else - fprintf(stdout, "%s\n", result[ret]); - } - - if (test->legacyirq) { - ret = ioctl(fd, PCITEST_LEGACY_IRQ, 0); - fprintf(stdout, "LEGACY IRQ:\t"); - if (ret < 0) - fprintf(stdout, "TEST FAILED\n"); - else - fprintf(stdout, "%s\n", result[ret]); - } - - if (test->msinum > 0 && test->msinum <= 32) { - ret = ioctl(fd, PCITEST_MSI, test->msinum); - fprintf(stdout, "MSI%u:\t\t", test->msinum); - if (ret < 0) - fprintf(stdout, "TEST FAILED\n"); - else - fprintf(stdout, "%s\n", result[ret]); - } - - if (test->msixnum > 0 && test->msixnum <= 2048) { - ret = ioctl(fd, PCITEST_MSIX, test->msixnum); - fprintf(stdout, "MSI-X%u:\t\t", test->msixnum); - if (ret < 0) - fprintf(stdout, "TEST FAILED\n"); - else - fprintf(stdout, "%s\n", result[ret]); - } - - if (test->write) { - param.size = test->size; - if (test->use_dma) - param.flags = PCITEST_FLAGS_USE_DMA; - ret = ioctl(fd, PCITEST_WRITE, ¶m); - fprintf(stdout, "WRITE (%7lu bytes):\t\t", test->size); - if (ret < 0) - fprintf(stdout, "TEST FAILED\n"); - else - fprintf(stdout, "%s\n", result[ret]); - } - - if (test->read) { - param.size = test->size; - if (test->use_dma) - param.flags = PCITEST_FLAGS_USE_DMA; - ret = ioctl(fd, PCITEST_READ, ¶m); - fprintf(stdout, "READ (%7lu bytes):\t\t", test->size); - if (ret < 0) - fprintf(stdout, "TEST FAILED\n"); - else - fprintf(stdout, "%s\n", result[ret]); - } - - if (test->copy) { - param.size = test->size; - if (test->use_dma) - param.flags = PCITEST_FLAGS_USE_DMA; - ret = ioctl(fd, PCITEST_COPY, ¶m); - fprintf(stdout, "COPY (%7lu bytes):\t\t", test->size); - if (ret < 0) - fprintf(stdout, "TEST FAILED\n"); - else - fprintf(stdout, "%s\n", result[ret]); - } - - fflush(stdout); - close(fd); - return (ret < 0) ? ret : 1 - ret; /* return 0 if test succeeded */ -} - -int main(int argc, char **argv) -{ - int c; - struct pci_test *test; - - test = calloc(1, sizeof(*test)); - if (!test) { - perror("Fail to allocate memory for pci_test\n"); - return -ENOMEM; - } - - /* since '0' is a valid BAR number, initialize it to -1 */ - test->barnum = -1; - - /* set default size as 100KB */ - test->size = 0x19000; - - /* set default endpoint device */ - test->device = "/dev/pci-endpoint-test.0"; - - while ((c = getopt(argc, argv, "D:b:m:x:i:deIlhrwcs:")) != EOF) - switch (c) { - case 'D': - test->device = optarg; - continue; - case 'b': - test->barnum = atoi(optarg); - if (test->barnum < 0 || test->barnum > 5) - goto usage; - continue; - case 'l': - test->legacyirq = true; - continue; - case 'm': - test->msinum = atoi(optarg); - if (test->msinum < 1 || test->msinum > 32) - goto usage; - continue; - case 'x': - test->msixnum = atoi(optarg); - if (test->msixnum < 1 || test->msixnum > 2048) - goto usage; - continue; - case 'i': - test->irqtype = atoi(optarg); - if (test->irqtype < 0 || test->irqtype > 2) - goto usage; - test->set_irqtype = true; - continue; - case 'I': - test->get_irqtype = true; - continue; - case 'r': - test->read = true; - continue; - case 'w': - test->write = true; - continue; - case 'c': - test->copy = true; - continue; - case 'e': - test->clear_irq = true; - continue; - case 's': - test->size = strtoul(optarg, NULL, 0); - continue; - case 'd': - test->use_dma = true; - continue; - case 'h': - default: -usage: - fprintf(stderr, - "usage: %s [options]\n" - "Options:\n" - "\t-D <dev> PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n" - "\t-b <bar num> BAR test (bar number between 0..5)\n" - "\t-m <msi num> MSI test (msi number between 1..32)\n" - "\t-x <msix num> \tMSI-X test (msix number between 1..2048)\n" - "\t-i <irq type> \tSet IRQ type (0 - Legacy, 1 - MSI, 2 - MSI-X)\n" - "\t-e Clear IRQ\n" - "\t-I Get current IRQ type configured\n" - "\t-d Use DMA\n" - "\t-l Legacy IRQ test\n" - "\t-r Read buffer test\n" - "\t-w Write buffer test\n" - "\t-c Copy buffer test\n" - "\t-s <size> Size of buffer {default: 100KB}\n" - "\t-h Print this help message\n", - argv[0]); - return -EINVAL; - } - - return run_test(test); -} diff --git a/tools/pci/pcitest.sh b/tools/pci/pcitest.sh deleted file mode 100644 index 75ed48ff2990..000000000000 --- a/tools/pci/pcitest.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -echo "BAR tests" -echo - -bar=0 - -while [ $bar -lt 6 ] -do - pcitest -b $bar - bar=`expr $bar + 1` -done -echo - -echo "Interrupt tests" -echo - -pcitest -i 0 -pcitest -l - -pcitest -i 1 -msi=1 - -while [ $msi -lt 33 ] -do - pcitest -m $msi - msi=`expr $msi + 1` -done -echo - -pcitest -i 2 -msix=1 - -while [ $msix -lt 2049 ] -do - pcitest -x $msix - msix=`expr $msix + 1` -done -echo - -echo "Read Tests" -echo - -pcitest -i 1 - -pcitest -r -s 1 -pcitest -r -s 1024 -pcitest -r -s 1025 -pcitest -r -s 1024000 -pcitest -r -s 1024001 -echo - -echo "Write Tests" -echo - -pcitest -w -s 1 -pcitest -w -s 1024 -pcitest -w -s 1025 -pcitest -w -s 1024000 -pcitest -w -s 1024001 -echo - -echo "Copy Tests" -echo - -pcitest -c -s 1 -pcitest -c -s 1024 -pcitest -c -s 1025 -pcitest -c -s 1024000 -pcitest -c -s 1024001 -echo diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt index de2b0b479249..37afade4f1b2 100644 --- a/tools/perf/Documentation/perf-arm-spe.txt +++ b/tools/perf/Documentation/perf-arm-spe.txt @@ -150,6 +150,7 @@ arm_spe/load_filter=1,min_latency=10/' pct_enable=1 - collect physical timestamp instead of virtual timestamp (PMSCR.PCT) - requires privilege store_filter=1 - collect stores only (PMSFCR.ST) ts_enable=1 - enable timestamping with value of generic timer (PMSCR.TS) + discard=1 - enable SPE PMU events but don't collect sample data - see 'Discard mode' (PMBLIMITR.FM = DISCARD) +++*+++ Latency is the total latency from the point at which sampling started on that instruction, rather than only the execution latency. @@ -220,6 +221,31 @@ Common errors Increase sampling interval (see above) +PMU events +~~~~~~~~~~ + +SPE has events that can be counted on core PMUs. These are prefixed with +SAMPLE_, for example SAMPLE_POP, SAMPLE_FEED, SAMPLE_COLLISION and +SAMPLE_FEED_BR. + +These events will only count when an SPE event is running on the same core that +the PMU event is opened on, otherwise they read as 0. There are various ways to +ensure that the PMU event and SPE event are scheduled together depending on the +way the event is opened. For example opening both events as per-process events +on the same process, although it's not guaranteed that the PMU event is enabled +first when context switching. For that reason it may be better to open the PMU +event as a systemwide event and then open SPE on the process of interest. + +Discard mode +~~~~~~~~~~~~ + +SPE related (SAMPLE_* etc) core PMU events can be used without the overhead of +collecting sample data if discard mode is supported (optional from Armv8.6). +First run a system wide SPE session (or on the core of interest) using options +to minimize output. Then run perf stat: + + perf record -e arm_spe/discard/ -a -N -B --no-bpf-event -o - > /dev/null & + perf stat -e SAMPLE_FEED_LD SEE ALSO -------- diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ac97632f13dc..06356217adeb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2107,8 +2107,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) return PTR_ERR(sc->tp_format); } + /* + * The tracepoint format contains __syscall_nr field, so it's one more + * than the actual number of syscall arguments. + */ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? - RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) + RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1)) return -ENOMEM; sc->args = sc->tp_format->format.fields; diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index 5a3b8a5a9b5c..8d1e6bbeac90 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -26,8 +26,12 @@ check_vmlinux() { trace_landlock() { echo "Tracing syscall ${syscall}" - # test flight just to see if landlock_add_rule and libbpf are available - $TESTPROG + # test flight just to see if landlock_add_rule is available + if ! perf trace $TESTPROG 2>&1 | grep -q landlock + then + echo "No landlock system call found, skipping to non-syscall tracing." + return + fi if perf trace -e $syscall $TESTPROG 2>&1 | \ grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*" diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0d2ea22bd9e4..31bb326b07a6 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2100,6 +2100,57 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, return 0; } +const char * const perf_disassembler__strs[] = { + [PERF_DISASM_UNKNOWN] = "unknown", + [PERF_DISASM_LLVM] = "llvm", + [PERF_DISASM_CAPSTONE] = "capstone", + [PERF_DISASM_OBJDUMP] = "objdump", +}; + + +static void annotation_options__add_disassembler(struct annotation_options *options, + enum perf_disassembler dis) +{ + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers); i++) { + if (options->disassemblers[i] == dis) { + /* Disassembler is already present then don't add again. */ + return; + } + if (options->disassemblers[i] == PERF_DISASM_UNKNOWN) { + /* Found a free slot. */ + options->disassemblers[i] = dis; + return; + } + } + pr_err("Failed to add disassembler %d\n", dis); +} + +static int annotation_options__add_disassemblers_str(struct annotation_options *options, + const char *str) +{ + while (str && *str != '\0') { + const char *comma = strchr(str, ','); + int len = comma ? comma - str : (int)strlen(str); + bool match = false; + + for (u8 i = 0; i < ARRAY_SIZE(perf_disassembler__strs); i++) { + const char *dis_str = perf_disassembler__strs[i]; + + if (len == (int)strlen(dis_str) && !strncmp(str, dis_str, len)) { + annotation_options__add_disassembler(options, i); + match = true; + break; + } + } + if (!match) { + pr_err("Invalid disassembler '%.*s'\n", len, str); + return -1; + } + str = comma ? comma + 1 : NULL; + } + return 0; +} + static int annotation__config(const char *var, const char *value, void *data) { struct annotation_options *opt = data; @@ -2115,11 +2166,10 @@ static int annotation__config(const char *var, const char *value, void *data) else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; } else if (!strcmp(var, "annotate.disassemblers")) { - opt->disassemblers_str = strdup(value); - if (!opt->disassemblers_str) { - pr_err("Not enough memory for annotate.disassemblers\n"); - return -1; - } + int err = annotation_options__add_disassemblers_str(opt, value); + + if (err) + return err; } else if (!strcmp(var, "annotate.hide_src_code")) { opt->hide_src_code = perf_config_bool("hide_src_code", value); } else if (!strcmp(var, "annotate.jump_arrows")) { @@ -2185,9 +2235,25 @@ void annotation_options__exit(void) zfree(&annotate_opts.objdump_path); } +static void annotation_options__default_init_disassemblers(struct annotation_options *options) +{ + if (options->disassemblers[0] != PERF_DISASM_UNKNOWN) { + /* Already initialized. */ + return; + } +#ifdef HAVE_LIBLLVM_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_LLVM); +#endif +#ifdef HAVE_LIBCAPSTONE_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_CAPSTONE); +#endif + annotation_options__add_disassembler(options, PERF_DISASM_OBJDUMP); +} + void annotation_config__init(void) { perf_config(annotation__config, &annotate_opts); + annotation_options__default_init_disassemblers(&annotate_opts); } static unsigned int parse_percent_type(char *str1, char *str2) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0ba5846dad4d..98db1b88daf4 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -34,8 +34,13 @@ struct annotated_data_type; #define ANNOTATION__BR_CNTR_WIDTH 30 #define ANNOTATION_DUMMY_LEN 256 -// llvm, capstone, objdump -#define MAX_DISASSEMBLERS 3 +enum perf_disassembler { + PERF_DISASM_UNKNOWN = 0, + PERF_DISASM_LLVM, + PERF_DISASM_CAPSTONE, + PERF_DISASM_OBJDUMP, +}; +#define MAX_DISASSEMBLERS (PERF_DISASM_OBJDUMP + 1) struct annotation_options { bool hide_src_code, @@ -52,14 +57,12 @@ struct annotation_options { annotate_src, full_addr; u8 offset_level; - u8 nr_disassemblers; + u8 disassemblers[MAX_DISASSEMBLERS]; int min_pcnt; int max_lines; int context; char *objdump_path; char *disassembler_style; - const char *disassemblers_str; - const char *disassemblers[MAX_DISASSEMBLERS]; const char *prefix; const char *prefix_strip; unsigned int percent_type; @@ -134,6 +137,8 @@ struct disasm_line { struct annotation_line al; }; +extern const char * const perf_disassembler__strs[]; + void annotation_line__add(struct annotation_line *al, struct list_head *head); static inline double annotation_data__percent(struct annotation_data *data, diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 4a62ed593e84..e4352881e3fa 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -431,9 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) { bool augmented, do_output = false; - int zero = 0, size, aug_size, index, - value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); + int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */ + s64 aug_size, size; unsigned int nr, *beauty_map; struct beauty_payload_enter *payload; void *arg, *payload_offset; @@ -484,14 +484,11 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } else if (size > 0 && size <= value_size) { /* struct */ if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg)) augmented = true; - } else if (size < 0 && size >= -6) { /* buffer */ + } else if ((int)size < 0 && size >= -6) { /* buffer */ index = -(size + 1); barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick. index &= 7; // Satisfy the bounds checking with the verifier in some kernels. - aug_size = args->args[index]; - - if (aug_size > TRACE_AUG_MAX_BUF) - aug_size = TRACE_AUG_MAX_BUF; + aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index]; if (aug_size > 0) { if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg)) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 27094211edd8..5c329ad614e9 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -293,7 +293,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) die = cpu__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (die == -1) + if (die < 0) die = 0; /* @@ -322,7 +322,7 @@ struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data) struct aggr_cpu_id id; /* There is no cluster_id on legacy system. */ - if (cluster == -1) + if (cluster < 0) cluster = 0; id = aggr_cpu_id__die(cpu, data); diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index b7de4d9fd004..50c5c206b70e 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -2216,56 +2216,6 @@ out_free_command: return err; } -static int annotation_options__init_disassemblers(struct annotation_options *options) -{ - char *disassembler; - - if (options->disassemblers_str == NULL) { - const char *default_disassemblers_str = -#ifdef HAVE_LIBLLVM_SUPPORT - "llvm," -#endif -#ifdef HAVE_LIBCAPSTONE_SUPPORT - "capstone," -#endif - "objdump"; - - options->disassemblers_str = strdup(default_disassemblers_str); - if (!options->disassemblers_str) - goto out_enomem; - } - - disassembler = strdup(options->disassemblers_str); - if (disassembler == NULL) - goto out_enomem; - - while (1) { - char *comma = strchr(disassembler, ','); - - if (comma != NULL) - *comma = '\0'; - - options->disassemblers[options->nr_disassemblers++] = strim(disassembler); - - if (comma == NULL) - break; - - disassembler = comma + 1; - - if (options->nr_disassemblers >= MAX_DISASSEMBLERS) { - pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n", - MAX_DISASSEMBLERS, disassembler); - break; - } - } - - return 0; - -out_enomem: - pr_err("Not enough memory for annotate.disassemblers\n"); - return -1; -} - int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct annotation_options *options = args->options; @@ -2274,7 +2224,6 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) char symfs_filename[PATH_MAX]; bool delete_extract = false; struct kcore_extract kce; - const char *disassembler; bool decomp = false; int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); @@ -2334,28 +2283,26 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) } } - err = annotation_options__init_disassemblers(options); - if (err) - goto out_remove_tmp; - err = -1; + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { + enum perf_disassembler dis = options->disassemblers[i]; - for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) { - disassembler = options->disassemblers[i]; - - if (!strcmp(disassembler, "llvm")) + switch (dis) { + case PERF_DISASM_LLVM: err = symbol__disassemble_llvm(symfs_filename, sym, args); - else if (!strcmp(disassembler, "capstone")) + break; + case PERF_DISASM_CAPSTONE: err = symbol__disassemble_capstone(symfs_filename, sym, args); - else if (!strcmp(disassembler, "objdump")) + break; + case PERF_DISASM_OBJDUMP: err = symbol__disassemble_objdump(symfs_filename, sym, args); - else - pr_debug("Unknown disassembler %s, skipping...\n", disassembler); - } - - if (err == 0) { - pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n", - disassembler, options->disassemblers_str); + break; + case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ + default: + goto out_remove_tmp; + } + if (err == 0) + pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]); } out_remove_tmp: if (decomp) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 175004ce44b2..51a95239fe06 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -87,11 +87,19 @@ INSTALL_SCRIPT = ${INSTALL} -m 644 # to something more interesting, like "arm-linux-". If you want # to compile vs uClibc, that can be done here as well. CROSS ?= #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- +ifneq ($(CROSS), ) +CC = $(CROSS)gcc +LD = $(CROSS)gcc +AR = $(CROSS)ar +STRIP = $(CROSS)strip +RANLIB = $(CROSS)ranlib +else CC ?= $(CROSS)gcc LD ?= $(CROSS)gcc AR ?= $(CROSS)ar STRIP ?= $(CROSS)strip RANLIB ?= $(CROSS)ranlib +endif HOSTCC = gcc MKDIR = mkdir diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile index e1ebb1d60cd4..741f21477432 100644 --- a/tools/power/cpupower/bindings/python/Makefile +++ b/tools/power/cpupower/bindings/python/Makefile @@ -11,6 +11,7 @@ HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; el LIB_DIR := ../../lib PY_INCLUDE = $(firstword $(shell python-config --includes)) OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o) +INSTALL_DIR = $(shell python3 -c "import site; print(site.getsitepackages()[0])") all: _raw_pylibcpupower.so @@ -28,6 +29,15 @@ else ifeq ($(HAVE_PYCONFIG),0) endif swig -python raw_pylibcpupower.swg +# Only installs the Python bindings +install: _raw_pylibcpupower.so + install -D _raw_pylibcpupower.so $(INSTALL_DIR)/_raw_pylibcpupower.so + install -D raw_pylibcpupower.py $(INSTALL_DIR)/raw_pylibcpupower.py + +uninstall: + rm -f $(INSTALL_DIR)/_raw_pylibcpupower.so + rm -f $(INSTALL_DIR)/raw_pylibcpupower.py + # Will only clean the bindings folder; will not clean the actual cpupower folder clean: rm -f raw_pylibcpupower.py raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.o _raw_pylibcpupower.so diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README index 0a4bb2581e8a..952e2e02fd32 100644 --- a/tools/power/cpupower/bindings/python/README +++ b/tools/power/cpupower/bindings/python/README @@ -48,6 +48,31 @@ To run the test script: $ python test_raw_pylibcpupower.py +developing/using the bindings directly +-------------------------------------- + +You need to add the Python bindings directory to your $PYTHONPATH. + +You would set the path in the Bash terminal or in the Bash profile: + +PYTHONPATH=~/linux/tools/power/cpupower/bindings/python:$PYTHONPATH + +This allows you to set a specific repo of the bindings to use. + + +installing/uninstalling +----------------------- + +Python uses a system specific site-packages folder to look up modules to import +by default. You do not need to install cpupower to use the SWIG bindings. + +You can install and uninstall the bindings to the site-packages with: + +sudo make install + +sudo make uninstall + + credits ------- diff --git a/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg b/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg index 96556d87a745..d82af6fa93c3 100644 --- a/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg +++ b/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg @@ -134,6 +134,9 @@ void cpufreq_put_stats(struct cpufreq_stats *stats); unsigned long cpufreq_get_transitions(unsigned int cpu); +char *cpufreq_get_energy_performance_preference(unsigned int cpu); +void cpufreq_put_energy_performance_preference(char *ptr); + int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy); int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq); @@ -160,6 +163,8 @@ int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate, unsigned int disable); unsigned long cpuidle_state_latency(unsigned int cpu, unsigned int idlestate); +unsigned long cpuidle_state_residency(unsigned int cpu, + unsigned int idlestate); unsigned long cpuidle_state_usage(unsigned int cpu, unsigned int idlestate); unsigned long long cpuidle_state_time(unsigned int cpu, diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index 1516d23c17c9..8dda3db2dff0 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -102,6 +102,10 @@ unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, if (len == 0) return 0; + if (!strcmp(linebuf, "enabled\n")) + return 1; + if (!strcmp(linebuf, "disabled\n")) + return 0; value = strtoul(linebuf, &endp, 0); if (endp == linebuf || errno == ERANGE) @@ -123,12 +127,14 @@ static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, enum cpufreq_string { SCALING_DRIVER, SCALING_GOVERNOR, + ENERGY_PERFORMANCE_PREFERENCE, MAX_CPUFREQ_STRING_FILES }; static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = { [SCALING_DRIVER] = "scaling_driver", [SCALING_GOVERNOR] = "scaling_governor", + [ENERGY_PERFORMANCE_PREFERENCE] = "energy_performance_preference", }; @@ -203,6 +209,18 @@ unsigned long cpufreq_get_transition_latency(unsigned int cpu) return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY); } +char *cpufreq_get_energy_performance_preference(unsigned int cpu) +{ + return sysfs_cpufreq_get_one_string(cpu, ENERGY_PERFORMANCE_PREFERENCE); +} + +void cpufreq_put_energy_performance_preference(char *ptr) +{ + if (!ptr) + return; + free(ptr); +} + int cpufreq_get_hardware_limits(unsigned int cpu, unsigned long *min, unsigned long *max) diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h index 2f3c84035806..bfc617311ebd 100644 --- a/tools/power/cpupower/lib/cpufreq.h +++ b/tools/power/cpupower/lib/cpufreq.h @@ -68,6 +68,14 @@ unsigned long cpufreq_get_freq_hardware(unsigned int cpu); unsigned long cpufreq_get_transition_latency(unsigned int cpu); +/* determine energy performance preference + * + * returns NULL on failure, else the string that represents the energy performance + * preference requested. + */ +char *cpufreq_get_energy_performance_preference(unsigned int cpu); +void cpufreq_put_energy_performance_preference(char *ptr); + /* determine hardware CPU frequency limits * * These may be limited further by thermal, energy or other diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index c96b77365c63..fc750e127404 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -120,7 +120,6 @@ static void print_duration(unsigned long duration) } else printf("%lu ns", duration); } - return; } static int get_boost_mode_x86(unsigned int cpu) @@ -255,7 +254,12 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human) static int get_freq_hardware(unsigned int cpu, unsigned int human) { - unsigned long freq = cpufreq_get_freq_hardware(cpu); + unsigned long freq; + + if (cpupower_cpu_info.caps & CPUPOWER_CAP_APERF) + return -EINVAL; + + freq = cpufreq_get_freq_hardware(cpu); printf(_(" current CPU frequency: ")); if (!freq) { printf("Unable to call hardware\n"); @@ -418,12 +422,32 @@ static int get_freq_stats(unsigned int cpu, unsigned int human) return 0; } +/* --epp / -z */ + +static int get_epp(unsigned int cpu, bool interactive) +{ + char *epp; + + epp = cpufreq_get_energy_performance_preference(cpu); + if (!epp) + return -EINVAL; + if (interactive) + printf(_(" energy performance preference: %s\n"), epp); + + cpufreq_put_energy_performance_preference(epp); + + return 0; +} + /* --latency / -y */ static int get_latency(unsigned int cpu, unsigned int human) { unsigned long latency = cpufreq_get_transition_latency(cpu); + if (!get_epp(cpu, false)) + return -EINVAL; + printf(_(" maximum transition latency: ")); if (!latency || latency == UINT_MAX) { printf(_(" Cannot determine or is not supported.\n")); @@ -457,6 +481,7 @@ static void debug_output_one(unsigned int cpu) get_related_cpus(cpu); get_affected_cpus(cpu); get_latency(cpu, 1); + get_epp(cpu, true); get_hardware_limits(cpu, 1); freqs = cpufreq_get_available_frequencies(cpu); @@ -497,6 +522,7 @@ static struct option info_opts[] = { {"human", no_argument, NULL, 'm'}, {"no-rounding", no_argument, NULL, 'n'}, {"performance", no_argument, NULL, 'c'}, + {"epp", no_argument, NULL, 'z'}, { }, }; @@ -510,7 +536,7 @@ int cmd_freq_info(int argc, char **argv) int output_param = 0; do { - ret = getopt_long(argc, argv, "oefwldpgrasmybnc", info_opts, + ret = getopt_long(argc, argv, "oefwldpgrasmybncz", info_opts, NULL); switch (ret) { case '?': @@ -534,6 +560,7 @@ int cmd_freq_info(int argc, char **argv) case 's': case 'y': case 'c': + case 'z': if (output_param) { output_param = -1; cont = 0; @@ -643,6 +670,9 @@ int cmd_freq_info(int argc, char **argv) case 'c': ret = get_perf_cap(cpu); break; + case 'z': + ret = get_epp(cpu, true); + break; } if (ret) return ret; diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 0a56e22240fc..795562e879de 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -177,6 +177,8 @@ enum amd_pstate_value { AMD_PSTATE_HIGHEST_PERF, AMD_PSTATE_MAX_FREQ, AMD_PSTATE_LOWEST_NONLINEAR_FREQ, + AMD_PSTATE_HW_PREFCORE, + AMD_PSTATE_PREFCORE_RANKING, MAX_AMD_PSTATE_VALUE_READ_FILES, }; @@ -184,6 +186,8 @@ static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = { [AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf", [AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq", [AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq", + [AMD_PSTATE_HW_PREFCORE] = "amd_pstate_hw_prefcore", + [AMD_PSTATE_PREFCORE_RANKING] = "amd_pstate_prefcore_ranking", }; static unsigned long amd_pstate_get_data(unsigned int cpu, @@ -215,7 +219,9 @@ void amd_pstate_boost_init(unsigned int cpu, int *support, int *active) void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding) { - printf(_(" AMD PSTATE Highest Performance: %lu. Maximum Frequency: "), + + printf(_(" amd-pstate limits:\n")); + printf(_(" Highest Performance: %lu. Maximum Frequency: "), amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF)); /* * If boost isn't active, the cpuinfo_max doesn't indicate real max @@ -224,22 +230,26 @@ void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding) print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding); printf(".\n"); - printf(_(" AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "), + printf(_(" Nominal Performance: %lu. Nominal Frequency: "), acpi_cppc_get_data(cpu, NOMINAL_PERF)); print_speed(acpi_cppc_get_data(cpu, NOMINAL_FREQ) * 1000, no_rounding); printf(".\n"); - printf(_(" AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "), + printf(_(" Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "), acpi_cppc_get_data(cpu, LOWEST_NONLINEAR_PERF)); print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ), no_rounding); printf(".\n"); - printf(_(" AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "), + printf(_(" Lowest Performance: %lu. Lowest Frequency: "), acpi_cppc_get_data(cpu, LOWEST_PERF)); print_speed(acpi_cppc_get_data(cpu, LOWEST_FREQ) * 1000, no_rounding); printf(".\n"); + + printf(_(" Preferred Core Support: %lu. Preferred Core Ranking: %lu.\n"), + amd_pstate_get_data(cpu, AMD_PSTATE_HW_PREFCORE), + amd_pstate_get_data(cpu, AMD_PSTATE_PREFCORE_RANKING)); } /* AMD P-State Helper Functions ************************************/ diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c index 55e55b6b42f9..f5a2a326b1b7 100644 --- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -117,7 +117,7 @@ static int hsw_ext_start(void) for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { - hsw_ext_get_count(num, &val, cpu); + is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu); previous_count[num][cpu] = val; } } @@ -134,7 +134,7 @@ static int hsw_ext_stop(void) for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { - is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu); + is_valid[cpu] |= !hsw_ext_get_count(num, &val, cpu); current_count[num][cpu] = val; } } diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index ae6af354a81d..73b6b10cbdd2 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -33,7 +33,7 @@ static int mperf_get_count_percent(unsigned int self_id, double *percent, unsigned int cpu); static int mperf_get_count_freq(unsigned int id, unsigned long long *count, unsigned int cpu); -static struct timespec time_start, time_end; +static struct timespec *time_start, *time_end; static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { { @@ -148,7 +148,7 @@ static int mperf_measure_stats(unsigned int cpu) ret = get_aperf_mperf(cpu, &aval, &mval); aperf_current_count[cpu] = aval; mperf_current_count[cpu] = mval; - is_valid[cpu] = !ret; + is_valid[cpu] |= !ret; return 0; } @@ -174,7 +174,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent, dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n", mperf_cstates[id].name, mperf_diff, tsc_diff); } else if (max_freq_mode == MAX_FREQ_SYSFS) { - timediff = max_frequency * timespec_diff_us(time_start, time_end); + timediff = max_frequency * timespec_diff_us(time_start[cpu], time_end[cpu]); *percent = 100.0 * mperf_diff / timediff; dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n", mperf_cstates[id].name, mperf_diff, timediff); @@ -207,7 +207,7 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count, if (max_freq_mode == MAX_FREQ_TSC_REF) { /* Calculate max_freq from TSC count */ tsc_diff = tsc_at_measure_end[cpu] - tsc_at_measure_start[cpu]; - time_diff = timespec_diff_us(time_start, time_end); + time_diff = timespec_diff_us(time_start[cpu], time_end[cpu]); max_frequency = tsc_diff / time_diff; } @@ -226,9 +226,8 @@ static int mperf_start(void) { int cpu; - clock_gettime(CLOCK_REALTIME, &time_start); - for (cpu = 0; cpu < cpu_count; cpu++) { + clock_gettime(CLOCK_REALTIME, &time_start[cpu]); mperf_get_tsc(&tsc_at_measure_start[cpu]); mperf_init_stats(cpu); } @@ -243,9 +242,9 @@ static int mperf_stop(void) for (cpu = 0; cpu < cpu_count; cpu++) { mperf_measure_stats(cpu); mperf_get_tsc(&tsc_at_measure_end[cpu]); + clock_gettime(CLOCK_REALTIME, &time_end[cpu]); } - clock_gettime(CLOCK_REALTIME, &time_end); return 0; } @@ -349,6 +348,8 @@ struct cpuidle_monitor *mperf_register(void) aperf_current_count = calloc(cpu_count, sizeof(unsigned long long)); tsc_at_measure_start = calloc(cpu_count, sizeof(unsigned long long)); tsc_at_measure_end = calloc(cpu_count, sizeof(unsigned long long)); + time_start = calloc(cpu_count, sizeof(struct timespec)); + time_end = calloc(cpu_count, sizeof(struct timespec)); mperf_monitor.name_len = strlen(mperf_monitor.name); return &mperf_monitor; } @@ -361,6 +362,8 @@ void mperf_unregister(void) free(aperf_current_count); free(tsc_at_measure_start); free(tsc_at_measure_end); + free(time_start); + free(time_end); free(is_valid); } diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c index 16eaf006f61f..6b1733782ffa 100644 --- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c @@ -151,7 +151,7 @@ static int nhm_stop(void) for (num = 0; num < NHM_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { - is_valid[cpu] = !nhm_get_count(num, &val, cpu); + is_valid[cpu] |= !nhm_get_count(num, &val, cpu); current_count[num][cpu] = val; } } diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c index 811d63ab17a7..5969b88a85b4 100644 --- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -115,7 +115,7 @@ static int snb_start(void) for (num = 0; num < SNB_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { - snb_get_count(num, &val, cpu); + is_valid[cpu] = !snb_get_count(num, &val, cpu); previous_count[num][cpu] = val; } } @@ -132,7 +132,7 @@ static int snb_stop(void) for (num = 0; num < SNB_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { - is_valid[cpu] = !snb_get_count(num, &val, cpu); + is_valid[cpu] |= !snb_get_count(num, &val, cpu); current_count[num][cpu] = val; } } diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 5127be34869e..fadfb02b8611 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -16,7 +16,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.20"; +static const char *version_str = "v1.21"; static const int supported_api_ver = 3; static struct isst_if_platform_info isst_platform_info; diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c index 32ea70c7dbd8..da53aaa27fc9 100644 --- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c +++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c @@ -329,7 +329,7 @@ static int tpmi_get_get_trls(struct isst_id *id, int config_index, return 0; } -static int tpmi_get_get_trl(struct isst_id *id, int level, int config_index, +static int tpmi_get_get_trl(struct isst_id *id, int config_index, int level, int *trl) { struct isst_pkg_ctdp_level_info ctdp_level; diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index a7f7ed01421c..99bf905ade81 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -136,7 +136,7 @@ displays the statistics gathered since it was forked. The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system. .SH COLUMN DESCRIPTIONS .PP -\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus. +\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to snapshot the procfs/sysfs and collect the counters on all cpus. .PP \fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU. .PP @@ -190,7 +190,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. .PP -\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default. Enable with --enable SysWatt. +\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). .PP \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary. .PP @@ -516,14 +516,40 @@ that they count at TSC rate, which is true on all processors tested to date. Volume 3B: System Programming Guide" https://www.intel.com/products/processor/manuals/ +.SH RUN THE LATEST VERSION +If turbostat complains that it doesn't recognize your processor, +please try the latest version. + +The latest version of turbostat does not require the latest version of the Linux kernel. +However, some features, such as perf(1) counters, do require kernel support. + +The latest turbostat release is available in the upstream Linux Kernel source tree. +eg. "git pull https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git" +and run make in tools/power/x86/turbostat/. + +n.b. "make install" will update your system manually, but a distro update may subsequently downgrade your turbostat to an older version. +For this reason, manually installing to /usr/local/bin may be what you want. + +Note that turbostat/Makefile has a "make snapshot" target, which will create a tar file +that can build without a local kernel source tree. + +If the upstream version isn't new enough, the development tree can be found here: +"git pull https://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat" + +If the development tree doesn't work, please contact the author via chat, +or via email with the word "turbostat" on the Subject line. + .SH FILES .ta .nf +/sys/bus/event_source/devices/ /dev/cpu/*/msr +/sys/class/intel_pmt/ +/sys/devices/system/cpu/ .fi .SH "SEE ALSO" -msr(4), vmstat(8) +perf(1), msr(4), vmstat(8) .PP .SH AUTHOR .nf diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 58a487c225a7..8d5011a0bf60 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3,7 +3,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel and AMD processors. * - * Copyright (c) 2024 Intel Corporation. + * Copyright (c) 2025 Intel Corporation. * Len Brown <len.brown@intel.com> */ @@ -95,6 +95,8 @@ #define INTEL_ECORE_TYPE 0x20 #define INTEL_PCORE_TYPE 0x40 +#define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL)) + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; @@ -202,6 +204,8 @@ struct msr_counter bic[] = { { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -266,14 +270,16 @@ struct msr_counter bic[] = { #define BIC_Diec6 (1ULL << 58) #define BIC_SysWatt (1ULL << 59) #define BIC_Sys_J (1ULL << 60) +#define BIC_NMI (1ULL << 61) +#define BIC_CPU_c1e (1ULL << 62) -#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) -#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) +#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) +#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) -#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) +#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; @@ -326,6 +332,7 @@ unsigned int rapl_joules; unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; +unsigned int force_load; unsigned int has_aperf; unsigned int has_aperf_access; unsigned int has_epb; @@ -353,7 +360,7 @@ unsigned long long cpuidle_cur_sys_lpi_us; unsigned int tj_max; unsigned int tj_max_override; double rapl_power_units, rapl_time_units; -double rapl_dram_energy_units, rapl_energy_units; +double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units; double rapl_joule_counter_range; unsigned int crystal_hz; unsigned long long tsc_hz; @@ -365,6 +372,9 @@ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ unsigned int first_counter_read = 1; + +static struct timeval procsysfs_tv_begin; + int ignore_stdin; bool no_msr; bool no_perf; @@ -419,6 +429,7 @@ struct platform_features { bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ + bool has_fixed_rapl_psys_unit; /* Fixed Energy Unit used for PSYS RAPL Domain */ int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ @@ -819,6 +830,7 @@ static const struct platform_features spr_features = { .has_msr_core_c1_res = 1, .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, + .has_fixed_rapl_psys_unit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; @@ -1024,6 +1036,7 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_ARROWLAKE_U, &adl_features }, { INTEL_ARROWLAKE, &adl_features }, { INTEL_LUNARLAKE_M, &lnl_features }, + { INTEL_PANTHERLAKE_L, &lnl_features }, { INTEL_ATOM_SILVERMONT, &slv_features }, { INTEL_ATOM_SILVERMONT_D, &slvd_features }, { INTEL_ATOM_AIRMONT, &amt_features }, @@ -1036,6 +1049,7 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_ATOM_GRACEMONT, &adl_features }, { INTEL_ATOM_CRESTMONT_X, &srf_features }, { INTEL_ATOM_CRESTMONT, &grr_features }, + { INTEL_ATOM_DARKMONT_X, &srf_features }, { INTEL_XEON_PHI_KNL, &knl_features }, { INTEL_XEON_PHI_KNM, &knl_features }, /* @@ -1054,9 +1068,10 @@ void probe_platform_features(unsigned int family, unsigned int model) { int i; - platform = &default_features; if (authentic_amd || hygon_genuine) { + /* fallback to default features on unsupported models */ + force_load++; if (max_extended_level >= 0x80000007) { unsigned int eax, ebx, ecx, edx; @@ -1065,11 +1080,11 @@ void probe_platform_features(unsigned int family, unsigned int model) if ((edx & (1 << 14)) && family >= 0x17) platform = &amd_features_with_rapl; } - return; + goto end; } if (!genuine_intel) - return; + goto end; for (i = 0; turbostat_pdata[i].features; i++) { if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { @@ -1077,6 +1092,19 @@ void probe_platform_features(unsigned int family, unsigned int model) return; } } + +end: + if (force_load && !platform) { + fprintf(outf, "Forced to run on unsupported platform!\n"); + platform = &default_features; + } + + if (platform) + return; + + fprintf(stderr, "Unsupported platform detected.\n" + "\tSee RUN THE LATEST VERSION on turbostat(8)\n"); + exit(1); } /* Model specific support End */ @@ -1094,8 +1122,8 @@ int backwards_count; char *progname; #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ -cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; -size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; +cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; +size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_THREAD_COUNTERS 24 #define MAX_ADDED_CORE_COUNTERS 8 #define MAX_ADDED_PACKAGE_COUNTERS 16 @@ -1271,7 +1299,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr = MSR_PLATFORM_ENERGY_STATUS, .msr_mask = 0x00000000FFFFFFFF, .msr_shift = 0, - .platform_rapl_msr_scale = &rapl_energy_units, + .platform_rapl_msr_scale = &rapl_psys_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, .bic = BIC_SysWatt | BIC_Sys_J, .compat_scale = 1.0, @@ -1510,6 +1538,17 @@ static struct msr_counter_arch_info msr_counter_arch_infos[] = { #define PMT_COUNTER_MTL_DC6_LSB 0 #define PMT_COUNTER_MTL_DC6_MSB 63 #define PMT_MTL_DC6_GUID 0x1a067102 +#define PMT_MTL_DC6_SEQ 0 + +#define PMT_COUNTER_CWF_MC1E_OFFSET_BASE 20936 +#define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT 24 +#define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12 +#define PMT_COUNTER_CWF_CPUS_PER_MODULE 4 +#define PMT_COUNTER_CWF_MC1E_LSB 0 +#define PMT_COUNTER_CWF_MC1E_MSB 63 +#define PMT_CWF_MC1E_GUID 0x14421519 + +unsigned long long tcore_clock_freq_hz = 800000000; #define PMT_COUNTER_NAME_SIZE_BYTES 16 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 @@ -1533,6 +1572,7 @@ struct pmt_mmio { enum pmt_datatype { PMT_TYPE_RAW, PMT_TYPE_XTAL_TIME, + PMT_TYPE_TCORE_CLOCK, }; struct pmt_domain_info { @@ -1562,6 +1602,93 @@ struct pmt_counter { struct pmt_domain_info *domains; }; +/* + * PMT telemetry directory iterator. + * Used to iterate telemetry files in sysfs in correct order. + */ +struct pmt_diriter_t { + DIR *dir; + struct dirent **namelist; + unsigned int num_names; + unsigned int current_name_idx; +}; + +int pmt_telemdir_filter(const struct dirent *e) +{ + unsigned int dummy; + + return sscanf(e->d_name, "telem%u", &dummy); +} + +int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b) +{ + unsigned int aidx = 0, bidx = 0; + + sscanf((*a)->d_name, "telem%u", &aidx); + sscanf((*b)->d_name, "telem%u", &bidx); + + return aidx >= bidx; +} + +const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter) +{ + const struct dirent *ret = NULL; + + if (!iter->dir) + return NULL; + + if (iter->current_name_idx >= iter->num_names) + return NULL; + + ret = iter->namelist[iter->current_name_idx]; + ++iter->current_name_idx; + + return ret; +} + +const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path) +{ + int num_names = iter->num_names; + + if (!iter->dir) { + iter->dir = opendir(pmt_root_path); + if (iter->dir == NULL) + return NULL; + + num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort); + if (num_names == -1) + return NULL; + } + + iter->current_name_idx = 0; + iter->num_names = num_names; + + return pmt_diriter_next(iter); +} + +void pmt_diriter_init(struct pmt_diriter_t *iter) +{ + memset(iter, 0, sizeof(*iter)); +} + +void pmt_diriter_remove(struct pmt_diriter_t *iter) +{ + if (iter->namelist) { + for (unsigned int i = 0; i < iter->num_names; i++) { + free(iter->namelist[i]); + iter->namelist[i] = NULL; + } + } + + free(iter->namelist); + iter->namelist = NULL; + iter->num_names = 0; + iter->current_name_idx = 0; + + closedir(iter->dir); + iter->dir = NULL; +} + unsigned int pmt_counter_get_width(const struct pmt_counter *p) { return (p->msb - p->lsb) + 1; @@ -1611,6 +1738,7 @@ struct thread_data { unsigned long long c1; unsigned long long instr_count; unsigned long long irq_count; + unsigned long long nmi_count; unsigned int smi_count; unsigned int cpu_id; unsigned int apic_id; @@ -1917,6 +2045,7 @@ struct timeval tv_even, tv_odd, tv_delta; int *irq_column_2_cpu; /* /proc/interrupts column numbers */ int *irqs_per_cpu; /* indexed by cpu_num */ +int *nmi_per_cpu; /* indexed by cpu_num */ void setup_all_buffers(bool startup); @@ -1944,6 +2073,8 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk { int retval, pkg_no, core_no, thread_no, node_no; + retval = 0; + for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { @@ -1959,14 +2090,12 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk c = GET_CORE(core_base, core_no, node_no, pkg_no); p = GET_PKG(pkg_base, pkg_no); - retval = func(t, c, p); - if (retval) - return retval; + retval |= func(t, c, p); } } } } - return 0; + return retval; } int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) @@ -2085,13 +2214,17 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) int probe_msr(int cpu, off_t offset) { ssize_t retval; - unsigned long long dummy; + unsigned long long value; assert(!no_msr); - retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset); + retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset); - if (retval != sizeof(dummy)) + /* + * Expect MSRs to accumulate some non-zero value since the system was powered on. + * Treat zero as a read failure. + */ + if (retval != sizeof(value) || value == 0) return 1; return 0; @@ -2135,41 +2268,54 @@ void help(void) "when COMMAND completes.\n" "If no COMMAND is specified, turbostat wakes every 5-seconds\n" "to print statistics, until interrupted.\n" - " -a, --add add a counter\n" + " -a, --add counter\n" + " add a counter\n" " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" - " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" + " -c, --cpu cpu-set\n" + " limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" - " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" + " -d, --debug\n" + " displays usec, Time_Of_Day_Seconds and more debugging\n" " debug messages are printed to stderr\n" - " -D, --Dump displays the raw counter values\n" - " -e, --enable [all | column]\n" + " -D, --Dump\n" + " displays the raw counter values\n" + " -e, --enable [all | column]\n" " shows all or the specified disabled column\n" - " -H, --hide [column|column,column,...]\n" + " -f, --force\n" + " force load turbostat with minimum default features on unsupported platforms.\n" + " -H, --hide [column | column,column,...]\n" " hide the specified column(s)\n" " -i, --interval sec.subsec\n" - " Override default 5-second measurement interval\n" - " -J, --Joules displays energy in Joules instead of Watts\n" - " -l, --list list column headers only\n" - " -M, --no-msr Disable all uses of the MSR driver\n" - " -P, --no-perf Disable all uses of the perf API\n" + " override default 5-second measurement interval\n" + " -J, --Joules\n" + " displays energy in Joules instead of Watts\n" + " -l, --list\n" + " list column headers only\n" + " -M, --no-msr\n" + " disable all uses of the MSR driver\n" + " -P, --no-perf\n" + " disable all uses of the perf API\n" " -n, --num_iterations num\n" " number of the measurement iterations\n" " -N, --header_iterations num\n" " print header every num iterations\n" " -o, --out file\n" " create or truncate \"file\" for all output\n" - " -q, --quiet skip decoding system configuration header\n" - " -s, --show [column|column,column,...]\n" + " -q, --quiet\n" + " skip decoding system configuration header\n" + " -s, --show [column | column,column,...]\n" " show only the specified column(s)\n" " -S, --Summary\n" " limits output to 1-line system summary per interval\n" " -T, --TCC temperature\n" " sets the Thermal Control Circuit temperature in\n" " degrees Celsius\n" - " -h, --help print this help message\n" - " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); + " -h, --help\n" + " print this help message\n" + " -v, --version\n" + " print version information\n\nFor more help, run \"man turbostat\"\n"); } /* @@ -2289,6 +2435,12 @@ void print_header(char *delim) else outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); } + if (DO_BIC(BIC_NMI)) { + if (sums_need_wide_columns) + outp += sprintf(outp, "%s NMI", (printed++ ? delim : "")); + else + outp += sprintf(outp, "%sNMI", (printed++ ? delim : "")); + } if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); @@ -2335,6 +2487,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: + case PMT_TYPE_TCORE_CLOCK: outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2409,6 +2562,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: + case PMT_TYPE_TCORE_CLOCK: outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2540,6 +2694,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: + case PMT_TYPE_TCORE_CLOCK: outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2575,6 +2730,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p if (DO_BIC(BIC_IRQ)) outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); + if (DO_BIC(BIC_NMI)) + outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count); if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "SMI: %d\n", t->smi_count); @@ -2794,6 +2951,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); } + /* NMI */ + if (DO_BIC(BIC_NMI)) { + if (sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count); + } + /* SMI */ if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); @@ -2848,7 +3013,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = t->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -2860,8 +3025,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: + value_converted = 100.0 * value_raw / crystal_hz / interval_float; outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; + + case PMT_TYPE_TCORE_CLOCK: + value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); } } @@ -2928,7 +3098,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = c->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -2940,8 +3110,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: + value_converted = 100.0 * value_raw / crystal_hz / interval_float; outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; + + case PMT_TYPE_TCORE_CLOCK: + value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); } } @@ -3126,7 +3301,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = p->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -3138,8 +3313,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: + value_converted = 100.0 * value_raw / crystal_hz / interval_float; outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; + + case PMT_TYPE_TCORE_CLOCK: + value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); } } @@ -3409,6 +3589,9 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d if (DO_BIC(BIC_IRQ)) old->irq_count = new->irq_count - old->irq_count; + if (DO_BIC(BIC_NMI)) + old->nmi_count = new->nmi_count - old->nmi_count; + if (DO_BIC(BIC_SMI)) old->smi_count = new->smi_count - old->smi_count; @@ -3447,12 +3630,10 @@ int delta_cpu(struct thread_data *t, struct core_data *c, /* always calculate thread delta */ retval = delta_thread(t, t2, c2); /* c2 is core delta */ - if (retval) - return retval; /* calculate package delta only for 1st core in package */ if (is_cpu_first_core_in_package(t, c, p)) - retval = delta_package(p, p2); + retval |= delta_package(p, p2); return retval; } @@ -3489,6 +3670,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->instr_count = 0; t->irq_count = 0; + t->nmi_count = 0; t->smi_count = 0; c->c3 = 0; @@ -3580,7 +3762,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) /* remember first tv_begin */ if (average.threads.tv_begin.tv_sec == 0) - average.threads.tv_begin = t->tv_begin; + average.threads.tv_begin = procsysfs_tv_begin; /* remember last tv_end */ average.threads.tv_end = t->tv_end; @@ -3593,6 +3775,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.threads.instr_count += t->instr_count; average.threads.irq_count += t->irq_count; + average.threads.nmi_count += t->nmi_count; average.threads.smi_count += t->smi_count; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { @@ -3734,6 +3917,9 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data if (average.threads.irq_count > 9999999) sums_need_wide_columns = 1; + if (average.threads.nmi_count > 9999999) + sums_need_wide_columns = 1; + average.cores.c3 /= topo.allowed_cores; average.cores.c6 /= topo.allowed_cores; @@ -4546,7 +4732,8 @@ unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) { - assert(domain_id < ppmt->num_domains); + if (domain_id >= ppmt->num_domains) + return 0; const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; const unsigned long value = pmmio ? *pmmio : 0; @@ -4590,6 +4777,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (DO_BIC(BIC_IRQ)) t->irq_count = irqs_per_cpu[cpu]; + if (DO_BIC(BIC_NMI)) + t->nmi_count = nmi_per_cpu[cpu]; get_cstate_counters(cpu, t, c, p); @@ -5335,6 +5524,7 @@ void free_all_buffers(void) free(irq_column_2_cpu); free(irqs_per_cpu); + free(nmi_per_cpu); for (i = 0; i <= topo.max_cpu_num; ++i) { if (cpus[i].put_ids) @@ -5566,6 +5756,8 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, { int retval, pkg_no, node_no, core_no, thread_no; + retval = 0; + for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { @@ -5587,14 +5779,12 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, p = GET_PKG(pkg_base, pkg_no); p2 = GET_PKG(pkg_base2, pkg_no); - retval = func(t, c, p, t2, c2, p2); - if (retval) - return retval; + retval |= func(t, c, p, t2, c2, p2); } } } } - return 0; + return retval; } /* @@ -5791,31 +5981,37 @@ int snapshot_proc_interrupts(void) irq_column_2_cpu[column] = cpu_number; irqs_per_cpu[cpu_number] = 0; + nmi_per_cpu[cpu_number] = 0; } /* read /proc/interrupt count lines and sum up irqs per cpu */ while (1) { int column; char buf[64]; + int this_row_is_nmi = 0; - retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ + retval = fscanf(fp, " %s:", buf); /* irq# "N:" */ if (retval != 1) break; + if (strncmp(buf, "NMI", strlen("NMI")) == 0) + this_row_is_nmi = 1; + /* read the count per cpu */ for (column = 0; column < topo.num_cpus; ++column) { int cpu_number, irq_count; retval = fscanf(fp, " %d", &irq_count); + if (retval != 1) break; cpu_number = irq_column_2_cpu[column]; irqs_per_cpu[cpu_number] += irq_count; - + if (this_row_is_nmi) + nmi_per_cpu[cpu_number] += irq_count; } - while (getc(fp) != '\n') ; /* flush interrupt description */ } @@ -5912,7 +6108,9 @@ int snapshot_sys_lpi_us(void) */ int snapshot_proc_sysfs_files(void) { - if (DO_BIC(BIC_IRQ)) + gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL); + + if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI)) if (snapshot_proc_interrupts()) return 1; @@ -7043,6 +7241,11 @@ void rapl_probe_intel(void) else rapl_dram_energy_units = rapl_energy_units; + if (platform->has_fixed_rapl_psys_unit) + rapl_psys_energy_units = 1.0; + else + rapl_psys_energy_units = rapl_energy_units; + time_unit = msr >> 16 & 0xF; if (time_unit == 0) time_unit = 0xA; @@ -8233,6 +8436,7 @@ void process_cpuid() aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; BIC_PRESENT(BIC_IRQ); + BIC_PRESENT(BIC_NMI); BIC_PRESENT(BIC_TSC_MHz); } @@ -8292,6 +8496,33 @@ int dir_filter(const struct dirent *dirp) return 0; } +char *possible_file = "/sys/devices/system/cpu/possible"; +char possible_buf[1024]; + +int initialize_cpu_possible_set(void) +{ + FILE *fp; + + fp = fopen(possible_file, "r"); + if (!fp) { + warn("open %s", possible_file); + return -1; + } + if (fread(possible_buf, sizeof(char), 1024, fp) == 0) { + warn("read %s", possible_file); + goto err; + } + if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) { + warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str); + goto err; + } + return 0; + +err: + fclose(fp); + return -1; +} + void topology_probe(bool startup) { int i; @@ -8324,6 +8555,16 @@ void topology_probe(bool startup) for_all_proc_cpus(mark_cpu_present); /* + * Allocate and initialize cpu_possible_set + */ + cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_possible_set == NULL) + err(3, "CPU_ALLOC"); + cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set); + initialize_cpu_possible_set(); + + /* * Allocate and initialize cpu_effective_set */ cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); @@ -8583,7 +8824,11 @@ void allocate_irq_buffers(void) irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); if (irqs_per_cpu == NULL) - err(-1, "calloc %d", topo.max_cpu_num + 1); + err(-1, "calloc %d IRQ", topo.max_cpu_num + 1); + + nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); + if (nmi_per_cpu == NULL) + err(-1, "calloc %d NMI", topo.max_cpu_num + 1); } int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) @@ -8854,49 +9099,36 @@ int parse_telem_info_file(int fd_dir, const char *info_filename, const char *for struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) { - DIR *dirp; - struct dirent *entry; + struct pmt_diriter_t pmt_iter; + const struct dirent *entry; struct stat st; - unsigned int telem_idx; int fd_telem_dir, fd_pmt; unsigned long guid, size, offset; size_t mmap_size; void *mmio; - struct pmt_mmio *ret = NULL; + struct pmt_mmio *head = NULL, *last = NULL; + struct pmt_mmio *new_pmt = NULL; if (stat(SYSFS_TELEM_PATH, &st) == -1) return NULL; - dirp = opendir(SYSFS_TELEM_PATH); - if (dirp == NULL) + pmt_diriter_init(&pmt_iter); + entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); + if (!entry) { + pmt_diriter_remove(&pmt_iter); return NULL; + } - for (;;) { - entry = readdir(dirp); - - if (entry == NULL) - break; - - if (strcmp(entry->d_name, ".") == 0) - continue; - - if (strcmp(entry->d_name, "..") == 0) - continue; - - if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1) - continue; - - if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) { + for ( ; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) { + if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1) break; - } if (!S_ISDIR(st.st_mode)) continue; - fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY); - if (fd_telem_dir == -1) { + fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY); + if (fd_telem_dir == -1) break; - } if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { close(fd_telem_dir); @@ -8924,38 +9156,54 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) if (fd_pmt == -1) goto loop_cleanup_and_break; - mmap_size = (size + 0x1000UL) & (~0x1000UL); + mmap_size = ROUND_UP_TO_PAGE_SIZE(size); mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); if (mmio != MAP_FAILED) { - if (debug) fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); - ret = calloc(1, sizeof(*ret)); + new_pmt = calloc(1, sizeof(*new_pmt)); - if (!ret) { + if (!new_pmt) { fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); exit(1); } - ret->guid = guid; - ret->mmio_base = mmio; - ret->pmt_offset = offset; - ret->size = size; + /* + * Create linked list of mmaped regions, + * but preserve the ordering from sysfs. + * Ordering is important for the user to + * use the seq=%u parameter when adding a counter. + */ + new_pmt->guid = guid; + new_pmt->mmio_base = mmio; + new_pmt->pmt_offset = offset; + new_pmt->size = size; + new_pmt->next = pmt_mmios; + + if (last) + last->next = new_pmt; + else + head = new_pmt; - ret->next = pmt_mmios; - pmt_mmios = ret; + last = new_pmt; } loop_cleanup_and_break: close(fd_pmt); close(fd_telem_dir); - break; } - closedir(dirp); + pmt_diriter_remove(&pmt_iter); - return ret; + /* + * If we found something, stick just + * created linked list to the front. + */ + if (head) + pmt_mmios = head; + + return head; } struct pmt_mmio *pmt_mmio_find(unsigned int guid) @@ -8992,7 +9240,7 @@ void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offs return ret; } -struct pmt_mmio *pmt_add_guid(unsigned int guid) +struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq) { struct pmt_mmio *ret; @@ -9000,6 +9248,11 @@ struct pmt_mmio *pmt_add_guid(unsigned int guid) if (!ret) ret = pmt_mmio_open(guid); + while (ret && seq) { + ret = ret->next; + --seq; + } + return ret; } @@ -9046,7 +9299,7 @@ void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, pcounter->domains[domain_id].pcounter = pmmio; } -int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, +int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type, unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) { @@ -9066,10 +9319,10 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, exit(1); } - mmio = pmt_add_guid(guid); + mmio = pmt_add_guid(guid, seq); if (!mmio) { if (mode != PMT_OPEN_TRY) { - fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid); + fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq); exit(1); } @@ -9124,10 +9377,68 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, void pmt_init(void) { + int cpu_num; + unsigned long seq, offset, mod_num; + if (BIC_IS_ENABLED(BIC_Diec6)) { - pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB, - PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA, - 0, PMT_OPEN_TRY); + pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME, + PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, + SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY); + } + + if (BIC_IS_ENABLED(BIC_CPU_c1e)) { + seq = 0; + offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE; + mod_num = 0; /* Relative module number for current PMT file. */ + + /* Open the counter for each CPU. */ + for (cpu_num = 0; cpu_num < topo.max_cpu_num;) { + + if (cpu_is_not_allowed(cpu_num)) + goto next_loop_iter; + + /* + * Set the scope to CPU, even though CWF report the counter per module. + * CPUs inside the same module will read from the same location, instead of reporting zeros. + * + * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK. + */ + pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK, + PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, + FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); + + /* + * Rather complex logic for each time we go to the next loop iteration, + * so keep it as a label. + */ +next_loop_iter: + /* + * Advance the cpu number and check if we should also advance offset to + * the next counter inside the PMT file. + * + * On Clearwater Forest platform, the counter is reported per module, + * so open the same counter for all of the CPUs inside the module. + * That way, reported table show the correct value for all of the CPUs inside the module, + * instead of zeros. + */ + ++cpu_num; + if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) { + offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT; + ++mod_num; + } + + /* + * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file. + * + * If that number is reached, seq must be incremented to advance to the next file in a sequence. + * Offset inside that file and a module counter has to be reset. + */ + if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) { + ++seq; + offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE; + mod_num = 0; + } + } } } @@ -9163,6 +9474,18 @@ void turbostat_init() } } +void affinitize_child(void) +{ + /* Prefer cpu_possible_set, if available */ + if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) { + warn("sched_setaffinity cpu_possible_set"); + + /* Otherwise, allow child to run on same cpu set as turbostat */ + if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set)) + warn("sched_setaffinity cpu_allowed_set"); + } +} + int fork_it(char **argv) { pid_t child_pid; @@ -9178,6 +9501,7 @@ int fork_it(char **argv) child_pid = fork(); if (!child_pid) { /* child */ + affinitize_child(); execvp(argv[0], argv); err(errno, "exec %s", argv[0]); } else { @@ -9204,10 +9528,9 @@ int fork_it(char **argv) timersub(&tv_odd, &tv_even, &tv_delta); if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) fprintf(outf, "%s: Counter reset detected\n", progname); - else { - compute_average(EVEN_COUNTERS); - format_all_counters(EVEN_COUNTERS); - } + + compute_average(EVEN_COUNTERS); + format_all_counters(EVEN_COUNTERS); fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); @@ -9236,7 +9559,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2024.11.30 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -9561,7 +9884,7 @@ next: } if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { - fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n"); + fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n"); fail++; } @@ -9599,15 +9922,100 @@ bool starts_with(const char *str, const char *prefix) return strncmp(prefix, str, strlen(prefix)) == 0; } +int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq) +{ + struct pmt_diriter_t pmt_iter; + const struct dirent *dirname; + struct stat stat, target_stat; + int fd_telem_dir = -1; + int fd_target_dir; + unsigned int seq = 0; + unsigned long guid, target_guid; + int ret = -1; + + fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY); + if (fd_target_dir == -1) { + return -1; + } + + if (fstat(fd_target_dir, &target_stat) == -1) { + fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno)); + exit(1); + } + + if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) { + fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno)); + exit(1); + } + + close(fd_target_dir); + + pmt_diriter_init(&pmt_iter); + + for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; + dirname = pmt_diriter_next(&pmt_iter)) { + + fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY); + if (fd_telem_dir == -1) + continue; + + if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { + fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno)); + continue; + } + + if (fstat(fd_telem_dir, &stat) == -1) { + fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, + dirname->d_name, strerror(errno)); + continue; + } + + /* + * If reached the same directory as target, exit the loop. + * Seq has the correct value now. + */ + if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) { + ret = 0; + break; + } + + /* + * If reached directory with the same guid, + * but it's not the target directory yet, + * increment seq and continue the search. + */ + if (guid == target_guid) + ++seq; + + close(fd_telem_dir); + fd_telem_dir = -1; + } + + pmt_diriter_remove(&pmt_iter); + + if (fd_telem_dir != -1) + close(fd_telem_dir); + + if (!ret) { + *out_guid = target_guid; + *out_seq = seq; + } + + return ret; +} + void parse_add_command_pmt(char *add_command) { char *name = NULL; char *type_name = NULL; char *format_name = NULL; + char *direct_path = NULL; + static const char direct_path_prefix[] = "path="; unsigned int offset; unsigned int lsb; unsigned int msb; unsigned int guid; + unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */ unsigned int domain_id; enum counter_scope scope = 0; enum pmt_datatype type = PMT_TYPE_RAW; @@ -9687,6 +10095,13 @@ void parse_add_command_pmt(char *add_command) goto next; } + if (sscanf(add_command, "seq=%x", &seq) == 1) + goto next; + + if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) { + direct_path = add_command + strlen(direct_path_prefix); + goto next; + } next: add_command = strchr(add_command, ','); if (add_command) { @@ -9737,6 +10152,11 @@ next: has_type = true; } + if (strcmp("tcore_clock", type_name) == 0) { + type = PMT_TYPE_TCORE_CLOCK; + has_type = true; + } + if (!has_type) { printf("%s: invalid %s: %s\n", __func__, "type", type_name); exit(1); @@ -9758,8 +10178,24 @@ next: exit(1); } + if (direct_path && has_guid) { + printf("%s: path and guid+seq parameters are mutually exclusive\n" + "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path); + exit(1); + } + + if (direct_path) { + if (pmt_parse_from_path(direct_path, &guid, &seq)) { + printf("%s: failed to parse PMT file from %s\n", __func__, direct_path); + exit(1); + } + + /* GUID was just infered from the direct path. */ + has_guid = true; + } + if (!has_guid) { - printf("%s: missing %s\n", __func__, "guid"); + printf("%s: missing %s\n", __func__, "guid or path"); exit(1); } @@ -9773,7 +10209,7 @@ next: exit(1); } - pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); + pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); } void parse_add_command(char *add_command) @@ -9921,6 +10357,7 @@ void cmdline(int argc, char **argv) { "Dump", no_argument, 0, 'D' }, { "debug", no_argument, 0, 'd' }, /* internal, not documented */ { "enable", required_argument, 0, 'e' }, + { "force", no_argument, 0, 'f' }, { "interval", required_argument, 0, 'i' }, { "IPC", no_argument, 0, 'I' }, { "num_iterations", required_argument, 0, 'n' }, @@ -9981,6 +10418,9 @@ void cmdline(int argc, char **argv) /* --enable specified counter */ bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); break; + case 'f': + force_load++; + break; case 'd': debug++; ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 2f36b7b6418d..f3e15e9efa76 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -9,7 +9,7 @@ #ifdef LSP #define __bpf__ -#include "../vmlinux/vmlinux.h" +#include "../vmlinux.h" #else #include "vmlinux.h" #endif @@ -24,6 +24,10 @@ #define PF_EXITING 0x00000004 #define CLOCK_MONOTONIC 1 +extern int LINUX_KERNEL_VERSION __kconfig; +extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak; +extern const char CONFIG_LOCALVERSION[64] __kconfig __weak; + /* * Earlier versions of clang/pahole lost upper 32bits in 64bit enums which can * lead to really confusing misbehaviors. Let's trigger a build failure. @@ -40,9 +44,9 @@ void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_fl void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; u32 scx_bpf_dispatch_nr_slots(void) __ksym; void scx_bpf_dispatch_cancel(void) __ksym; -bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym; -void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym; -void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym; +bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym __weak; +void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak; +void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak; bool scx_bpf_dsq_move(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; bool scx_bpf_dsq_move_vtime(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; u32 scx_bpf_reenqueue_local(void) __ksym; @@ -72,6 +76,7 @@ bool scx_bpf_task_running(const struct task_struct *p) __ksym; s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak; +u64 scx_bpf_now(void) __ksym __weak; /* * Use the following as @it__iter when calling scx_bpf_dsq_move[_vtime]() from @@ -98,7 +103,7 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ___bpf_fill(___param, args); \ - _Pragma("GCC diagnostic pop") \ + _Pragma("GCC diagnostic pop") /* * scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments @@ -136,6 +141,20 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} ___scx_bpf_bstr_format_checker(fmt, ##args); \ }) +/* + * scx_bpf_dump_header() is a wrapper around scx_bpf_dump that adds a header + * of system information for debugging. + */ +#define scx_bpf_dump_header() \ +({ \ + scx_bpf_dump("kernel: %d.%d.%d %s\ncc: %s\n", \ + LINUX_KERNEL_VERSION >> 16, \ + LINUX_KERNEL_VERSION >> 8 & 0xFF, \ + LINUX_KERNEL_VERSION & 0xFF, \ + CONFIG_LOCALVERSION, \ + CONFIG_CC_VERSION_TEXT); \ +}) + #define BPF_STRUCT_OPS(name, args...) \ SEC("struct_ops/"#name) \ BPF_PROG(name, ##args) @@ -317,6 +336,66 @@ u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; +int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words) __ksym; +int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym; +void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym; + +#define def_iter_struct(name) \ +struct bpf_iter_##name { \ + struct bpf_iter_bits it; \ + const struct cpumask *bitmap; \ +}; + +#define def_iter_new(name) \ +static inline int bpf_iter_##name##_new( \ + struct bpf_iter_##name *it, const u64 *unsafe_ptr__ign, u32 nr_words) \ +{ \ + it->bitmap = scx_bpf_get_##name##_cpumask(); \ + return bpf_iter_bits_new(&it->it, (const u64 *)it->bitmap, \ + sizeof(struct cpumask) / 8); \ +} + +#define def_iter_next(name) \ +static inline int *bpf_iter_##name##_next(struct bpf_iter_##name *it) { \ + return bpf_iter_bits_next(&it->it); \ +} + +#define def_iter_destroy(name) \ +static inline void bpf_iter_##name##_destroy(struct bpf_iter_##name *it) { \ + scx_bpf_put_cpumask(it->bitmap); \ + bpf_iter_bits_destroy(&it->it); \ +} +#define def_for_each_cpu(cpu, name) for_each_##name##_cpu(cpu) + +/// Provides iterator for possible and online cpus. +/// +/// # Example +/// +/// ``` +/// static inline void example_use() { +/// int *cpu; +/// +/// for_each_possible_cpu(cpu){ +/// bpf_printk("CPU %d is possible", *cpu); +/// } +/// +/// for_each_online_cpu(cpu){ +/// bpf_printk("CPU %d is online", *cpu); +/// } +/// } +/// ``` +def_iter_struct(possible); +def_iter_new(possible); +def_iter_next(possible); +def_iter_destroy(possible); +#define for_each_possible_cpu(cpu) bpf_for_each(possible, cpu, NULL, 0) + +def_iter_struct(online); +def_iter_new(online); +def_iter_next(online); +def_iter_destroy(online); +#define for_each_online_cpu(cpu) bpf_for_each(online, cpu, NULL, 0) + /* * Access a cpumask in read-only mode (typically to check bits). */ @@ -329,6 +408,100 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask) void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; +/* + * Time helpers, most of which are from jiffies.h. + */ + +/** + * time_delta - Calculate the delta between new and old time stamp + * @after: first comparable as u64 + * @before: second comparable as u64 + * + * Return: the time difference, which is >= 0 + */ +static inline s64 time_delta(u64 after, u64 before) +{ + return (s64)(after - before) > 0 ? : 0; +} + +/** + * time_after - returns true if the time a is after time b. + * @a: first comparable as u64 + * @b: second comparable as u64 + * + * Do this with "<0" and ">=0" to only test the sign of the result. A + * good compiler would generate better code (and a really good compiler + * wouldn't care). Gcc is currently neither. + * + * Return: %true is time a is after time b, otherwise %false. + */ +static inline bool time_after(u64 a, u64 b) +{ + return (s64)(b - a) < 0; +} + +/** + * time_before - returns true if the time a is before time b. + * @a: first comparable as u64 + * @b: second comparable as u64 + * + * Return: %true is time a is before time b, otherwise %false. + */ +static inline bool time_before(u64 a, u64 b) +{ + return time_after(b, a); +} + +/** + * time_after_eq - returns true if the time a is after or the same as time b. + * @a: first comparable as u64 + * @b: second comparable as u64 + * + * Return: %true is time a is after or the same as time b, otherwise %false. + */ +static inline bool time_after_eq(u64 a, u64 b) +{ + return (s64)(a - b) >= 0; +} + +/** + * time_before_eq - returns true if the time a is before or the same as time b. + * @a: first comparable as u64 + * @b: second comparable as u64 + * + * Return: %true is time a is before or the same as time b, otherwise %false. + */ +static inline bool time_before_eq(u64 a, u64 b) +{ + return time_after_eq(b, a); +} + +/** + * time_in_range - Calculate whether a is in the range of [b, c]. + * @a: time to test + * @b: beginning of the range + * @c: end of the range + * + * Return: %true is time a is in the range [b, c], otherwise %false. + */ +static inline bool time_in_range(u64 a, u64 b, u64 c) +{ + return time_after_eq(a, b) && time_before_eq(a, c); +} + +/** + * time_in_range_open - Calculate whether a is in the range of [b, c). + * @a: time to test + * @b: beginning of the range + * @c: end of the range + * + * Return: %true is time a is in the range [b, c), otherwise %false. + */ +static inline bool time_in_range_open(u64 a, u64 b, u64 c) +{ + return time_after_eq(a, b) && time_before(a, c); +} + /* * Other helpers @@ -423,5 +596,6 @@ static inline u32 log2_u64(u64 v) } #include "compat.bpf.h" +#include "enums.bpf.h" #endif /* __SCX_COMMON_BPF_H */ diff --git a/tools/sched_ext/include/scx/common.h b/tools/sched_ext/include/scx/common.h index 5b0f90152152..dc18b99e55cd 100644 --- a/tools/sched_ext/include/scx/common.h +++ b/tools/sched_ext/include/scx/common.h @@ -71,5 +71,11 @@ typedef int64_t s64; #include "user_exit_info.h" #include "compat.h" +#include "enums.h" + +/* not available when building kernel tools/sched_ext */ +#if __has_include(<lib/sdt_task.h>) +#include <lib/sdt_task.h> +#endif #endif /* __SCHED_EXT_COMMON_H */ diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h index d56520100a26..50e1499ae093 100644 --- a/tools/sched_ext/include/scx/compat.bpf.h +++ b/tools/sched_ext/include/scx/compat.bpf.h @@ -125,6 +125,11 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, false; \ }) +#define scx_bpf_now() \ + (bpf_ksym_exists(scx_bpf_now) ? \ + scx_bpf_now() : \ + bpf_ktime_get_ns()) + /* * Define sched_ext_ops. This may be expanded to define multiple variants for * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index cc56ff9aa252..b50280e2ba2b 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -149,6 +149,7 @@ static inline long scx_hotplug_seq(void) __skel = __scx_name##__open(); \ SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \ __skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \ + SCX_ENUM_INIT(__skel); \ __skel; \ }) diff --git a/tools/sched_ext/include/scx/enums.autogen.bpf.h b/tools/sched_ext/include/scx/enums.autogen.bpf.h new file mode 100644 index 000000000000..0e941a0d6f88 --- /dev/null +++ b/tools/sched_ext/include/scx/enums.autogen.bpf.h @@ -0,0 +1,105 @@ +/* + * WARNING: This file is autogenerated from scripts/gen_enums.py. If you would + * like to access an enum that is currently missing, add it to the script + * and run it from the root directory to update this file. + */ + +const volatile u64 __SCX_OPS_NAME_LEN __weak; +#define SCX_OPS_NAME_LEN __SCX_OPS_NAME_LEN + +const volatile u64 __SCX_SLICE_DFL __weak; +#define SCX_SLICE_DFL __SCX_SLICE_DFL + +const volatile u64 __SCX_SLICE_INF __weak; +#define SCX_SLICE_INF __SCX_SLICE_INF + +const volatile u64 __SCX_DSQ_FLAG_BUILTIN __weak; +#define SCX_DSQ_FLAG_BUILTIN __SCX_DSQ_FLAG_BUILTIN + +const volatile u64 __SCX_DSQ_FLAG_LOCAL_ON __weak; +#define SCX_DSQ_FLAG_LOCAL_ON __SCX_DSQ_FLAG_LOCAL_ON + +const volatile u64 __SCX_DSQ_INVALID __weak; +#define SCX_DSQ_INVALID __SCX_DSQ_INVALID + +const volatile u64 __SCX_DSQ_GLOBAL __weak; +#define SCX_DSQ_GLOBAL __SCX_DSQ_GLOBAL + +const volatile u64 __SCX_DSQ_LOCAL __weak; +#define SCX_DSQ_LOCAL __SCX_DSQ_LOCAL + +const volatile u64 __SCX_DSQ_LOCAL_ON __weak; +#define SCX_DSQ_LOCAL_ON __SCX_DSQ_LOCAL_ON + +const volatile u64 __SCX_DSQ_LOCAL_CPU_MASK __weak; +#define SCX_DSQ_LOCAL_CPU_MASK __SCX_DSQ_LOCAL_CPU_MASK + +const volatile u64 __SCX_TASK_QUEUED __weak; +#define SCX_TASK_QUEUED __SCX_TASK_QUEUED + +const volatile u64 __SCX_TASK_RESET_RUNNABLE_AT __weak; +#define SCX_TASK_RESET_RUNNABLE_AT __SCX_TASK_RESET_RUNNABLE_AT + +const volatile u64 __SCX_TASK_DEQD_FOR_SLEEP __weak; +#define SCX_TASK_DEQD_FOR_SLEEP __SCX_TASK_DEQD_FOR_SLEEP + +const volatile u64 __SCX_TASK_STATE_SHIFT __weak; +#define SCX_TASK_STATE_SHIFT __SCX_TASK_STATE_SHIFT + +const volatile u64 __SCX_TASK_STATE_BITS __weak; +#define SCX_TASK_STATE_BITS __SCX_TASK_STATE_BITS + +const volatile u64 __SCX_TASK_STATE_MASK __weak; +#define SCX_TASK_STATE_MASK __SCX_TASK_STATE_MASK + +const volatile u64 __SCX_TASK_CURSOR __weak; +#define SCX_TASK_CURSOR __SCX_TASK_CURSOR + +const volatile u64 __SCX_TASK_NONE __weak; +#define SCX_TASK_NONE __SCX_TASK_NONE + +const volatile u64 __SCX_TASK_INIT __weak; +#define SCX_TASK_INIT __SCX_TASK_INIT + +const volatile u64 __SCX_TASK_READY __weak; +#define SCX_TASK_READY __SCX_TASK_READY + +const volatile u64 __SCX_TASK_ENABLED __weak; +#define SCX_TASK_ENABLED __SCX_TASK_ENABLED + +const volatile u64 __SCX_TASK_NR_STATES __weak; +#define SCX_TASK_NR_STATES __SCX_TASK_NR_STATES + +const volatile u64 __SCX_TASK_DSQ_ON_PRIQ __weak; +#define SCX_TASK_DSQ_ON_PRIQ __SCX_TASK_DSQ_ON_PRIQ + +const volatile u64 __SCX_KICK_IDLE __weak; +#define SCX_KICK_IDLE __SCX_KICK_IDLE + +const volatile u64 __SCX_KICK_PREEMPT __weak; +#define SCX_KICK_PREEMPT __SCX_KICK_PREEMPT + +const volatile u64 __SCX_KICK_WAIT __weak; +#define SCX_KICK_WAIT __SCX_KICK_WAIT + +const volatile u64 __SCX_ENQ_WAKEUP __weak; +#define SCX_ENQ_WAKEUP __SCX_ENQ_WAKEUP + +const volatile u64 __SCX_ENQ_HEAD __weak; +#define SCX_ENQ_HEAD __SCX_ENQ_HEAD + +const volatile u64 __SCX_ENQ_PREEMPT __weak; +#define SCX_ENQ_PREEMPT __SCX_ENQ_PREEMPT + +const volatile u64 __SCX_ENQ_REENQ __weak; +#define SCX_ENQ_REENQ __SCX_ENQ_REENQ + +const volatile u64 __SCX_ENQ_LAST __weak; +#define SCX_ENQ_LAST __SCX_ENQ_LAST + +const volatile u64 __SCX_ENQ_CLEAR_OPSS __weak; +#define SCX_ENQ_CLEAR_OPSS __SCX_ENQ_CLEAR_OPSS + +const volatile u64 __SCX_ENQ_DSQ_PRIQ __weak; +#define SCX_ENQ_DSQ_PRIQ __SCX_ENQ_DSQ_PRIQ + diff --git a/tools/sched_ext/include/scx/enums.autogen.h b/tools/sched_ext/include/scx/enums.autogen.h new file mode 100644 index 000000000000..88137a140e72 --- /dev/null +++ b/tools/sched_ext/include/scx/enums.autogen.h @@ -0,0 +1,41 @@ +/* + * WARNING: This file is autogenerated from scripts/gen_enums.py. If you would + * like to access an enum that is currently missing, add it to the script + * and run it from the root directory to update this file. + */ + +#define SCX_ENUM_INIT(skel) do { \ + SCX_ENUM_SET(skel, scx_public_consts, SCX_OPS_NAME_LEN); \ + SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_DFL); \ + SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_INF); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_BUILTIN); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_LOCAL_ON); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_INVALID); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_GLOBAL); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_ON); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_CPU_MASK); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_QUEUED); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_RESET_RUNNABLE_AT); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_DEQD_FOR_SLEEP); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_SHIFT); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_BITS); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_MASK); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_CURSOR); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NONE); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_INIT); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_READY); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_ENABLED); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NR_STATES); \ + SCX_ENUM_SET(skel, scx_ent_dsq_flags, SCX_TASK_DSQ_ON_PRIQ); \ + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_IDLE); \ + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_PREEMPT); \ + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_WAIT); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_WAKEUP); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_HEAD); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_PREEMPT); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_REENQ); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_LAST); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_CLEAR_OPSS); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_DSQ_PRIQ); \ +} while (0) diff --git a/tools/sched_ext/include/scx/enums.bpf.h b/tools/sched_ext/include/scx/enums.bpf.h new file mode 100644 index 000000000000..af704c5d6334 --- /dev/null +++ b/tools/sched_ext/include/scx/enums.bpf.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Convenience macros for getting/setting struct scx_enums instances. + * + * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. + */ +#ifndef __SCX_ENUMS_BPF_H +#define __SCX_ENUMS_BPF_H + +#include "enums.autogen.bpf.h" + +#endif /* __SCX_ENUMS_BPF_H */ diff --git a/tools/sched_ext/include/scx/enums.h b/tools/sched_ext/include/scx/enums.h new file mode 100644 index 000000000000..34cbebe974b7 --- /dev/null +++ b/tools/sched_ext/include/scx/enums.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Define struct scx_enums that stores the load-time values of enums + * used by the BPF program. + * + * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. + */ + +#ifndef __SCX_ENUMS_H +#define __SCX_ENUMS_H + +static inline void __ENUM_set(u64 *val, char *type, char *name) +{ + bool res; + + res = __COMPAT_read_enum(type, name, val); + SCX_BUG_ON(!res, "enum not found(%s)", name); +} + +#define SCX_ENUM_SET(skel, type, name) do { \ + __ENUM_set(&skel->rodata->__##name, #type, #name); \ + } while (0) + + +#include "enums.autogen.h" + +#endif /* __SCX_ENUMS_H */ diff --git a/tools/sched_ext/include/scx/user_exit_info.h b/tools/sched_ext/include/scx/user_exit_info.h index 8ce2734402e1..66f856640ee7 100644 --- a/tools/sched_ext/include/scx/user_exit_info.h +++ b/tools/sched_ext/include/scx/user_exit_info.h @@ -10,6 +10,11 @@ #ifndef __USER_EXIT_INFO_H #define __USER_EXIT_INFO_H +#ifdef LSP +#define __bpf__ +#include "../vmlinux.h" +#endif + enum uei_sizes { UEI_REASON_LEN = 128, UEI_MSG_LEN = 1024, @@ -25,9 +30,7 @@ struct user_exit_info { #ifdef __bpf__ -#ifdef LSP -#include "../vmlinux/vmlinux.h" -#else +#ifndef LSP #include "vmlinux.h" #endif #include <bpf/bpf_core_read.h> diff --git a/tools/sched_ext/scx_central.bpf.c b/tools/sched_ext/scx_central.bpf.c index e6fad6211f6c..50bc1737c167 100644 --- a/tools/sched_ext/scx_central.bpf.c +++ b/tools/sched_ext/scx_central.bpf.c @@ -57,7 +57,7 @@ enum { const volatile s32 central_cpu; const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */ -const volatile u64 slice_ns = SCX_SLICE_DFL; +const volatile u64 slice_ns; bool timer_pinned = true; u64 nr_total, nr_locals, nr_queued, nr_lost_pids; @@ -87,11 +87,6 @@ struct { __type(value, struct central_timer); } central_timer SEC(".maps"); -static bool vtime_before(u64 a, u64 b) -{ - return (s64)(a - b) < 0; -} - s32 BPF_STRUCT_OPS(central_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { @@ -245,7 +240,7 @@ void BPF_STRUCT_OPS(central_running, struct task_struct *p) s32 cpu = scx_bpf_task_cpu(p); u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids); if (started_at) - *started_at = bpf_ktime_get_ns() ?: 1; /* 0 indicates idle */ + *started_at = scx_bpf_now() ?: 1; /* 0 indicates idle */ } void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable) @@ -258,7 +253,7 @@ void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable) static int central_timerfn(void *map, int *key, struct bpf_timer *timer) { - u64 now = bpf_ktime_get_ns(); + u64 now = scx_bpf_now(); u64 nr_to_kick = nr_queued; s32 i, curr_cpu; @@ -279,7 +274,7 @@ static int central_timerfn(void *map, int *key, struct bpf_timer *timer) /* kick iff the current one exhausted its slice */ started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids); if (started_at && *started_at && - vtime_before(now, *started_at + slice_ns)) + time_before(now, *started_at + slice_ns)) continue; /* and there's something pending */ diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index 21deea320bd7..1e9f74525d8f 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -58,6 +58,7 @@ restart: skel->rodata->central_cpu = 0; skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus(); + skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) { switch (opt) { @@ -97,7 +98,7 @@ restart: SCX_BUG_ON(!cpuset, "Failed to allocate cpuset"); CPU_ZERO(cpuset); CPU_SET(skel->rodata->central_cpu, cpuset); - SCX_BUG_ON(sched_setaffinity(0, sizeof(cpuset), cpuset), + SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset), "Failed to affinitize to central CPU %d (max %d)", skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1); CPU_FREE(cpuset); diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 4e3afcd260bf..2c720e3ecad5 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -57,7 +57,7 @@ enum { char _license[] SEC("license") = "GPL"; const volatile u32 nr_cpus = 32; /* !0 for veristat, set during init */ -const volatile u64 cgrp_slice_ns = SCX_SLICE_DFL; +const volatile u64 cgrp_slice_ns; const volatile bool fifo_sched; u64 cvtime_now; @@ -137,11 +137,6 @@ static u64 div_round_up(u64 dividend, u64 divisor) return (dividend + divisor - 1) / divisor; } -static bool vtime_before(u64 a, u64 b) -{ - return (s64)(a - b) < 0; -} - static bool cgv_node_less(struct bpf_rb_node *a, const struct bpf_rb_node *b) { struct cgv_node *cgc_a, *cgc_b; @@ -271,7 +266,7 @@ static void cgrp_cap_budget(struct cgv_node *cgv_node, struct fcg_cgrp_ctx *cgc) */ max_budget = (cgrp_slice_ns * nr_cpus * cgc->hweight) / (2 * FCG_HWEIGHT_ONE); - if (vtime_before(cvtime, cvtime_now - max_budget)) + if (time_before(cvtime, cvtime_now - max_budget)) cvtime = cvtime_now - max_budget; cgv_node->cvtime = cvtime; @@ -401,7 +396,7 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags) * Limit the amount of budget that an idling task can accumulate * to one slice. */ - if (vtime_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL)) + if (time_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL)) tvtime = cgc->tvtime_now - SCX_SLICE_DFL; scx_bpf_dsq_insert_vtime(p, cgrp->kn->id, SCX_SLICE_DFL, @@ -535,7 +530,7 @@ void BPF_STRUCT_OPS(fcg_running, struct task_struct *p) * from multiple CPUs and thus racy. Any error should be * contained and temporary. Let's just live with it. */ - if (vtime_before(cgc->tvtime_now, p->scx.dsq_vtime)) + if (time_before(cgc->tvtime_now, p->scx.dsq_vtime)) cgc->tvtime_now = p->scx.dsq_vtime; } bpf_cgroup_release(cgrp); @@ -645,7 +640,7 @@ static bool try_pick_next_cgroup(u64 *cgidp) cgv_node = container_of(rb_node, struct cgv_node, rb_node); cgid = cgv_node->cgid; - if (vtime_before(cvtime_now, cgv_node->cvtime)) + if (time_before(cvtime_now, cgv_node->cvtime)) cvtime_now = cgv_node->cvtime; /* @@ -734,7 +729,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev) struct fcg_cpu_ctx *cpuc; struct fcg_cgrp_ctx *cgc; struct cgroup *cgrp; - u64 now = bpf_ktime_get_ns(); + u64 now = scx_bpf_now(); bool picked_next = false; cpuc = find_cpu_ctx(); @@ -744,7 +739,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev) if (!cpuc->cur_cgid) goto pick_next_cgroup; - if (vtime_before(now, cpuc->cur_at + cgrp_slice_ns)) { + if (time_before(now, cpuc->cur_at + cgrp_slice_ns)) { if (scx_bpf_dsq_move_to_local(cpuc->cur_cgid)) { stat_inc(FCG_STAT_CNS_KEEP); return; @@ -920,14 +915,14 @@ void BPF_STRUCT_OPS(fcg_cgroup_move, struct task_struct *p, struct cgroup *from, struct cgroup *to) { struct fcg_cgrp_ctx *from_cgc, *to_cgc; - s64 vtime_delta; + s64 delta; /* find_cgrp_ctx() triggers scx_ops_error() on lookup failures */ if (!(from_cgc = find_cgrp_ctx(from)) || !(to_cgc = find_cgrp_ctx(to))) return; - vtime_delta = p->scx.dsq_vtime - from_cgc->tvtime_now; - p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta; + delta = time_delta(p->scx.dsq_vtime, from_cgc->tvtime_now); + p->scx.dsq_vtime = to_cgc->tvtime_now + delta; } s32 BPF_STRUCT_OPS_SLEEPABLE(fcg_init) diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c index 5d24ca9c29d9..6dd423eeb4ff 100644 --- a/tools/sched_ext/scx_flatcg.c +++ b/tools/sched_ext/scx_flatcg.c @@ -137,6 +137,7 @@ restart: skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); + skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) { double v; diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index ee264947e0c3..3a20bb0c014a 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -33,7 +33,7 @@ enum consts { char _license[] SEC("license") = "GPL"; -const volatile u64 slice_ns = SCX_SLICE_DFL; +const volatile u64 slice_ns; const volatile u32 stall_user_nth; const volatile u32 stall_kernel_nth; const volatile u32 dsp_inf_loop_after; diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index ac45a02b4055..c4912ab2e76f 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -64,6 +64,8 @@ int main(int argc, char **argv) skel = SCX_OPS_OPEN(qmap_ops, scx_qmap); + skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); + while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PHd:D:Spvh")) != -1) { switch (opt) { case 's': diff --git a/tools/sched_ext/scx_simple.bpf.c b/tools/sched_ext/scx_simple.bpf.c index 31f915b286c6..e6de99dba7db 100644 --- a/tools/sched_ext/scx_simple.bpf.c +++ b/tools/sched_ext/scx_simple.bpf.c @@ -52,11 +52,6 @@ static void stat_inc(u32 idx) (*cnt_p)++; } -static inline bool vtime_before(u64 a, u64 b) -{ - return (s64)(a - b) < 0; -} - s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { bool is_idle = false; @@ -84,7 +79,7 @@ void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) * Limit the amount of budget that an idling task can accumulate * to one slice. */ - if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL)) + if (time_before(vtime, vtime_now - SCX_SLICE_DFL)) vtime = vtime_now - SCX_SLICE_DFL; scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime, @@ -108,7 +103,7 @@ void BPF_STRUCT_OPS(simple_running, struct task_struct *p) * thus racy. Any error should be contained and temporary. Let's just * live with it. */ - if (vtime_before(vtime_now, p->scx.dsq_vtime)) + if (time_before(vtime_now, p->scx.dsq_vtime)) vtime_now = p->scx.dsq_vtime; } diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index d0337c11f9ee..cc8948f49117 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -725,7 +725,7 @@ static void default_mock_decoder(struct cxl_decoder *cxld) cxld->reset = mock_decoder_reset; } -static int first_decoder(struct device *dev, void *data) +static int first_decoder(struct device *dev, const void *data) { struct cxl_decoder *cxld; diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 347c1e7b37bd..8d731bd63988 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -401,6 +401,10 @@ struct cxl_test_gen_media gen_media = { .channel = 1, .rank = 30, }, + .component_id = { 0x3, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d }, + .cme_threshold_ev_flags = 3, + .cme_count = { 33, 0, 0 }, + .sub_type = 0x2, }, }; @@ -429,6 +433,11 @@ struct cxl_test_dram dram = { .bank_group = 5, .bank = 2, .column = {0xDE, 0xAD}, + .component_id = { 0x1, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d }, + .sub_channel = 8, + .cme_threshold_ev_flags = 2, + .cvme_count = { 14, 0, 0 }, + .sub_type = 0x5, }, }; @@ -456,7 +465,10 @@ struct cxl_test_mem_module mem_module = { .dirty_shutdown_cnt = { 0xde, 0xad, 0xbe, 0xef }, .cor_vol_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, .cor_per_err_cnt = { 0xde, 0xad, 0xbe, 0xef }, - } + }, + /* .validity_flags = <set below> */ + .component_id = { 0x2, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d }, + .event_sub_type = 0x3, }, }; @@ -478,13 +490,18 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds, static void cxl_mock_add_event_logs(struct mock_event_store *mes) { - put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK, + put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK | + CXL_GMER_VALID_COMPONENT | CXL_GMER_VALID_COMPONENT_ID_FORMAT, &gen_media.rec.media_hdr.validity_flags); put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP | - CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN, + CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN | CXL_DER_VALID_SUB_CHANNEL | + CXL_DER_VALID_COMPONENT | CXL_DER_VALID_COMPONENT_ID_FORMAT, &dram.rec.media_hdr.validity_flags); + put_unaligned_le16(CXL_MMER_VALID_COMPONENT | CXL_MMER_VALID_COMPONENT_ID_FORMAT, + &mem_module.rec.validity_flags); + mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed); mes_add_event(mes, CXL_EVENT_TYPE_INFO, (struct cxl_event_record_raw *)&gen_media); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 450c7566c33f..af2594e4f35d 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -228,16 +228,16 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL"); -int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, +int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info) { int rc = 0, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - if (ops && ops->is_mock_dev(dev)) + if (ops && ops->is_mock_dev(cxlds->dev)) rc = 0; else - rc = cxl_dvsec_rr_decode(dev, port, info); + rc = cxl_dvsec_rr_decode(cxlds, info); put_cxl_mock_ops(index); return rc; diff --git a/tools/testing/ktest/examples/include/defaults.conf b/tools/testing/ktest/examples/include/defaults.conf index 63a1a83f4f0b..f6d8517a471e 100644 --- a/tools/testing/ktest/examples/include/defaults.conf +++ b/tools/testing/ktest/examples/include/defaults.conf @@ -46,7 +46,7 @@ CLEAR_LOG = 1 SSH_USER = root -# For accesing the machine, we will ssh to root@machine. +# For accessing the machine, we will ssh to root@machine. SSH := ssh ${SSH_USER}@${MACHINE} # Update this. The default here is ktest will ssh to the target box diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index dacad94e2be4..8c8da966c641 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1245,7 +1245,7 @@ sub __read_config { # Config variables are only active while reading the # config and can be defined anywhere. They also ignore # TEST_START and DEFAULTS, but are skipped if they are in - # on of these sections that have SKIP defined. + # one of these sections that have SKIP defined. # The save variable can be # defined multiple times and the new one simply overrides # the previous one. @@ -2419,6 +2419,11 @@ sub get_version { return if ($have_version); doprint "$make kernelrelease ... "; $version = `$make -s kernelrelease | tail -1`; + if (!length($version)) { + run_command "$make allnoconfig" or return 0; + doprint "$make kernelrelease ... "; + $version = `$make -s kernelrelease | tail -1`; + } chomp($version); doprint "$version\n"; $have_version = 1; @@ -2960,8 +2965,6 @@ sub run_bisect_test { my $failed = 0; my $result; - my $output; - my $ret; $in_bisect = 1; diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index b3b00269a52a..b0049be00c70 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -38,9 +38,6 @@ CONFIG_IWLWIFI=y CONFIG_DAMON=y CONFIG_DAMON_VADDR=y CONFIG_DAMON_PADDR=y -CONFIG_DEBUG_FS=y -CONFIG_DAMON_DBGFS=y -CONFIG_DAMON_DBGFS_DEPRECATED=y CONFIG_REGMAP_BUILD=y diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 676fa99a8b19..7f9ae55fd6d5 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -312,7 +312,16 @@ def massage_argv(argv: Sequence[str]) -> Sequence[str]: return list(map(massage_arg, argv)) def get_default_jobs() -> int: - return len(os.sched_getaffinity(0)) + if sys.version_info >= (3, 13): + if (ncpu := os.process_cpu_count()) is not None: + return ncpu + raise RuntimeError("os.process_cpu_count() returned None") + # See https://github.com/python/cpython/blob/b61fece/Lib/os.py#L1175-L1186. + if sys.platform != "darwin": + return len(os.sched_getaffinity(0)) + if (ncpu := os.cpu_count()) is not None: + return ncpu + raise RuntimeError("os.cpu_count() returned None") def add_common_opts(parser: argparse.ArgumentParser) -> None: parser.add_argument('--build_dir', diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index e76d7894b6c5..d30f90eae9a4 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -125,6 +125,9 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): '-append', ' '.join(params + [self._kernel_command_line]), '-no-reboot', '-nographic', + '-accel', 'kvm', + '-accel', 'hvf', + '-accel', 'tcg', '-serial', self._serial] + self._extra_qemu_params # Note: shlex.join() does what we want, but requires python 3.8+. print('Running tests with:\n$', ' '.join(shlex.quote(arg) for arg in qemu_command)) diff --git a/tools/testing/kunit/qemu_configs/arm64.py b/tools/testing/kunit/qemu_configs/arm64.py index d3ff27024755..5c44d3a87e6d 100644 --- a/tools/testing/kunit/qemu_configs/arm64.py +++ b/tools/testing/kunit/qemu_configs/arm64.py @@ -9,4 +9,4 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''', qemu_arch='aarch64', kernel_path='arch/arm64/boot/Image.gz', kernel_command_line='console=ttyAMA0', - extra_qemu_params=['-machine', 'virt', '-cpu', 'max,pauth-impdef=on']) + extra_qemu_params=['-machine', 'virt', '-cpu', 'max']) diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index cffaf2245d4f..eaff1b036989 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -227,6 +227,7 @@ static void *load_creator(void *ptr) unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) - (1 << order); item_insert_order(tree, index, order); + xa_set_mark(tree, index, XA_MARK_1); item_delete_rcu(tree, index); } } @@ -242,8 +243,11 @@ static void *load_worker(void *ptr) rcu_register_thread(); while (!stop_iteration) { + unsigned long find_index = (2 << RADIX_TREE_MAP_SHIFT) + 1; struct item *item = xa_load(ptr, index); assert(!xa_is_internal(item)); + item = xa_find(ptr, &find_index, index, XA_MARK_1); + assert(!xa_is_internal(item)); } rcu_unregister_thread(); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 2401e973c359..8daac70c2f9d 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -18,6 +18,7 @@ TARGETS += devices/error_logs TARGETS += devices/probe TARGETS += dmabuf-heaps TARGETS += drivers/dma-buf +TARGETS += drivers/ntsync TARGETS += drivers/s390x/uvdevice TARGETS += drivers/net TARGETS += drivers/net/bonding @@ -72,6 +73,7 @@ TARGETS += net/packetdrill TARGETS += net/rds TARGETS += net/tcp_ao TARGETS += nsfs +TARGETS += pci_endpoint TARGETS += pcie_bwctrl TARGETS += perf_events TARGETS += pidfd diff --git a/tools/testing/selftests/acct/acct_syscall.c b/tools/testing/selftests/acct/acct_syscall.c index e44e8fe1f4a3..87c044fb9293 100644 --- a/tools/testing/selftests/acct/acct_syscall.c +++ b/tools/testing/selftests/acct/acct_syscall.c @@ -24,7 +24,7 @@ int main(void) // Check if test is run a root if (geteuid()) { - ksft_test_result_skip("This test needs root to run!\n"); + ksft_exit_skip("This test needs root to run!\n"); return 1; } diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile index 944279160fed..8dab90ad22bb 100644 --- a/tools/testing/selftests/alsa/Makefile +++ b/tools/testing/selftests/alsa/Makefile @@ -27,5 +27,5 @@ include ../lib.mk $(OUTPUT)/libatest.so: conf.c alsa-local.h $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@ -$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) alsa-local.h +$(OUTPUT)/%: %.c $(OUTPUT)/libatest.so alsa-local.h $(CC) $(CFLAGS) $< $(LDLIBS) -latest -o $@ diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 0029ed9c5c9a..35f521e5f41c 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -46,6 +46,12 @@ static void atomics_sigill(void) asm volatile(".inst 0xb82003ff" : : : ); } +static void cmpbr_sigill(void) +{ + /* Not implemented, too complicated and unreliable anyway */ +} + + static void crc32_sigill(void) { /* CRC32W W0, W0, W1 */ @@ -82,6 +88,18 @@ static void f8fma_sigill(void) asm volatile(".inst 0xec0fc00"); } +static void f8mm4_sigill(void) +{ + /* FMMLA V0.4SH, V0.16B, V0.16B */ + asm volatile(".inst 0x6e00ec00"); +} + +static void f8mm8_sigill(void) +{ + /* FMMLA V0.4S, V0.16B, V0.16B */ + asm volatile(".inst 0x6e80ec00"); +} + static void faminmax_sigill(void) { /* FAMIN V0.4H, V0.4H, V0.4H */ @@ -98,6 +116,12 @@ static void fpmr_sigill(void) asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0"); } +static void fprcvt_sigill(void) +{ + /* FCVTAS S0, H0 */ + asm volatile(".inst 0x1efa0000"); +} + static void gcs_sigill(void) { unsigned long *gcspr; @@ -226,6 +250,42 @@ static void sme2p1_sigill(void) asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); } +static void sme2p2_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* UXTB Z0.D, P0/Z, Z0.D */ + asm volatile(".inst 0x4c1a000" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void sme_aes_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* AESD z0.b, z0.b, z0.b */ + asm volatile(".inst 0x4522e400" : : : "z0"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void sme_sbitperm_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* BDEP Z0.B, Z0.B, Z0.B */ + asm volatile(".inst 0x4500b400" : : : "z0"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + static void smei16i32_sigill(void) { /* SMSTART */ @@ -339,8 +399,44 @@ static void smesf8fma_sigill(void) /* SMSTART */ asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); - /* FMLALB V0.8H, V0.16B, V0.16B */ - asm volatile(".inst 0xec0fc00"); + /* FMLALB Z0.8H, Z0.B, Z0.B */ + asm volatile(".inst 0x64205000"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smesfexpa_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* FEXPA Z0.D, Z0.D */ + asm volatile(".inst 0x04e0b800"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smesmop4_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* SMOP4A ZA0.S, Z0.B, { Z0.B - Z1.B } */ + asm volatile(".inst 0x80108000"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smestmop_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* STMOPA ZA0.S, { Z0.H - Z1.H }, Z0.H, Z20[0] */ + asm volatile(".inst 0x80408008"); /* SMSTOP */ asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); @@ -364,18 +460,42 @@ static void sve2p1_sigill(void) asm volatile(".inst 0x65000000" : : : "z0"); } +static void sve2p2_sigill(void) +{ + /* NOT Z0.D, P0/Z, Z0.D */ + asm volatile(".inst 0x4cea000" : : : "z0"); +} + static void sveaes_sigill(void) { /* AESD z0.b, z0.b, z0.b */ asm volatile(".inst 0x4522e400" : : : "z0"); } +static void sveaes2_sigill(void) +{ + /* AESD {Z0.B - Z1.B }, { Z0.B - Z1.B }, Z0.Q */ + asm volatile(".inst 0x4522ec00" : : : "z0"); +} + static void sveb16b16_sigill(void) { /* BFADD Z0.H, Z0.H, Z0.H */ asm volatile(".inst 0x65000000" : : : ); } +static void svebfscale_sigill(void) +{ + /* BFSCALE Z0.H, P0/M, Z0.H, Z0.H */ + asm volatile(".inst 0x65098000" : : : "z0"); +} + +static void svef16mm_sigill(void) +{ + /* FMMLA Z0.S, Z0.H, Z0.H */ + asm volatile(".inst 0x6420e400"); +} + static void svepmull_sigill(void) { /* PMULLB Z0.Q, Z0.D, Z0.D */ @@ -394,6 +514,12 @@ static void svesha3_sigill(void) asm volatile(".inst 0x4203800" : : : "z0"); } +static void sveeltperm_sigill(void) +{ + /* COMPACT Z0.B, P0, Z0.B */ + asm volatile(".inst 0x5218000" : : : "x0"); +} + static void svesm4_sigill(void) { /* SM4E Z0.S, Z0.S, Z0.S */ @@ -470,6 +596,13 @@ static const struct hwcap_data { .sigill_fn = aes_sigill, }, { + .name = "CMPBR", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_CMPBR, + .cpuinfo = "cmpbr", + .sigill_fn = cmpbr_sigill, + }, + { .name = "CRC32", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_CRC32, @@ -524,6 +657,20 @@ static const struct hwcap_data { .sigill_fn = f8fma_sigill, }, { + .name = "F8MM8", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_F8MM8, + .cpuinfo = "f8mm8", + .sigill_fn = f8mm8_sigill, + }, + { + .name = "F8MM4", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_F8MM4, + .cpuinfo = "f8mm4", + .sigill_fn = f8mm4_sigill, + }, + { .name = "FAMINMAX", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_FAMINMAX, @@ -546,6 +693,13 @@ static const struct hwcap_data { .sigill_reliable = true, }, { + .name = "FPRCVT", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_FPRCVT, + .cpuinfo = "fprcvt", + .sigill_fn = fprcvt_sigill, + }, + { .name = "GCS", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_GCS, @@ -692,6 +846,20 @@ static const struct hwcap_data { .sigill_fn = sme2p1_sigill, }, { + .name = "SME 2.2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME2P2, + .cpuinfo = "sme2p2", + .sigill_fn = sme2p2_sigill, + }, + { + .name = "SME AES", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_AES, + .cpuinfo = "smeaes", + .sigill_fn = sme_aes_sigill, + }, + { .name = "SME I16I32", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SME_I16I32, @@ -741,6 +909,13 @@ static const struct hwcap_data { .sigill_fn = smelutv2_sigill, }, { + .name = "SME SBITPERM", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_SBITPERM, + .cpuinfo = "smesbitperm", + .sigill_fn = sme_sbitperm_sigill, + }, + { .name = "SME SF8FMA", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SME_SF8FMA, @@ -762,6 +937,27 @@ static const struct hwcap_data { .sigill_fn = smesf8dp4_sigill, }, { + .name = "SME SFEXPA", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_SFEXPA, + .cpuinfo = "smesfexpa", + .sigill_fn = smesfexpa_sigill, + }, + { + .name = "SME SMOP4", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_SMOP4, + .cpuinfo = "smesmop4", + .sigill_fn = smesmop4_sigill, + }, + { + .name = "SME STMOP", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_STMOP, + .cpuinfo = "smestmop", + .sigill_fn = smestmop_sigill, + }, + { .name = "SVE", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_SVE, @@ -784,6 +980,13 @@ static const struct hwcap_data { .sigill_fn = sve2p1_sigill, }, { + .name = "SVE 2.2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE2P2, + .cpuinfo = "sve2p2", + .sigill_fn = sve2p2_sigill, + }, + { .name = "SVE AES", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SVEAES, @@ -791,6 +994,34 @@ static const struct hwcap_data { .sigill_fn = sveaes_sigill, }, { + .name = "SVE AES2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_AES2, + .cpuinfo = "sveaes2", + .sigill_fn = sveaes2_sigill, + }, + { + .name = "SVE BFSCALE", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_BFSCALE, + .cpuinfo = "svebfscale", + .sigill_fn = svebfscale_sigill, + }, + { + .name = "SVE ELTPERM", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_ELTPERM, + .cpuinfo = "sveeltperm", + .sigill_fn = sveeltperm_sigill, + }, + { + .name = "SVE F16MM", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_F16MM, + .cpuinfo = "svef16mm", + .sigill_fn = svef16mm_sigill, + }, + { .name = "SVE2 B16B16", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SVE_B16B16, diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index df3230fdac39..66ab2e0bae5f 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -81,32 +81,31 @@ do_syscall: stp x27, x28, [sp, #96] // Set SVCR if we're doing SME - cbz x1, 1f + cbz x1, load_gpr adrp x2, svcr_in ldr x2, [x2, :lo12:svcr_in] msr S3_3_C4_C2_2, x2 -1: // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR - tbz x2, #SVCR_ZA_SHIFT, 1f + tbz x2, #SVCR_ZA_SHIFT, load_gpr mov w12, #0 ldr x2, =za_in -2: _ldr_za 12, 2 +1: _ldr_za 12, 2 add x2, x2, x1 add x12, x12, #1 cmp x1, x12 - bne 2b + bne 1b // ZT0 mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ #ID_AA64SMFR0_EL1_SMEver_WIDTH - cbz x2, 1f + cbz x2, load_gpr adrp x2, zt_in add x2, x2, :lo12:zt_in _ldr_zt 2 -1: +load_gpr: // Load GPRs x8-x28, and save our SP/FP for later comparison ldr x2, =gpr_in add x2, x2, #64 @@ -125,9 +124,9 @@ do_syscall: str x30, [x2], #8 // LR // Load FPRs if we're not doing neither SVE nor streaming SVE - cbnz x0, 1f + cbnz x0, check_sve_in ldr x2, =svcr_in - tbnz x2, #SVCR_SM_SHIFT, 1f + tbnz x2, #SVCR_SM_SHIFT, check_sve_in ldr x2, =fpr_in ldp q0, q1, [x2] @@ -148,8 +147,8 @@ do_syscall: ldp q30, q31, [x2, #16 * 30] b 2f -1: +check_sve_in: // Load the SVE registers if we're doing SVE/SME ldr x2, =z_in @@ -256,32 +255,31 @@ do_syscall: stp q30, q31, [x2, #16 * 30] // Save SVCR if we're doing SME - cbz x1, 1f + cbz x1, check_sve_out mrs x2, S3_3_C4_C2_2 adrp x3, svcr_out str x2, [x3, :lo12:svcr_out] -1: // Save ZA if it's enabled - uses x12 as scratch due to SME STR - tbz x2, #SVCR_ZA_SHIFT, 1f + tbz x2, #SVCR_ZA_SHIFT, check_sve_out mov w12, #0 ldr x2, =za_out -2: _str_za 12, 2 +1: _str_za 12, 2 add x2, x2, x1 add x12, x12, #1 cmp x1, x12 - bne 2b + bne 1b // ZT0 mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ #ID_AA64SMFR0_EL1_SMEver_WIDTH - cbz x2, 1f + cbz x2, check_sve_out adrp x2, zt_out add x2, x2, :lo12:zt_out _str_zt 2 -1: +check_sve_out: // Save the SVE state if we have some cbz x0, 1f diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c index 859345379044..348e8bef62c7 100644 --- a/tools/testing/selftests/arm64/fp/kernel-test.c +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -46,8 +46,7 @@ static void handle_kick_signal(int sig, siginfo_t *info, void *context) } static char *drivers[] = { - "crct10dif-arm64-ce", - /* "crct10dif-arm64-neon", - Same priority as generic */ + "crct10dif-arm64", "sha1-ce", "sha224-arm64", "sha224-arm64-neon", diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index e9c377001f93..e2a2c46c008b 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -18,7 +18,6 @@ feature urandom_read test_sockmap test_lirc_mode2_user -test_flow_dissector flow_dissector_load test_tcpnotify_user test_libbpf diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 7eeb3cbe18c7..87551628e112 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -41,7 +41,7 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -CFLAGS += -g $(OPT_FLAGS) -rdynamic \ +CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11 \ -Wall -Werror -fno-omit-frame-pointer \ $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ @@ -54,21 +54,6 @@ PCAP_LIBS := $(shell $(PKG_CONFIG) --libs libpcap 2>/dev/null) LDLIBS += $(PCAP_LIBS) CFLAGS += $(PCAP_CFLAGS) -# The following tests perform type punning and they may break strict -# aliasing rules, which are exploited by both GCC and clang by default -# while optimizing. This can lead to broken programs. -progs/bind4_prog.c-CFLAGS := -fno-strict-aliasing -progs/bind6_prog.c-CFLAGS := -fno-strict-aliasing -progs/dynptr_fail.c-CFLAGS := -fno-strict-aliasing -progs/linked_list_fail.c-CFLAGS := -fno-strict-aliasing -progs/map_kptr_fail.c-CFLAGS := -fno-strict-aliasing -progs/syscall.c-CFLAGS := -fno-strict-aliasing -progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing -progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing -progs/timer_crash.c-CFLAGS := -fno-strict-aliasing -progs/test_global_func9.c-CFLAGS := -fno-strict-aliasing -progs/verifier_nocsr.c-CFLAGS := -fno-strict-aliasing - # Some utility functions use LLVM libraries jit_disasm_helpers.c-CFLAGS = $(LLVM_CFLAGS) @@ -103,18 +88,6 @@ progs/btf_dump_test_case_packing.c-bpf_gcc-CFLAGS := -Wno-error progs/btf_dump_test_case_padding.c-bpf_gcc-CFLAGS := -Wno-error progs/btf_dump_test_case_syntax.c-bpf_gcc-CFLAGS := -Wno-error -# The following tests do type-punning, via the __imm_insn macro, from -# `struct bpf_insn' to long and then uses the value. This triggers an -# "is used uninitialized" warning in GCC due to strict-aliasing -# rules. -progs/verifier_ref_tracking.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_unpriv.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_cgroup_storage.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_ld_ind.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_map_ret_val.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_spill_fill.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_subprog_precision.c-bpf_gcc-CFLAGS := -fno-strict-aliasing -progs/verifier_uninit.c-bpf_gcc-CFLAGS := -fno-strict-aliasing endif ifneq ($(CLANG_CPUV4),) @@ -127,13 +100,10 @@ TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ - test_xdp_redirect.sh \ test_xdp_redirect_multi.sh \ - test_xdp_meta.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ - test_flow_dissector.sh \ test_xdp_vlan_mode_generic.sh \ test_xdp_vlan_mode_native.sh \ test_lwt_ip_encap.sh \ @@ -151,17 +121,16 @@ TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh ima_setup.sh verify_sig_setup.sh \ test_xdp_vlan.sh test_bpftool.py +TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ + bpf_test_modorder_y.ko +TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS)) + # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = \ bench \ - bpf_testmod.ko \ - bpf_test_modorder_x.ko \ - bpf_test_modorder_y.ko \ - bpf_test_no_cfi.ko \ flow_dissector_load \ runqslower \ test_cpp \ - test_flow_dissector \ test_lirc_mode2_user \ veristat \ xdp_features \ @@ -184,8 +153,9 @@ override define CLEAN $(Q)$(RM) -r $(TEST_GEN_PROGS) $(Q)$(RM) -r $(TEST_GEN_PROGS_EXTENDED) $(Q)$(RM) -r $(TEST_GEN_FILES) + $(Q)$(RM) -r $(TEST_KMODS) $(Q)$(RM) -r $(EXTRA_CLEAN) - $(Q)$(MAKE) -C bpf_testmod clean + $(Q)$(MAKE) -C test_kmods clean $(Q)$(MAKE) docs-clean endef @@ -203,9 +173,9 @@ ifeq ($(shell expr $(MAKE_VERSION) \>= 4.4), 1) $(let OUTPUT,$(OUTPUT)/,\ $(eval include ../../../build/Makefile.feature)) else -OUTPUT := $(OUTPUT)/ +override OUTPUT := $(OUTPUT)/ $(eval include ../../../build/Makefile.feature) -OUTPUT := $(patsubst %/,%,$(OUTPUT)) +override OUTPUT := $(patsubst %/,%,$(OUTPUT)) endif endif @@ -251,7 +221,7 @@ endif # to build individual tests. # NOTE: Semicolon at the end is critical to override lib.mk's default static # rule for binaries. -$(notdir $(TEST_GEN_PROGS) \ +$(notdir $(TEST_GEN_PROGS) $(TEST_KMODS) \ $(TEST_GEN_PROGS_EXTENDED)): %: $(OUTPUT)/% ; # sort removes libbpf duplicates when not cross-building @@ -305,37 +275,19 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $< -o $@ \ $(shell $(PKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) -$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) - $(call msg,MOD,,$@) - $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_testmod \ - RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ - EXTRA_CFLAGS='' EXTRA_LDFLAGS='' - $(Q)cp bpf_testmod/bpf_testmod.ko $@ - -$(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch]) - $(call msg,MOD,,$@) - $(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_test_no_cfi \ - RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ +# This should really be a grouped target, but make versions before 4.3 don't +# support that for regular rules. However, pattern matching rules are implicitly +# treated as grouped even with older versions of make, so as a workaround, the +# subst() turns the rule into a pattern matching rule +$(addprefix test_kmods/,$(subst .ko,%ko,$(TEST_KMODS))): $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard test_kmods/Makefile test_kmods/*.[ch]) + $(Q)$(RM) test_kmods/*.ko test_kmods/*.mod.o # force re-compilation + $(Q)$(MAKE) $(submake_extras) -C test_kmods \ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ EXTRA_CFLAGS='' EXTRA_LDFLAGS='' - $(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@ -$(OUTPUT)/bpf_test_modorder_x.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_x/Makefile bpf_test_modorder_x/*.[ch]) +$(TEST_KMOD_TARGETS): $(addprefix test_kmods/,$(TEST_KMODS)) $(call msg,MOD,,$@) - $(Q)$(RM) bpf_test_modorder_x/bpf_test_modorder_x.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_x \ - RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ - EXTRA_CFLAGS='' EXTRA_LDFLAGS='' - $(Q)cp bpf_test_modorder_x/bpf_test_modorder_x.ko $@ - -$(OUTPUT)/bpf_test_modorder_y.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_y/Makefile bpf_test_modorder_y/*.[ch]) - $(call msg,MOD,,$@) - $(Q)$(RM) bpf_test_modorder_y/bpf_test_modorder_y.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_y \ - RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ - EXTRA_CFLAGS='' EXTRA_LDFLAGS='' - $(Q)cp bpf_test_modorder_y/bpf_test_modorder_y.ko $@ + $(Q)cp test_kmods/$(@F) $@ DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool @@ -480,10 +432,10 @@ $(shell $(1) $(2) -dM -E - </dev/null | grep -E 'MIPS(EL|EB)|_MIPS_SZ(PTR|LONG) endef # Determine target endianness. -IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ +IS_LITTLE_ENDIAN := $(shell $(CC) -dM -E - </dev/null | \ grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__') -MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) -BPF_TARGET_ENDIAN=$(if $(IS_LITTLE_ENDIAN),--target=bpfel,--target=bpfeb) +MENDIAN:=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) +BPF_TARGET_ENDIAN:=$(if $(IS_LITTLE_ENDIAN),--target=bpfel,--target=bpfeb) ifneq ($(CROSS_COMPILE),) CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) @@ -493,6 +445,8 @@ CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) \ + -std=gnu11 \ + -fno-strict-aliasing \ -Wno-compare-distinct-pointer-types # TODO: enable me -Wsign-compare @@ -760,14 +714,12 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ json_writer.c \ flow_dissector_load.h \ ip_check_defrag_frags.h -TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ - $(OUTPUT)/bpf_test_no_cfi.ko \ - $(OUTPUT)/bpf_test_modorder_x.ko \ - $(OUTPUT)/bpf_test_modorder_y.ko \ +TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ $(OUTPUT)/sign-file \ $(OUTPUT)/uprobe_multi \ + $(TEST_KMOD_TARGETS) \ ima_setup.sh \ verify_sig_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) \ @@ -834,9 +786,12 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Make sure we are able to include and link libbpf against c++. +CXXFLAGS += $(CFLAGS) +CXXFLAGS := $(subst -D_GNU_SOURCE=,,$(CXXFLAGS)) +CXXFLAGS := $(subst -std=gnu11,-std=gnu++11,$(CXXFLAGS)) $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ + $(Q)$(CXX) $(CXXFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) @@ -894,12 +849,9 @@ $(OUTPUT)/uprobe_multi: uprobe_multi.c uprobe_multi.ld EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ - feature bpftool \ + feature bpftool $(TEST_KMOD_TARGETS) \ $(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \ - no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \ - bpf_test_no_cfi.ko \ - bpf_test_modorder_x.ko \ - bpf_test_modorder_y.ko \ + no_alu32 cpuv4 bpf_gcc \ liburandom_read.so) \ $(OUTPUT)/FEATURE-DUMP.selftests diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile deleted file mode 100644 index 40b25b98ad1b..000000000000 --- a/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) -KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) - -ifeq ($(V),1) -Q = -else -Q = @ -endif - -MODULES = bpf_test_modorder_x.ko - -obj-m += bpf_test_modorder_x.o - -all: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules - -clean: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean - diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile deleted file mode 100644 index 52c3ab9d84e2..000000000000 --- a/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) -KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) - -ifeq ($(V),1) -Q = -else -Q = @ -endif - -MODULES = bpf_test_modorder_y.ko - -obj-m += bpf_test_modorder_y.o - -all: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules - -clean: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean - diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile deleted file mode 100644 index ed5143b79edf..000000000000 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -BPF_TEST_NO_CFI_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) -KDIR ?= $(abspath $(BPF_TEST_NO_CFI_DIR)/../../../../..) - -ifeq ($(V),1) -Q = -else -Q = @ -endif - -MODULES = bpf_test_no_cfi.ko - -obj-m += bpf_test_no_cfi.o - -all: - +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) modules - -clean: - +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) clean - diff --git a/tools/testing/selftests/bpf/bpf_testmod/Makefile b/tools/testing/selftests/bpf/bpf_testmod/Makefile deleted file mode 100644 index 15cb36c4483a..000000000000 --- a/tools/testing/selftests/bpf/bpf_testmod/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) -KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) - -ifeq ($(V),1) -Q = -else -Q = @ -endif - -MODULES = bpf_testmod.ko - -obj-m += bpf_testmod.o -CFLAGS_bpf_testmod.o = -I$(src) - -all: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules - -clean: - +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean - diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 4ca84c8d9116..c378d5d07e02 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -58,6 +58,7 @@ CONFIG_MPLS=y CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y +CONFIG_NET_ACT_GACT=y CONFIG_NET_ACT_SKBMOD=y CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 27784946b01b..80844a5fb1fe 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -21,7 +21,7 @@ #include <linux/limits.h> #include <linux/ip.h> -#include <linux/udp.h> +#include <netinet/udp.h> #include <netinet/tcp.h> #include <net/if.h> diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 5764155b6d25..ebec8a8d6f81 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -14,6 +14,7 @@ typedef __u16 __sum16; #include <linux/sockios.h> #include <linux/err.h> #include <netinet/tcp.h> +#include <netinet/udp.h> #include <bpf/bpf_endian.h> #include <net/if.h> @@ -105,6 +106,45 @@ static __u16 csum_fold(__u32 csum) return (__u16)~csum; } +static __wsum csum_partial(const void *buf, int len, __wsum sum) +{ + __u16 *p = (__u16 *)buf; + int num_u16 = len >> 1; + int i; + + for (i = 0; i < num_u16; i++) + sum += p[i]; + + return sum; +} + +static inline __sum16 build_ip_csum(struct iphdr *iph) +{ + __u32 sum = 0; + __u16 *p; + + iph->check = 0; + p = (void *)iph; + sum = csum_partial(p, iph->ihl << 2, 0); + + return csum_fold(sum); +} + +/** + * csum_tcpudp_magic - compute IP pseudo-header checksum + * + * Compute the IPv4 pseudo header checksum. The helper can take a + * accumulated sum from the transport layer to accumulate it and directly + * return the transport layer + * + * @saddr: IP source address + * @daddr: IP dest address + * @len: IP data size + * @proto: transport layer protocol + * @csum: The accumulated partial sum to add to the computation + * + * Returns the folded sum + */ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum csum) @@ -120,6 +160,21 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, return csum_fold((__u32)s); } +/** + * csum_ipv6_magic - compute IPv6 pseudo-header checksum + * + * Compute the ipv6 pseudo header checksum. The helper can take a + * accumulated sum from the transport layer to accumulate it and directly + * return the transport layer + * + * @saddr: IPv6 source address + * @daddr: IPv6 dest address + * @len: IPv6 data size + * @proto: transport layer protocol + * @csum: The accumulated partial sum to add to the computation + * + * Returns the folded sum + */ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, __u32 len, __u8 proto, @@ -139,6 +194,47 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, return csum_fold((__u32)s); } +/** + * build_udp_v4_csum - compute UDP checksum for UDP over IPv4 + * + * Compute the checksum to embed in UDP header, composed of the sum of IP + * pseudo-header checksum, UDP header checksum and UDP data checksum + * @iph IP header + * @udph UDP header, which must be immediately followed by UDP data + * + * Returns the total checksum + */ + +static inline __sum16 build_udp_v4_csum(const struct iphdr *iph, + const struct udphdr *udph) +{ + unsigned long sum; + + sum = csum_partial(udph, ntohs(udph->len), 0); + return csum_tcpudp_magic(iph->saddr, iph->daddr, ntohs(udph->len), + IPPROTO_UDP, sum); +} + +/** + * build_udp_v6_csum - compute UDP checksum for UDP over IPv6 + * + * Compute the checksum to embed in UDP header, composed of the sum of IPv6 + * pseudo-header checksum, UDP header checksum and UDP data checksum + * @ip6h IPv6 header + * @udph UDP header, which must be immediately followed by UDP data + * + * Returns the total checksum + */ +static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h, + const struct udphdr *udph) +{ + unsigned long sum; + + sum = csum_partial(udph, ntohs(udph->len), 0); + return csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ntohs(udph->len), + IPPROTO_UDP, sum); +} + struct tmonitor_ctx; #ifdef TRAFFIC_MONITOR diff --git a/tools/testing/selftests/bpf/prog_tests/btf_distill.c b/tools/testing/selftests/bpf/prog_tests/btf_distill.c index ca84726d5ac1..fb67ae195a73 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_distill.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_distill.c @@ -385,7 +385,7 @@ static void test_distilled_base_missing_err(void) "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED"); btf5 = btf__new_empty(); if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf")) - return; + goto cleanup; btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */ VALIDATE_RAW_BTF( btf5, @@ -478,7 +478,7 @@ static void test_distilled_base_multi_err2(void) "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); btf5 = btf__new_empty(); if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf")) - return; + goto cleanup; btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [2] int */ VALIDATE_RAW_BTF( @@ -601,6 +601,76 @@ cleanup: btf__free(base); } +/* If a needed composite type, which is the member of composite type + * in the split BTF, has a different size in the base BTF we wish to + * relocate with, btf__relocate() should error out. + */ +static void test_distilled_base_embedded_err(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_struct(btf1, "s1", 4); /* [2] struct s1 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0"); + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + btf__add_struct(btf2, "with_embedded", 8); /* [3] struct with_embedded { */ + btf__add_field(btf2, "e1", 2, 0, 0); /* struct s1 e1; */ + /* } */ + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] STRUCT 's1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[3] STRUCT 'with_embedded' size=8 vlen=1\n" + "\t'e1' type_id=2 bits_offset=0"); + + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(2, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf4, + "[1] STRUCT 's1' size=4 vlen=0", + "[2] STRUCT 'with_embedded' size=8 vlen=1\n" + "\t'e1' type_id=1 bits_offset=0"); + + btf5 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf")) + goto cleanup; + + btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */ + /* struct with the same name but different size */ + btf__add_struct(btf5, "s1", 8); /* [2] struct s1 { */ + btf__add_field(btf5, "f1", 1, 0, 0); /* int f1; */ + /* } */ + + ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split"); +cleanup: + btf__free(btf5); + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + void test_btf_distill(void) { if (test__start_subtest("distilled_base")) @@ -613,6 +683,8 @@ void test_btf_distill(void) test_distilled_base_multi_err(); if (test__start_subtest("distilled_base_multi_err2")) test_distilled_base_multi_err2(); + if (test__start_subtest("distilled_base_embedded_err")) + test_distilled_base_embedded_err(); if (test__start_subtest("distilled_base_vmlinux")) test_distilled_base_vmlinux(); if (test__start_subtest("distilled_endianness")) diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c new file mode 100644 index 000000000000..e1a90c10db8c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "cgroup_skb_direct_packet_access.skel.h" + +void test_cgroup_skb_prog_run_direct_packet_access(void) +{ + int err; + struct cgroup_skb_direct_packet_access *skel; + char test_skb[64] = {}; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = test_skb, + .data_size_in = sizeof(test_skb), + ); + + skel = cgroup_skb_direct_packet_access__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_skb_direct_packet_access__open_and_load")) + return; + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.direct_packet_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts err"); + ASSERT_EQ(topts.retval, 1, "retval"); + + ASSERT_NEQ(skel->bss->data_end, 0, "data_end"); + + cgroup_skb_direct_packet_access__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c new file mode 100644 index 000000000000..7526de379081 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bpf/libbpf.h" +#include "changes_pkt_data_freplace.skel.h" +#include "changes_pkt_data.skel.h" +#include <test_progs.h> + +static void print_verifier_log(const char *log) +{ + if (env.verbosity >= VERBOSE_VERY) + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); +} + +static void test_aux(const char *main_prog_name, + const char *to_be_replaced, + const char *replacement, + bool expect_load) +{ + struct changes_pkt_data_freplace *freplace = NULL; + struct bpf_program *freplace_prog = NULL; + struct bpf_program *main_prog = NULL; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct changes_pkt_data *main = NULL; + char log[16*1024]; + int err; + + opts.kernel_log_buf = log; + opts.kernel_log_size = sizeof(log); + if (env.verbosity >= VERBOSE_SUPER) + opts.kernel_log_level = 1 | 2 | 4; + main = changes_pkt_data__open_opts(&opts); + if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) + goto out; + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); + if (!ASSERT_OK_PTR(main_prog, "main_prog")) + goto out; + bpf_program__set_autoload(main_prog, true); + err = changes_pkt_data__load(main); + print_verifier_log(log); + if (!ASSERT_OK(err, "changes_pkt_data__load")) + goto out; + freplace = changes_pkt_data_freplace__open_opts(&opts); + if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) + goto out; + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) + goto out; + bpf_program__set_autoload(freplace_prog, true); + bpf_program__set_autoattach(freplace_prog, true); + bpf_program__set_attach_target(freplace_prog, + bpf_program__fd(main_prog), + to_be_replaced); + err = changes_pkt_data_freplace__load(freplace); + print_verifier_log(log); + if (expect_load) { + ASSERT_OK(err, "changes_pkt_data_freplace__load"); + } else { + ASSERT_ERR(err, "changes_pkt_data_freplace__load"); + ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); + } + +out: + changes_pkt_data_freplace__destroy(freplace); + changes_pkt_data__destroy(main); +} + +/* There are two global subprograms in both changes_pkt_data.skel.h: + * - one changes packet data; + * - another does not. + * It is ok to freplace subprograms that change packet data with those + * that either do or do not. It is only ok to freplace subprograms + * that do not change packet data with those that do not as well. + * The below tests check outcomes for each combination of such freplace. + * Also test a case when main subprogram itself is replaced and is a single + * subprogram in a program. + */ +void test_changes_pkt_data_freplace(void) +{ + struct { + const char *main; + const char *to_be_replaced; + bool changes; + } mains[] = { + { "main_with_subprogs", "changes_pkt_data", true }, + { "main_with_subprogs", "does_not_change_pkt_data", false }, + { "main_changes", "main_changes", true }, + { "main_does_not_change", "main_does_not_change", false }, + }; + struct { + const char *func; + bool changes; + } replacements[] = { + { "changes_pkt_data", true }, + { "does_not_change_pkt_data", false } + }; + char buf[64]; + + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { + snprintf(buf, sizeof(buf), "%s_with_%s", + mains[i].to_be_replaced, replacements[j].func); + if (!test__start_subtest(buf)) + continue; + test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, + mains[i].changes || !replacements[j].changes); + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 1c682550e0e7..e10ea92c3fe2 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -2,7 +2,7 @@ #define _GNU_SOURCE #include <test_progs.h> #include "progs/core_reloc_types.h" -#include "bpf_testmod/bpf_testmod.h" +#include "test_kmods/bpf_testmod.h" #include <linux/limits.h> #include <sys/mman.h> #include <sys/syscall.h> diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c new file mode 100644 index 000000000000..a1d52e73fb16 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -0,0 +1,441 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include <linux/btf.h> +#include <bpf/bpf.h> + +#include "../test_btf.h" + +static inline int new_map(void) +{ + const char *name = NULL; + __u32 max_entries = 1; + __u32 value_size = 8; + __u32 key_size = 4; + + return bpf_map_create(BPF_MAP_TYPE_ARRAY, name, + key_size, value_size, + max_entries, NULL); +} + +static int new_btf(void) +{ + struct btf_blob { + struct btf_header btf_hdr; + __u32 types[8]; + __u32 str; + } raw_btf = { + .btf_hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(raw_btf.types), + .str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types), + .str_len = sizeof(raw_btf.str), + }, + .types = { + /* long */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */ + /* unsigned long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + }, + }; + + return bpf_btf_load(&raw_btf, sizeof(raw_btf), NULL); +} + +#define Close(FD) do { \ + if ((FD) >= 0) { \ + close(FD); \ + FD = -1; \ + } \ +} while(0) + +static bool map_exists(__u32 id) +{ + int fd; + + fd = bpf_map_get_fd_by_id(id); + if (fd >= 0) { + close(fd); + return true; + } + return false; +} + +static bool btf_exists(__u32 id) +{ + int fd; + + fd = bpf_btf_get_fd_by_id(id); + if (fd >= 0) { + close(fd); + return true; + } + return false; +} + +static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *map_ids) +{ + __u32 len = sizeof(struct bpf_prog_info); + struct bpf_prog_info info; + int err; + + memset(&info, 0, len); + info.nr_map_ids = *nr_map_ids, + info.map_ids = ptr_to_u64(map_ids), + + err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + return -1; + + *nr_map_ids = info.nr_map_ids; + + return 0; +} + +static int __load_test_prog(int map_fd, const int *fd_array, int fd_array_cnt) +{ + /* A trivial program which uses one map */ + struct bpf_insn insns[] = { + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + opts.fd_array = fd_array; + opts.fd_array_cnt = fd_array_cnt; + + return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, ARRAY_SIZE(insns), &opts); +} + +static int load_test_prog(const int *fd_array, int fd_array_cnt) +{ + int map_fd; + int ret; + + map_fd = new_map(); + if (!ASSERT_GE(map_fd, 0, "new_map")) + return map_fd; + + ret = __load_test_prog(map_fd, fd_array, fd_array_cnt); + close(map_fd); + return ret; +} + +static bool check_expected_map_ids(int prog_fd, int expected, __u32 *map_ids, __u32 *nr_map_ids) +{ + int err; + + err = bpf_prog_get_map_ids(prog_fd, nr_map_ids, map_ids); + if (!ASSERT_OK(err, "bpf_prog_get_map_ids")) + return false; + if (!ASSERT_EQ(*nr_map_ids, expected, "unexpected nr_map_ids")) + return false; + + return true; +} + +/* + * Load a program, which uses one map. No fd_array maps are present. + * On return only one map is expected to be bound to prog. + */ +static void check_fd_array_cnt__no_fd_array(void) +{ + __u32 map_ids[16]; + __u32 nr_map_ids; + int prog_fd = -1; + + prog_fd = load_test_prog(NULL, 0); + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) + return; + nr_map_ids = ARRAY_SIZE(map_ids); + check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids); + close(prog_fd); +} + +/* + * Load a program, which uses one map, and pass two extra, non-equal, maps in + * fd_array with fd_array_cnt=2. On return three maps are expected to be bound + * to the program. + */ +static void check_fd_array_cnt__fd_array_ok(void) +{ + int extra_fds[2] = { -1, -1 }; + __u32 map_ids[16]; + __u32 nr_map_ids; + int prog_fd = -1; + + extra_fds[0] = new_map(); + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) + goto cleanup; + extra_fds[1] = new_map(); + if (!ASSERT_GE(extra_fds[1], 0, "new_map")) + goto cleanup; + prog_fd = load_test_prog(extra_fds, 2); + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) + goto cleanup; + nr_map_ids = ARRAY_SIZE(map_ids); + if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids)) + goto cleanup; + + /* maps should still exist when original file descriptors are closed */ + Close(extra_fds[0]); + Close(extra_fds[1]); + if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map_ids[0] should exist")) + goto cleanup; + if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map_ids[1] should exist")) + goto cleanup; + + /* some fds might be invalid, so ignore return codes */ +cleanup: + Close(extra_fds[1]); + Close(extra_fds[0]); + Close(prog_fd); +} + +/* + * Load a program with a few extra maps duplicated in the fd_array. + * After the load maps should only be referenced once. + */ +static void check_fd_array_cnt__duplicated_maps(void) +{ + int extra_fds[4] = { -1, -1, -1, -1 }; + __u32 map_ids[16]; + __u32 nr_map_ids; + int prog_fd = -1; + + extra_fds[0] = extra_fds[2] = new_map(); + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) + goto cleanup; + extra_fds[1] = extra_fds[3] = new_map(); + if (!ASSERT_GE(extra_fds[1], 0, "new_map")) + goto cleanup; + prog_fd = load_test_prog(extra_fds, 4); + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) + goto cleanup; + nr_map_ids = ARRAY_SIZE(map_ids); + if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids)) + goto cleanup; + + /* maps should still exist when original file descriptors are closed */ + Close(extra_fds[0]); + Close(extra_fds[1]); + if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist")) + goto cleanup; + if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map should exist")) + goto cleanup; + + /* some fds might be invalid, so ignore return codes */ +cleanup: + Close(extra_fds[1]); + Close(extra_fds[0]); + Close(prog_fd); +} + +/* + * Check that if maps which are referenced by a program are + * passed in fd_array, then they will be referenced only once + */ +static void check_fd_array_cnt__referenced_maps_in_fd_array(void) +{ + int extra_fds[1] = { -1 }; + __u32 map_ids[16]; + __u32 nr_map_ids; + int prog_fd = -1; + + extra_fds[0] = new_map(); + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) + goto cleanup; + prog_fd = __load_test_prog(extra_fds[0], extra_fds, 1); + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) + goto cleanup; + nr_map_ids = ARRAY_SIZE(map_ids); + if (!check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids)) + goto cleanup; + + /* map should still exist when original file descriptor is closed */ + Close(extra_fds[0]); + if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist")) + goto cleanup; + + /* some fds might be invalid, so ignore return codes */ +cleanup: + Close(extra_fds[0]); + Close(prog_fd); +} + +static int get_btf_id_by_fd(int btf_fd, __u32 *id) +{ + struct bpf_btf_info info; + __u32 info_len = sizeof(info); + int err; + + memset(&info, 0, info_len); + err = bpf_btf_get_info_by_fd(btf_fd, &info, &info_len); + if (err) + return err; + if (id) + *id = info.id; + return 0; +} + +/* + * Check that fd_array operates properly for btfs. Namely, to check that + * passing a btf fd in fd_array increases its reference count, do the + * following: + * 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1 + * 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2 + * 3) Close the btf_fd, now refcnt=1 + * Wait and check that BTF stil exists. + */ +static void check_fd_array_cnt__referenced_btfs(void) +{ + int extra_fds[1] = { -1 }; + int prog_fd = -1; + __u32 btf_id; + int tries; + int err; + + extra_fds[0] = new_btf(); + if (!ASSERT_GE(extra_fds[0], 0, "new_btf")) + goto cleanup; + prog_fd = load_test_prog(extra_fds, 1); + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) + goto cleanup; + + /* btf should still exist when original file descriptor is closed */ + err = get_btf_id_by_fd(extra_fds[0], &btf_id); + if (!ASSERT_GE(err, 0, "get_btf_id_by_fd")) + goto cleanup; + + Close(extra_fds[0]); + + if (!ASSERT_GE(kern_sync_rcu(), 0, "kern_sync_rcu 1")) + goto cleanup; + + if (!ASSERT_EQ(btf_exists(btf_id), true, "btf should exist")) + goto cleanup; + + Close(prog_fd); + + /* The program is freed by a workqueue, so no reliable + * way to sync, so just wait a bit (max ~1 second). */ + for (tries = 100; tries >= 0; tries--) { + usleep(1000); + + if (!btf_exists(btf_id)) + break; + + if (tries) + continue; + + PRINT_FAIL("btf should have been freed"); + } + + /* some fds might be invalid, so ignore return codes */ +cleanup: + Close(extra_fds[0]); + Close(prog_fd); +} + +/* + * Test that a program with trash in fd_array can't be loaded: + * only map and BTF file descriptors should be accepted. + */ +static void check_fd_array_cnt__fd_array_with_trash(void) +{ + int extra_fds[3] = { -1, -1, -1 }; + int prog_fd = -1; + + extra_fds[0] = new_map(); + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) + goto cleanup; + extra_fds[1] = new_btf(); + if (!ASSERT_GE(extra_fds[1], 0, "new_btf")) + goto cleanup; + + /* trash 1: not a file descriptor */ + extra_fds[2] = 0xbeef; + prog_fd = load_test_prog(extra_fds, 3); + if (!ASSERT_EQ(prog_fd, -EBADF, "prog should have been rejected with -EBADF")) + goto cleanup; + + /* trash 2: not a map or btf */ + extra_fds[2] = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(extra_fds[2], 0, "socket")) + goto cleanup; + + prog_fd = load_test_prog(extra_fds, 3); + if (!ASSERT_EQ(prog_fd, -EINVAL, "prog should have been rejected with -EINVAL")) + goto cleanup; + + /* Validate that the prog is ok if trash is removed */ + Close(extra_fds[2]); + extra_fds[2] = new_btf(); + if (!ASSERT_GE(extra_fds[2], 0, "new_btf")) + goto cleanup; + + prog_fd = load_test_prog(extra_fds, 3); + if (!ASSERT_GE(prog_fd, 0, "prog should have been loaded")) + goto cleanup; + + /* some fds might be invalid, so ignore return codes */ +cleanup: + Close(extra_fds[2]); + Close(extra_fds[1]); + Close(extra_fds[0]); +} + +/* + * Test that a program with too big fd_array can't be loaded. + */ +static void check_fd_array_cnt__fd_array_too_big(void) +{ + int extra_fds[65]; + int prog_fd = -1; + int i; + + for (i = 0; i < 65; i++) { + extra_fds[i] = new_map(); + if (!ASSERT_GE(extra_fds[i], 0, "new_map")) + goto cleanup_fds; + } + + prog_fd = load_test_prog(extra_fds, 65); + ASSERT_EQ(prog_fd, -E2BIG, "prog should have been rejected with -E2BIG"); + +cleanup_fds: + while (i > 0) + Close(extra_fds[--i]); +} + +void test_fd_array_cnt(void) +{ + if (test__start_subtest("no-fd-array")) + check_fd_array_cnt__no_fd_array(); + + if (test__start_subtest("fd-array-ok")) + check_fd_array_cnt__fd_array_ok(); + + if (test__start_subtest("fd-array-dup-input")) + check_fd_array_cnt__duplicated_maps(); + + if (test__start_subtest("fd-array-ref-maps-in-array")) + check_fd_array_cnt__referenced_maps_in_fd_array(); + + if (test__start_subtest("fd-array-ref-btfs")) + check_fd_array_cnt__referenced_btfs(); + + if (test__start_subtest("fd-array-trash-input")) + check_fd_array_cnt__fd_array_with_trash(); + + if (test__start_subtest("fd-array-2big")) + check_fd_array_cnt__fd_array_too_big(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index d50cbd8040d4..e59af2aa6601 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -171,6 +171,10 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, /* See also arch_adjust_kprobe_addr(). */ if (skel->kconfig->CONFIG_X86_KERNEL_IBT) entry_offset = 4; + if (skel->kconfig->CONFIG_PPC64 && + skel->kconfig->CONFIG_KPROBES_ON_FTRACE && + !skel->kconfig->CONFIG_PPC_FTRACE_OUT_OF_LINE) + entry_offset = 4; err = verify_perf_link_info(link_fd, type, kprobe_addr, 0, entry_offset); ASSERT_OK(err, "verify_perf_link_info"); } else { diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index cfcc90cb7ffb..08bae13248c4 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -7,39 +7,14 @@ #include "bpf_flow.skel.h" +#define TEST_NS "flow_dissector_ns" #define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */ +#define TEST_NAME_MAX_LEN 64 #ifndef IP_MF #define IP_MF 0x2000 #endif -#define CHECK_FLOW_KEYS(desc, got, expected) \ - _CHECK(memcmp(&got, &expected, sizeof(got)) != 0, \ - desc, \ - topts.duration, \ - "nhoff=%u/%u " \ - "thoff=%u/%u " \ - "addr_proto=0x%x/0x%x " \ - "is_frag=%u/%u " \ - "is_first_frag=%u/%u " \ - "is_encap=%u/%u " \ - "ip_proto=0x%x/0x%x " \ - "n_proto=0x%x/0x%x " \ - "flow_label=0x%x/0x%x " \ - "sport=%u/%u " \ - "dport=%u/%u\n", \ - got.nhoff, expected.nhoff, \ - got.thoff, expected.thoff, \ - got.addr_proto, expected.addr_proto, \ - got.is_frag, expected.is_frag, \ - got.is_first_frag, expected.is_first_frag, \ - got.is_encap, expected.is_encap, \ - got.ip_proto, expected.ip_proto, \ - got.n_proto, expected.n_proto, \ - got.flow_label, expected.flow_label, \ - got.sport, expected.sport, \ - got.dport, expected.dport) - struct ipv4_pkt { struct ethhdr eth; struct iphdr iph; @@ -89,6 +64,19 @@ struct dvlan_ipv6_pkt { struct tcphdr tcp; } __packed; +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +} gre_base_hdr; + +struct gre_minimal_pkt { + struct ethhdr eth; + struct iphdr iph; + struct gre_base_hdr gre_hdr; + struct iphdr iph_inner; + struct tcphdr tcp; +} __packed; + struct test { const char *name; union { @@ -98,6 +86,7 @@ struct test { struct ipv6_pkt ipv6; struct ipv6_frag_pkt ipv6_frag; struct dvlan_ipv6_pkt dvlan_ipv6; + struct gre_minimal_pkt gre_minimal; } pkt; struct bpf_flow_keys keys; __u32 flags; @@ -106,7 +95,6 @@ struct test { #define VLAN_HLEN 4 -static __u32 duration; struct test tests[] = { { .name = "ipv4", @@ -444,8 +432,137 @@ struct test tests[] = { }, .retval = BPF_FLOW_DISSECTOR_CONTINUE, }, + { + .name = "ip-gre", + .pkt.gre_minimal = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_GRE, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .gre_hdr = { + .flags = 0, + .protocol = __bpf_constant_htons(ETH_P_IP), + }, + .iph_inner.ihl = 5, + .iph_inner.protocol = IPPROTO_TCP, + .iph_inner.tot_len = + __bpf_constant_htons(MAGIC_BYTES - + sizeof(struct iphdr)), + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr) * 2 + + sizeof(struct gre_base_hdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IP), + .is_encap = true, + .sport = 80, + .dport = 8080, + }, + .retval = BPF_OK, + }, + { + .name = "ip-gre-no-encap", + .pkt.ipip = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_GRE, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .iph_inner.ihl = 5, + .iph_inner.protocol = IPPROTO_TCP, + .iph_inner.tot_len = + __bpf_constant_htons(MAGIC_BYTES - + sizeof(struct iphdr)), + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP, + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr) + + sizeof(struct gre_base_hdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_GRE, + .n_proto = __bpf_constant_htons(ETH_P_IP), + .is_encap = true, + }, + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP, + .retval = BPF_OK, + }, }; +void serial_test_flow_dissector_namespace(void) +{ + struct bpf_flow *skel; + struct nstoken *ns; + int err, prog_fd; + + skel = bpf_flow__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open/load skeleton")) + return; + + prog_fd = bpf_program__fd(skel->progs._dissect); + if (!ASSERT_OK_FD(prog_fd, "get dissector fd")) + goto out_destroy_skel; + + /* We must be able to attach a flow dissector to root namespace */ + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + if (!ASSERT_OK(err, "attach on root namespace ok")) + goto out_destroy_skel; + + err = make_netns(TEST_NS); + if (!ASSERT_OK(err, "create non-root net namespace")) + goto out_destroy_skel; + + /* We must not be able to additionally attach a flow dissector to a + * non-root net namespace + */ + ns = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(ns, "enter non-root net namespace")) + goto out_clean_ns; + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + if (!ASSERT_ERR(err, + "refuse new flow dissector in non-root net namespace")) + bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); + else + ASSERT_EQ(errno, EEXIST, + "refused because of already attached prog"); + close_netns(ns); + + /* If no flow dissector is attached to the root namespace, we must + * be able to attach one to a non-root net namespace + */ + bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); + ns = open_netns(TEST_NS); + ASSERT_OK_PTR(ns, "enter non-root net namespace"); + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + close_netns(ns); + ASSERT_OK(err, "accept new flow dissector in non-root net namespace"); + + /* If a flow dissector is attached to non-root net namespace, attaching + * a flow dissector to root namespace must fail + */ + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + if (!ASSERT_ERR(err, "refuse new flow dissector on root namespace")) + bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); + else + ASSERT_EQ(errno, EEXIST, + "refused because of already attached prog"); + + ns = open_netns(TEST_NS); + bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); + close_netns(ns); +out_clean_ns: + remove_netns(TEST_NS); +out_destroy_skel: + bpf_flow__destroy(skel); +} + static int create_tap(const char *ifname) { struct ifreq ifr = { @@ -533,22 +650,27 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array) return 0; } -static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) +static void run_tests_skb_less(int tap_fd, struct bpf_map *keys, + char *test_suffix) { + char test_name[TEST_NAME_MAX_LEN]; int i, err, keys_fd; keys_fd = bpf_map__fd(keys); - if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd)) + if (!ASSERT_OK_FD(keys_fd, "bpf_map__fd")) return; for (i = 0; i < ARRAY_SIZE(tests); i++) { /* Keep in sync with 'flags' from eth_get_headlen. */ __u32 eth_get_headlen_flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG; - LIBBPF_OPTS(bpf_test_run_opts, topts); struct bpf_flow_keys flow_keys = {}; __u32 key = (__u32)(tests[i].keys.sport) << 16 | tests[i].keys.dport; + snprintf(test_name, TEST_NAME_MAX_LEN, "%s-%s", tests[i].name, + test_suffix); + if (!test__start_subtest(test_name)) + continue; /* For skb-less case we can't pass input flags; run * only the tests that have a matching set of flags. @@ -558,78 +680,139 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) continue; err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); - CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno); + if (!ASSERT_EQ(err, sizeof(tests[i].pkt), "tx_tap")) + continue; /* check the stored flow_keys only if BPF_OK expected */ if (tests[i].retval != BPF_OK) continue; err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); - ASSERT_OK(err, "bpf_map_lookup_elem"); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + continue; - CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); + ASSERT_MEMEQ(&flow_keys, &tests[i].keys, + sizeof(struct bpf_flow_keys), + "returned flow keys"); err = bpf_map_delete_elem(keys_fd, &key); ASSERT_OK(err, "bpf_map_delete_elem"); } } -static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd) +void test_flow_dissector_skb_less_direct_attach(void) { - int err, prog_fd; + int err, prog_fd, tap_fd; + struct bpf_flow *skel; + struct netns_obj *ns; - prog_fd = bpf_program__fd(skel->progs._dissect); - if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd)) + ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true); + if (!ASSERT_OK_PTR(ns, "create and open netns")) return; + skel = bpf_flow__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open/load skeleton")) + goto out_clean_ns; + + err = init_prog_array(skel->obj, skel->maps.jmp_table); + if (!ASSERT_OK(err, "init_prog_array")) + goto out_destroy_skel; + + prog_fd = bpf_program__fd(skel->progs._dissect); + if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd")) + goto out_destroy_skel; + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); - if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno)) - return; + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out_destroy_skel; + + tap_fd = create_tap("tap0"); + if (!ASSERT_OK_FD(tap_fd, "create_tap")) + goto out_destroy_skel; + err = ifup("tap0"); + if (!ASSERT_OK(err, "ifup")) + goto out_close_tap; - run_tests_skb_less(tap_fd, skel->maps.last_dissection); + run_tests_skb_less(tap_fd, skel->maps.last_dissection, + "non-skb-direct-attach"); err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); - CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno); + ASSERT_OK(err, "bpf_prog_detach2"); + +out_close_tap: + close(tap_fd); +out_destroy_skel: + bpf_flow__destroy(skel); +out_clean_ns: + netns_free(ns); } -static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd) +void test_flow_dissector_skb_less_indirect_attach(void) { + int err, net_fd, tap_fd; + struct bpf_flow *skel; struct bpf_link *link; - int err, net_fd; + struct netns_obj *ns; - net_fd = open("/proc/self/ns/net", O_RDONLY); - if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno)) + ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true); + if (!ASSERT_OK_PTR(ns, "create and open netns")) return; + skel = bpf_flow__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open/load skeleton")) + goto out_clean_ns; + + net_fd = open("/proc/self/ns/net", O_RDONLY); + if (!ASSERT_OK_FD(net_fd, "open(/proc/self/ns/net")) + goto out_destroy_skel; + + err = init_prog_array(skel->obj, skel->maps.jmp_table); + if (!ASSERT_OK(err, "init_prog_array")) + goto out_destroy_skel; + + tap_fd = create_tap("tap0"); + if (!ASSERT_OK_FD(tap_fd, "create_tap")) + goto out_close_ns; + err = ifup("tap0"); + if (!ASSERT_OK(err, "ifup")) + goto out_close_tap; + link = bpf_program__attach_netns(skel->progs._dissect, net_fd); if (!ASSERT_OK_PTR(link, "attach_netns")) - goto out_close; + goto out_close_tap; - run_tests_skb_less(tap_fd, skel->maps.last_dissection); + run_tests_skb_less(tap_fd, skel->maps.last_dissection, + "non-skb-indirect-attach"); err = bpf_link__destroy(link); - CHECK(err, "bpf_link__destroy", "err %d\n", err); -out_close: + ASSERT_OK(err, "bpf_link__destroy"); + +out_close_tap: + close(tap_fd); +out_close_ns: close(net_fd); +out_destroy_skel: + bpf_flow__destroy(skel); +out_clean_ns: + netns_free(ns); } -void test_flow_dissector(void) +void test_flow_dissector_skb(void) { - int i, err, prog_fd, keys_fd = -1, tap_fd; + char test_name[TEST_NAME_MAX_LEN]; struct bpf_flow *skel; + int i, err, prog_fd; skel = bpf_flow__open_and_load(); - if (CHECK(!skel, "skel", "failed to open/load skeleton\n")) + if (!ASSERT_OK_PTR(skel, "open/load skeleton")) return; - prog_fd = bpf_program__fd(skel->progs._dissect); - if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd)) - goto out_destroy_skel; - keys_fd = bpf_map__fd(skel->maps.last_dissection); - if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd)) - goto out_destroy_skel; err = init_prog_array(skel->obj, skel->maps.jmp_table); - if (CHECK(err, "init_prog_array", "err %d\n", err)) + if (!ASSERT_OK(err, "init_prog_array")) + goto out_destroy_skel; + + prog_fd = bpf_program__fd(skel->progs._dissect); + if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd")) goto out_destroy_skel; for (i = 0; i < ARRAY_SIZE(tests); i++) { @@ -641,6 +824,10 @@ void test_flow_dissector(void) ); static struct bpf_flow_keys ctx = {}; + snprintf(test_name, TEST_NAME_MAX_LEN, "%s-skb", tests[i].name); + if (!test__start_subtest(test_name)) + continue; + if (tests[i].flags) { topts.ctx_in = &ctx; topts.ctx_size_in = sizeof(ctx); @@ -656,26 +843,12 @@ void test_flow_dissector(void) continue; ASSERT_EQ(topts.data_size_out, sizeof(flow_keys), "test_run data_size_out"); - CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); + ASSERT_MEMEQ(&flow_keys, &tests[i].keys, + sizeof(struct bpf_flow_keys), + "returned flow keys"); } - /* Do the same tests but for skb-less flow dissector. - * We use a known path in the net/tun driver that calls - * eth_get_headlen and we manually export bpf_flow_keys - * via BPF map in this case. - */ - - tap_fd = create_tap("tap0"); - CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno); - err = ifup("tap0"); - CHECK(err, "ifup", "err %d errno %d\n", err, errno); - - /* Test direct prog attachment */ - test_skb_less_prog_attach(skel, tap_fd); - /* Test indirect prog attachment via link */ - test_skb_less_link_create(skel, tap_fd); - - close(tap_fd); out_destroy_skel: bpf_flow__destroy(skel); } + diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c new file mode 100644 index 000000000000..3729fbfd3084 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c @@ -0,0 +1,792 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <bpf/bpf.h> +#include <linux/bpf.h> +#include <bpf/libbpf.h> +#include <arpa/inet.h> +#include <asm/byteorder.h> +#include <netinet/udp.h> +#include <poll.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <unistd.h> +#include "test_progs.h" +#include "network_helpers.h" +#include "bpf_util.h" +#include "bpf_flow.skel.h" + +#define CFG_PORT_INNER 8000 +#define CFG_PORT_GUE 6080 +#define SUBTEST_NAME_MAX_LEN 32 +#define TEST_NAME_MAX_LEN (32 + SUBTEST_NAME_MAX_LEN) +#define MAX_SOURCE_PORTS 3 +#define TEST_PACKETS_COUNT 10 +#define TEST_PACKET_LEN 100 +#define TEST_PACKET_PATTERN 'a' +#define TEST_IPV4 "192.168.0.1/32" +#define TEST_IPV6 "100::a/128" +#define TEST_TUNNEL_REMOTE "127.0.0.2" +#define TEST_TUNNEL_LOCAL "127.0.0.1" + +#define INIT_ADDR4(addr4, port) \ + { \ + .sin_family = AF_INET, \ + .sin_port = __constant_htons(port), \ + .sin_addr.s_addr = __constant_htonl(addr4), \ + } + +#define INIT_ADDR6(addr6, port) \ + { \ + .sin6_family = AF_INET6, \ + .sin6_port = __constant_htons(port), \ + .sin6_addr = addr6, \ + } +#define TEST_IN4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 2, 0) +#define TEST_IN4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, CFG_PORT_INNER) +#define TEST_OUT4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 1, 0) +#define TEST_OUT4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, 0) + +#define TEST_IN6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0) +#define TEST_IN6_DST_ADDR_DEFAULT \ + INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER) +#define TEST_OUT6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0) +#define TEST_OUT6_DST_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0) + +#define TEST_IN4_SRC_ADDR_DISSECT_CONTINUE INIT_ADDR4(INADDR_LOOPBACK + 126, 0) +#define TEST_IN4_SRC_ADDR_IPIP INIT_ADDR4((in_addr_t)0x01010101, 0) +#define TEST_IN4_DST_ADDR_IPIP INIT_ADDR4((in_addr_t)0xC0A80001, CFG_PORT_INNER) + +struct grehdr { + uint16_t unused; + uint16_t protocol; +} __packed; + +struct guehdr { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 hlen : 5, control : 1, version : 2; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 version : 2, control : 1, hlen : 5; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __u8 proto_ctype; + __be16 flags; + }; + __be32 word; + }; +}; + +static char buf[ETH_DATA_LEN]; + +struct test_configuration { + char name[SUBTEST_NAME_MAX_LEN]; + int (*test_setup)(void); + void (*test_teardown)(void); + int source_ports[MAX_SOURCE_PORTS]; + int cfg_l3_inner; + struct sockaddr_in in_saddr4; + struct sockaddr_in in_daddr4; + struct sockaddr_in6 in_saddr6; + struct sockaddr_in6 in_daddr6; + int cfg_l3_outer; + struct sockaddr_in out_saddr4; + struct sockaddr_in out_daddr4; + struct sockaddr_in6 out_saddr6; + struct sockaddr_in6 out_daddr6; + int cfg_encap_proto; + uint8_t cfg_dsfield_inner; + uint8_t cfg_dsfield_outer; + int cfg_l3_extra; + struct sockaddr_in extra_saddr4; + struct sockaddr_in extra_daddr4; + struct sockaddr_in6 extra_saddr6; + struct sockaddr_in6 extra_daddr6; +}; + +static unsigned long util_gettime(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void build_ipv4_header(void *header, uint8_t proto, uint32_t src, + uint32_t dst, int payload_len, uint8_t tos) +{ + struct iphdr *iph = header; + + iph->ihl = 5; + iph->version = 4; + iph->tos = tos; + iph->ttl = 8; + iph->tot_len = htons(sizeof(*iph) + payload_len); + iph->id = htons(1337); + iph->protocol = proto; + iph->saddr = src; + iph->daddr = dst; + iph->check = build_ip_csum((void *)iph); +} + +static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield) +{ + uint16_t val, *ptr = (uint16_t *)ip6h; + + val = ntohs(*ptr); + val &= 0xF00F; + val |= ((uint16_t)dsfield) << 4; + *ptr = htons(val); +} + +static void build_ipv6_header(void *header, uint8_t proto, + const struct sockaddr_in6 *src, + const struct sockaddr_in6 *dst, int payload_len, + uint8_t dsfield) +{ + struct ipv6hdr *ip6h = header; + + ip6h->version = 6; + ip6h->payload_len = htons(payload_len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 8; + ipv6_set_dsfield(ip6h, dsfield); + + memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr)); + memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr)); +} + +static void build_udp_header(void *header, int payload_len, uint16_t sport, + uint16_t dport, int family) +{ + struct udphdr *udph = header; + int len = sizeof(*udph) + payload_len; + + udph->source = htons(sport); + udph->dest = htons(dport); + udph->len = htons(len); + udph->check = 0; + if (family == AF_INET) + udph->check = build_udp_v4_csum(header - sizeof(struct iphdr), + udph); + else + udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr), + udph); +} + +static void build_gue_header(void *header, uint8_t proto) +{ + struct guehdr *gueh = header; + + gueh->proto_ctype = proto; +} + +static void build_gre_header(void *header, uint16_t proto) +{ + struct grehdr *greh = header; + + greh->protocol = htons(proto); +} + +static int l3_length(int family) +{ + if (family == AF_INET) + return sizeof(struct iphdr); + else + return sizeof(struct ipv6hdr); +} + +static int build_packet(const struct test_configuration *test, uint16_t sport) +{ + int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0; + int el3_len = 0, packet_len; + + memset(buf, 0, ETH_DATA_LEN); + + if (test->cfg_l3_extra) + el3_len = l3_length(test->cfg_l3_extra); + + /* calculate header offsets */ + if (test->cfg_encap_proto) { + ol3_len = l3_length(test->cfg_l3_outer); + + if (test->cfg_encap_proto == IPPROTO_GRE) + ol4_len = sizeof(struct grehdr); + else if (test->cfg_encap_proto == IPPROTO_UDP) + ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr); + } + + il3_len = l3_length(test->cfg_l3_inner); + il4_len = sizeof(struct udphdr); + + packet_len = el3_len + ol3_len + ol4_len + il3_len + il4_len + + TEST_PACKET_LEN; + if (!ASSERT_LE(packet_len, sizeof(buf), "check packet size")) + return -1; + + /* + * Fill packet from inside out, to calculate correct checksums. + * But create ip before udp headers, as udp uses ip for pseudo-sum. + */ + memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len, + TEST_PACKET_PATTERN, TEST_PACKET_LEN); + + /* add zero byte for udp csum padding */ + buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + TEST_PACKET_LEN] = + 0; + + switch (test->cfg_l3_inner) { + case PF_INET: + build_ipv4_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, test->in_saddr4.sin_addr.s_addr, + test->in_daddr4.sin_addr.s_addr, + il4_len + TEST_PACKET_LEN, + test->cfg_dsfield_inner); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, &test->in_saddr6, + &test->in_daddr6, il4_len + TEST_PACKET_LEN, + test->cfg_dsfield_inner); + break; + } + + build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len, + TEST_PACKET_LEN, sport, CFG_PORT_INNER, + test->cfg_l3_inner); + + if (!test->cfg_encap_proto) + return il3_len + il4_len + TEST_PACKET_LEN; + + switch (test->cfg_l3_outer) { + case PF_INET: + build_ipv4_header(buf + el3_len, test->cfg_encap_proto, + test->out_saddr4.sin_addr.s_addr, + test->out_daddr4.sin_addr.s_addr, + ol4_len + il3_len + il4_len + TEST_PACKET_LEN, + test->cfg_dsfield_outer); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len, test->cfg_encap_proto, + &test->out_saddr6, &test->out_daddr6, + ol4_len + il3_len + il4_len + TEST_PACKET_LEN, + test->cfg_dsfield_outer); + break; + } + + switch (test->cfg_encap_proto) { + case IPPROTO_UDP: + build_gue_header(buf + el3_len + ol3_len + ol4_len - + sizeof(struct guehdr), + test->cfg_l3_inner == PF_INET ? IPPROTO_IPIP : + IPPROTO_IPV6); + build_udp_header(buf + el3_len + ol3_len, + sizeof(struct guehdr) + il3_len + il4_len + + TEST_PACKET_LEN, + sport, CFG_PORT_GUE, test->cfg_l3_outer); + break; + case IPPROTO_GRE: + build_gre_header(buf + el3_len + ol3_len, + test->cfg_l3_inner == PF_INET ? ETH_P_IP : + ETH_P_IPV6); + break; + } + + switch (test->cfg_l3_extra) { + case PF_INET: + build_ipv4_header(buf, + test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP : + IPPROTO_IPV6, + test->extra_saddr4.sin_addr.s_addr, + test->extra_daddr4.sin_addr.s_addr, + ol3_len + ol4_len + il3_len + il4_len + + TEST_PACKET_LEN, + 0); + break; + case PF_INET6: + build_ipv6_header(buf, + test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP : + IPPROTO_IPV6, + &test->extra_saddr6, &test->extra_daddr6, + ol3_len + ol4_len + il3_len + il4_len + + TEST_PACKET_LEN, + 0); + break; + } + + return el3_len + ol3_len + ol4_len + il3_len + il4_len + + TEST_PACKET_LEN; +} + +/* sender transmits encapsulated over RAW or unencap'd over UDP */ +static int setup_tx(const struct test_configuration *test) +{ + int family, fd, ret; + + if (test->cfg_l3_extra) + family = test->cfg_l3_extra; + else if (test->cfg_l3_outer) + family = test->cfg_l3_outer; + else + family = test->cfg_l3_inner; + + fd = socket(family, SOCK_RAW, IPPROTO_RAW); + if (!ASSERT_OK_FD(fd, "setup tx socket")) + return fd; + + if (test->cfg_l3_extra) { + if (test->cfg_l3_extra == PF_INET) + ret = connect(fd, (void *)&test->extra_daddr4, + sizeof(test->extra_daddr4)); + else + ret = connect(fd, (void *)&test->extra_daddr6, + sizeof(test->extra_daddr6)); + if (!ASSERT_OK(ret, "connect")) { + close(fd); + return ret; + } + } else if (test->cfg_l3_outer) { + /* connect to destination if not encapsulated */ + if (test->cfg_l3_outer == PF_INET) + ret = connect(fd, (void *)&test->out_daddr4, + sizeof(test->out_daddr4)); + else + ret = connect(fd, (void *)&test->out_daddr6, + sizeof(test->out_daddr6)); + if (!ASSERT_OK(ret, "connect")) { + close(fd); + return ret; + } + } else { + /* otherwise using loopback */ + if (test->cfg_l3_inner == PF_INET) + ret = connect(fd, (void *)&test->in_daddr4, + sizeof(test->in_daddr4)); + else + ret = connect(fd, (void *)&test->in_daddr6, + sizeof(test->in_daddr6)); + if (!ASSERT_OK(ret, "connect")) { + close(fd); + return ret; + } + } + + return fd; +} + +/* receiver reads unencapsulated UDP */ +static int setup_rx(const struct test_configuration *test) +{ + int fd, ret; + + fd = socket(test->cfg_l3_inner, SOCK_DGRAM, 0); + if (!ASSERT_OK_FD(fd, "socket rx")) + return fd; + + if (test->cfg_l3_inner == PF_INET) + ret = bind(fd, (void *)&test->in_daddr4, + sizeof(test->in_daddr4)); + else + ret = bind(fd, (void *)&test->in_daddr6, + sizeof(test->in_daddr6)); + if (!ASSERT_OK(ret, "bind rx")) { + close(fd); + return ret; + } + + return fd; +} + +static int do_tx(int fd, const char *pkt, int len) +{ + int ret; + + ret = write(fd, pkt, len); + return ret != len; +} + +static int do_poll(int fd, short events, int timeout) +{ + struct pollfd pfd; + int ret; + + pfd.fd = fd; + pfd.events = events; + + ret = poll(&pfd, 1, timeout); + return ret; +} + +static int do_rx(int fd) +{ + char rbuf; + int ret, num = 0; + + while (1) { + ret = recv(fd, &rbuf, 1, MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret < 0) + return -1; + if (!ASSERT_EQ(rbuf, TEST_PACKET_PATTERN, "check pkt pattern")) + return -1; + num++; + } + + return num; +} + +static int run_test(const struct test_configuration *test, + int source_port_index) +{ + int fdt = -1, fdr = -1, len, tx = 0, rx = 0, err; + unsigned long tstop, tcur; + + fdr = setup_rx(test); + fdt = setup_tx(test); + if (!ASSERT_OK_FD(fdr, "setup rx") || !ASSERT_OK_FD(fdt, "setup tx")) { + err = -1; + goto out_close_sockets; + } + + len = build_packet(test, + (uint16_t)test->source_ports[source_port_index]); + if (!ASSERT_GT(len, 0, "build test packet")) + return -1; + + tcur = util_gettime(); + tstop = tcur; + + while (tx < TEST_PACKETS_COUNT) { + if (!ASSERT_OK(do_tx(fdt, buf, len), "do_tx")) + break; + tx++; + err = do_rx(fdr); + if (!ASSERT_GE(err, 0, "do_rx")) + break; + rx += err; + } + + /* read straggler packets, if any */ + if (rx < tx) { + tstop = util_gettime() + 100; + while (rx < tx) { + tcur = util_gettime(); + if (tcur >= tstop) + break; + + err = do_poll(fdr, POLLIN, tstop - tcur); + if (err < 0) + break; + err = do_rx(fdr); + if (err >= 0) + rx += err; + } + } + +out_close_sockets: + close(fdt); + close(fdr); + return rx; +} + +static int attach_and_configure_program(struct bpf_flow *skel) +{ + struct bpf_map *prog_array = skel->maps.jmp_table; + int main_prog_fd, sub_prog_fd, map_fd, i, err; + struct bpf_program *prog; + char prog_name[32]; + + main_prog_fd = bpf_program__fd(skel->progs._dissect); + if (main_prog_fd < 0) + return main_prog_fd; + + err = bpf_prog_attach(main_prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + if (err) + return err; + + map_fd = bpf_map__fd(prog_array); + if (map_fd < 0) + return map_fd; + + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { + snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i); + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!prog) + return -1; + + sub_prog_fd = bpf_program__fd(prog); + if (sub_prog_fd < 0) + return -1; + + err = bpf_map_update_elem(map_fd, &i, &sub_prog_fd, BPF_ANY); + if (err) + return -1; + } + + return main_prog_fd; +} + +static void detach_program(struct bpf_flow *skel, int prog_fd) +{ + bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); +} + +static int set_port_drop(int pf, bool multi_port) +{ + SYS(fail, "tc qdisc add dev lo ingress"); + SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s", + "dev lo", + "parent FFFF:", + "protocol", pf == PF_INET6 ? "ipv6" : "ip", + "pref 1337", + "flower", + "ip_proto udp", + "src_port", multi_port ? "8-10" : "9", + "action drop"); + return 0; + +fail_delete_qdisc: + SYS_NOFAIL("tc qdisc del dev lo ingress"); +fail: + return 1; +} + +static void remove_filter(void) +{ + SYS_NOFAIL("tc filter del dev lo ingress"); + SYS_NOFAIL("tc qdisc del dev lo ingress"); +} + +static int ipv4_setup(void) +{ + return set_port_drop(PF_INET, false); +} + +static int ipv6_setup(void) +{ + return set_port_drop(PF_INET6, false); +} + +static int port_range_setup(void) +{ + return set_port_drop(PF_INET, true); +} + +static int set_addresses(void) +{ + SYS(out, "ip -4 addr add %s dev lo", TEST_IPV4); + SYS(out_remove_ipv4, "ip -6 addr add %s dev lo", TEST_IPV6); + return 0; +out_remove_ipv4: + SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4); +out: + return -1; +} + +static void unset_addresses(void) +{ + SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4); + SYS_NOFAIL("ip -6 addr del %s dev lo", TEST_IPV6); +} + +static int ipip_setup(void) +{ + if (!ASSERT_OK(set_addresses(), "configure addresses")) + return -1; + if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter")) + goto out_unset_addresses; + SYS(out_remove_filter, + "ip link add ipip_test type ipip remote %s local %s dev lo", + TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL); + SYS(out_clean_netif, "ip link set ipip_test up"); + return 0; + +out_clean_netif: + SYS_NOFAIL("ip link del ipip_test"); +out_remove_filter: + remove_filter(); +out_unset_addresses: + unset_addresses(); + return -1; +} + +static void ipip_shutdown(void) +{ + SYS_NOFAIL("ip link del ipip_test"); + remove_filter(); + unset_addresses(); +} + +static int gre_setup(void) +{ + if (!ASSERT_OK(set_addresses(), "configure addresses")) + return -1; + if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter")) + goto out_unset_addresses; + SYS(out_remove_filter, + "ip link add gre_test type gre remote %s local %s dev lo", + TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL); + SYS(out_clean_netif, "ip link set gre_test up"); + return 0; + +out_clean_netif: + SYS_NOFAIL("ip link del ipip_test"); +out_remove_filter: + remove_filter(); +out_unset_addresses: + unset_addresses(); + return -1; +} + +static void gre_shutdown(void) +{ + SYS_NOFAIL("ip link del gre_test"); + remove_filter(); + unset_addresses(); +} + +static const struct test_configuration tests_input[] = { + { + .name = "ipv4", + .test_setup = ipv4_setup, + .test_teardown = remove_filter, + .source_ports = { 8, 9, 10 }, + .cfg_l3_inner = PF_INET, + .in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT, + .in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT + }, + { + .name = "ipv4_continue_dissect", + .test_setup = ipv4_setup, + .test_teardown = remove_filter, + .source_ports = { 8, 9, 10 }, + .cfg_l3_inner = PF_INET, + .in_saddr4 = TEST_IN4_SRC_ADDR_DISSECT_CONTINUE, + .in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT }, + { + .name = "ipip", + .test_setup = ipip_setup, + .test_teardown = ipip_shutdown, + .source_ports = { 8, 9, 10 }, + .cfg_l3_inner = PF_INET, + .in_saddr4 = TEST_IN4_SRC_ADDR_IPIP, + .in_daddr4 = TEST_IN4_DST_ADDR_IPIP, + .out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT, + .out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT, + .cfg_l3_outer = PF_INET, + .cfg_encap_proto = IPPROTO_IPIP, + + }, + { + .name = "gre", + .test_setup = gre_setup, + .test_teardown = gre_shutdown, + .source_ports = { 8, 9, 10 }, + .cfg_l3_inner = PF_INET, + .in_saddr4 = TEST_IN4_SRC_ADDR_IPIP, + .in_daddr4 = TEST_IN4_DST_ADDR_IPIP, + .out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT, + .out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT, + .cfg_l3_outer = PF_INET, + .cfg_encap_proto = IPPROTO_GRE, + }, + { + .name = "port_range", + .test_setup = port_range_setup, + .test_teardown = remove_filter, + .source_ports = { 7, 9, 11 }, + .cfg_l3_inner = PF_INET, + .in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT, + .in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT }, + { + .name = "ipv6", + .test_setup = ipv6_setup, + .test_teardown = remove_filter, + .source_ports = { 8, 9, 10 }, + .cfg_l3_inner = PF_INET6, + .in_saddr6 = TEST_IN6_SRC_ADDR_DEFAULT, + .in_daddr6 = TEST_IN6_DST_ADDR_DEFAULT + }, +}; + +struct test_ctx { + struct bpf_flow *skel; + struct netns_obj *ns; + int prog_fd; +}; + +static int test_global_init(struct test_ctx *ctx) +{ + int err; + + ctx->skel = bpf_flow__open_and_load(); + if (!ASSERT_OK_PTR(ctx->skel, "open and load flow_dissector")) + return -1; + + ctx->ns = netns_new("flow_dissector_classification", true); + if (!ASSERT_OK_PTR(ctx->ns, "switch ns")) + goto out_destroy_skel; + + err = write_sysctl("/proc/sys/net/ipv4/conf/default/rp_filter", "0"); + err |= write_sysctl("/proc/sys/net/ipv4/conf/all/rp_filter", "0"); + err |= write_sysctl("/proc/sys/net/ipv4/conf/lo/rp_filter", "0"); + if (!ASSERT_OK(err, "configure net tunables")) + goto out_clean_ns; + + ctx->prog_fd = attach_and_configure_program(ctx->skel); + if (!ASSERT_OK_FD(ctx->prog_fd, "attach and configure program")) + goto out_clean_ns; + return 0; +out_clean_ns: + netns_free(ctx->ns); +out_destroy_skel: + bpf_flow__destroy(ctx->skel); + return -1; +} + +static void test_global_shutdown(struct test_ctx *ctx) +{ + detach_program(ctx->skel, ctx->prog_fd); + netns_free(ctx->ns); + bpf_flow__destroy(ctx->skel); +} + +void test_flow_dissector_classification(void) +{ + struct test_ctx ctx; + const struct test_configuration *test; + int i; + + if (test_global_init(&ctx)) + return; + + for (i = 0; i < ARRAY_SIZE(tests_input); i++) { + if (!test__start_subtest(tests_input[i].name)) + continue; + test = &tests_input[i]; + /* All tests are expected to have one rx-ok port first, + * then a non-working rx port, and finally a rx-ok port + */ + if (test->test_setup && + !ASSERT_OK(test->test_setup(), "init filter")) + continue; + + ASSERT_EQ(run_test(test, 0), TEST_PACKETS_COUNT, + "test first port"); + ASSERT_EQ(run_test(test, 1), 0, "test second port"); + ASSERT_EQ(run_test(test, 2), TEST_PACKETS_COUNT, + "test third port"); + if (test->test_teardown) + test->test_teardown(); + } + test_global_shutdown(&ctx); +} diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c new file mode 100644 index 000000000000..b7b77a6b2979 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <test_progs.h> + +#include "free_timer.skel.h" + +struct run_ctx { + struct bpf_program *start_prog; + struct bpf_program *overwrite_prog; + pthread_barrier_t notify; + int loop; + bool start; + bool stop; +}; + +static void start_threads(struct run_ctx *ctx) +{ + ctx->start = true; +} + +static void stop_threads(struct run_ctx *ctx) +{ + ctx->stop = true; + /* Guarantee the order between ->stop and ->start */ + __atomic_store_n(&ctx->start, true, __ATOMIC_RELEASE); +} + +static int wait_for_start(struct run_ctx *ctx) +{ + while (!__atomic_load_n(&ctx->start, __ATOMIC_ACQUIRE)) + usleep(10); + + return ctx->stop; +} + +static void *overwrite_timer_fn(void *arg) +{ + struct run_ctx *ctx = arg; + int loop, fd, err; + cpu_set_t cpuset; + long ret = 0; + + /* Pin on CPU 0 */ + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset); + + /* Is the thread being stopped ? */ + err = wait_for_start(ctx); + if (err) + return NULL; + + fd = bpf_program__fd(ctx->overwrite_prog); + loop = ctx->loop; + while (loop-- > 0) { + LIBBPF_OPTS(bpf_test_run_opts, opts); + + /* Wait for start thread to complete */ + pthread_barrier_wait(&ctx->notify); + + /* Overwrite timers */ + err = bpf_prog_test_run_opts(fd, &opts); + if (err) + ret |= 1; + else if (opts.retval) + ret |= 2; + + /* Notify start thread to start timers */ + pthread_barrier_wait(&ctx->notify); + } + + return (void *)ret; +} + +static void *start_timer_fn(void *arg) +{ + struct run_ctx *ctx = arg; + int loop, fd, err; + cpu_set_t cpuset; + long ret = 0; + + /* Pin on CPU 1 */ + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset); + + /* Is the thread being stopped ? */ + err = wait_for_start(ctx); + if (err) + return NULL; + + fd = bpf_program__fd(ctx->start_prog); + loop = ctx->loop; + while (loop-- > 0) { + LIBBPF_OPTS(bpf_test_run_opts, opts); + + /* Run the prog to start timer */ + err = bpf_prog_test_run_opts(fd, &opts); + if (err) + ret |= 4; + else if (opts.retval) + ret |= 8; + + /* Notify overwrite thread to do overwrite */ + pthread_barrier_wait(&ctx->notify); + + /* Wait for overwrite thread to complete */ + pthread_barrier_wait(&ctx->notify); + } + + return (void *)ret; +} + +void test_free_timer(void) +{ + struct free_timer *skel; + struct bpf_program *prog; + struct run_ctx ctx; + pthread_t tid[2]; + void *ret; + int err; + + skel = free_timer__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load")) + return; + + memset(&ctx, 0, sizeof(ctx)); + + prog = bpf_object__find_program_by_name(skel->obj, "start_timer"); + if (!ASSERT_OK_PTR(prog, "find start prog")) + goto out; + ctx.start_prog = prog; + + prog = bpf_object__find_program_by_name(skel->obj, "overwrite_timer"); + if (!ASSERT_OK_PTR(prog, "find overwrite prog")) + goto out; + ctx.overwrite_prog = prog; + + pthread_barrier_init(&ctx.notify, NULL, 2); + ctx.loop = 10; + + err = pthread_create(&tid[0], NULL, start_timer_fn, &ctx); + if (!ASSERT_OK(err, "create start_timer")) + goto out; + + err = pthread_create(&tid[1], NULL, overwrite_timer_fn, &ctx); + if (!ASSERT_OK(err, "create overwrite_timer")) { + stop_threads(&ctx); + goto out; + } + + start_threads(&ctx); + + ret = NULL; + err = pthread_join(tid[0], &ret); + ASSERT_EQ(err | (long)ret, 0, "start_timer"); + ret = NULL; + err = pthread_join(tid[1], &ret); + ASSERT_EQ(err | (long)ret, 0, "overwrite_timer"); +out: + free_timer__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 66ab1cae923e..e19ef509ebf8 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -397,6 +397,31 @@ cleanup: kprobe_multi_session_cookie__destroy(skel); } +static void test_unique_match(void) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + struct kprobe_multi *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi__open_and_load")) + return; + + opts.unique_match = true; + skel->bss->pid = getpid(); + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, + "bpf_fentry_test*", &opts); + if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_multi_opts")) + bpf_link__destroy(link); + + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, + "bpf_fentry_test8*", &opts); + if (ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts")) + bpf_link__destroy(link); + + kprobe_multi__destroy(skel); +} + static size_t symbol_hash(long key, void *ctx __maybe_unused) { return str_hash((const char *) key); @@ -765,5 +790,7 @@ void test_kprobe_multi_test(void) test_session_skel_api(); if (test__start_subtest("session_cookie")) test_session_cookie_skel_api(); + if (test__start_subtest("unique_match")) + test_unique_match(); RUN_TESTS(kprobe_multi_verifier); } diff --git a/tools/testing/selftests/bpf/prog_tests/missed.c b/tools/testing/selftests/bpf/prog_tests/missed.c index 70d90c43537c..ed8857ae914a 100644 --- a/tools/testing/selftests/bpf/prog_tests/missed.c +++ b/tools/testing/selftests/bpf/prog_tests/missed.c @@ -85,6 +85,7 @@ static void test_missed_kprobe_recursion(void) ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses"); ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses"); ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test5)), 1, "test5_recursion_misses"); + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test6)), 1, "test6_recursion_misses"); cleanup: missed_kprobe_recursion__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c index 6fa19449297e..43676a9922dc 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c @@ -3,11 +3,14 @@ #include <test_progs.h> #include "raw_tp_null.skel.h" +#include "raw_tp_null_fail.skel.h" void test_raw_tp_null(void) { struct raw_tp_null *skel; + RUN_TESTS(raw_tp_null_fail); + skel = raw_tp_null__open_and_load(); if (!ASSERT_OK_PTR(skel, "raw_tp_null__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h new file mode 100644 index 000000000000..1bdfb79ef009 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h @@ -0,0 +1,394 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SOCKET_HELPERS__ +#define __SOCKET_HELPERS__ + +#include <linux/vm_sockets.h> + +/* include/linux/net.h */ +#define SOCK_TYPE_MASK 0xf + +#define IO_TIMEOUT_SEC 30 +#define MAX_STRERR_LEN 256 + +/* workaround for older vm_sockets.h */ +#ifndef VMADDR_CID_LOCAL +#define VMADDR_CID_LOCAL 1 +#endif + +/* include/linux/cleanup.h */ +#define __get_and_null(p, nullvalue) \ + ({ \ + __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = nullvalue; \ + __val; \ + }) + +#define take_fd(fd) __get_and_null(fd, -EBADF) + +/* Wrappers that fail the test on error and report it. */ + +#define _FAIL(errnum, fmt...) \ + ({ \ + error_at_line(0, (errnum), __func__, __LINE__, fmt); \ + CHECK_FAIL(true); \ + }) +#define FAIL(fmt...) _FAIL(0, fmt) +#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) +#define FAIL_LIBBPF(err, msg) \ + ({ \ + char __buf[MAX_STRERR_LEN]; \ + libbpf_strerror((err), __buf, sizeof(__buf)); \ + FAIL("%s: %s", (msg), __buf); \ + }) + + +#define xaccept_nonblock(fd, addr, len) \ + ({ \ + int __ret = \ + accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("accept"); \ + __ret; \ + }) + +#define xbind(fd, addr, len) \ + ({ \ + int __ret = bind((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("bind"); \ + __ret; \ + }) + +#define xclose(fd) \ + ({ \ + int __ret = close((fd)); \ + if (__ret == -1) \ + FAIL_ERRNO("close"); \ + __ret; \ + }) + +#define xconnect(fd, addr, len) \ + ({ \ + int __ret = connect((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("connect"); \ + __ret; \ + }) + +#define xgetsockname(fd, addr, len) \ + ({ \ + int __ret = getsockname((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockname"); \ + __ret; \ + }) + +#define xgetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = getsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockopt(" #name ")"); \ + __ret; \ + }) + +#define xlisten(fd, backlog) \ + ({ \ + int __ret = listen((fd), (backlog)); \ + if (__ret == -1) \ + FAIL_ERRNO("listen"); \ + __ret; \ + }) + +#define xsetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = setsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("setsockopt(" #name ")"); \ + __ret; \ + }) + +#define xsend(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = send((fd), (buf), (len), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("send"); \ + __ret; \ + }) + +#define xrecv_nonblock(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ + IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("recv"); \ + __ret; \ + }) + +#define xsocket(family, sotype, flags) \ + ({ \ + int __ret = socket(family, sotype, flags); \ + if (__ret == -1) \ + FAIL_ERRNO("socket"); \ + __ret; \ + }) + +static inline void close_fd(int *fd) +{ + if (*fd >= 0) + xclose(*fd); +} + +#define __close_fd __attribute__((cleanup(close_fd))) + +static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) +{ + return (struct sockaddr *)ss; +} + +static inline void init_addr_loopback4(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); + + addr4->sin_family = AF_INET; + addr4->sin_port = 0; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + *len = sizeof(*addr4); +} + +static inline void init_addr_loopback6(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = 0; + addr6->sin6_addr = in6addr_loopback; + *len = sizeof(*addr6); +} + +static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); + + addr->svm_family = AF_VSOCK; + addr->svm_port = VMADDR_PORT_ANY; + addr->svm_cid = VMADDR_CID_LOCAL; + *len = sizeof(*addr); +} + +static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, + socklen_t *len) +{ + switch (family) { + case AF_INET: + init_addr_loopback4(ss, len); + return; + case AF_INET6: + init_addr_loopback6(ss, len); + return; + case AF_VSOCK: + init_addr_loopback_vsock(ss, len); + return; + default: + FAIL("unsupported address family %d", family); + } +} + +static inline int enable_reuseport(int s, int progfd) +{ + int err, one = 1; + + err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + if (err) + return -1; + err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, + sizeof(progfd)); + if (err) + return -1; + + return 0; +} + +static inline int socket_loopback_reuseport(int family, int sotype, int progfd) +{ + struct sockaddr_storage addr; + socklen_t len = 0; + int err, s; + + init_addr_loopback(family, &addr, &len); + + s = xsocket(family, sotype, 0); + if (s == -1) + return -1; + + if (progfd >= 0) + enable_reuseport(s, progfd); + + err = xbind(s, sockaddr(&addr), len); + if (err) + goto close; + + if (sotype & SOCK_DGRAM) + return s; + + err = xlisten(s, SOMAXCONN); + if (err) + goto close; + + return s; +close: + xclose(s); + return -1; +} + +static inline int socket_loopback(int family, int sotype) +{ + return socket_loopback_reuseport(family, sotype, -1); +} + +static inline int poll_connect(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set wfds; + int r, eval; + socklen_t esize = sizeof(eval); + + FD_ZERO(&wfds); + FD_SET(fd, &wfds); + + r = select(fd + 1, NULL, &wfds, NULL, &timeout); + if (r == 0) + errno = ETIME; + if (r != 1) + return -1; + + if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0) + return -1; + if (eval != 0) { + errno = eval; + return -1; + } + + return 0; +} + +static inline int poll_read(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set rfds; + int r; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + + r = select(fd + 1, &rfds, NULL, NULL, &timeout); + if (r == 0) + errno = ETIME; + + return r == 1 ? 0 : -1; +} + +static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return accept(fd, addr, len); +} + +static inline int recv_timeout(int fd, void *buf, size_t len, int flags, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return recv(fd, buf, len, flags); +} + + +static inline int create_pair(int family, int sotype, int *p0, int *p1) +{ + __close_fd int s, c = -1, p = -1; + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int err; + + s = socket_loopback(family, sotype); + if (s < 0) + return s; + + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + return err; + + c = xsocket(family, sotype, 0); + if (c < 0) + return c; + + err = connect(c, sockaddr(&addr), len); + if (err) { + if (errno != EINPROGRESS) { + FAIL_ERRNO("connect"); + return err; + } + + err = poll_connect(c, IO_TIMEOUT_SEC); + if (err) { + FAIL_ERRNO("poll_connect"); + return err; + } + } + + switch (sotype & SOCK_TYPE_MASK) { + case SOCK_DGRAM: + err = xgetsockname(c, sockaddr(&addr), &len); + if (err) + return err; + + err = xconnect(s, sockaddr(&addr), len); + if (err) + return err; + + *p0 = take_fd(s); + break; + case SOCK_STREAM: + case SOCK_SEQPACKET: + p = xaccept_nonblock(s, NULL, NULL); + if (p < 0) + return p; + + *p0 = take_fd(p); + break; + default: + FAIL("Unsupported socket type %#x", sotype); + return -EOPNOTSUPP; + } + + *p1 = take_fd(c); + return 0; +} + +static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, + int *p0, int *p1) +{ + int err; + + err = create_pair(family, sotype, c0, p0); + if (err) + return err; + + err = create_pair(family, sotype, c1, p1); + if (err) { + close(*c0); + close(*p0); + } + + return err; +} + +#endif // __SOCKET_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index fdff0652d7ef..884ad87783d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -12,6 +12,7 @@ #include "test_sockmap_progs_query.skel.h" #include "test_sockmap_pass_prog.skel.h" #include "test_sockmap_drop_prog.skel.h" +#include "test_sockmap_change_tail.skel.h" #include "bpf_iter_sockmap.skel.h" #include "sockmap_helpers.h" @@ -643,6 +644,54 @@ out: test_sockmap_drop_prog__destroy(drop); } +static void test_sockmap_skb_verdict_change_tail(void) +{ + struct test_sockmap_change_tail *skel; + int err, map, verdict; + int c1, p1, sent, recvd; + int zero = 0; + char buf[2]; + + skel = test_sockmap_change_tail__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + map = bpf_map__fd(skel->maps.sock_map_rx); + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c1)")) + goto out_close; + sent = xsend(p1, "Tr", 2, 0); + ASSERT_EQ(sent, 2, "xsend(p1)"); + recvd = recv(c1, buf, 2, 0); + ASSERT_EQ(recvd, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + sent = xsend(p1, "G", 1, 0); + ASSERT_EQ(sent, 1, "xsend(p1)"); + recvd = recv(c1, buf, 2, 0); + ASSERT_EQ(recvd, 2, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + sent = xsend(p1, "E", 1, 0); + ASSERT_EQ(sent, 1, "xsend(p1)"); + recvd = recv(c1, buf, 1, 0); + ASSERT_EQ(recvd, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + +out_close: + close(c1); + close(p1); +out: + test_sockmap_change_tail__destroy(skel); +} + static void test_sockmap_skb_verdict_peek_helper(int map) { int err, c1, p1, zero = 0, sent, recvd, avail; @@ -934,8 +983,10 @@ static void test_sockmap_same_sock(void) err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); - if (err) + if (err) { + close(tcp); goto out; + } for (i = 0; i < 2; i++) { err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY); @@ -954,14 +1005,14 @@ static void test_sockmap_same_sock(void) ASSERT_OK(err, "bpf_map_update_elem(tcp)"); } + close(tcp); err = bpf_map_delete_elem(map, &zero); - ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + ASSERT_ERR(err, "bpf_map_delete_elem(entry)"); close(stream[0]); close(stream[1]); out: close(dgram); - close(tcp); close(udp); test_sockmap_pass_prog__destroy(skel); } @@ -1056,6 +1107,8 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) test_sockmap_skb_verdict_fionread(false); + if (test__start_subtest("sockmap skb_verdict change tail")) + test_sockmap_skb_verdict_change_tail(); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); if (test__start_subtest("sockmap skb_verdict msg_f_peek with link")) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index 38e35c72bdaa..3e5571dd578d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -1,139 +1,12 @@ #ifndef __SOCKMAP_HELPERS__ #define __SOCKMAP_HELPERS__ -#include <linux/vm_sockets.h> +#include "socket_helpers.h" -/* include/linux/net.h */ -#define SOCK_TYPE_MASK 0xf - -#define IO_TIMEOUT_SEC 30 -#define MAX_STRERR_LEN 256 #define MAX_TEST_NAME 80 -/* workaround for older vm_sockets.h */ -#ifndef VMADDR_CID_LOCAL -#define VMADDR_CID_LOCAL 1 -#endif - #define __always_unused __attribute__((__unused__)) -/* include/linux/cleanup.h */ -#define __get_and_null(p, nullvalue) \ - ({ \ - __auto_type __ptr = &(p); \ - __auto_type __val = *__ptr; \ - *__ptr = nullvalue; \ - __val; \ - }) - -#define take_fd(fd) __get_and_null(fd, -EBADF) - -#define _FAIL(errnum, fmt...) \ - ({ \ - error_at_line(0, (errnum), __func__, __LINE__, fmt); \ - CHECK_FAIL(true); \ - }) -#define FAIL(fmt...) _FAIL(0, fmt) -#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) -#define FAIL_LIBBPF(err, msg) \ - ({ \ - char __buf[MAX_STRERR_LEN]; \ - libbpf_strerror((err), __buf, sizeof(__buf)); \ - FAIL("%s: %s", (msg), __buf); \ - }) - -/* Wrappers that fail the test on error and report it. */ - -#define xaccept_nonblock(fd, addr, len) \ - ({ \ - int __ret = \ - accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ - if (__ret == -1) \ - FAIL_ERRNO("accept"); \ - __ret; \ - }) - -#define xbind(fd, addr, len) \ - ({ \ - int __ret = bind((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("bind"); \ - __ret; \ - }) - -#define xclose(fd) \ - ({ \ - int __ret = close((fd)); \ - if (__ret == -1) \ - FAIL_ERRNO("close"); \ - __ret; \ - }) - -#define xconnect(fd, addr, len) \ - ({ \ - int __ret = connect((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("connect"); \ - __ret; \ - }) - -#define xgetsockname(fd, addr, len) \ - ({ \ - int __ret = getsockname((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("getsockname"); \ - __ret; \ - }) - -#define xgetsockopt(fd, level, name, val, len) \ - ({ \ - int __ret = getsockopt((fd), (level), (name), (val), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("getsockopt(" #name ")"); \ - __ret; \ - }) - -#define xlisten(fd, backlog) \ - ({ \ - int __ret = listen((fd), (backlog)); \ - if (__ret == -1) \ - FAIL_ERRNO("listen"); \ - __ret; \ - }) - -#define xsetsockopt(fd, level, name, val, len) \ - ({ \ - int __ret = setsockopt((fd), (level), (name), (val), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("setsockopt(" #name ")"); \ - __ret; \ - }) - -#define xsend(fd, buf, len, flags) \ - ({ \ - ssize_t __ret = send((fd), (buf), (len), (flags)); \ - if (__ret == -1) \ - FAIL_ERRNO("send"); \ - __ret; \ - }) - -#define xrecv_nonblock(fd, buf, len, flags) \ - ({ \ - ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ - IO_TIMEOUT_SEC); \ - if (__ret == -1) \ - FAIL_ERRNO("recv"); \ - __ret; \ - }) - -#define xsocket(family, sotype, flags) \ - ({ \ - int __ret = socket(family, sotype, flags); \ - if (__ret == -1) \ - FAIL_ERRNO("socket"); \ - __ret; \ - }) - #define xbpf_map_delete_elem(fd, key) \ ({ \ int __ret = bpf_map_delete_elem((fd), (key)); \ @@ -193,130 +66,6 @@ __ret; \ }) -static inline void close_fd(int *fd) -{ - if (*fd >= 0) - xclose(*fd); -} - -#define __close_fd __attribute__((cleanup(close_fd))) - -static inline int poll_connect(int fd, unsigned int timeout_sec) -{ - struct timeval timeout = { .tv_sec = timeout_sec }; - fd_set wfds; - int r, eval; - socklen_t esize = sizeof(eval); - - FD_ZERO(&wfds); - FD_SET(fd, &wfds); - - r = select(fd + 1, NULL, &wfds, NULL, &timeout); - if (r == 0) - errno = ETIME; - if (r != 1) - return -1; - - if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0) - return -1; - if (eval != 0) { - errno = eval; - return -1; - } - - return 0; -} - -static inline int poll_read(int fd, unsigned int timeout_sec) -{ - struct timeval timeout = { .tv_sec = timeout_sec }; - fd_set rfds; - int r; - - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - - r = select(fd + 1, &rfds, NULL, NULL, &timeout); - if (r == 0) - errno = ETIME; - - return r == 1 ? 0 : -1; -} - -static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, - unsigned int timeout_sec) -{ - if (poll_read(fd, timeout_sec)) - return -1; - - return accept(fd, addr, len); -} - -static inline int recv_timeout(int fd, void *buf, size_t len, int flags, - unsigned int timeout_sec) -{ - if (poll_read(fd, timeout_sec)) - return -1; - - return recv(fd, buf, len, flags); -} - -static inline void init_addr_loopback4(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); - - addr4->sin_family = AF_INET; - addr4->sin_port = 0; - addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - *len = sizeof(*addr4); -} - -static inline void init_addr_loopback6(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); - - addr6->sin6_family = AF_INET6; - addr6->sin6_port = 0; - addr6->sin6_addr = in6addr_loopback; - *len = sizeof(*addr6); -} - -static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); - - addr->svm_family = AF_VSOCK; - addr->svm_port = VMADDR_PORT_ANY; - addr->svm_cid = VMADDR_CID_LOCAL; - *len = sizeof(*addr); -} - -static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, - socklen_t *len) -{ - switch (family) { - case AF_INET: - init_addr_loopback4(ss, len); - return; - case AF_INET6: - init_addr_loopback6(ss, len); - return; - case AF_VSOCK: - init_addr_loopback_vsock(ss, len); - return; - default: - FAIL("unsupported address family %d", family); - } -} - -static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) -{ - return (struct sockaddr *)ss; -} - static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) { u64 value; @@ -334,136 +83,4 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); } -static inline int enable_reuseport(int s, int progfd) -{ - int err, one = 1; - - err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); - if (err) - return -1; - err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, - sizeof(progfd)); - if (err) - return -1; - - return 0; -} - -static inline int socket_loopback_reuseport(int family, int sotype, int progfd) -{ - struct sockaddr_storage addr; - socklen_t len = 0; - int err, s; - - init_addr_loopback(family, &addr, &len); - - s = xsocket(family, sotype, 0); - if (s == -1) - return -1; - - if (progfd >= 0) - enable_reuseport(s, progfd); - - err = xbind(s, sockaddr(&addr), len); - if (err) - goto close; - - if (sotype & SOCK_DGRAM) - return s; - - err = xlisten(s, SOMAXCONN); - if (err) - goto close; - - return s; -close: - xclose(s); - return -1; -} - -static inline int socket_loopback(int family, int sotype) -{ - return socket_loopback_reuseport(family, sotype, -1); -} - -static inline int create_pair(int family, int sotype, int *p0, int *p1) -{ - __close_fd int s, c = -1, p = -1; - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int err; - - s = socket_loopback(family, sotype); - if (s < 0) - return s; - - err = xgetsockname(s, sockaddr(&addr), &len); - if (err) - return err; - - c = xsocket(family, sotype, 0); - if (c < 0) - return c; - - err = connect(c, sockaddr(&addr), len); - if (err) { - if (errno != EINPROGRESS) { - FAIL_ERRNO("connect"); - return err; - } - - err = poll_connect(c, IO_TIMEOUT_SEC); - if (err) { - FAIL_ERRNO("poll_connect"); - return err; - } - } - - switch (sotype & SOCK_TYPE_MASK) { - case SOCK_DGRAM: - err = xgetsockname(c, sockaddr(&addr), &len); - if (err) - return err; - - err = xconnect(s, sockaddr(&addr), len); - if (err) - return err; - - *p0 = take_fd(s); - break; - case SOCK_STREAM: - case SOCK_SEQPACKET: - p = xaccept_nonblock(s, NULL, NULL); - if (p < 0) - return p; - - *p0 = take_fd(p); - break; - default: - FAIL("Unsupported socket type %#x", sotype); - return -EOPNOTSUPP; - } - - *p1 = take_fd(c); - return 0; -} - -static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, - int *p0, int *p1) -{ - int err; - - err = create_pair(family, sotype, c0, p0); - if (err) - return err; - - err = create_pair(family, sotype, c1, p1); - if (err) { - close(*c0); - close(*p0); - } - - return err; -} - #endif // __SOCKMAP_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 05d0e07da394..ba6b3ec1156a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -2,7 +2,7 @@ #include <test_progs.h> #include "cgroup_helpers.h" -#include <linux/tcp.h> +#include <netinet/tcp.h> #include <linux/netlink.h> #include "sockopt_sk.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c new file mode 100644 index 000000000000..74752233e779 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <test_progs.h> +#include <linux/pkt_cls.h> + +#include "test_tc_change_tail.skel.h" +#include "socket_helpers.h" + +#define LO_IFINDEX 1 + +void test_tc_change_tail(void) +{ + LIBBPF_OPTS(bpf_tcx_opts, tcx_opts); + struct test_tc_change_tail *skel = NULL; + struct bpf_link *link; + int c1, p1; + char buf[2]; + int ret; + + skel = test_tc_change_tail__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tc_change_tail__open_and_load")) + return; + + link = bpf_program__attach_tcx(skel->progs.change_tail, LO_IFINDEX, + &tcx_opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_tcx")) + goto destroy; + + skel->links.change_tail = link; + ret = create_pair(AF_INET, SOCK_DGRAM, &c1, &p1); + if (!ASSERT_OK(ret, "create_pair")) + goto destroy; + + ret = xsend(p1, "Tr", 2, 0); + ASSERT_EQ(ret, 2, "xsend(p1)"); + ret = recv(c1, buf, 2, 0); + ASSERT_EQ(ret, 2, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + ret = xsend(p1, "G", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 2, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + ret = xsend(p1, "E", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 1, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + + ret = xsend(p1, "Z", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 1, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + + close(c1); + close(p1); +destroy: + test_tc_change_tail__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c index 151a4210028f..2461d183dee5 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c @@ -14,10 +14,16 @@ #include "netlink_helpers.h" #include "tc_helpers.h" +#define NETKIT_HEADROOM 32 +#define NETKIT_TAILROOM 8 + #define MARK 42 #define PRIO 0xeb9f #define ICMP_ECHO 8 +#define FLAG_ADJUST_ROOM (1 << 0) +#define FLAG_SAME_NETNS (1 << 1) + struct icmphdr { __u8 type; __u8 code; @@ -35,7 +41,7 @@ struct iplink_req { }; static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, - bool same_netns, int scrub, int peer_scrub) + int scrub, int peer_scrub, __u32 flags) { struct rtnl_handle rth = { .fd = -1 }; struct iplink_req req = {}; @@ -63,6 +69,10 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub); addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub); addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); + if (flags & FLAG_ADJUST_ROOM) { + addattr16(&req.n, sizeof(req), IFLA_NETKIT_HEADROOM, NETKIT_HEADROOM); + addattr16(&req.n, sizeof(req), IFLA_NETKIT_TAILROOM, NETKIT_TAILROOM); + } addattr_nest_end(&req.n, data); addattr_nest_end(&req.n, linkinfo); @@ -87,7 +97,7 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, " addr ee:ff:bb:cc:aa:dd"), "set hwaddress"); } - if (same_netns) { + if (flags & FLAG_SAME_NETNS) { ASSERT_OK(system("ip link set dev " netkit_peer " up"), "up peer"); ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"), @@ -184,8 +194,8 @@ void serial_test_tc_netkit_basic(void) int err, ifindex; err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -299,8 +309,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -428,8 +438,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -543,8 +553,8 @@ void serial_test_tc_netkit_device(void) int err, ifindex, ifindex2; err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS, - &ifindex, true, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS); if (err) return; @@ -655,8 +665,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -733,8 +743,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode) struct bpf_link *link; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, true, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS); if (err) return; @@ -799,7 +809,7 @@ void serial_test_tc_netkit_pkt_type(void) serial_test_tc_netkit_pkt_type_mode(NETKIT_L3); } -static void serial_test_tc_netkit_scrub_type(int scrub) +static void serial_test_tc_netkit_scrub_type(int scrub, bool room) { LIBBPF_OPTS(bpf_netkit_opts, optl); struct test_tc_link *skel; @@ -807,7 +817,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub) int err, ifindex; err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, scrub, scrub); + &ifindex, scrub, scrub, + room ? FLAG_ADJUST_ROOM : 0); if (err) return; @@ -842,6 +853,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub) ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8"); ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark"); ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio"); + ASSERT_EQ(skel->bss->headroom, room ? NETKIT_HEADROOM : 0, "headroom"); + ASSERT_EQ(skel->bss->tailroom, room ? NETKIT_TAILROOM : 0, "tailroom"); cleanup: test_tc_link__destroy(skel); @@ -852,6 +865,6 @@ cleanup: void serial_test_tc_netkit_scrub(void) { - serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT); - serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE); + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT, false); + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE, true); } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 3ee40ee9413a..8a0e1ff8a2dc 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -52,6 +52,8 @@ #include "verifier_map_ptr_mixing.skel.h" #include "verifier_map_ret_val.skel.h" #include "verifier_masking.skel.h" +#include "verifier_may_goto_1.skel.h" +#include "verifier_may_goto_2.skel.h" #include "verifier_meta_access.skel.h" #include "verifier_movsx.skel.h" #include "verifier_mtu.skel.h" @@ -98,6 +100,7 @@ #include "verifier_xdp_direct_packet_access.skel.h" #include "verifier_bits_iter.skel.h" #include "verifier_lsm.skel.h" +#include "irq.skel.h" #define MAX_ENTRIES 11 @@ -181,6 +184,8 @@ void test_verifier_map_ptr(void) { RUN(verifier_map_ptr); } void test_verifier_map_ptr_mixing(void) { RUN(verifier_map_ptr_mixing); } void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); } void test_verifier_masking(void) { RUN(verifier_masking); } +void test_verifier_may_goto_1(void) { RUN(verifier_may_goto_1); } +void test_verifier_may_goto_2(void) { RUN(verifier_may_goto_2); } void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } @@ -225,6 +230,7 @@ void test_verifier_xdp(void) { RUN(verifier_xdp); } void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } void test_verifier_lsm(void) { RUN(verifier_lsm); } +void test_irq(void) { RUN(irq); } void test_verifier_mtu(void) { RUN(verifier_mtu); } static int init_test_val_map(struct bpf_object *obj, char *map_name) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index 53d6ad8c2257..b2b2d85dbb1b 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -82,6 +82,8 @@ static void test_xdp_adjust_tail_grow2(void) /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */ #if defined(__s390x__) int tailroom = 512; +#elif defined(__powerpc__) + int tailroom = 384; #else int tailroom = 320; #endif diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index 6d8b54124cb3..fb952703653e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -17,7 +17,7 @@ #include "network_helpers.h" #include <linux/if_bonding.h> #include <linux/limits.h> -#include <linux/udp.h> +#include <netinet/udp.h> #include <uapi/linux/netdev.h> #include "xdp_dummy.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index e6a783c7f5db..937da9b7532a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -2,6 +2,14 @@ #include <test_progs.h> #include <network_helpers.h> #include "test_xdp_context_test_run.skel.h" +#include "test_xdp_meta.skel.h" + +#define TX_ADDR "10.0.0.1" +#define RX_ADDR "10.0.0.2" +#define RX_NAME "veth0" +#define TX_NAME "veth1" +#define TX_NETNS "xdp_context_tx" +#define RX_NETNS "xdp_context_rx" void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, __u32 data_meta, __u32 data, __u32 data_end, @@ -103,3 +111,82 @@ void test_xdp_context_test_run(void) test_xdp_context_test_run__destroy(skel); } + +void test_xdp_context_functional(void) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *rx_ns = NULL, *tx_ns = NULL; + struct bpf_program *tc_prog, *xdp_prog; + struct test_xdp_meta *skel = NULL; + struct nstoken *nstoken = NULL; + int rx_ifindex; + int ret; + + tx_ns = netns_new(TX_NETNS, false); + if (!ASSERT_OK_PTR(tx_ns, "create tx_ns")) + return; + + rx_ns = netns_new(RX_NETNS, false); + if (!ASSERT_OK_PTR(rx_ns, "create rx_ns")) + goto close; + + SYS(close, "ip link add " RX_NAME " netns " RX_NETNS + " type veth peer name " TX_NAME " netns " TX_NETNS); + + nstoken = open_netns(RX_NETNS); + if (!ASSERT_OK_PTR(nstoken, "setns rx_ns")) + goto close; + + SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME); + SYS(close, "ip link set dev " RX_NAME " up"); + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + goto close; + + rx_ifindex = if_nametoindex(RX_NAME); + if (!ASSERT_GE(rx_ifindex, 0, "if_nametoindex rx")) + goto close; + + tc_hook.ifindex = rx_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_prog = bpf_object__find_program_by_name(skel->obj, "ing_cls"); + if (!ASSERT_OK_PTR(tc_prog, "open ing_cls prog")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(tc_prog); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + xdp_prog = bpf_object__find_program_by_name(skel->obj, "ing_xdp"); + if (!ASSERT_OK_PTR(xdp_prog, "open ing_xdp prog")) + goto close; + + ret = bpf_xdp_attach(rx_ifindex, + bpf_program__fd(xdp_prog), + 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + close_netns(nstoken); + + nstoken = open_netns(TX_NETNS); + if (!ASSERT_OK_PTR(nstoken, "setns tx_ns")) + goto close; + + SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME); + SYS(close, "ip link set dev " TX_NAME " up"); + ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping"); + +close: + close_netns(nstoken); + test_xdp_meta__destroy(skel); + netns_free(rx_ns); + netns_free(tx_ns); +} + diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index bad0ea167be7..7dac044664ac 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -7,10 +7,11 @@ #include <linux/if_link.h> #include <linux/ipv6.h> #include <linux/in6.h> -#include <linux/udp.h> +#include <netinet/udp.h> #include <bpf/bpf_endian.h> #include <uapi/linux/netdev.h> #include "test_xdp_do_redirect.skel.h" +#include "xdp_dummy.skel.h" struct udp_packet { struct ethhdr eth; @@ -246,3 +247,166 @@ out: SYS_NOFAIL("ip netns del testns"); test_xdp_do_redirect__destroy(skel); } + +#define NS_NB 3 +#define NS0 "NS0" +#define NS1 "NS1" +#define NS2 "NS2" +#define IPV4_NETWORK "10.1.1" +#define VETH1_INDEX 111 +#define VETH2_INDEX 222 + +struct test_data { + struct netns_obj *ns[NS_NB]; + u32 xdp_flags; +}; + +static void cleanup(struct test_data *data) +{ + int i; + + for (i = 0; i < NS_NB; i++) + netns_free(data->ns[i]); +} + +/** + * ping_setup - + * Create two veth peers and forward packets in-between using XDP + * + * ------------ ------------ + * | NS1 | | NS2 | + * | veth0 | | veth0 | + * | 10.1.1.1 | | 10.1.1.2 | + * -----|------ ------|----- + * | | + * | | + * -----|-----------------------|------- + * | veth1 veth2 | + * | (id:111) (id:222) | + * | | | | + * | ----- xdp forwarding ----- | + * | | + * | NS0 | + * ------------------------------------- + */ +static int ping_setup(struct test_data *data) +{ + int i; + + data->ns[0] = netns_new(NS0, false); + if (!ASSERT_OK_PTR(data->ns[0], "create ns")) + return -1; + + for (i = 1; i < NS_NB; i++) { + char ns_name[4] = {}; + + snprintf(ns_name, 4, "NS%d", i); + data->ns[i] = netns_new(ns_name, false); + if (!ASSERT_OK_PTR(data->ns[i], "create ns")) + goto fail; + + SYS(fail, + "ip -n %s link add veth%d index %d%d%d type veth peer name veth0 netns %s", + NS0, i, i, i, i, ns_name); + SYS(fail, "ip -n %s link set veth%d up", NS0, i); + + SYS(fail, "ip -n %s addr add %s.%d/24 dev veth0", ns_name, IPV4_NETWORK, i); + SYS(fail, "ip -n %s link set veth0 up", ns_name); + } + + return 0; + +fail: + cleanup(data); + return -1; +} + +static void ping_test(struct test_data *data) +{ + struct test_xdp_do_redirect *skel = NULL; + struct xdp_dummy *skel_dummy = NULL; + struct nstoken *nstoken = NULL; + int i, ret; + + skel_dummy = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(skel_dummy, "open and load xdp_dummy skeleton")) + goto close; + + for (i = 1; i < NS_NB; i++) { + char ns_name[4] = {}; + + snprintf(ns_name, 4, "NS%d", i); + nstoken = open_netns(ns_name); + if (!ASSERT_OK_PTR(nstoken, "open ns")) + goto close; + + ret = bpf_xdp_attach(if_nametoindex("veth0"), + bpf_program__fd(skel_dummy->progs.xdp_dummy_prog), + data->xdp_flags, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach dummy_prog")) + goto close; + + close_netns(nstoken); + nstoken = NULL; + } + + skel = test_xdp_do_redirect__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + goto close; + + nstoken = open_netns(NS0); + if (!ASSERT_OK_PTR(nstoken, "open NS0")) + goto close; + + ret = bpf_xdp_attach(VETH2_INDEX, + bpf_program__fd(skel->progs.xdp_redirect_to_111), + data->xdp_flags, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + ret = bpf_xdp_attach(VETH1_INDEX, + bpf_program__fd(skel->progs.xdp_redirect_to_222), + data->xdp_flags, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + close_netns(nstoken); + nstoken = NULL; + + nstoken = open_netns(NS1); + if (!ASSERT_OK_PTR(nstoken, "open NS1")) + goto close; + + SYS(close, "ping -c 1 %s.2 > /dev/null", IPV4_NETWORK); + +close: + close_netns(nstoken); + xdp_dummy__destroy(skel_dummy); + test_xdp_do_redirect__destroy(skel); +} + + +static void xdp_redirect_ping(u32 xdp_flags) +{ + struct test_data data = {}; + + if (ping_setup(&data) < 0) + return; + + data.xdp_flags = xdp_flags; + ping_test(&data); + cleanup(&data); +} + +void test_xdp_index_redirect(void) +{ + if (test__start_subtest("noflag")) + xdp_redirect_ping(0); + + if (test__start_subtest("drvflag")) + xdp_redirect_ping(XDP_FLAGS_DRV_MODE); + + if (test__start_subtest("skbflag")) + xdp_redirect_ping(XDP_FLAGS_SKB_MODE); +} + diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c index e1bf141d3401..3f9146d83d79 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c @@ -3,7 +3,7 @@ #include <network_helpers.h> #include <bpf/btf.h> #include <linux/if_link.h> -#include <linux/udp.h> +#include <netinet/udp.h> #include <net/if.h> #include <unistd.h> diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index c87ee2bf558c..3d47878ef6bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -10,7 +10,7 @@ #include <linux/errqueue.h> #include <linux/if_link.h> #include <linux/net_tstamp.h> -#include <linux/udp.h> +#include <netinet/udp.h> #include <sys/mman.h> #include <net/if.h> #include <poll.h> @@ -133,23 +133,6 @@ static void close_xsk(struct xsk *xsk) munmap(xsk->umem_area, UMEM_SIZE); } -static void ip_csum(struct iphdr *iph) -{ - __u32 sum = 0; - __u16 *p; - int i; - - iph->check = 0; - p = (void *)iph; - for (i = 0; i < sizeof(*iph) / sizeof(*p); i++) - sum += p[i]; - - while (sum >> 16) - sum = (sum & 0xffff) + (sum >> 16); - - iph->check = ~sum; -} - static int generate_packet(struct xsk *xsk, __u16 dst_port) { struct xsk_tx_metadata *meta; @@ -192,7 +175,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) iph->protocol = IPPROTO_UDP; ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)"); ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)"); - ip_csum(iph); + iph->check = build_ip_csum(iph); udph->source = htons(UDP_SOURCE_PORT); udph->dest = htons(dst_port); diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops.c b/tools/testing/selftests/bpf/progs/bad_struct_ops.c index b7e175cd0af0..b3f77b4561c8 100644 --- a/tools/testing/selftests/bpf/progs/bad_struct_ops.c +++ b/tools/testing/selftests/bpf/progs/bad_struct_ops.c @@ -3,7 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 56c764df8196..5d6fc7f01ebb 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -2,7 +2,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" struct map_value { struct prog_test_ref_kfunc __kptr *ptr; diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c b/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c new file mode 100644 index 000000000000..e32b07d802bb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +__u32 data_end; + +SEC("cgroup_skb/ingress") +int direct_packet_access(struct __sk_buff *skb) +{ + data_end = skb->data_end; + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c new file mode 100644 index 000000000000..43cada48b28a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__noinline +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +__noinline __weak +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +SEC("?tc") +int main_with_subprogs(struct __sk_buff *sk) +{ + changes_pkt_data(sk); + does_not_change_pkt_data(sk); + return 0; +} + +SEC("?tc") +int main_changes(struct __sk_buff *sk) +{ + bpf_skb_pull_data(sk, 0); + return 0; +} + +SEC("?tc") +int main_does_not_change(struct __sk_buff *sk) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c new file mode 100644 index 000000000000..f9a622705f1b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("?freplace") +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +SEC("?freplace") +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index dfd817d0348c..bd8f15229f5c 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -192,7 +192,7 @@ done: /* Can't add a dynptr to a map */ SEC("?raw_tp") -__failure __msg("invalid indirect read from stack") +__failure __msg("invalid read from stack") int add_dynptr_to_map1(void *ctx) { struct bpf_dynptr ptr; @@ -210,7 +210,7 @@ int add_dynptr_to_map1(void *ctx) /* Can't add a struct with an embedded dynptr to a map */ SEC("?raw_tp") -__failure __msg("invalid indirect read from stack") +__failure __msg("invalid read from stack") int add_dynptr_to_map2(void *ctx) { struct test_info x; @@ -398,7 +398,7 @@ int data_slice_missing_null_check2(void *ctx) * dynptr argument */ SEC("?raw_tp") -__failure __msg("invalid indirect read from stack") +__failure __msg("invalid read from stack") int invalid_helper1(void *ctx) { struct bpf_dynptr ptr; diff --git a/tools/testing/selftests/bpf/progs/epilogue_exit.c b/tools/testing/selftests/bpf/progs/epilogue_exit.c index 33d3a57bee90..35fec7c75bef 100644 --- a/tools/testing/selftests/bpf/progs/epilogue_exit.c +++ b/tools/testing/selftests/bpf/progs/epilogue_exit.c @@ -4,8 +4,8 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/epilogue_tailcall.c b/tools/testing/selftests/bpf/progs/epilogue_tailcall.c index 7275dd594de0..153514691ba4 100644 --- a/tools/testing/selftests/bpf/progs/epilogue_tailcall.c +++ b/tools/testing/selftests/bpf/progs/epilogue_tailcall.c @@ -4,8 +4,8 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index fe0f3fa5aab6..8a0fdff89927 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -131,7 +131,7 @@ int reject_subprog_with_lock(void *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region") int reject_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); @@ -147,7 +147,7 @@ __noinline static int throwing_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region") int reject_subprog_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c index 38034fb82530..02b82774469c 100644 --- a/tools/testing/selftests/bpf/progs/find_vma.c +++ b/tools/testing/selftests/bpf/progs/find_vma.c @@ -25,7 +25,7 @@ static long check_vma(struct task_struct *task, struct vm_area_struct *vma, { if (vma->vm_file) bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1, - vma->vm_file->f_path.dentry->d_iname); + vma->vm_file->f_path.dentry->d_shortname.string); /* check for VM_EXEC */ if (vma->vm_flags & VM_EXEC) diff --git a/tools/testing/selftests/bpf/progs/free_timer.c b/tools/testing/selftests/bpf/progs/free_timer.c new file mode 100644 index 000000000000..4501ae8fc414 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/free_timer.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <time.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#define MAX_ENTRIES 8 + +struct map_value { + struct bpf_timer timer; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, MAX_ENTRIES); +} map SEC(".maps"); + +static int timer_cb(void *map, void *key, struct map_value *value) +{ + volatile int sum = 0; + int i; + + bpf_for(i, 0, 1024 * 1024) sum += i; + + return 0; +} + +static int start_cb(int key) +{ + struct map_value *value; + + value = bpf_map_lookup_elem(&map, (void *)&key); + if (!value) + return 0; + + bpf_timer_init(&value->timer, &map, CLOCK_MONOTONIC); + bpf_timer_set_callback(&value->timer, timer_cb); + /* Hope 100us will be enough to wake-up and run the overwrite thread */ + bpf_timer_start(&value->timer, 100000, BPF_F_TIMER_CPU_PIN); + + return 0; +} + +static int overwrite_cb(int key) +{ + struct map_value zero = {}; + + /* Free the timer which may run on other CPU */ + bpf_map_update_elem(&map, (void *)&key, &zero, BPF_ANY); + + return 0; +} + +SEC("syscall") +int BPF_PROG(start_timer) +{ + bpf_loop(MAX_ENTRIES, start_cb, NULL, 0); + return 0; +} + +SEC("syscall") +int BPF_PROG(overwrite_timer) +{ + bpf_loop(MAX_ENTRIES, overwrite_cb, NULL, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c new file mode 100644 index 000000000000..b0b53d980964 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/irq.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +unsigned long global_flags; + +extern void bpf_local_irq_save(unsigned long *) __weak __ksym; +extern void bpf_local_irq_restore(unsigned long *) __weak __ksym; +extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym; + +SEC("?tc") +__failure __msg("arg#0 doesn't point to an irq flag on stack") +int irq_save_bad_arg(struct __sk_buff *ctx) +{ + bpf_local_irq_save(&global_flags); + return 0; +} + +SEC("?tc") +__failure __msg("arg#0 doesn't point to an irq flag on stack") +int irq_restore_bad_arg(struct __sk_buff *ctx) +{ + bpf_local_irq_restore(&global_flags); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_2(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_save(&flags3); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3_minus_2(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_save(&flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_restore(&flags2); + return 0; +} + +static __noinline void local_irq_save(unsigned long *flags) +{ + bpf_local_irq_save(flags); +} + +static __noinline void local_irq_restore(unsigned long *flags) +{ + bpf_local_irq_restore(flags); +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_1_subprog(struct __sk_buff *ctx) +{ + unsigned long flags; + + local_irq_save(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_2_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + + local_irq_save(&flags1); + local_irq_save(&flags2); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + return 0; +} + +SEC("?tc") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region") +int irq_restore_missing_3_minus_2_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + local_irq_restore(&flags3); + local_irq_restore(&flags2); + return 0; +} + +SEC("?tc") +__success +int irq_balance(struct __sk_buff *ctx) +{ + unsigned long flags; + + local_irq_save(&flags); + local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__success +int irq_balance_n(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + local_irq_restore(&flags3); + local_irq_restore(&flags2); + local_irq_restore(&flags1); + return 0; +} + +static __noinline void local_irq_balance(void) +{ + unsigned long flags; + + local_irq_save(&flags); + local_irq_restore(&flags); +} + +static __noinline void local_irq_balance_n(void) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save(&flags1); + local_irq_save(&flags2); + local_irq_save(&flags3); + local_irq_restore(&flags3); + local_irq_restore(&flags2); + local_irq_restore(&flags1); +} + +SEC("?tc") +__success +int irq_balance_subprog(struct __sk_buff *ctx) +{ + local_irq_balance(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("sleepable helper bpf_copy_from_user#") +int irq_sleepable_helper(void *ctx) +{ + unsigned long flags; + u32 data; + + local_irq_save(&flags); + bpf_copy_from_user(&data, sizeof(data), NULL); + local_irq_restore(&flags); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("kernel func bpf_copy_from_user_str is sleepable within IRQ-disabled region") +int irq_sleepable_kfunc(void *ctx) +{ + unsigned long flags; + u32 data; + + local_irq_save(&flags); + bpf_copy_from_user_str(&data, sizeof(data), NULL, 0); + local_irq_restore(&flags); + return 0; +} + +int __noinline global_local_irq_balance(void) +{ + local_irq_balance_n(); + return 0; +} + +SEC("?tc") +__failure __msg("global function calls are not allowed with IRQs disabled") +int irq_global_subprog(struct __sk_buff *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + global_local_irq_balance(); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_ooo(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_restore(&flags1); + bpf_local_irq_restore(&flags2); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_ooo_3(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags2); + bpf_local_irq_restore(&flags2); + bpf_local_irq_save(&flags3); + bpf_local_irq_restore(&flags1); + bpf_local_irq_restore(&flags3); + return 0; +} + +static __noinline void local_irq_save_3(unsigned long *flags1, unsigned long *flags2, + unsigned long *flags3) +{ + local_irq_save(flags1); + local_irq_save(flags2); + local_irq_save(flags3); +} + +SEC("?tc") +__success +int irq_restore_3_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save_3(&flags1, &flags2, &flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_restore(&flags2); + bpf_local_irq_restore(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_4_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + unsigned long flags4; + + local_irq_save_3(&flags1, &flags2, &flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_save(&flags4); + bpf_local_irq_restore(&flags4); + bpf_local_irq_restore(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_restore_ooo_3_subprog(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags2; + unsigned long flags3; + + local_irq_save_3(&flags1, &flags2, &flags3); + bpf_local_irq_restore(&flags3); + bpf_local_irq_restore(&flags2); + bpf_local_irq_save(&flags3); + bpf_local_irq_restore(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_restore_invalid(struct __sk_buff *ctx) +{ + unsigned long flags1; + unsigned long flags = 0xfaceb00c; + + bpf_local_irq_save(&flags1); + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("expected uninitialized") +int irq_save_invalid(struct __sk_buff *ctx) +{ + unsigned long flags1; + + bpf_local_irq_save(&flags1); + bpf_local_irq_save(&flags1); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_restore_iter(struct __sk_buff *ctx) +{ + struct bpf_iter_num it; + + bpf_iter_num_new(&it, 0, 42); + bpf_local_irq_restore((unsigned long *)&it); + return 0; +} + +SEC("?tc") +__failure __msg("Unreleased reference id=1") +int irq_save_iter(struct __sk_buff *ctx) +{ + struct bpf_iter_num it; + + /* Ensure same sized slot has st->ref_obj_id set, so we reject based on + * slot_type != STACK_IRQ_FLAG... + */ + _Static_assert(sizeof(it) == sizeof(unsigned long), "broken iterator size"); + + bpf_iter_num_new(&it, 0, 42); + bpf_local_irq_save((unsigned long *)&it); + bpf_local_irq_restore((unsigned long *)&it); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_flag_overwrite(struct __sk_buff *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + flags = 0xdeadbeef; + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("expected an initialized") +int irq_flag_overwrite_partial(struct __sk_buff *ctx) +{ + unsigned long flags; + + bpf_local_irq_save(&flags); + *(((char *)&flags) + 1) = 0xff; + bpf_local_irq_restore(&flags); + return 0; +} + +SEC("?tc") +__failure __msg("cannot restore irq state out of order") +int irq_ooo_refs_array(struct __sk_buff *ctx) +{ + unsigned long flags[4]; + struct { int i; } *p; + + /* refs=1 */ + bpf_local_irq_save(&flags[0]); + + /* refs=1,2 */ + p = bpf_obj_new(typeof(*p)); + if (!p) { + bpf_local_irq_restore(&flags[0]); + return 0; + } + + /* refs=1,2,3 */ + bpf_local_irq_save(&flags[1]); + + /* refs=1,2,3,4 */ + bpf_local_irq_save(&flags[2]); + + /* Now when we remove ref=2, the verifier must not break the ordering in + * the refs array between 1,3,4. With an older implementation, the + * verifier would swap the last element with the removed element, but to + * maintain the stack property we need to use memmove. + */ + bpf_obj_drop(p); + + /* Save and restore to reset active_irq_id to 3, as the ordering is now + * refs=1,4,3. When restoring the linear scan will find prev_id in order + * as 3 instead of 4. + */ + bpf_local_irq_save(&flags[3]); + bpf_local_irq_restore(&flags[3]); + + /* With the incorrect implementation, we can release flags[1], flags[2], + * and flags[0], i.e. in the wrong order. + */ + bpf_local_irq_restore(&flags[1]); + bpf_local_irq_restore(&flags[2]); + bpf_local_irq_restore(&flags[0]); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 7c969c127573..190822b2f08b 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -524,11 +524,11 @@ int iter_subprog_iters(const void *ctx) } struct { - __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); __type(value, int); __uint(max_entries, 1000); -} arr_map SEC(".maps"); +} hash_map SEC(".maps"); SEC("?raw_tp") __failure __msg("invalid mem access 'scalar'") @@ -539,7 +539,7 @@ int iter_err_too_permissive1(const void *ctx) MY_PID_GUARD(); - map_val = bpf_map_lookup_elem(&arr_map, &key); + map_val = bpf_map_lookup_elem(&hash_map, &key); if (!map_val) return 0; @@ -561,12 +561,12 @@ int iter_err_too_permissive2(const void *ctx) MY_PID_GUARD(); - map_val = bpf_map_lookup_elem(&arr_map, &key); + map_val = bpf_map_lookup_elem(&hash_map, &key); if (!map_val) return 0; bpf_repeat(1000000) { - map_val = bpf_map_lookup_elem(&arr_map, &key); + map_val = bpf_map_lookup_elem(&hash_map, &key); } *map_val = 123; @@ -585,7 +585,7 @@ int iter_err_too_permissive3(const void *ctx) MY_PID_GUARD(); bpf_repeat(1000000) { - map_val = bpf_map_lookup_elem(&arr_map, &key); + map_val = bpf_map_lookup_elem(&hash_map, &key); found = true; } @@ -606,7 +606,7 @@ int iter_tricky_but_fine(const void *ctx) MY_PID_GUARD(); bpf_repeat(1000000) { - map_val = bpf_map_lookup_elem(&arr_map, &key); + map_val = bpf_map_lookup_elem(&hash_map, &key); if (map_val) { found = true; break; diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c index df1d3db60b1b..9e4b45201e69 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod.c @@ -4,7 +4,7 @@ #include "bpf_experimental.h" #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c index f9789e668297..82190d79de37 100644 --- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c +++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c @@ -3,7 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" static struct prog_test_ref_kfunc __kptr *v; long total_sum = -1; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c index 7632d9ecb253..b9670e9a6e3d 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <vmlinux.h> #include <bpf/bpf_helpers.h> -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" SEC("tc") int kfunc_destructive_test(void) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c index 08fae306539c..a1963497f0bf 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c @@ -2,7 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" struct syscall_test_args { __u8 data[16]; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c index d532af07decf..48f64827cd93 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_race.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <vmlinux.h> #include <bpf/bpf_helpers.h> -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" SEC("tc") int kfunc_call_fail(struct __sk_buff *ctx) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index f502f755f567..8b86113a0126 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -2,7 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" SEC("tc") int kfunc_call_test4(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c index 2380c75e74ce..8e150e85b50d 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" extern const int bpf_prog_active __ksym; int active_res = -1; diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index b092a72b2c9d..d736506a4c80 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -6,7 +6,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> #include "../bpf_experimental.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" struct plain_local; diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index ab0ce1d01a4a..edaba481db9d 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -2,7 +2,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" struct map_value { struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 450bb373b179..4c0ff01f1a96 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -4,7 +4,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" struct map_value { char buf[8]; @@ -345,7 +345,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("Unreleased reference id=5 alloc_insn=") +__failure __msg("Unreleased reference id=4 alloc_insn=") int kptr_xchg_ref_state(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c index 7f9ef701f5de..51a4fe64c917 100644 --- a/tools/testing/selftests/bpf/progs/missed_kprobe.c +++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c @@ -2,7 +2,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c index 8ea71cbd6c45..29c18d869ec1 100644 --- a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c +++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c @@ -2,7 +2,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; @@ -46,3 +46,9 @@ int test5(struct pt_regs *ctx) { return 0; } + +SEC("kprobe.session/bpf_kfunc_common_test") +int test6(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/nested_acquire.c b/tools/testing/selftests/bpf/progs/nested_acquire.c index 8e521a21d995..49ad7b9adf56 100644 --- a/tools/testing/selftests/bpf/progs/nested_acquire.c +++ b/tools/testing/selftests/bpf/progs/nested_acquire.c @@ -4,7 +4,7 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c index 885377e83607..6c5797bf0ead 100644 --- a/tools/testing/selftests/bpf/progs/preempt_lock.c +++ b/tools/testing/selftests/bpf/progs/preempt_lock.c @@ -5,8 +5,10 @@ #include "bpf_misc.h" #include "bpf_experimental.h" +extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym; + SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -14,7 +16,7 @@ int preempt_lock_missing_1(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -23,7 +25,7 @@ int preempt_lock_missing_2(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_3(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -33,7 +35,7 @@ int preempt_lock_missing_3(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -55,7 +57,7 @@ static __noinline void preempt_enable(void) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -63,7 +65,7 @@ int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -72,7 +74,7 @@ int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -113,6 +115,18 @@ int preempt_sleepable_helper(void *ctx) return 0; } +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("kernel func bpf_copy_from_user_str is sleepable within non-preemptible region") +int preempt_sleepable_kfunc(void *ctx) +{ + u32 data; + + bpf_preempt_disable(); + bpf_copy_from_user_str(&data, sizeof(data), NULL, 0); + bpf_preempt_enable(); + return 0; +} + int __noinline preempt_global_subprog(void) { preempt_balance_subprog(); diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue.c b/tools/testing/selftests/bpf/progs/pro_epilogue.c index 44bc3f06b4b6..d97d6e07ef5c 100644 --- a/tools/testing/selftests/bpf/progs/pro_epilogue.c +++ b/tools/testing/selftests/bpf/progs/pro_epilogue.c @@ -4,8 +4,8 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c b/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c index 3529e53be355..6048d79be48b 100644 --- a/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c +++ b/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c @@ -4,8 +4,8 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c index 457f34c151e3..5927054b6dd9 100644 --- a/tools/testing/selftests/bpf/progs/raw_tp_null.c +++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c @@ -3,6 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -17,16 +18,14 @@ int BPF_PROG(test_raw_tp_null, struct sk_buff *skb) if (task->pid != tid) return 0; - i = i + skb->mark + 1; - /* The compiler may move the NULL check before this deref, which causes - * the load to fail as deref of scalar. Prevent that by using a barrier. + /* If dead code elimination kicks in, the increment +=2 will be + * removed. For raw_tp programs attaching to tracepoints in kernel + * modules, we mark input arguments as PTR_MAYBE_NULL, so branch + * prediction should never kick in. */ - barrier(); - /* If dead code elimination kicks in, the increment below will - * be removed. For raw_tp programs, we mark input arguments as - * PTR_MAYBE_NULL, so branch prediction should never kick in. - */ - if (!skb) - i += 2; + asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;" + : [i]"+r"(i) + : [ctx]"r"(skb) + : "memory"); return 0; } diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c new file mode 100644 index 000000000000..38d669957bf1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +/* Ensure module parameter has PTR_MAYBE_NULL */ +SEC("tp_btf/bpf_testmod_test_raw_tp_null") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) { + asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); + return 0; +} + +/* Check NULL marking */ +SEC("tp_btf/sched_pi_setprio") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int test_raw_tp_null_sched_pi_setprio_arg_2(void *ctx) { + asm volatile("r1 = *(u64 *)(r1 +8); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/sock_addr_kern.c b/tools/testing/selftests/bpf/progs/sock_addr_kern.c index 8386bb15ccdc..84ad515eafd6 100644 --- a/tools/testing/selftests/bpf/progs/sock_addr_kern.c +++ b/tools/testing/selftests/bpf/progs/sock_addr_kern.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Google LLC */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" SEC("syscall") int init_sock(struct init_sock_args *args) diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c index d7fdcabe7d90..284a5b008e0c 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_detach.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c index 3c822103bd40..d8cc99f5c2e2 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #include <vmlinux.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c index b450f72e744a..ccab3935aa42 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #include <vmlinux.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c index 6283099ec383..8b5515f4f724 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #include <vmlinux.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c index 4c56d4a9d9f4..71c420c3a5a6 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_module.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c @@ -3,7 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c index 9efcc6e4d356..5b23ea817f1f 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c @@ -3,7 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c index fa2021388485..5d0937fa07be 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c @@ -2,7 +2,7 @@ /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ #include <vmlinux.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c index 8ea57e5348ab..0e4d2ff63ab8 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c @@ -3,7 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c index 1f55ec4cee37..58d5d8dc2235 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c @@ -3,7 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c index f2f300d50988..31e58389bb8b 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c @@ -3,7 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c index 0f4dfb770c32..b698cc62a371 100644 --- a/tools/testing/selftests/bpf/progs/syscall.c +++ b/tools/testing/selftests/bpf/progs/syscall.c @@ -76,9 +76,9 @@ static int btf_load(void) .magic = BTF_MAGIC, .version = BTF_VERSION, .hdr_len = sizeof(struct btf_header), - .type_len = sizeof(__u32) * 8, - .str_off = sizeof(__u32) * 8, - .str_len = sizeof(__u32), + .type_len = sizeof(raw_btf.types), + .str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types), + .str_len = sizeof(raw_btf.str), }, .types = { /* long */ diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c index d1a57f7d09bd..fe6249d99b31 100644 --- a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c @@ -11,6 +11,8 @@ int subprog_tc(struct __sk_buff *skb) __sink(skb); __sink(ret); + /* let verifier know that 'subprog_tc' can change pointers to skb->data */ + bpf_skb_change_proto(skb, 0, 0); return ret; } diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index 683c8aaa63da..f344c6835e84 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -15,7 +15,7 @@ #include <linux/ipv6.h> #include <linux/pkt_cls.h> #include <linux/tcp.h> -#include <linux/udp.h> +#include <netinet/udp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h index 233b089d1fba..eb55cb8a3dbd 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.h +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h @@ -10,7 +10,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include <linux/udp.h> +#include <netinet/udp.h> /* offsetof() is used in static asserts, and the libbpf-redefined CO-RE * friendly version breaks compilation for older clang versions <= 15 diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c index 464515b824b9..d0f7670351e5 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -15,7 +15,7 @@ #include <linux/ipv6.h> #include <linux/pkt_cls.h> #include <linux/tcp.h> -#include <linux/udp.h> +#include <netinet/udp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c index 6afa834756e9..fac33a14f200 100644 --- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c +++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c @@ -6,13 +6,20 @@ #include <stdbool.h> extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak; +extern bool CONFIG_PPC_FTRACE_OUT_OF_LINE __kconfig __weak; +extern bool CONFIG_KPROBES_ON_FTRACE __kconfig __weak; +extern bool CONFIG_PPC64 __kconfig __weak; -/* This function is here to have CONFIG_X86_KERNEL_IBT - * used and added to object BTF. +/* This function is here to have CONFIG_X86_KERNEL_IBT, + * CONFIG_PPC_FTRACE_OUT_OF_LINE, CONFIG_KPROBES_ON_FTRACE, + * CONFIG_PPC6 used and added to object BTF. */ int unused(void) { - return CONFIG_X86_KERNEL_IBT ? 0 : 1; + return CONFIG_X86_KERNEL_IBT || + CONFIG_PPC_FTRACE_OUT_OF_LINE || + CONFIG_KPROBES_ON_FTRACE || + CONFIG_PPC64 ? 0 : 1; } SEC("kprobe") diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c index 5da001ca57a5..09d027bd3ea8 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func10.c +++ b/tools/testing/selftests/bpf/progs/test_global_func10.c @@ -26,7 +26,7 @@ __noinline int foo(const struct Big *big) } SEC("cgroup_skb/ingress") -__failure __msg("invalid indirect access to stack") +__failure __msg("invalid read from stack") int global_func10(struct __sk_buff *skb) { const struct Small small = {.x = skb->len }; diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c index 7ac7e1de34d8..0ad1bf1ede8d 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c @@ -4,7 +4,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" #include "bpf_kfuncs.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" SEC("tc") int kfunc_dynptr_nullable_test1(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index cc1a012d038f..fb07f5773888 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -5,7 +5,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" __u32 raw_tp_read_sz = 0; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c new file mode 100644 index 000000000000..2796dd8545eb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 ByteDance */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sock_map_rx SEC(".maps"); + +long change_tail_ret = 1; + +SEC("sk_skb") +int prog_skb_verdict(struct __sk_buff *skb) +{ + char *data, *data_end; + + bpf_skb_pull_data(skb, 1); + data = (char *)(unsigned long)skb->data; + data_end = (char *)(unsigned long)skb->data_end; + + if (data + 1 > data_end) + return SK_PASS; + + if (data[0] == 'T') { /* Trim the packet */ + change_tail_ret = bpf_skb_change_tail(skb, skb->len - 1, 0); + return SK_PASS; + } else if (data[0] == 'G') { /* Grow the packet */ + change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0); + return SK_PASS; + } else if (data[0] == 'E') { /* Error */ + change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + return SK_PASS; + } + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c new file mode 100644 index 000000000000..28edafe803f0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/pkt_cls.h> + +long change_tail_ret = 1; + +static __always_inline struct iphdr *parse_ip_header(struct __sk_buff *skb, int *ip_proto) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = data; + struct iphdr *iph; + + /* Verify Ethernet header */ + if ((void *)(data + sizeof(*eth)) > data_end) + return NULL; + + /* Skip Ethernet header to get to IP header */ + iph = (void *)(data + sizeof(struct ethhdr)); + + /* Verify IP header */ + if ((void *)(data + sizeof(struct ethhdr) + sizeof(*iph)) > data_end) + return NULL; + + /* Basic IP header validation */ + if (iph->version != 4) /* Only support IPv4 */ + return NULL; + + if (iph->ihl < 5) /* Minimum IP header length */ + return NULL; + + *ip_proto = iph->protocol; + return iph; +} + +static __always_inline struct udphdr *parse_udp_header(struct __sk_buff *skb, struct iphdr *iph) +{ + void *data_end = (void *)(long)skb->data_end; + void *hdr = (void *)iph; + struct udphdr *udp; + + /* Calculate UDP header position */ + udp = hdr + (iph->ihl * 4); + hdr = (void *)udp; + + /* Verify UDP header bounds */ + if ((void *)(hdr + sizeof(*udp)) > data_end) + return NULL; + + return udp; +} + +SEC("tc/ingress") +int change_tail(struct __sk_buff *skb) +{ + int len = skb->len; + struct udphdr *udp; + struct iphdr *iph; + void *data_end; + char *payload; + int ip_proto; + + bpf_skb_pull_data(skb, len); + + data_end = (void *)(long)skb->data_end; + iph = parse_ip_header(skb, &ip_proto); + if (!iph) + return TCX_PASS; + + if (ip_proto != IPPROTO_UDP) + return TCX_PASS; + + udp = parse_udp_header(skb, iph); + if (!udp) + return TCX_PASS; + + payload = (char *)udp + (sizeof(struct udphdr)); + if (payload + 1 > (char *)data_end) + return TCX_PASS; + + if (payload[0] == 'T') { /* Trim the packet */ + change_tail_ret = bpf_skb_change_tail(skb, len - 1, 0); + if (!change_tail_ret) + bpf_skb_change_tail(skb, len, 0); + return TCX_PASS; + } else if (payload[0] == 'G') { /* Grow the packet */ + change_tail_ret = bpf_skb_change_tail(skb, len + 1, 0); + if (!change_tail_ret) + bpf_skb_change_tail(skb, len, 0); + return TCX_PASS; + } else if (payload[0] == 'E') { /* Error */ + change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + return TCX_PASS; + } else if (payload[0] == 'Z') { /* Zero */ + change_tail_ret = bpf_skb_change_tail(skb, 0, 0); + return TCX_PASS; + } + return TCX_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c index 10d825928499..630f12e51b07 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_link.c +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -8,6 +8,7 @@ #include <linux/if_packet.h> #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char LICENSE[] SEC("license") = "GPL"; @@ -27,6 +28,7 @@ bool seen_host; bool seen_mcast; int mark, prio; +unsigned short headroom, tailroom; SEC("tc/ingress") int tc1(struct __sk_buff *skb) @@ -104,11 +106,24 @@ out: return TCX_PASS; } +struct sk_buff { + struct net_device *dev; +}; + +struct net_device { + unsigned short needed_headroom; + unsigned short needed_tailroom; +}; + SEC("tc/egress") int tc8(struct __sk_buff *skb) { + struct net_device *dev = BPF_CORE_READ((struct sk_buff *)skb, dev); + seen_tc8 = true; mark = skb->mark; prio = skb->priority; + headroom = BPF_CORE_READ(dev, needed_headroom); + tailroom = BPF_CORE_READ(dev, needed_tailroom); return TCX_PASS; } diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c index 5aaf2b065f86..39ff06f2c834 100644 --- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c @@ -3,15 +3,11 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" #include "bpf_misc.h" SEC("tp_btf/bpf_testmod_test_nullable_bare") -/* This used to be a failure test, but raw_tp nullable arguments can now - * directly be dereferenced, whether they have nullable annotation or not, - * and don't need to be explicitly checked. - */ -__success +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx) { return nullable_ctx->len; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index 81bb38d72ced..dc74d8cf9e3f 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -10,6 +10,8 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */ #if defined(__TARGET_ARCH_s390) int tailroom = 512; +#elif defined(__TARGET_ARCH_powerpc) + int tailroom = 384; #else int tailroom = 320; #endif diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c index 3abf068b8446..5928ed0911ca 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c @@ -98,6 +98,18 @@ int xdp_count_pkts(struct xdp_md *xdp) return XDP_DROP; } +SEC("xdp") +int xdp_redirect_to_111(struct xdp_md *xdp) +{ + return bpf_redirect(111, 0); +} + +SEC("xdp") +int xdp_redirect_to_222(struct xdp_md *xdp) +{ + return bpf_redirect(222, 0); +} + SEC("tc") int tc_count_pkts(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index a7c4a7d49fe6..fe2d71ae0e71 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -8,7 +8,7 @@ #define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem -SEC("t") +SEC("tc") int ing_cls(struct __sk_buff *ctx) { __u8 *data, *data_meta, *data_end; @@ -28,7 +28,7 @@ int ing_cls(struct __sk_buff *ctx) return diff ? TC_ACT_SHOT : TC_ACT_OK; } -SEC("x") +SEC("xdp") int ing_xdp(struct xdp_md *ctx) { __u8 *data, *data_meta, *data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c deleted file mode 100644 index b778cad45485..000000000000 --- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c +++ /dev/null @@ -1,26 +0,0 @@ -/* Copyright (c) 2017 VMware - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> - -SEC("redirect_to_111") -int xdp_redirect_to_111(struct xdp_md *xdp) -{ - return bpf_redirect(111, 0); -} -SEC("redirect_to_222") -int xdp_redirect_to_222(struct xdp_md *xdp) -{ - return bpf_redirect(222, 0); -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c index 8a403470e557..046a204c8fc6 100644 --- a/tools/testing/selftests/bpf/progs/uninit_stack.c +++ b/tools/testing/selftests/bpf/progs/uninit_stack.c @@ -70,7 +70,8 @@ __naked int helper_uninit_to_misc(void *ctx) r1 = r10; \ r1 += -128; \ r2 = 32; \ - call %[bpf_trace_printk]; \ + r3 = 0; \ + call %[bpf_probe_read_user]; \ /* Call to dummy() forces print_verifier_state(..., true), \ * thus showing the stack state, matched by __msg(). \ */ \ @@ -79,7 +80,7 @@ __naked int helper_uninit_to_misc(void *ctx) exit; \ " : - : __imm(bpf_trace_printk), + : __imm(bpf_probe_read_user), __imm(dummy) : __clobber_all); } diff --git a/tools/testing/selftests/bpf/progs/unsupported_ops.c b/tools/testing/selftests/bpf/progs/unsupported_ops.c index 9180365a3568..8aa2e0dd624e 100644 --- a/tools/testing/selftests/bpf/progs/unsupported_ops.c +++ b/tools/testing/selftests/bpf/progs/unsupported_ops.c @@ -4,7 +4,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod.h" +#include "../test_kmods/bpf_testmod.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c index 4195aa824ba5..29eb9568633f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_array_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c @@ -29,6 +29,20 @@ struct { } map_array_wo SEC(".maps"); struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, struct test_val); +} map_array_pcpu SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, struct test_val); +} map_array SEC(".maps"); + +struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1); __type(key, long long); @@ -525,4 +539,178 @@ l0_%=: exit; \ : __clobber_all); } +SEC("socket") +__description("valid map access into an array using constant without nullness") +__success __retval(4) __log_level(2) +__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}") +unsigned int an_array_with_a_constant_no_nullness(void) +{ + /* Need 8-byte alignment for spill tracking */ + __u32 __attribute__((aligned(8))) key = 1; + struct test_val *val; + + val = bpf_map_lookup_elem(&map_array, &key); + val->index = offsetof(struct test_val, foo); + + return val->index; +} + +SEC("socket") +__description("valid multiple map access into an array using constant without nullness") +__success __retval(8) __log_level(2) +__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -16) = {{(0|r[0-9])}}") +__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}") +unsigned int multiple_array_with_a_constant_no_nullness(void) +{ + __u32 __attribute__((aligned(8))) key = 1; + __u32 __attribute__((aligned(8))) key2 = 0; + struct test_val *val, *val2; + + val = bpf_map_lookup_elem(&map_array, &key); + val->index = offsetof(struct test_val, foo); + + val2 = bpf_map_lookup_elem(&map_array, &key2); + val2->index = offsetof(struct test_val, foo); + + return val->index + val2->index; +} + +SEC("socket") +__description("valid map access into an array using natural aligned 32-bit constant 0 without nullness") +__success __retval(4) +unsigned int an_array_with_a_32bit_constant_0_no_nullness(void) +{ + /* Unlike the above tests, 32-bit zeroing is precisely tracked even + * if writes are not aligned to BPF_REG_SIZE. This tests that our + * STACK_ZERO handling functions. + */ + struct test_val *val; + __u32 key = 0; + + val = bpf_map_lookup_elem(&map_array, &key); + val->index = offsetof(struct test_val, foo); + + return val->index; +} + +SEC("socket") +__description("valid map access into a pcpu array using constant without nullness") +__success __retval(4) __log_level(2) +__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}") +unsigned int a_pcpu_array_with_a_constant_no_nullness(void) +{ + __u32 __attribute__((aligned(8))) key = 1; + struct test_val *val; + + val = bpf_map_lookup_elem(&map_array_pcpu, &key); + val->index = offsetof(struct test_val, foo); + + return val->index; +} + +SEC("socket") +__description("invalid map access into an array using constant without nullness") +__failure __msg("R0 invalid mem access 'map_value_or_null'") +unsigned int an_array_with_a_constant_no_nullness_out_of_bounds(void) +{ + /* Out of bounds */ + __u32 __attribute__((aligned(8))) key = 3; + struct test_val *val; + + val = bpf_map_lookup_elem(&map_array, &key); + val->index = offsetof(struct test_val, foo); + + return val->index; +} + +SEC("socket") +__description("invalid map access into an array using constant smaller than key_size") +__failure __msg("R0 invalid mem access 'map_value_or_null'") +unsigned int an_array_with_a_constant_too_small(void) +{ + __u32 __attribute__((aligned(8))) key; + struct test_val *val; + + /* Mark entire key as STACK_MISC */ + bpf_probe_read_user(&key, sizeof(key), NULL); + + /* Spilling only the bottom byte results in a tnum const of 1. + * We want to check that the verifier rejects it, as the spill is < 4B. + */ + *(__u8 *)&key = 1; + val = bpf_map_lookup_elem(&map_array, &key); + + /* Should fail, as verifier cannot prove in-bound lookup */ + val->index = offsetof(struct test_val, foo); + + return val->index; +} + +SEC("socket") +__description("invalid map access into an array using constant larger than key_size") +__failure __msg("R0 invalid mem access 'map_value_or_null'") +unsigned int an_array_with_a_constant_too_big(void) +{ + struct test_val *val; + __u64 key = 1; + + /* Even if the constant value is < max_entries, if the spill size is + * larger than the key size, the set bits may not be where we expect them + * to be on different endian architectures. + */ + val = bpf_map_lookup_elem(&map_array, &key); + val->index = offsetof(struct test_val, foo); + + return val->index; +} + +SEC("socket") +__description("invalid elided lookup using const and non-const key") +__failure __msg("R0 invalid mem access 'map_value_or_null'") +unsigned int mixed_const_and_non_const_key_lookup(void) +{ + __u32 __attribute__((aligned(8))) key; + struct test_val *val; + __u32 rand; + + rand = bpf_get_prandom_u32(); + key = rand > 42 ? 1 : rand; + val = bpf_map_lookup_elem(&map_array, &key); + + return val->index; +} + +SEC("socket") +__failure __msg("invalid read from stack R2 off=4096 size=4") +__naked void key_lookup_at_invalid_fp(void) +{ + asm volatile (" \ + r1 = %[map_array] ll; \ + r2 = r10; \ + r2 += 4096; \ + call %[bpf_map_lookup_elem]; \ + r0 = *(u64*)(r0 + 0); \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_array) + : __clobber_all); +} + +volatile __u32 __attribute__((aligned(8))) global_key; + +SEC("socket") +__description("invalid elided lookup using non-stack key") +__failure __msg("R0 invalid mem access 'map_value_or_null'") +unsigned int non_stack_key_lookup(void) +{ + struct test_val *val; + + global_key = 1; + val = bpf_map_lookup_elem(&map_array, (void *)&global_key); + val->index = offsetof(struct test_val, foo); + + return val->index; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c index 8d77cc5323d3..fb62e09f2114 100644 --- a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c @@ -28,7 +28,7 @@ __naked void stack_out_of_bounds(void) SEC("socket") __description("uninitialized stack1") __success __log_level(4) __msg("stack depth 8") -__failure_unpriv __msg_unpriv("invalid indirect read from stack") +__failure_unpriv __msg_unpriv("invalid read from stack") __naked void uninitialized_stack1(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index a0bb7fb40ea5..0eb33bb801b5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1200,4 +1200,138 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("tc") +__description("multiply mixed sign bounds. test 1") +__success __log_level(2) +__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))") +__naked void mult_mixed0_sign(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= 0xf;" + "r6 -= 1000000000;" + "r7 &= 0xf;" + "r7 -= 2000000000;" + "r6 *= r7;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("multiply mixed sign bounds. test 2") +__success __log_level(2) +__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=smin32=-100,smax=smax32=200)") +__naked void mult_mixed1_sign(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= 0xf;" + "r6 -= 0xa;" + "r7 &= 0xf;" + "r7 -= 0x14;" + "r6 *= r7;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("multiply negative bounds") +__success __log_level(2) +__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))") +__naked void mult_sign_bounds(void) +{ + asm volatile ( + "r8 = 0x7fff;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= 0xa;" + "r6 -= r8;" + "r7 &= 0xf;" + "r7 -= r8;" + "r6 *= r7;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("multiply bounds that don't cross signed boundary") +__success __log_level(2) +__msg("r8 *= r6 {{.*}}; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8_w=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))") +__naked void mult_no_sign_crossing(void) +{ + asm volatile ( + "r6 = 0xb;" + "r8 = 0xb3c3f8c99262687 ll;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= r7;" + "r8 *= r6;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("multiplication overflow, result in unbounded reg. test 1") +__success __log_level(2) +__msg("r6 *= r7 {{.*}}; R6_w=scalar()") +__naked void mult_unsign_ovf(void) +{ + asm volatile ( + "r8 = 0x7ffffffffff ll;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= 0x7fffffff;" + "r7 &= r8;" + "r6 *= r7;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("multiplication overflow, result in unbounded reg. test 2") +__success __log_level(2) +__msg("r6 *= r7 {{.*}}; R6_w=scalar()") +__naked void mult_sign_ovf(void) +{ + asm volatile ( + "r8 = 0x7ffffffff ll;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "call %[bpf_get_prandom_u32];" + "r7 = r0;" + "r6 &= 0xa;" + "r6 -= r8;" + "r7 &= 0x7fffffff;" + "r6 *= r7;" + "exit" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c index a570e48b917a..28b939572cda 100644 --- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c @@ -11,7 +11,7 @@ __success __retval(0) __naked void btf_ctx_access_accept(void) { asm volatile (" \ - r2 = *(u32*)(r1 + 8); /* load 2nd argument value (int pointer) */\ + r2 = *(u64 *)(r1 + 8); /* load 2nd argument value (int pointer) */\ r0 = 0; \ exit; \ " ::: __clobber_all); @@ -23,7 +23,43 @@ __success __retval(0) __naked void ctx_access_u32_pointer_accept(void) { asm volatile (" \ - r2 = *(u32*)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ + r2 = *(u64 *)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u32") +__failure __msg("size 4 must be 8") +__naked void ctx_access_u32_pointer_reject_32(void) +{ + asm volatile (" \ + r2 = *(u32 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u16") +__failure __msg("size 2 must be 8") +__naked void ctx_access_u32_pointer_reject_16(void) +{ + asm volatile (" \ + r2 = *(u16 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u8") +__failure __msg("size 1 must be 8") +__naked void ctx_access_u32_pointer_reject_8(void) +{ + asm volatile (" \ + r2 = *(u8 *)(r1 + 0); /* load 1st argument with narrow load */\ r0 = 0; \ exit; \ " ::: __clobber_all); diff --git a/tools/testing/selftests/bpf/progs/verifier_const_or.c b/tools/testing/selftests/bpf/progs/verifier_const_or.c index ba8922b2eebd..68c568c3c3a0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_const_or.c +++ b/tools/testing/selftests/bpf/progs/verifier_const_or.c @@ -25,7 +25,7 @@ __naked void constant_should_keep_constant_type(void) SEC("tracepoint") __description("constant register |= constant should not bypass stack boundary checks") -__failure __msg("invalid indirect access to stack R1 off=-48 size=58") +__failure __msg("invalid write to stack R1 off=-48 size=58") __naked void not_bypass_stack_boundary_checks_1(void) { asm volatile (" \ @@ -62,7 +62,7 @@ __naked void register_should_keep_constant_type(void) SEC("tracepoint") __description("constant register |= constant register should not bypass stack boundary checks") -__failure __msg("invalid indirect access to stack R1 off=-48 size=58") +__failure __msg("invalid write to stack R1 off=-48 size=58") __naked void not_bypass_stack_boundary_checks_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_d_path.c b/tools/testing/selftests/bpf/progs/verifier_d_path.c index ec79cbcfde91..87e51a215558 100644 --- a/tools/testing/selftests/bpf/progs/verifier_d_path.c +++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c @@ -11,7 +11,7 @@ __success __retval(0) __naked void d_path_accept(void) { asm volatile (" \ - r1 = *(u32*)(r1 + 0); \ + r1 = *(u64 *)(r1 + 0); \ r2 = r10; \ r2 += -8; \ r6 = 0; \ @@ -31,7 +31,7 @@ __failure __msg("helper call is not allowed in probe") __naked void d_path_reject(void) { asm volatile (" \ - r1 = *(u32*)(r1 + 0); \ + r1 = *(u64 *)(r1 + 0); \ r2 = r10; \ r2 += -8; \ r6 = 0; \ diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c index 50c6b22606f6..f2c54e4d89eb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c +++ b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c @@ -67,7 +67,7 @@ SEC("socket") __description("helper access to variable memory: stack, bitwise AND, zero included") /* in privileged mode reads from uninitialized stack locations are permitted */ __success __failure_unpriv -__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64") +__msg_unpriv("invalid read from stack R2 off -64+0 size 64") __retval(0) __naked void stack_bitwise_and_zero_included(void) { @@ -100,7 +100,7 @@ __naked void stack_bitwise_and_zero_included(void) SEC("tracepoint") __description("helper access to variable memory: stack, bitwise AND + JMP, wrong max") -__failure __msg("invalid indirect access to stack R1 off=-64 size=65") +__failure __msg("invalid write to stack R1 off=-64 size=65") __naked void bitwise_and_jmp_wrong_max(void) { asm volatile (" \ @@ -187,7 +187,7 @@ l0_%=: r0 = 0; \ SEC("tracepoint") __description("helper access to variable memory: stack, JMP, bounds + offset") -__failure __msg("invalid indirect access to stack R1 off=-64 size=65") +__failure __msg("invalid write to stack R1 off=-64 size=65") __naked void memory_stack_jmp_bounds_offset(void) { asm volatile (" \ @@ -211,7 +211,7 @@ l0_%=: r0 = 0; \ SEC("tracepoint") __description("helper access to variable memory: stack, JMP, wrong max") -__failure __msg("invalid indirect access to stack R1 off=-64 size=65") +__failure __msg("invalid write to stack R1 off=-64 size=65") __naked void memory_stack_jmp_wrong_max(void) { asm volatile (" \ @@ -260,7 +260,7 @@ SEC("socket") __description("helper access to variable memory: stack, JMP, no min check") /* in privileged mode reads from uninitialized stack locations are permitted */ __success __failure_unpriv -__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64") +__msg_unpriv("invalid read from stack R2 off -64+0 size 64") __retval(0) __naked void stack_jmp_no_min_check(void) { @@ -750,7 +750,7 @@ SEC("socket") __description("helper access to variable memory: 8 bytes leak") /* in privileged mode reads from uninitialized stack locations are permitted */ __success __failure_unpriv -__msg_unpriv("invalid indirect read from stack R2 off -64+32 size 64") +__msg_unpriv("invalid read from stack R2 off -64+32 size 64") __retval(0) __naked void variable_memory_8_bytes_leak(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c index 5f2efb895edb..59e34d558654 100644 --- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c @@ -96,7 +96,7 @@ __naked void arg_ptr_to_long_misaligned(void) SEC("cgroup/sysctl") __description("arg pointer to long size < sizeof(long)") -__failure __msg("invalid indirect access to stack R4 off=-4 size=8") +__failure __msg("invalid write to stack R4 off=-4 size=8") __naked void to_long_size_sizeof_long(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c index 4eaab1468eb7..7d088ba99ea5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c @@ -47,7 +47,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("map in map state pruning") -__success __msg("processed 26 insns") +__success __msg("processed 15 insns") __log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ) __naked void map_in_map_state_pruning(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c new file mode 100644 index 000000000000..e81097c96fe2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "../../../include/linux/filter.h" +#include "bpf_misc.h" + +SEC("raw_tp") +__description("may_goto 0") +__arch_x86_64 +__xlated("0: r0 = 1") +__xlated("1: exit") +__success +__naked void may_goto_simple(void) +{ + asm volatile ( + ".8byte %[may_goto];" + "r0 = 1;" + ".8byte %[may_goto];" + "exit;" + : + : __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0)) + : __clobber_all); +} + +SEC("raw_tp") +__description("batch 2 of may_goto 0") +__arch_x86_64 +__xlated("0: r0 = 1") +__xlated("1: exit") +__success +__naked void may_goto_batch_0(void) +{ + asm volatile ( + ".8byte %[may_goto1];" + ".8byte %[may_goto1];" + "r0 = 1;" + ".8byte %[may_goto1];" + ".8byte %[may_goto1];" + "exit;" + : + : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0)) + : __clobber_all); +} + +SEC("raw_tp") +__description("may_goto batch with offsets 2/1/0") +__arch_x86_64 +__xlated("0: r0 = 1") +__xlated("1: exit") +__success +__naked void may_goto_batch_1(void) +{ + asm volatile ( + ".8byte %[may_goto1];" + ".8byte %[may_goto2];" + ".8byte %[may_goto3];" + "r0 = 1;" + ".8byte %[may_goto1];" + ".8byte %[may_goto2];" + ".8byte %[may_goto3];" + "exit;" + : + : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)), + __imm_insn(may_goto2, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 1 /* offset */, 0)), + __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0)) + : __clobber_all); +} + +SEC("raw_tp") +__description("may_goto batch with offsets 2/0") +__arch_x86_64 +__xlated("0: *(u64 *)(r10 -8) = 8388608") +__xlated("1: r11 = *(u64 *)(r10 -8)") +__xlated("2: if r11 == 0x0 goto pc+3") +__xlated("3: r11 -= 1") +__xlated("4: *(u64 *)(r10 -8) = r11") +__xlated("5: r0 = 1") +__xlated("6: r0 = 2") +__xlated("7: exit") +__success +__naked void may_goto_batch_2(void) +{ + asm volatile ( + ".8byte %[may_goto1];" + ".8byte %[may_goto3];" + "r0 = 1;" + "r0 = 2;" + "exit;" + : + : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)), + __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0)) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c new file mode 100644 index 000000000000..b891faf50660 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include "bpf_misc.h" +#include "bpf_experimental.h" + +int gvar; + +SEC("raw_tp") +__description("C code with may_goto 0") +__success +int may_goto_c_code(void) +{ + int i, tmp[3]; + + for (i = 0; i < 3 && can_loop; i++) + tmp[i] = 0; + + for (i = 0; i < 3 && can_loop; i++) + tmp[i] = gvar - i; + + for (i = 0; i < 3 && can_loop; i++) + gvar += tmp[i]; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_mtu.c b/tools/testing/selftests/bpf/progs/verifier_mtu.c index 4ccf1ebc42d1..256956ea1ac5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_mtu.c +++ b/tools/testing/selftests/bpf/progs/verifier_mtu.c @@ -8,7 +8,7 @@ SEC("tc/ingress") __description("uninit/mtu: write rejected") __success __caps_unpriv(CAP_BPF|CAP_NET_ADMIN) -__failure_unpriv __msg_unpriv("invalid indirect read from stack") +__failure_unpriv __msg_unpriv("invalid read from stack") int tc_uninit_mtu(struct __sk_buff *ctx) { __u32 mtu; diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c index 7cc83acac727..c689665e07b9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c @@ -236,7 +236,7 @@ __naked void load_bytes_spilled_regs_data(void) SEC("tc") __description("raw_stack: skb_load_bytes, invalid access 1") -__failure __msg("invalid indirect access to stack R3 off=-513 size=8") +__failure __msg("invalid write to stack R3 off=-513 size=8") __naked void load_bytes_invalid_access_1(void) { asm volatile (" \ @@ -255,7 +255,7 @@ __naked void load_bytes_invalid_access_1(void) SEC("tc") __description("raw_stack: skb_load_bytes, invalid access 2") -__failure __msg("invalid indirect access to stack R3 off=-1 size=8") +__failure __msg("invalid write to stack R3 off=-1 size=8") __naked void load_bytes_invalid_access_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index d3e70e38e442..0d5e56dffabb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -50,6 +50,13 @@ struct { __uint(map_flags, BPF_F_NO_PREALLOC); } sk_storage_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + SEC("cgroup/skb") __description("skb->sk: no NULL check") __failure __msg("invalid mem access 'sock_common_or_null'") @@ -1037,4 +1044,53 @@ __naked void sock_create_read_src_port(void) : __clobber_all); } +__noinline +long skb_pull_data2(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +__noinline +long skb_pull_data1(struct __sk_buff *sk, __u32 len) +{ + return skb_pull_data2(sk, len); +} + +/* global function calls bpf_skb_pull_data(), which invalidates packet + * pointers established before global function call. + */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + skb_pull_data1(sk, 0); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + +__noinline +int tail_call(struct __sk_buff *sk) +{ + bpf_tail_call_static(sk, &jmp_table, 0); + return 0; +} + +/* Tail calls invalidate packet pointers. */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + tail_call(sk); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c index 3f679de73229..d9d7b05cf6d2 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c @@ -187,7 +187,7 @@ l0_%=: r6 = r0; \ SEC("cgroup/skb") __description("spin_lock: test6 missing unlock") -__failure __msg("BPF_EXIT instruction cannot be used inside bpf_spin_lock-ed region") +__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_spin_lock-ed region") __failure_unpriv __msg_unpriv("") __naked void spin_lock_test6_missing_unlock(void) { @@ -530,4 +530,30 @@ l1_%=: exit; \ : __clobber_all); } +SEC("tc") +__description("spin_lock: loop within a locked region") +__success __failure_unpriv __msg_unpriv("") +__retval(0) +int bpf_loop_inside_locked_region(void) +{ + const int zero = 0; + struct val *val; + int i, j = 0; + + val = bpf_map_lookup_elem(&map_spin_lock, &zero); + if (!val) + return -1; + + bpf_spin_lock(&val->l); + bpf_for(i, 0, 10) { + j++; + /* Silence "unused variable" warnings. */ + if (j == 10) + break; + } + bpf_spin_unlock(&val->l); + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index 7ea535bfbacd..a4a5e2071604 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -199,7 +199,7 @@ __naked void pass_pointer_to_helper_function(void) SEC("socket") __description("unpriv: indirectly pass pointer on stack to helper function") __success __failure_unpriv -__msg_unpriv("invalid indirect read from stack R2 off -8+0 size 8") +__msg_unpriv("invalid read from stack R2 off -8+0 size 8") __retval(0) __naked void on_stack_to_helper_function(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c index c810f4f6f479..1d36d01b746e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_var_off.c +++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c @@ -203,7 +203,7 @@ __naked void stack_write_clobbers_spilled_regs(void) SEC("sockops") __description("indirect variable-offset stack access, unbounded") -__failure __msg("invalid unbounded variable-offset indirect access to stack R4") +__failure __msg("invalid unbounded variable-offset write to stack R4") __naked void variable_offset_stack_access_unbounded(void) { asm volatile (" \ @@ -236,7 +236,7 @@ l0_%=: r0 = 0; \ SEC("lwt_in") __description("indirect variable-offset stack access, max out of bound") -__failure __msg("invalid variable-offset indirect access to stack R2") +__failure __msg("invalid variable-offset read from stack R2") __naked void access_max_out_of_bound(void) { asm volatile (" \ @@ -269,7 +269,7 @@ __naked void access_max_out_of_bound(void) */ SEC("socket") __description("indirect variable-offset stack access, zero-sized, max out of bound") -__failure __msg("invalid variable-offset indirect access to stack R1") +__failure __msg("invalid variable-offset write to stack R1") __naked void zero_sized_access_max_out_of_bound(void) { asm volatile (" \ @@ -294,7 +294,7 @@ __naked void zero_sized_access_max_out_of_bound(void) SEC("lwt_in") __description("indirect variable-offset stack access, min out of bound") -__failure __msg("invalid variable-offset indirect access to stack R2") +__failure __msg("invalid variable-offset read from stack R2") __naked void access_min_out_of_bound(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c index f8d3ae0c29ae..2f1ba08c293e 100644 --- a/tools/testing/selftests/bpf/progs/wq.c +++ b/tools/testing/selftests/bpf/progs/wq.c @@ -5,7 +5,7 @@ #include "bpf_experimental.h" #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c index 25b51a72fe0f..4240211a1900 100644 --- a/tools/testing/selftests/bpf/progs/wq_failures.c +++ b/tools/testing/selftests/bpf/progs/wq_failures.c @@ -5,7 +5,7 @@ #include "bpf_experimental.h" #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#include "../bpf_testmod/bpf_testmod_kfunc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h index ca0162b4dc57..1fcfa5160231 100644 --- a/tools/testing/selftests/bpf/sdt.h +++ b/tools/testing/selftests/bpf/sdt.h @@ -102,6 +102,8 @@ # define STAP_SDT_ARG_CONSTRAINT nZr # elif defined __arm__ # define STAP_SDT_ARG_CONSTRAINT g +# elif defined __loongarch__ +# define STAP_SDT_ARG_CONSTRAINT nmr # else # define STAP_SDT_ARG_CONSTRAINT nor # endif diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index 0ed67b6b31dd..238121fda5b6 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -66,7 +66,7 @@ class ArrayParser(BlockParser): def __init__(self, reader, array_name): self.array_name = array_name - self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n') + self.start_marker = re.compile(fr'(static )?const bool {self.array_name}\[.*\] = {{\n') super().__init__(reader) def search_block(self): @@ -80,7 +80,7 @@ class ArrayParser(BlockParser): Parse a block and return data as a dictionary. Items to extract must be on separate lines in the file. """ - pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$') + pattern = re.compile(r'\[(BPF_\w*)\]\s*= (true|false),?$') entries = set() while True: line = self.reader.readline() @@ -178,7 +178,7 @@ class FileExtractor(object): @enum_name: name of the enum to parse """ start_marker = re.compile(f'enum {enum_name} {{\n') - pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$') + pattern = re.compile(r'^\s*(BPF_\w+),?(\s+/\*.*\*/)?$') end_marker = re.compile('^};') parser = BlockParser(self.reader) parser.search_block(start_marker) @@ -226,8 +226,8 @@ class FileExtractor(object): @block_name: name of the blog to parse, 'TYPE' in the example """ - start_marker = re.compile(f'\*{block_name}\* := {{') - pattern = re.compile('\*\*([\w/-]+)\*\*') + start_marker = re.compile(fr'\*{block_name}\* := {{') + pattern = re.compile(r'\*\*([\w/-]+)\*\*') end_marker = re.compile('}\n') return self.__get_description_list(start_marker, pattern, end_marker) @@ -245,8 +245,8 @@ class FileExtractor(object): @block_name: name of the blog to parse, 'TYPE' in the example """ - start_marker = re.compile(f'"\s*{block_name} := {{') - pattern = re.compile('([\w/]+) [|}]') + start_marker = re.compile(fr'"\s*{block_name} := {{') + pattern = re.compile(r'([\w/]+) [|}]') end_marker = re.compile('}') return self.__get_description_list(start_marker, pattern, end_marker) @@ -264,8 +264,8 @@ class FileExtractor(object): @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example """ - start_marker = re.compile(f'"\s*{macro}\s*" [|}}]') - pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])') + start_marker = re.compile(fr'"\s*{macro}\s*" [|}}]') + pattern = re.compile(r'([\w-]+) ?(?:\||}[ }\]])') end_marker = re.compile('}\\\\n') return self.__get_description_list(start_marker, pattern, end_marker) @@ -283,8 +283,8 @@ class FileExtractor(object): @block_name: name of the blog to parse, 'TYPE' in the example """ - start_marker = re.compile(f'local {block_name}=\'') - pattern = re.compile('(?:.*=\')?([\w/]+)') + start_marker = re.compile(fr'local {block_name}=\'') + pattern = re.compile(r'(?:.*=\')?([\w/]+)') end_marker = re.compile('\'$') return self.__get_description_list(start_marker, pattern, end_marker) @@ -316,7 +316,7 @@ class MainHeaderFileExtractor(SourceFileExtractor): {'-p', '-d', '--pretty', '--debug', '--json', '-j'} """ start_marker = re.compile(f'"OPTIONS :=') - pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])') + pattern = re.compile(r'([\w-]+) ?(?:\||}[ }\]"])') end_marker = re.compile('#define') parser = InlineListParser(self.reader) @@ -338,8 +338,8 @@ class ManSubstitutionsExtractor(SourceFileExtractor): {'-p', '-d', '--pretty', '--debug', '--json', '-j'} """ - start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {') - pattern = re.compile('\*\*([\w/-]+)\*\*') + start_marker = re.compile(r'\|COMMON_OPTIONS\| replace:: {') + pattern = re.compile(r'\*\*([\w/-]+)\*\*') end_marker = re.compile('}$') parser = InlineListParser(self.reader) diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c deleted file mode 100644 index 571cc076dd7d..000000000000 --- a/tools/testing/selftests/bpf/test_flow_dissector.c +++ /dev/null @@ -1,780 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Inject packets with all sorts of encapsulation into the kernel. - * - * IPv4/IPv6 outer layer 3 - * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/.. - * IPv4/IPv6 inner layer 3 - */ - -#define _GNU_SOURCE - -#include <stddef.h> -#include <arpa/inet.h> -#include <asm/byteorder.h> -#include <error.h> -#include <errno.h> -#include <linux/if_packet.h> -#include <linux/if_ether.h> -#include <linux/ipv6.h> -#include <netinet/ip.h> -#include <netinet/in.h> -#include <netinet/udp.h> -#include <poll.h> -#include <stdbool.h> -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/types.h> -#include <unistd.h> - -#define CFG_PORT_INNER 8000 - -/* Add some protocol definitions that do not exist in userspace */ - -struct grehdr { - uint16_t unused; - uint16_t protocol; -} __attribute__((packed)); - -struct guehdr { - union { - struct { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 hlen:5, - control:1, - version:2; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u8 version:2, - control:1, - hlen:5; -#else -#error "Please fix <asm/byteorder.h>" -#endif - __u8 proto_ctype; - __be16 flags; - }; - __be32 word; - }; -}; - -static uint8_t cfg_dsfield_inner; -static uint8_t cfg_dsfield_outer; -static uint8_t cfg_encap_proto; -static bool cfg_expect_failure = false; -static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */ -static int cfg_l3_inner = AF_UNSPEC; -static int cfg_l3_outer = AF_UNSPEC; -static int cfg_num_pkt = 10; -static int cfg_num_secs = 0; -static char cfg_payload_char = 'a'; -static int cfg_payload_len = 100; -static int cfg_port_gue = 6080; -static bool cfg_only_rx; -static bool cfg_only_tx; -static int cfg_src_port = 9; - -static char buf[ETH_DATA_LEN]; - -#define INIT_ADDR4(name, addr4, port) \ - static struct sockaddr_in name = { \ - .sin_family = AF_INET, \ - .sin_port = __constant_htons(port), \ - .sin_addr.s_addr = __constant_htonl(addr4), \ - }; - -#define INIT_ADDR6(name, addr6, port) \ - static struct sockaddr_in6 name = { \ - .sin6_family = AF_INET6, \ - .sin6_port = __constant_htons(port), \ - .sin6_addr = addr6, \ - }; - -INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER) -INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0) -INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0) -INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0) -INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0) -INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0) - -INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER) -INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0) -INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0) -INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0) -INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0) -INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0) - -static unsigned long util_gettime(void) -{ - struct timeval tv; - - gettimeofday(&tv, NULL); - return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); -} - -static void util_printaddr(const char *msg, struct sockaddr *addr) -{ - unsigned long off = 0; - char nbuf[INET6_ADDRSTRLEN]; - - switch (addr->sa_family) { - case PF_INET: - off = __builtin_offsetof(struct sockaddr_in, sin_addr); - break; - case PF_INET6: - off = __builtin_offsetof(struct sockaddr_in6, sin6_addr); - break; - default: - error(1, 0, "printaddr: unsupported family %u\n", - addr->sa_family); - } - - if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf, - sizeof(nbuf))) - error(1, errno, "inet_ntop"); - - fprintf(stderr, "%s: %s\n", msg, nbuf); -} - -static unsigned long add_csum_hword(const uint16_t *start, int num_u16) -{ - unsigned long sum = 0; - int i; - - for (i = 0; i < num_u16; i++) - sum += start[i]; - - return sum; -} - -static uint16_t build_ip_csum(const uint16_t *start, int num_u16, - unsigned long sum) -{ - sum += add_csum_hword(start, num_u16); - - while (sum >> 16) - sum = (sum & 0xffff) + (sum >> 16); - - return ~sum; -} - -static void build_ipv4_header(void *header, uint8_t proto, - uint32_t src, uint32_t dst, - int payload_len, uint8_t tos) -{ - struct iphdr *iph = header; - - iph->ihl = 5; - iph->version = 4; - iph->tos = tos; - iph->ttl = 8; - iph->tot_len = htons(sizeof(*iph) + payload_len); - iph->id = htons(1337); - iph->protocol = proto; - iph->saddr = src; - iph->daddr = dst; - iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0); -} - -static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield) -{ - uint16_t val, *ptr = (uint16_t *)ip6h; - - val = ntohs(*ptr); - val &= 0xF00F; - val |= ((uint16_t) dsfield) << 4; - *ptr = htons(val); -} - -static void build_ipv6_header(void *header, uint8_t proto, - struct sockaddr_in6 *src, - struct sockaddr_in6 *dst, - int payload_len, uint8_t dsfield) -{ - struct ipv6hdr *ip6h = header; - - ip6h->version = 6; - ip6h->payload_len = htons(payload_len); - ip6h->nexthdr = proto; - ip6h->hop_limit = 8; - ipv6_set_dsfield(ip6h, dsfield); - - memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr)); - memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr)); -} - -static uint16_t build_udp_v4_csum(const struct iphdr *iph, - const struct udphdr *udph, - int num_words) -{ - unsigned long pseudo_sum; - int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */ - - pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16); - pseudo_sum += htons(IPPROTO_UDP); - pseudo_sum += udph->len; - return build_ip_csum((void *) udph, num_words, pseudo_sum); -} - -static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h, - const struct udphdr *udph, - int num_words) -{ - unsigned long pseudo_sum; - int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */ - - pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16); - pseudo_sum += htons(ip6h->nexthdr); - pseudo_sum += ip6h->payload_len; - return build_ip_csum((void *) udph, num_words, pseudo_sum); -} - -static void build_udp_header(void *header, int payload_len, - uint16_t dport, int family) -{ - struct udphdr *udph = header; - int len = sizeof(*udph) + payload_len; - - udph->source = htons(cfg_src_port); - udph->dest = htons(dport); - udph->len = htons(len); - udph->check = 0; - if (family == AF_INET) - udph->check = build_udp_v4_csum(header - sizeof(struct iphdr), - udph, len >> 1); - else - udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr), - udph, len >> 1); -} - -static void build_gue_header(void *header, uint8_t proto) -{ - struct guehdr *gueh = header; - - gueh->proto_ctype = proto; -} - -static void build_gre_header(void *header, uint16_t proto) -{ - struct grehdr *greh = header; - - greh->protocol = htons(proto); -} - -static int l3_length(int family) -{ - if (family == AF_INET) - return sizeof(struct iphdr); - else - return sizeof(struct ipv6hdr); -} - -static int build_packet(void) -{ - int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0; - int el3_len = 0; - - if (cfg_l3_extra) - el3_len = l3_length(cfg_l3_extra); - - /* calculate header offsets */ - if (cfg_encap_proto) { - ol3_len = l3_length(cfg_l3_outer); - - if (cfg_encap_proto == IPPROTO_GRE) - ol4_len = sizeof(struct grehdr); - else if (cfg_encap_proto == IPPROTO_UDP) - ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr); - } - - il3_len = l3_length(cfg_l3_inner); - il4_len = sizeof(struct udphdr); - - if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >= - sizeof(buf)) - error(1, 0, "packet too large\n"); - - /* - * Fill packet from inside out, to calculate correct checksums. - * But create ip before udp headers, as udp uses ip for pseudo-sum. - */ - memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len, - cfg_payload_char, cfg_payload_len); - - /* add zero byte for udp csum padding */ - buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0; - - switch (cfg_l3_inner) { - case PF_INET: - build_ipv4_header(buf + el3_len + ol3_len + ol4_len, - IPPROTO_UDP, - in_saddr4.sin_addr.s_addr, - in_daddr4.sin_addr.s_addr, - il4_len + cfg_payload_len, - cfg_dsfield_inner); - break; - case PF_INET6: - build_ipv6_header(buf + el3_len + ol3_len + ol4_len, - IPPROTO_UDP, - &in_saddr6, &in_daddr6, - il4_len + cfg_payload_len, - cfg_dsfield_inner); - break; - } - - build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len, - cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner); - - if (!cfg_encap_proto) - return il3_len + il4_len + cfg_payload_len; - - switch (cfg_l3_outer) { - case PF_INET: - build_ipv4_header(buf + el3_len, cfg_encap_proto, - out_saddr4.sin_addr.s_addr, - out_daddr4.sin_addr.s_addr, - ol4_len + il3_len + il4_len + cfg_payload_len, - cfg_dsfield_outer); - break; - case PF_INET6: - build_ipv6_header(buf + el3_len, cfg_encap_proto, - &out_saddr6, &out_daddr6, - ol4_len + il3_len + il4_len + cfg_payload_len, - cfg_dsfield_outer); - break; - } - - switch (cfg_encap_proto) { - case IPPROTO_UDP: - build_gue_header(buf + el3_len + ol3_len + ol4_len - - sizeof(struct guehdr), - cfg_l3_inner == PF_INET ? IPPROTO_IPIP - : IPPROTO_IPV6); - build_udp_header(buf + el3_len + ol3_len, - sizeof(struct guehdr) + il3_len + il4_len + - cfg_payload_len, - cfg_port_gue, cfg_l3_outer); - break; - case IPPROTO_GRE: - build_gre_header(buf + el3_len + ol3_len, - cfg_l3_inner == PF_INET ? ETH_P_IP - : ETH_P_IPV6); - break; - } - - switch (cfg_l3_extra) { - case PF_INET: - build_ipv4_header(buf, - cfg_l3_outer == PF_INET ? IPPROTO_IPIP - : IPPROTO_IPV6, - extra_saddr4.sin_addr.s_addr, - extra_daddr4.sin_addr.s_addr, - ol3_len + ol4_len + il3_len + il4_len + - cfg_payload_len, 0); - break; - case PF_INET6: - build_ipv6_header(buf, - cfg_l3_outer == PF_INET ? IPPROTO_IPIP - : IPPROTO_IPV6, - &extra_saddr6, &extra_daddr6, - ol3_len + ol4_len + il3_len + il4_len + - cfg_payload_len, 0); - break; - } - - return el3_len + ol3_len + ol4_len + il3_len + il4_len + - cfg_payload_len; -} - -/* sender transmits encapsulated over RAW or unencap'd over UDP */ -static int setup_tx(void) -{ - int family, fd, ret; - - if (cfg_l3_extra) - family = cfg_l3_extra; - else if (cfg_l3_outer) - family = cfg_l3_outer; - else - family = cfg_l3_inner; - - fd = socket(family, SOCK_RAW, IPPROTO_RAW); - if (fd == -1) - error(1, errno, "socket tx"); - - if (cfg_l3_extra) { - if (cfg_l3_extra == PF_INET) - ret = connect(fd, (void *) &extra_daddr4, - sizeof(extra_daddr4)); - else - ret = connect(fd, (void *) &extra_daddr6, - sizeof(extra_daddr6)); - if (ret) - error(1, errno, "connect tx"); - } else if (cfg_l3_outer) { - /* connect to destination if not encapsulated */ - if (cfg_l3_outer == PF_INET) - ret = connect(fd, (void *) &out_daddr4, - sizeof(out_daddr4)); - else - ret = connect(fd, (void *) &out_daddr6, - sizeof(out_daddr6)); - if (ret) - error(1, errno, "connect tx"); - } else { - /* otherwise using loopback */ - if (cfg_l3_inner == PF_INET) - ret = connect(fd, (void *) &in_daddr4, - sizeof(in_daddr4)); - else - ret = connect(fd, (void *) &in_daddr6, - sizeof(in_daddr6)); - if (ret) - error(1, errno, "connect tx"); - } - - return fd; -} - -/* receiver reads unencapsulated UDP */ -static int setup_rx(void) -{ - int fd, ret; - - fd = socket(cfg_l3_inner, SOCK_DGRAM, 0); - if (fd == -1) - error(1, errno, "socket rx"); - - if (cfg_l3_inner == PF_INET) - ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4)); - else - ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6)); - if (ret) - error(1, errno, "bind rx"); - - return fd; -} - -static int do_tx(int fd, const char *pkt, int len) -{ - int ret; - - ret = write(fd, pkt, len); - if (ret == -1) - error(1, errno, "send"); - if (ret != len) - error(1, errno, "send: len (%d < %d)\n", ret, len); - - return 1; -} - -static int do_poll(int fd, short events, int timeout) -{ - struct pollfd pfd; - int ret; - - pfd.fd = fd; - pfd.events = events; - - ret = poll(&pfd, 1, timeout); - if (ret == -1) - error(1, errno, "poll"); - if (ret && !(pfd.revents & POLLIN)) - error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents); - - return ret; -} - -static int do_rx(int fd) -{ - char rbuf; - int ret, num = 0; - - while (1) { - ret = recv(fd, &rbuf, 1, MSG_DONTWAIT); - if (ret == -1 && errno == EAGAIN) - break; - if (ret == -1) - error(1, errno, "recv"); - if (rbuf != cfg_payload_char) - error(1, 0, "recv: payload mismatch"); - num++; - } - - return num; -} - -static int do_main(void) -{ - unsigned long tstop, treport, tcur; - int fdt = -1, fdr = -1, len, tx = 0, rx = 0; - - if (!cfg_only_tx) - fdr = setup_rx(); - if (!cfg_only_rx) - fdt = setup_tx(); - - len = build_packet(); - - tcur = util_gettime(); - treport = tcur + 1000; - tstop = tcur + (cfg_num_secs * 1000); - - while (1) { - if (!cfg_only_rx) - tx += do_tx(fdt, buf, len); - - if (!cfg_only_tx) - rx += do_rx(fdr); - - if (cfg_num_secs) { - tcur = util_gettime(); - if (tcur >= tstop) - break; - if (tcur >= treport) { - fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); - tx = 0; - rx = 0; - treport = tcur + 1000; - } - } else { - if (tx == cfg_num_pkt) - break; - } - } - - /* read straggler packets, if any */ - if (rx < tx) { - tstop = util_gettime() + 100; - while (rx < tx) { - tcur = util_gettime(); - if (tcur >= tstop) - break; - - do_poll(fdr, POLLIN, tstop - tcur); - rx += do_rx(fdr); - } - } - - fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); - - if (fdr != -1 && close(fdr)) - error(1, errno, "close rx"); - if (fdt != -1 && close(fdt)) - error(1, errno, "close tx"); - - /* - * success (== 0) only if received all packets - * unless failure is expected, in which case none must arrive. - */ - if (cfg_expect_failure) - return rx != 0; - else - return rx != tx; -} - - -static void __attribute__((noreturn)) usage(const char *filepath) -{ - fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] " - "[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] " - "[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] " - "[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n", - filepath); - exit(1); -} - -static void parse_addr(int family, void *addr, const char *optarg) -{ - int ret; - - ret = inet_pton(family, optarg, addr); - if (ret == -1) - error(1, errno, "inet_pton"); - if (ret == 0) - error(1, 0, "inet_pton: bad string"); -} - -static void parse_addr4(struct sockaddr_in *addr, const char *optarg) -{ - parse_addr(AF_INET, &addr->sin_addr, optarg); -} - -static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg) -{ - parse_addr(AF_INET6, &addr->sin6_addr, optarg); -} - -static int parse_protocol_family(const char *filepath, const char *optarg) -{ - if (!strcmp(optarg, "4")) - return PF_INET; - if (!strcmp(optarg, "6")) - return PF_INET6; - - usage(filepath); -} - -static void parse_opts(int argc, char **argv) -{ - int c; - - while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) { - switch (c) { - case 'd': - if (cfg_l3_outer == AF_UNSPEC) - error(1, 0, "-d must be preceded by -o"); - if (cfg_l3_outer == AF_INET) - parse_addr4(&out_daddr4, optarg); - else - parse_addr6(&out_daddr6, optarg); - break; - case 'D': - if (cfg_l3_inner == AF_UNSPEC) - error(1, 0, "-D must be preceded by -i"); - if (cfg_l3_inner == AF_INET) - parse_addr4(&in_daddr4, optarg); - else - parse_addr6(&in_daddr6, optarg); - break; - case 'e': - if (!strcmp(optarg, "gre")) - cfg_encap_proto = IPPROTO_GRE; - else if (!strcmp(optarg, "gue")) - cfg_encap_proto = IPPROTO_UDP; - else if (!strcmp(optarg, "bare")) - cfg_encap_proto = IPPROTO_IPIP; - else if (!strcmp(optarg, "none")) - cfg_encap_proto = IPPROTO_IP; /* == 0 */ - else - usage(argv[0]); - break; - case 'f': - cfg_src_port = strtol(optarg, NULL, 0); - break; - case 'F': - cfg_expect_failure = true; - break; - case 'h': - usage(argv[0]); - break; - case 'i': - if (!strcmp(optarg, "4")) - cfg_l3_inner = PF_INET; - else if (!strcmp(optarg, "6")) - cfg_l3_inner = PF_INET6; - else - usage(argv[0]); - break; - case 'l': - cfg_payload_len = strtol(optarg, NULL, 0); - break; - case 'n': - cfg_num_pkt = strtol(optarg, NULL, 0); - break; - case 'o': - cfg_l3_outer = parse_protocol_family(argv[0], optarg); - break; - case 'O': - cfg_l3_extra = parse_protocol_family(argv[0], optarg); - break; - case 'R': - cfg_only_rx = true; - break; - case 's': - if (cfg_l3_outer == AF_INET) - parse_addr4(&out_saddr4, optarg); - else - parse_addr6(&out_saddr6, optarg); - break; - case 'S': - if (cfg_l3_inner == AF_INET) - parse_addr4(&in_saddr4, optarg); - else - parse_addr6(&in_saddr6, optarg); - break; - case 't': - cfg_num_secs = strtol(optarg, NULL, 0); - break; - case 'T': - cfg_only_tx = true; - break; - case 'x': - cfg_dsfield_outer = strtol(optarg, NULL, 0); - break; - case 'X': - cfg_dsfield_inner = strtol(optarg, NULL, 0); - break; - } - } - - if (cfg_only_rx && cfg_only_tx) - error(1, 0, "options: cannot combine rx-only and tx-only"); - - if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC) - error(1, 0, "options: must specify outer with encap"); - else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC) - error(1, 0, "options: cannot combine no-encap and outer"); - else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC) - error(1, 0, "options: cannot combine no-encap and extra"); - - if (cfg_l3_inner == AF_UNSPEC) - cfg_l3_inner = AF_INET6; - if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP) - cfg_encap_proto = IPPROTO_IPV6; - - /* RFC 6040 4.2: - * on decap, if outer encountered congestion (CE == 0x3), - * but inner cannot encode ECN (NoECT == 0x0), then drop packet. - */ - if (((cfg_dsfield_outer & 0x3) == 0x3) && - ((cfg_dsfield_inner & 0x3) == 0x0)) - cfg_expect_failure = true; -} - -static void print_opts(void) -{ - if (cfg_l3_inner == PF_INET6) { - util_printaddr("inner.dest6", (void *) &in_daddr6); - util_printaddr("inner.source6", (void *) &in_saddr6); - } else { - util_printaddr("inner.dest4", (void *) &in_daddr4); - util_printaddr("inner.source4", (void *) &in_saddr4); - } - - if (!cfg_l3_outer) - return; - - fprintf(stderr, "encap proto: %u\n", cfg_encap_proto); - - if (cfg_l3_outer == PF_INET6) { - util_printaddr("outer.dest6", (void *) &out_daddr6); - util_printaddr("outer.source6", (void *) &out_saddr6); - } else { - util_printaddr("outer.dest4", (void *) &out_daddr4); - util_printaddr("outer.source4", (void *) &out_saddr4); - } - - if (!cfg_l3_extra) - return; - - if (cfg_l3_outer == PF_INET6) { - util_printaddr("extra.dest6", (void *) &extra_daddr6); - util_printaddr("extra.source6", (void *) &extra_saddr6); - } else { - util_printaddr("extra.dest4", (void *) &extra_daddr4); - util_printaddr("extra.source4", (void *) &extra_saddr4); - } - -} - -int main(int argc, char **argv) -{ - parse_opts(argc, argv); - print_opts(); - return do_main(); -} diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh deleted file mode 100755 index 4b298863797a..000000000000 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ /dev/null @@ -1,178 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Load BPF flow dissector and verify it correctly dissects traffic - -BPF_FILE="bpf_flow.bpf.o" -export TESTNAME=test_flow_dissector -unmount=0 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -msg="skip all tests:" -if [ $UID != 0 ]; then - echo $msg please run this as root >&2 - exit $ksft_skip -fi - -# This test needs to be run in a network namespace with in_netns.sh. Check if -# this is the case and run it with in_netns.sh if it is being run in the root -# namespace. -if [[ -z $(ip netns identify $$) ]]; then - err=0 - if bpftool="$(which bpftool)"; then - echo "Testing global flow dissector..." - - $bpftool prog loadall $BPF_FILE /sys/fs/bpf/flow \ - type flow_dissector - - if ! unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/_dissect flow_dissector; then - echo "Unexpected unsuccessful attach in namespace" >&2 - err=1 - fi - - $bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \ - flow_dissector - - if unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/_dissect flow_dissector; then - echo "Unexpected successful attach in namespace" >&2 - err=1 - fi - - if ! $bpftool prog detach pinned \ - /sys/fs/bpf/flow/_dissect flow_dissector; then - echo "Failed to detach flow dissector" >&2 - err=1 - fi - - rm -rf /sys/fs/bpf/flow - else - echo "Skipping root flow dissector test, bpftool not found" >&2 - fi - - # Run the rest of the tests in a net namespace. - ../net/in_netns.sh "$0" "$@" - err=$(( $err + $? )) - - if (( $err == 0 )); then - echo "selftests: $TESTNAME [PASS]"; - else - echo "selftests: $TESTNAME [FAILED]"; - fi - - exit $err -fi - -# Determine selftest success via shell exit code -exit_handler() -{ - set +e - - # Cleanup - tc filter del dev lo ingress pref 1337 2> /dev/null - tc qdisc del dev lo ingress 2> /dev/null - ./flow_dissector_load -d 2> /dev/null - if [ $unmount -ne 0 ]; then - umount bpffs 2> /dev/null - fi -} - -# Exit script immediately (well catched by trap handler) if any -# program/thing exits with a non-zero status. -set -e - -# (Use 'trap -l' to list meaning of numbers) -trap exit_handler 0 2 3 6 9 - -# Mount BPF file system -if /bin/mount | grep /sys/fs/bpf > /dev/null; then - echo "bpffs already mounted" -else - echo "bpffs not mounted. Mounting..." - unmount=1 - /bin/mount bpffs /sys/fs/bpf -t bpf -fi - -# Attach BPF program -./flow_dissector_load -p $BPF_FILE -s _dissect - -# Setup -tc qdisc add dev lo ingress -echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter -echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter -echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter - -echo "Testing IPv4..." -# Drops all IP/UDP packets coming from port 9 -tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ - udp src_port 9 action drop - -# Send 10 IPv4/UDP packets from port 8. Filter should not drop any. -./test_flow_dissector -i 4 -f 8 -# Send 10 IPv4/UDP packets from port 9. Filter should drop all. -./test_flow_dissector -i 4 -f 9 -F -# Send 10 IPv4/UDP packets from port 10. Filter should not drop any. -./test_flow_dissector -i 4 -f 10 - -echo "Testing IPv4 from 127.0.0.127 (fallback to generic dissector)..." -# Send 10 IPv4/UDP packets from port 8. Filter should not drop any. -./test_flow_dissector -i 4 -S 127.0.0.127 -f 8 -# Send 10 IPv4/UDP packets from port 9. Filter should drop all. -./test_flow_dissector -i 4 -S 127.0.0.127 -f 9 -F -# Send 10 IPv4/UDP packets from port 10. Filter should not drop any. -./test_flow_dissector -i 4 -S 127.0.0.127 -f 10 - -echo "Testing IPIP..." -# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 8 -# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 9 -F -# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 10 - -echo "Testing IPv4 + GRE..." -# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 8 -# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 9 -F -# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any. -./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ - -D 192.168.0.1 -S 1.1.1.1 -f 10 - -tc filter del dev lo ingress pref 1337 - -echo "Testing port range..." -# Drops all IP/UDP packets coming from port 8-10 -tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ - udp src_port 8-10 action drop - -# Send 10 IPv4/UDP packets from port 7. Filter should not drop any. -./test_flow_dissector -i 4 -f 7 -# Send 10 IPv4/UDP packets from port 9. Filter should drop all. -./test_flow_dissector -i 4 -f 9 -F -# Send 10 IPv4/UDP packets from port 11. Filter should not drop any. -./test_flow_dissector -i 4 -f 11 - -tc filter del dev lo ingress pref 1337 - -echo "Testing IPv6..." -# Drops all IPv6/UDP packets coming from port 9 -tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \ - udp src_port 9 action drop - -# Send 10 IPv6/UDP packets from port 8. Filter should not drop any. -./test_flow_dissector -i 6 -f 8 -# Send 10 IPv6/UDP packets from port 9. Filter should drop all. -./test_flow_dissector -i 6 -f 9 -F -# Send 10 IPv6/UDP packets from port 10. Filter should not drop any. -./test_flow_dissector -i 6 -f 10 - -exit 0 diff --git a/tools/testing/selftests/bpf/bpf_testmod/.gitignore b/tools/testing/selftests/bpf/test_kmods/.gitignore index ded513777281..ded513777281 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/.gitignore +++ b/tools/testing/selftests/bpf/test_kmods/.gitignore diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile new file mode 100644 index 000000000000..d4e50c4509c9 --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/Makefile @@ -0,0 +1,21 @@ +TEST_KMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= $(abspath $(TEST_KMOD_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ + bpf_test_modorder_y.ko + +$(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o))) + +CFLAGS_bpf_testmod.o = -I$(src) + +all: + $(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) modules + +clean: + $(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) clean diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c index 0cc747fa912f..0cc747fa912f 100644 --- a/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c index c627ee085d13..c627ee085d13 100644 --- a/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c index 948eb3962732..948eb3962732 100644 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h index aeef86b3da74..aeef86b3da74 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index cc9dde507aba..cc9dde507aba 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h index 356803d1c10e..356803d1c10e 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h index b58817938deb..b58817938deb 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 6088d8222d59..c9e745d49493 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1282,6 +1282,21 @@ void crash_handler(int signum) backtrace_symbols_fd(bt, sz, STDERR_FILENO); } +void hexdump(const char *prefix, const void *buf, size_t len) +{ + for (int i = 0; i < len; i++) { + if (!(i % 16)) { + if (i) + fprintf(stdout, "\n"); + fprintf(stdout, "%s", prefix); + } + if (i && !(i % 8) && (i % 16)) + fprintf(stdout, "\t"); + fprintf(stdout, "%02X ", ((uint8_t *)(buf))[i]); + } + fprintf(stdout, "\n"); +} + static void sigint_handler(int signum) { int i; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 74de33ae37e5..404d0d4915d5 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -185,6 +185,7 @@ void test__end_subtest(void); void test__skip(void); void test__fail(void); int test__join_cgroup(const char *path); +void hexdump(const char *prefix, const void *buf, size_t len); #define PRINT_FAIL(format...) \ ({ \ @@ -344,6 +345,20 @@ int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_MEMEQ(actual, expected, len, name) ({ \ + static int duration = 0; \ + const void *__act = actual; \ + const void *__exp = expected; \ + int __len = len; \ + bool ___ok = memcmp(__act, __exp, __len) == 0; \ + CHECK(!___ok, (name), "unexpected memory mismatch\n"); \ + fprintf(stdout, "actual:\n"); \ + hexdump("\t", __act, __len); \ + fprintf(stdout, "expected:\n"); \ + hexdump("\t", __exp, __len); \ + ___ok; \ +}) + #define ASSERT_OK(res, name) ({ \ static int duration = 0; \ long long ___res = (res); \ diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index 7989ec608454..cb55a908bb0d 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -305,6 +305,7 @@ else client_connect verify_data server_listen + wait_for_port ${port} ${netcat_opt} fi # serverside, use BPF for decap diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh deleted file mode 100755 index 2740322c1878..000000000000 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/sh - -BPF_FILE="test_xdp_meta.bpf.o" -# Kselftest framework requirement - SKIP code is 4. -readonly KSFT_SKIP=4 -readonly NS1="ns1-$(mktemp -u XXXXXX)" -readonly NS2="ns2-$(mktemp -u XXXXXX)" - -cleanup() -{ - if [ "$?" = "0" ]; then - echo "selftests: test_xdp_meta [PASS]"; - else - echo "selftests: test_xdp_meta [FAILED]"; - fi - - set +e - ip link del veth1 2> /dev/null - ip netns del ${NS1} 2> /dev/null - ip netns del ${NS2} 2> /dev/null -} - -ip link set dev lo xdp off 2>/dev/null > /dev/null -if [ $? -ne 0 ];then - echo "selftests: [SKIP] Could not run test without the ip xdp support" - exit $KSFT_SKIP -fi -set -e - -ip netns add ${NS1} -ip netns add ${NS2} - -trap cleanup 0 2 3 6 9 - -ip link add veth1 type veth peer name veth2 - -ip link set veth1 netns ${NS1} -ip link set veth2 netns ${NS2} - -ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1 -ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2 - -ip netns exec ${NS1} tc qdisc add dev veth1 clsact -ip netns exec ${NS2} tc qdisc add dev veth2 clsact - -ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t -ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t - -ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x -ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x - -ip netns exec ${NS1} ip link set dev veth1 up -ip netns exec ${NS2} ip link set dev veth2 up - -ip netns exec ${NS1} ping -c 1 10.1.1.22 -ip netns exec ${NS2} ping -c 1 10.1.1.11 - -exit 0 diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh deleted file mode 100755 index 0746a4fde9d3..000000000000 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash -# Create 2 namespaces with two veth peers, and -# forward packets in-between using generic XDP -# -# NS1(veth11) NS2(veth22) -# | | -# | | -# (veth1, ------ (veth2, -# id:111) id:222) -# | xdp forwarding | -# ------------------ - -readonly NS1="ns1-$(mktemp -u XXXXXX)" -readonly NS2="ns2-$(mktemp -u XXXXXX)" -ret=0 - -setup() -{ - - local xdpmode=$1 - - ip netns add ${NS1} - ip netns add ${NS2} - - ip link add veth1 index 111 type veth peer name veth11 netns ${NS1} - ip link add veth2 index 222 type veth peer name veth22 netns ${NS2} - - ip link set veth1 up - ip link set veth2 up - ip -n ${NS1} link set dev veth11 up - ip -n ${NS2} link set dev veth22 up - - ip -n ${NS1} addr add 10.1.1.11/24 dev veth11 - ip -n ${NS2} addr add 10.1.1.22/24 dev veth22 -} - -cleanup() -{ - ip link del veth1 2> /dev/null - ip link del veth2 2> /dev/null - ip netns del ${NS1} 2> /dev/null - ip netns del ${NS2} 2> /dev/null -} - -test_xdp_redirect() -{ - local xdpmode=$1 - - setup - - ip link set dev veth1 $xdpmode off &> /dev/null - if [ $? -ne 0 ];then - echo "selftests: test_xdp_redirect $xdpmode [SKIP]" - return 0 - fi - - ip -n ${NS1} link set veth11 $xdpmode obj xdp_dummy.bpf.o sec xdp &> /dev/null - ip -n ${NS2} link set veth22 $xdpmode obj xdp_dummy.bpf.o sec xdp &> /dev/null - ip link set dev veth1 $xdpmode obj test_xdp_redirect.bpf.o sec redirect_to_222 &> /dev/null - ip link set dev veth2 $xdpmode obj test_xdp_redirect.bpf.o sec redirect_to_111 &> /dev/null - - if ip netns exec ${NS1} ping -c 1 10.1.1.22 &> /dev/null && - ip netns exec ${NS2} ping -c 1 10.1.1.11 &> /dev/null; then - echo "selftests: test_xdp_redirect $xdpmode [PASS]"; - else - ret=1 - echo "selftests: test_xdp_redirect $xdpmode [FAILED]"; - fi - - cleanup -} - -set -e -trap cleanup 2 3 6 9 - -test_xdp_redirect xdpgeneric -test_xdp_redirect xdpdrv - -exit $ret diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 2d742fdac6b9..81943c6254e6 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -293,6 +293,10 @@ static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *st return 0; } #else +# ifndef PROCMAP_QUERY_VMA_EXECUTABLE +# define PROCMAP_QUERY_VMA_EXECUTABLE 0x04 +# endif + static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags) { return -EOPNOTSUPP; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 7afc2619ab14..18596ae0b0c1 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -2252,7 +2252,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 7 }, - .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8", + .errstr_unpriv = "invalid read from stack R2 off -8+0 size 8", .result_unpriv = REJECT, /* in privileged mode reads from uninitialized stack locations are permitted */ .result = ACCEPT, diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c index f420c0312aa0..4b39f8472f9b 100644 --- a/tools/testing/selftests/bpf/verifier/map_kptr.c +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c @@ -373,7 +373,7 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_kptr = { 1 }, .result = REJECT, - .errstr = "Unreleased reference id=5 alloc_insn=20", + .errstr = "Unreleased reference id=4 alloc_insn=20", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_acquire", 15 }, } diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index e12ef953fba8..06af5029885b 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -21,11 +21,20 @@ #include <gelf.h> #include <float.h> #include <math.h> +#include <limits.h> #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + enum stat_id { VERDICT, DURATION, @@ -34,6 +43,11 @@ enum stat_id { PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, + SIZE, + JITED_SIZE, + STACK, + PROG_TYPE, + ATTACH_TYPE, FILE_NAME, PROG_NAME, @@ -203,7 +217,8 @@ const char argp_program_doc[] = "\n" "USAGE: veristat <obj-file> [<obj-file>...]\n" " OR: veristat -C <baseline.csv> <comparison.csv>\n" -" OR: veristat -R <results.csv>\n"; +" OR: veristat -R <results.csv>\n" +" OR: veristat -vl2 <to_analyze.bpf.o>\n"; enum { OPT_LOG_FIXED = 1000, @@ -215,7 +230,7 @@ static const struct argp_option opts[] = { { "version", 'V', NULL, 0, "Print version" }, { "verbose", 'v', NULL, 0, "Verbose mode" }, { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" }, - { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, + { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode, 2 for full verification log)" }, { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" }, { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" }, { "top-n", 'n', "N", 0, "Emit only up to first N results." }, @@ -640,19 +655,21 @@ cleanup: } static const struct stat_specs default_output_spec = { - .spec_cnt = 7, + .spec_cnt = 8, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, - TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, + TOTAL_INSNS, TOTAL_STATES, SIZE, JITED_SIZE }, }; static const struct stat_specs default_csv_output_spec = { - .spec_cnt = 9, + .spec_cnt = 14, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, + SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE, + STACK, }, }; @@ -688,6 +705,11 @@ static struct stat_def { [PEAK_STATES] = { "Peak states", {"peak_states"}, }, [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, }, [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, + [SIZE] = { "Program size", {"prog_size"}, }, + [JITED_SIZE] = { "Jited size", {"prog_size_jited"}, }, + [STACK] = {"Stack depth", {"stack_depth", "stack"}, }, + [PROG_TYPE] = { "Program type", {"prog_type"}, }, + [ATTACH_TYPE] = { "Attach type", {"attach_type", }, }, }; static bool parse_stat_id_var(const char *name, size_t len, int *id, @@ -835,7 +857,8 @@ static char verif_log_buf[64 * 1024]; static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s) { const char *cur; - int pos, lines; + int pos, lines, sub_stack, cnt = 0; + char *state = NULL, *token, stack[512]; buf[buf_sz - 1] = '\0'; @@ -853,15 +876,22 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats * if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION])) continue; - if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld", + if (5 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld", &s->stats[TOTAL_INSNS], &s->stats[MAX_STATES_PER_INSN], &s->stats[TOTAL_STATES], &s->stats[PEAK_STATES], &s->stats[MARK_READ_MAX_LEN])) continue; - } + if (1 == sscanf(cur, "stack depth %511s", stack)) + continue; + } + while ((token = strtok_r(cnt++ ? NULL : stack, "+", &state))) { + if (sscanf(token, "%d", &sub_stack) == 0) + break; + s->stats[STACK] += sub_stack; + } return 0; } @@ -884,7 +914,7 @@ static int line_cnt_cmp(const void *a, const void *b) const struct line_cnt *b_cnt = (const struct line_cnt *)b; if (a_cnt->cnt != b_cnt->cnt) - return a_cnt->cnt < b_cnt->cnt ? -1 : 1; + return a_cnt->cnt > b_cnt->cnt ? -1 : 1; return strcmp(a_cnt->line, b_cnt->line); } @@ -1032,6 +1062,41 @@ static int guess_prog_type_by_ctx_name(const char *ctx_name, return -ESRCH; } +/* Make sure only target program is referenced from struct_ops map, + * otherwise libbpf would automatically set autocreate for all + * referenced programs. + * See libbpf.c:bpf_object_adjust_struct_ops_autoload. + */ +static void mask_unrelated_struct_ops_progs(struct bpf_object *obj, + struct bpf_map *map, + struct bpf_program *prog) +{ + struct btf *btf = bpf_object__btf(obj); + const struct btf_type *t, *mt; + struct btf_member *m; + int i, moff; + size_t data_sz, ptr_sz = sizeof(void *); + void *data; + + t = btf__type_by_id(btf, bpf_map__btf_value_type_id(map)); + if (!btf_is_struct(t)) + return; + + data = bpf_map__initial_value(map, &data_sz); + for (i = 0; i < btf_vlen(t); i++) { + m = &btf_members(t)[i]; + mt = btf__type_by_id(btf, m->type); + if (!btf_is_ptr(mt)) + continue; + moff = m->offset / 8; + if (moff + ptr_sz > data_sz) + continue; + if (memcmp(data + moff, &prog, ptr_sz) == 0) + continue; + memset(data + moff, 0, ptr_sz); + } +} + static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename) { struct bpf_map *map; @@ -1047,6 +1112,9 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch case BPF_MAP_TYPE_INODE_STORAGE: case BPF_MAP_TYPE_CGROUP_STORAGE: break; + case BPF_MAP_TYPE_STRUCT_OPS: + mask_unrelated_struct_ops_progs(obj, map, prog); + break; default: if (bpf_map__max_entries(map) == 0) bpf_map__set_max_entries(map, 1); @@ -1146,8 +1214,11 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf char *buf; int buf_sz, log_level; struct verif_stats *stats; + struct bpf_prog_info info; + __u32 info_len = sizeof(info); int err = 0; void *tmp; + int fd; if (!should_process_file_prog(base_filename, bpf_program__name(prog))) { env.progs_skipped++; @@ -1196,6 +1267,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats->file_name = strdup(base_filename); stats->prog_name = strdup(bpf_program__name(prog)); stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */ + stats->stats[SIZE] = bpf_program__insn_cnt(prog); + stats->stats[PROG_TYPE] = bpf_program__type(prog); + stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog); + + memset(&info, 0, info_len); + fd = bpf_program__fd(prog); + if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) + stats->stats[JITED_SIZE] = info.jited_prog_len; + parse_verif_log(buf, buf_sz, stats); if (env.verbose) { @@ -1309,6 +1389,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, case PROG_NAME: cmp = strcmp(s1->prog_name, s2->prog_name); break; + case ATTACH_TYPE: + case PROG_TYPE: + case SIZE: + case JITED_SIZE: + case STACK: case VERDICT: case DURATION: case TOTAL_INSNS: @@ -1523,12 +1608,27 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, else *str = s->stats[VERDICT] ? "success" : "failure"; break; + case ATTACH_TYPE: + if (!s) + *str = "N/A"; + else + *str = libbpf_bpf_attach_type_str(s->stats[ATTACH_TYPE]) ?: "N/A"; + break; + case PROG_TYPE: + if (!s) + *str = "N/A"; + else + *str = libbpf_bpf_prog_type_str(s->stats[PROG_TYPE]) ?: "N/A"; + break; case DURATION: case TOTAL_INSNS: case TOTAL_STATES: case PEAK_STATES: case MAX_STATES_PER_INSN: case MARK_READ_MAX_LEN: + case STACK: + case SIZE: + case JITED_SIZE: *val = s ? s->stats[id] : 0; break; default: @@ -1612,7 +1712,10 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats case TOTAL_STATES: case PEAK_STATES: case MAX_STATES_PER_INSN: - case MARK_READ_MAX_LEN: { + case MARK_READ_MAX_LEN: + case SIZE: + case JITED_SIZE: + case STACK: { long val; int err, n; @@ -1625,6 +1728,42 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats st->stats[id] = val; break; } + case PROG_TYPE: { + enum bpf_prog_type prog_type = 0; + const char *type; + + while ((type = libbpf_bpf_prog_type_str(prog_type))) { + if (strcmp(type, str) == 0) { + st->stats[id] = prog_type; + break; + } + prog_type++; + } + + if (!type) { + fprintf(stderr, "Unrecognized prog type %s\n", str); + return -EINVAL; + } + break; + } + case ATTACH_TYPE: { + enum bpf_attach_type attach_type = 0; + const char *type; + + while ((type = libbpf_bpf_attach_type_str(attach_type))) { + if (strcmp(type, str) == 0) { + st->stats[id] = attach_type; + break; + } + attach_type++; + } + + if (!type) { + fprintf(stderr, "Unrecognized attach type %s\n", str); + return -EINVAL; + } + break; + } default: fprintf(stderr, "Unrecognized stat #%d\n", id); return -EINVAL; diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 6f9956eed797..6f7b15d6c6ed 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -27,7 +27,7 @@ #include <linux/errqueue.h> #include <linux/if_link.h> #include <linux/net_tstamp.h> -#include <linux/udp.h> +#include <netinet/udp.h> #include <linux/sockios.h> #include <linux/if_xdp.h> #include <sys/mman.h> @@ -79,7 +79,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .flags = XSK_UMEM__DEFAULT_FLAGS, + .flags = XDP_UMEM_TX_METADATA_LEN, .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; __u32 idx = 0; @@ -551,6 +551,7 @@ static void hwtstamp_enable(const char *ifname) { struct hwtstamp_config cfg = { .rx_filter = HWTSTAMP_FILTER_ALL, + .tx_type = HWTSTAMP_TX_ON, }; hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg); diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 03c1bdaed2c3..400a696a0d21 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -86,15 +86,15 @@ echo "" > test/cpuset.cpus # # If isolated CPUs have been reserved at boot time (as shown in -# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7 +# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8 # that will be used by this script for testing purpose. If not, some of -# the tests may fail incorrectly. These isolated CPUs will also be removed -# before being compared with the expected results. +# the tests may fail incorrectly. These pre-isolated CPUs should stay in +# an isolated state throughout the testing process for now. # BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) if [[ -n "$BOOT_ISOLCPUS" ]] then - [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] && + [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] && skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" fi @@ -684,14 +684,18 @@ check_isolcpus() fi # + # Appending pre-isolated CPUs + # Even though CPU #8 isn't used for testing, it can't be pre-isolated + # to make appending those CPUs easier. + # + [[ -n "$BOOT_ISOLCPUS" ]] && { + EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS} + EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS} + } + + # # Check cpuset.cpus.isolated cpumask # - if [[ -z "$BOOT_ISOLCPUS" ]] - then - ISOLCPUS=$(cat $ISCPUS) - else - ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//") - fi [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { # Take a 50ms pause and try again pause 0.05 @@ -731,8 +735,6 @@ check_isolcpus() fi done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU - [[ -n "BOOT_ISOLCPUS" ]] && - ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//") [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] } @@ -836,8 +838,11 @@ run_state_test() # if available [[ -n "$ICPUS" ]] && { check_isolcpus $ICPUS - [[ $? -ne 0 ]] && test_fail $I "isolated CPU" \ - "Expect $ICPUS, get $ISOLCPUS instead" + [[ $? -ne 0 ]] && { + [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS} + test_fail $I "isolated CPU" \ + "Expect $ICPUS, get $ISOLCPUS instead" + } } reset_cgroup_states # diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile new file mode 100644 index 000000000000..ed210037b29d --- /dev/null +++ b/tools/testing/selftests/coredump/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS = $(KHDR_INCLUDES) + +TEST_GEN_PROGS := stackdump_test +TEST_FILES := stackdump + +include ../lib.mk diff --git a/tools/testing/selftests/coredump/README.rst b/tools/testing/selftests/coredump/README.rst new file mode 100644 index 000000000000..164a7aa181c8 --- /dev/null +++ b/tools/testing/selftests/coredump/README.rst @@ -0,0 +1,50 @@ +coredump selftest +================= + +Background context +------------------ + +`coredump` is a feature which dumps a process's memory space when the process terminates +unexpectedly (e.g. due to segmentation fault), which can be useful for debugging. By default, +`coredump` dumps the memory to the file named `core`, but this behavior can be changed by writing a +different file name to `/proc/sys/kernel/core_pattern`. Furthermore, `coredump` can be piped to a +user-space program by writing the pipe symbol (`|`) followed by the command to be executed to +`/proc/sys/kernel/core_pattern`. For the full description, see `man 5 core`. + +The piped user program may be interested in reading the stack pointers of the crashed process. The +crashed process's stack pointers can be read from `procfs`: it is the `kstkesp` field in +`/proc/$PID/stat`. See `man 5 proc` for all the details. + +The problem +----------- +While a thread is active, the stack pointer is unsafe to read and therefore the `kstkesp` field +reads zero. But when the thread is dead (e.g. during a coredump), this field should have valid +value. + +However, this was broken in the past and `kstkesp` was zero even during coredump: + +* commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in /proc/PID/stat") changed kstkesp to + always be zero + +* commit fd7d56270b52 ("fs/proc: Report eip/esp in /prod/PID/stat for coredumping") fixed it for the + coredumping thread. However, other threads in a coredumping process still had the problem. + +* commit cb8f381f1613 ("fs/proc/array.c: allow reporting eip/esp for all coredumping threads") fixed + for all threads in a coredumping process. + +* commit 92307383082d ("coredump: Don't perform any cleanups before dumping core") broke it again + for the other threads in a coredumping process. + +The problem has been fixed now, but considering the history, it may appear again in the future. + +The goal of this test +--------------------- +This test detects problem with reading `kstkesp` during coredump by doing the following: + +#. Tell the kernel to execute the "stackdump" script when a coredump happens. This script + reads the stack pointers of all threads of crashed processes. + +#. Spawn a child process who creates some threads and then crashes. + +#. Read the output from the "stackdump" script, and make sure all stack pointer values are + non-zero. diff --git a/tools/testing/selftests/coredump/stackdump b/tools/testing/selftests/coredump/stackdump new file mode 100755 index 000000000000..96714ce42d12 --- /dev/null +++ b/tools/testing/selftests/coredump/stackdump @@ -0,0 +1,14 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +CRASH_PROGRAM_ID=$1 +STACKDUMP_FILE=$2 + +TMP=$(mktemp) + +for t in /proc/$CRASH_PROGRAM_ID/task/*; do + tid=$(basename $t) + cat /proc/$tid/stat | awk '{print $29}' >> $TMP +done + +mv $TMP $STACKDUMP_FILE diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c new file mode 100644 index 000000000000..137b2364a082 --- /dev/null +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <fcntl.h> +#include <libgen.h> +#include <linux/limits.h> +#include <pthread.h> +#include <string.h> +#include <sys/resource.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#define STACKDUMP_FILE "stack_values" +#define STACKDUMP_SCRIPT "stackdump" +#define NUM_THREAD_SPAWN 128 + +static void *do_nothing(void *) +{ + while (1) + pause(); +} + +static void crashing_child(void) +{ + pthread_t thread; + int i; + + for (i = 0; i < NUM_THREAD_SPAWN; ++i) + pthread_create(&thread, NULL, do_nothing, NULL); + + /* crash on purpose */ + i = *(int *)NULL; +} + +FIXTURE(coredump) +{ + char original_core_pattern[256]; +}; + +FIXTURE_SETUP(coredump) +{ + char buf[PATH_MAX]; + FILE *file; + char *dir; + int ret; + + file = fopen("/proc/sys/kernel/core_pattern", "r"); + ASSERT_NE(NULL, file); + + ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file); + ASSERT_TRUE(ret || feof(file)); + ASSERT_LT(ret, sizeof(self->original_core_pattern)); + + self->original_core_pattern[ret] = '\0'; + + ret = fclose(file); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(coredump) +{ + const char *reason; + FILE *file; + int ret; + + unlink(STACKDUMP_FILE); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + if (!file) { + reason = "Unable to open core_pattern"; + goto fail; + } + + ret = fprintf(file, "%s", self->original_core_pattern); + if (ret < 0) { + reason = "Unable to write to core_pattern"; + goto fail; + } + + ret = fclose(file); + if (ret) { + reason = "Unable to close core_pattern"; + goto fail; + } + + return; +fail: + /* This should never happen */ + fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason); +} + +TEST_F(coredump, stackdump) +{ + struct sigaction action = {}; + unsigned long long stack; + char *test_dir, *line; + size_t line_length; + char buf[PATH_MAX]; + int ret, i; + FILE *file; + pid_t pid; + + /* + * Step 1: Setup core_pattern so that the stackdump script is executed when the child + * process crashes + */ + ret = readlink("/proc/self/exe", buf, sizeof(buf)); + ASSERT_NE(-1, ret); + ASSERT_LT(ret, sizeof(buf)); + buf[ret] = '\0'; + + test_dir = dirname(buf); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(NULL, file); + + ret = fprintf(file, "|%1$s/%2$s %%P %1$s/%3$s", test_dir, STACKDUMP_SCRIPT, STACKDUMP_FILE); + ASSERT_LT(0, ret); + + ret = fclose(file); + ASSERT_EQ(0, ret); + + /* Step 2: Create a process who spawns some threads then crashes */ + pid = fork(); + ASSERT_TRUE(pid >= 0); + if (pid == 0) + crashing_child(); + + /* + * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file + */ + for (i = 0; i < 10; ++i) { + file = fopen(STACKDUMP_FILE, "r"); + if (file) + break; + sleep(1); + } + ASSERT_NE(file, NULL); + + /* Step 4: Make sure all stack pointer values are non-zero */ + for (i = 0; -1 != getline(&line, &line_length, file); ++i) { + stack = strtoull(line, NULL, 10); + ASSERT_NE(stack, 0); + } + + ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN); + + fclose(file); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/cpufreq/.gitignore b/tools/testing/selftests/cpufreq/.gitignore new file mode 100644 index 000000000000..67604e91e068 --- /dev/null +++ b/tools/testing/selftests/cpufreq/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +cpufreq_selftest.* diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile index c86ca8342222..9b2ccb10b0cf 100644 --- a/tools/testing/selftests/cpufreq/Makefile +++ b/tools/testing/selftests/cpufreq/Makefile @@ -3,6 +3,7 @@ all: TEST_PROGS := main.sh TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh special-tests.sh +EXTRA_CLEAN := cpufreq_selftest.dmesg_cpufreq.txt cpufreq_selftest.dmesg_full.txt cpufreq_selftest.txt include ../lib.mk diff --git a/tools/testing/selftests/damon/.gitignore b/tools/testing/selftests/damon/.gitignore index 2ab675fecb6b..2f0297657c81 100644 --- a/tools/testing/selftests/damon/.gitignore +++ b/tools/testing/selftests/damon/.gitignore @@ -1,6 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -huge_count_read_write -debugfs_target_ids_read_before_terminate_race -debugfs_target_ids_pid_leak access_memory access_memory_even diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 812f656260fb..ecbf07afc6dd 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -1,15 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for damon selftests -TEST_GEN_FILES += huge_count_read_write -TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race -TEST_GEN_FILES += debugfs_target_ids_pid_leak TEST_GEN_FILES += access_memory access_memory_even -TEST_FILES = _chk_dependency.sh _debugfs_common.sh _damon_sysfs.py +TEST_FILES = _chk_dependency.sh _damon_sysfs.py # functionality tests -TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += sysfs.sh TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py @@ -17,11 +13,6 @@ TEST_PROGS += damos_tried_regions.py damon_nr_regions.py TEST_PROGS += reclaim.sh lru_sort.sh # regression tests (reproducers of previously found bugs) -TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh -TEST_PROGS += debugfs_duplicate_context_creation.sh -TEST_PROGS += debugfs_rm_non_contexts.sh -TEST_PROGS += debugfs_target_ids_read_before_terminate_race.sh -TEST_PROGS += debugfs_target_ids_pid_leak.sh TEST_PROGS += sysfs_update_removed_scheme_dir.sh TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py diff --git a/tools/testing/selftests/damon/config b/tools/testing/selftests/damon/config index 0daf38974eb0..a68a9fead5dc 100644 --- a/tools/testing/selftests/damon/config +++ b/tools/testing/selftests/damon/config @@ -1,6 +1,5 @@ CONFIG_DAMON=y CONFIG_DAMON_SYSFS=y -CONFIG_DAMON_DBGFS=y CONFIG_DAMON_PADDR=y CONFIG_DAMON_VADDR=y CONFIG_DAMON_RECLAIM=y diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh deleted file mode 100755 index 902e312bca89..000000000000 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source _debugfs_common.sh - -# Test attrs file -# =============== - -file="$DBGFS/attrs" -orig_content=$(cat "$file") - -test_write_succ "$file" "1 2 3 4 5" "$orig_content" "valid input" -test_write_fail "$file" "1 2 3 4" "$orig_content" "no enough fields" -test_write_fail "$file" "1 2 3 5 4" "$orig_content" \ - "min_nr_regions > max_nr_regions" -test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written" -echo "$orig_content" > "$file" diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh deleted file mode 100755 index bd6c22d96ead..000000000000 --- a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source _debugfs_common.sh - -# Test duplicated context creation -# ================================ - -if ! echo foo > "$DBGFS/mk_contexts" -then - echo "context creation failed" - exit 1 -fi - -if echo foo > "$DBGFS/mk_contexts" 2> /dev/null -then - echo "duplicate context creation success" - exit 1 -fi - -if ! echo foo > "$DBGFS/rm_contexts" -then - echo "context deletion failed" - exit 1 -fi - -exit 0 diff --git a/tools/testing/selftests/damon/debugfs_empty_targets.sh b/tools/testing/selftests/damon/debugfs_empty_targets.sh deleted file mode 100755 index effbea33dc16..000000000000 --- a/tools/testing/selftests/damon/debugfs_empty_targets.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source _debugfs_common.sh - -# Test empty targets case -# ======================= - -orig_target_ids=$(cat "$DBGFS/target_ids") -echo "" > "$DBGFS/target_ids" - -if [ -f "$DBGFS/monitor_on_DEPRECATED" ] -then - monitor_on_file="$DBGFS/monitor_on_DEPRECATED" -else - monitor_on_file="$DBGFS/monitor_on" -fi - -orig_monitor_on=$(cat "$monitor_on_file") -test_write_fail "$monitor_on_file" "on" "orig_monitor_on" "empty target ids" -echo "$orig_target_ids" > "$DBGFS/target_ids" diff --git a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh deleted file mode 100755 index 922cadac2950..000000000000 --- a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source _debugfs_common.sh - -# Test huge count read write -# ========================== - -dmesg -C - -for file in "$DBGFS/"* -do - ./huge_count_read_write "$file" -done - -if dmesg | grep -q WARNING -then - dmesg - exit 1 -else - exit 0 -fi diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh deleted file mode 100755 index f3ffeb1343cf..000000000000 --- a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source _debugfs_common.sh - -# Test putting non-ctx files/dirs to rm_contexts file -# =================================================== - -dmesg -C - -for file in "$DBGFS/"* -do - (echo "$(basename "$f")" > "$DBGFS/rm_contexts") &> /dev/null - if dmesg | grep -q BUG - then - dmesg - exit 1 - fi -done diff --git a/tools/testing/selftests/damon/debugfs_schemes.sh b/tools/testing/selftests/damon/debugfs_schemes.sh deleted file mode 100755 index 5b39ab44731c..000000000000 --- a/tools/testing/selftests/damon/debugfs_schemes.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source _debugfs_common.sh - -# Test schemes file -# ================= - -file="$DBGFS/schemes" -orig_content=$(cat "$file") - -test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ - "$orig_content" "valid input" -test_write_fail "$file" "1 2 -3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" -test_write_succ "$file" "" "$orig_content" "disabling" -test_write_fail "$file" "2 1 2 1 10 1 3 10 1 1 1 1 1 1 1 1 2 3" \ - "$orig_content" "wrong condition ranges" -echo "$orig_content" > "$file" diff --git a/tools/testing/selftests/damon/debugfs_target_ids.sh b/tools/testing/selftests/damon/debugfs_target_ids.sh deleted file mode 100755 index 49aeabdb0aae..000000000000 --- a/tools/testing/selftests/damon/debugfs_target_ids.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source _debugfs_common.sh - -# Test target_ids file -# ==================== - -file="$DBGFS/target_ids" -orig_content=$(cat "$file") - -test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input" -test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input" -test_content "$file" "$orig_content" "1 2" "non-integer was there" -test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input" -test_content "$file" "$orig_content" "" "wrong input written" -test_write_succ "$file" "" "$orig_content" "empty input" -test_content "$file" "$orig_content" "" "empty input written" -echo "$orig_content" > "$file" diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c deleted file mode 100644 index 0cc2eef7d142..000000000000 --- a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c +++ /dev/null @@ -1,68 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Author: SeongJae Park <sj@kernel.org> - */ - -#define _GNU_SOURCE - -#include <fcntl.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/time.h> -#include <unistd.h> - -#define DBGFS_TARGET_IDS "/sys/kernel/debug/damon/target_ids" - -static void write_targetid_exit(void) -{ - int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR); - char pid_str[128]; - - snprintf(pid_str, sizeof(pid_str), "%d", getpid()); - write(target_ids_fd, pid_str, sizeof(pid_str)); - close(target_ids_fd); - exit(0); -} - -unsigned long msec_timestamp(void) -{ - struct timeval tv; - - gettimeofday(&tv, NULL); - return tv.tv_sec * 1000UL + tv.tv_usec / 1000; -} - -int main(int argc, char *argv[]) -{ - unsigned long start_ms; - int time_to_run, nr_forks = 0; - - if (argc != 2) { - fprintf(stderr, "Usage: %s <msecs to run>\n", argv[0]); - exit(1); - } - time_to_run = atoi(argv[1]); - - start_ms = msec_timestamp(); - while (true) { - int pid = fork(); - - if (pid < 0) { - fprintf(stderr, "fork() failed\n"); - exit(1); - } - if (pid == 0) - write_targetid_exit(); - wait(NULL); - nr_forks++; - - if (msec_timestamp() - start_ms > time_to_run) - break; - } - printf("%d\n", nr_forks); - return 0; -} diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh deleted file mode 100755 index 31fe33c2b032..000000000000 --- a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -before=$(grep "^pid " /proc/slabinfo | awk '{print $2}') - -nr_leaks=$(./debugfs_target_ids_pid_leak 1000) -expected_after_max=$((before + nr_leaks / 2)) - -after=$(grep "^pid " /proc/slabinfo | awk '{print $2}') - -echo > /sys/kernel/debug/damon/target_ids - -echo "tried $nr_leaks pid leak" -echo "number of active pid slabs: $before -> $after" -echo "(up to $expected_after_max expected)" -if [ $after -gt $expected_after_max ] -then - echo "maybe pids are leaking" - exit 1 -else - exit 0 -fi diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c deleted file mode 100644 index b06f52a8ce2d..000000000000 --- a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Author: SeongJae Park <sj@kernel.org> - */ -#define _GNU_SOURCE - -#include <fcntl.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <time.h> -#include <unistd.h> - -#define DBGFS_MONITOR_ON "/sys/kernel/debug/damon/monitor_on_DEPRECATED" -#define DBGFS_TARGET_IDS "/sys/kernel/debug/damon/target_ids" - -static void turn_damon_on_exit(void) -{ - int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR); - int monitor_on_fd = open(DBGFS_MONITOR_ON, O_RDWR); - char pid_str[128]; - - snprintf(pid_str, sizeof(pid_str), "%d", getpid()); - write(target_ids_fd, pid_str, sizeof(pid_str)); - write(monitor_on_fd, "on\n", 3); - close(target_ids_fd); - close(monitor_on_fd); - usleep(1000); - exit(0); -} - -static void try_race(void) -{ - int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR); - int pid = fork(); - int buf[256]; - - if (pid < 0) { - fprintf(stderr, "fork() failed\n"); - exit(1); - } - if (pid == 0) - turn_damon_on_exit(); - while (true) { - int status; - - read(target_ids_fd, buf, sizeof(buf)); - if (waitpid(-1, &status, WNOHANG) == pid) - break; - } - close(target_ids_fd); -} - -static inline uint64_t ts_to_ms(struct timespec *ts) -{ - return (uint64_t)ts->tv_sec * 1000 + (uint64_t)ts->tv_nsec / 1000000; -} - -int main(int argc, char *argv[]) -{ - struct timespec start_time, now; - int runtime_ms; - - if (argc != 2) { - fprintf(stderr, "Usage: %s <runtime in ms>\n", argv[0]); - exit(1); - } - runtime_ms = atoi(argv[1]); - clock_gettime(CLOCK_MONOTONIC, &start_time); - while (true) { - try_race(); - clock_gettime(CLOCK_MONOTONIC, &now); - if (ts_to_ms(&now) - ts_to_ms(&start_time) > runtime_ms) - break; - } - return 0; -} diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh deleted file mode 100755 index fc793c4c9aea..000000000000 --- a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -dmesg -C - -./debugfs_target_ids_read_before_terminate_race 5000 - -if dmesg | grep -q dbgfs_target_ids_read -then - dmesg - exit 1 -else - exit 0 -fi diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c deleted file mode 100644 index 53e69a669668..000000000000 --- a/tools/testing/selftests/damon/huge_count_read_write.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Author: SeongJae Park <sj@kernel.org> - */ - -#include <fcntl.h> -#include <stdlib.h> -#include <unistd.h> -#include <stdio.h> - -#pragma GCC diagnostic push -#if __GNUC__ >= 11 && __GNUC_MINOR__ >= 1 -/* Ignore read(2) overflow and write(2) overread compile warnings */ -#pragma GCC diagnostic ignored "-Wstringop-overread" -#pragma GCC diagnostic ignored "-Wstringop-overflow" -#endif - -void write_read_with_huge_count(char *file) -{ - int filedesc = open(file, O_RDWR); - char buf[256]; - int ret; - - printf("%s %s\n", __func__, file); - if (filedesc < 0) { - fprintf(stderr, "failed opening %s\n", file); - exit(1); - } - - write(filedesc, "", 0xfffffffful); - ret = read(filedesc, buf, 0xfffffffful); - close(filedesc); -} - -#pragma GCC diagnostic pop - -int main(int argc, char *argv[]) -{ - if (argc != 2) { - fprintf(stderr, "Usage: %s <file>\n", argv[0]); - exit(1); - } - write_read_with_huge_count(argv[1]); - - return 0; -} diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 0fec8f9801ad..137470bdee0c 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -1,15 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 TEST_INCLUDES := $(wildcard lib/py/*.py) \ + $(wildcard lib/sh/*.sh) \ ../../net/net_helper.sh \ ../../net/lib.sh \ TEST_PROGS := \ netcons_basic.sh \ + netcons_overflow.sh \ ping.py \ queues.py \ stats.py \ shaper.py \ + hds.py \ # end of TEST_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 03a089165d3f..2b10854e4b1e 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -10,7 +10,7 @@ TEST_PROGS := \ mode-2-recovery-updelay.sh \ bond_options.sh \ bond-eth-type-change.sh \ - bond_macvlan.sh + bond_macvlan_ipvlan.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh deleted file mode 100755 index b609fb6231f4..000000000000 --- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Test macvlan over balance-alb - -lib_dir=$(dirname "$0") -source ${lib_dir}/bond_topo_2d1c.sh - -m1_ns="m1-$(mktemp -u XXXXXX)" -m2_ns="m1-$(mktemp -u XXXXXX)" -m1_ip4="192.0.2.11" -m1_ip6="2001:db8::11" -m2_ip4="192.0.2.12" -m2_ip6="2001:db8::12" - -cleanup() -{ - ip -n ${m1_ns} link del macv0 - ip netns del ${m1_ns} - ip -n ${m2_ns} link del macv0 - ip netns del ${m2_ns} - - client_destroy - server_destroy - gateway_destroy -} - -check_connection() -{ - local ns=${1} - local target=${2} - local message=${3:-"macvlan_over_bond"} - RET=0 - - - ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null - check_err $? "ping failed" - log_test "$mode: $message" -} - -macvlan_over_bond() -{ - local param="$1" - RET=0 - - # setup new bond mode - bond_reset "${param}" - - ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge - ip -n ${s_ns} link set macv0 netns ${m1_ns} - ip -n ${m1_ns} link set dev macv0 up - ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0 - ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0 - - ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge - ip -n ${s_ns} link set macv0 netns ${m2_ns} - ip -n ${m2_ns} link set dev macv0 up - ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0 - ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0 - - sleep 2 - - check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server" - check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server" - check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1" - check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1" - check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2" - check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2" - check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2" - check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2" - - - sleep 5 - - check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client" - check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client" - check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client" - check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client" - check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client" - check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client" - check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2" - check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2" - - ip -n ${c_ns} neigh flush dev eth0 -} - -trap cleanup EXIT - -setup_prepare -ip netns add ${m1_ns} -ip netns add ${m2_ns} - -modes="active-backup balance-tlb balance-alb" - -for mode in $modes; do - macvlan_over_bond "mode $mode" -done - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh new file mode 100755 index 000000000000..c4711272fe45 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test macvlan/ipvlan over bond + +lib_dir=$(dirname "$0") +source ${lib_dir}/bond_topo_2d1c.sh + +xvlan1_ns="xvlan1-$(mktemp -u XXXXXX)" +xvlan2_ns="xvlan2-$(mktemp -u XXXXXX)" +xvlan1_ip4="192.0.2.11" +xvlan1_ip6="2001:db8::11" +xvlan2_ip4="192.0.2.12" +xvlan2_ip6="2001:db8::12" + +cleanup() +{ + client_destroy + server_destroy + gateway_destroy + + ip netns del ${xvlan1_ns} + ip netns del ${xvlan2_ns} +} + +check_connection() +{ + local ns=${1} + local target=${2} + local message=${3} + RET=0 + + ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null + check_err $? "ping failed" + log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}" +} + +xvlan_over_bond() +{ + local param="$1" + local xvlan_type="$2" + local xvlan_mode="$3" + RET=0 + + # setup new bond mode + bond_reset "${param}" + + ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode} + ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan1_ns} + ip -n ${xvlan1_ns} link set dev ${xvlan_type}0 up + ip -n ${xvlan1_ns} addr add ${xvlan1_ip4}/24 dev ${xvlan_type}0 + ip -n ${xvlan1_ns} addr add ${xvlan1_ip6}/24 dev ${xvlan_type}0 + + ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode} + ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan2_ns} + ip -n ${xvlan2_ns} link set dev ${xvlan_type}0 up + ip -n ${xvlan2_ns} addr add ${xvlan2_ip4}/24 dev ${xvlan_type}0 + ip -n ${xvlan2_ns} addr add ${xvlan2_ip6}/24 dev ${xvlan_type}0 + + sleep 2 + + check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server" + check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server" + check_connection "${c_ns}" "${xvlan1_ip4}" "IPv4: client->${xvlan_type}_1" + check_connection "${c_ns}" "${xvlan1_ip6}" "IPv6: client->${xvlan_type}_1" + check_connection "${c_ns}" "${xvlan2_ip4}" "IPv4: client->${xvlan_type}_2" + check_connection "${c_ns}" "${xvlan2_ip6}" "IPv6: client->${xvlan_type}_2" + check_connection "${xvlan1_ns}" "${xvlan2_ip4}" "IPv4: ${xvlan_type}_1->${xvlan_type}_2" + check_connection "${xvlan1_ns}" "${xvlan2_ip6}" "IPv6: ${xvlan_type}_1->${xvlan_type}_2" + + check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client" + check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client" + check_connection "${xvlan1_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_1->client" + check_connection "${xvlan1_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_1->client" + check_connection "${xvlan2_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_2->client" + check_connection "${xvlan2_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_2->client" + check_connection "${xvlan2_ns}" "${xvlan1_ip4}" "IPv4: ${xvlan_type}_2->${xvlan_type}_1" + check_connection "${xvlan2_ns}" "${xvlan1_ip6}" "IPv6: ${xvlan_type}_2->${xvlan_type}_1" + + ip -n ${c_ns} neigh flush dev eth0 +} + +trap cleanup EXIT + +setup_prepare +ip netns add ${xvlan1_ns} +ip netns add ${xvlan2_ns} + +bond_modes="active-backup balance-tlb balance-alb" + +for bond_mode in ${bond_modes}; do + xvlan_over_bond "mode ${bond_mode}" macvlan bridge + xvlan_over_bond "mode ${bond_mode}" ipvlan l2 +done + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 899d7fb6ea8e..dad4e5fda4db 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -3,6 +3,7 @@ CONFIG_BRIDGE=y CONFIG_DUMMY=y CONFIG_IPV6=y CONFIG_MACVLAN=y +CONFIG_IPVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y CONFIG_NET_SCH_INGRESS=y diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py new file mode 100755 index 000000000000..394971b25c0b --- /dev/null +++ b/tools/testing/selftests/drivers/net/hds.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import errno +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + +def get_hds(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + +def get_hds_thresh(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + +def set_hds_enable(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("disabling of HDS not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + + ksft_eq('enabled', rings['tcp-data-split']) + +def set_hds_disable(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("disabling of HDS not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + + ksft_eq('disabled', rings['tcp-data-split']) + +def set_hds_thresh_zero(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + + ksft_eq(0, rings['hds-thresh']) + +def set_hds_thresh_max(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': rings['hds-thresh-max']}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(rings['hds-thresh'], rings['hds-thresh-max']) + +def set_hds_thresh_gt(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + hds_gt = rings['hds-thresh-max'] + 1 + with ksft_raises(NlError) as e: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + +def main() -> None: + with NetDrvEnv(__file__, queue_count=3) as cfg: + ksft_run([get_hds, + get_hds_thresh, + set_hds_disable, + set_hds_enable, + set_hds_thresh_zero, + set_hds_thresh_max, + set_hds_thresh_gt], + args=(cfg, EthtoolFamily())) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 8e502a1f8f9b..19a6969643f4 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -619,9 +619,6 @@ int do_server(struct memory_buffer *mem) fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", page_aligned_frags, non_page_aligned_frags); - fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", - page_aligned_frags, non_page_aligned_frags); - cleanup: free(tmp_mem); diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py index 05b6fbb3fcdd..ad192fef3117 100755 --- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -21,9 +21,9 @@ def _enable_pp_allocation_fail(): if not os.path.exists("/sys/kernel/debug/fail_function"): raise KsftSkipEx("Kernel built without function error injection (or DebugFS)") - if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"): + if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): with open("/sys/kernel/debug/fail_function/inject", "w") as fp: - fp.write("page_pool_alloc_pages\n") + fp.write("page_pool_alloc_netmems\n") _write_fail_config({ "verbose": 0, @@ -37,7 +37,7 @@ def _disable_pp_allocation_fail(): if not os.path.exists("/sys/kernel/debug/fail_function"): return - if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"): + if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): with open("/sys/kernel/debug/fail_function/inject", "w") as fp: fp.write("\n") diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 1ea9bb695e94..987e452d3a45 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -5,7 +5,7 @@ import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx from lib.py import ksft_setup -from lib.py import cmd, ethtool, ip +from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -48,6 +48,7 @@ class NetDrvEnv: else: self._ns = NetdevSimDev(**kwargs) self.dev = self._ns.nsims[0].dev + self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] def __enter__(self): @@ -234,7 +235,12 @@ class NetDrvEpEnv: Good drivers will tell us via ethtool what their sync period is. """ if self._stats_settle_time is None: - data = ethtool("-c " + self.ifname, json=True)[0] + data = {} + try: + data = ethtool("-c " + self.ifname, json=True)[0] + except CmdExitFailure as e: + if "Operation not supported" not in e.cmd.stderr: + raise self._stats_settle_time = 0.025 + \ data.get('stats-block-usecs', 0) / 1000 / 1000 diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh new file mode 100644 index 000000000000..3acaba41ac7b --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -0,0 +1,225 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This file contains functions and helpers to support the netconsole +# selftests +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +SRCIF="" # to be populated later +SRCIP=192.0.2.1 +DSTIF="" # to be populated later +DSTIP=192.0.2.2 + +PORT="6666" +MSG="netconsole selftest" +USERDATA_KEY="key" +USERDATA_VALUE="value" +TARGET=$(mktemp -u netcons_XXXXX) +DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk) +NETCONS_CONFIGFS="/sys/kernel/config/netconsole" +NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" +# NAMESPACE will be populated by setup_ns with a random value +NAMESPACE="" + +# IDs for netdevsim +NSIM_DEV_1_ID=$((256 + RANDOM % 256)) +NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" + +# Used to create and delete namespaces +source "${LIBDIR}"/../../../../net/lib.sh +source "${LIBDIR}"/../../../../net/net_helper.sh + +# Create netdevsim interfaces +create_ifaces() { + + echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" + echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" + udevadm settle 2> /dev/null || true + + local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID" + local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID" + + # These are global variables + SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \ + -path "$NSIM1"/net -exec basename {} \;) + DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \ + -path "$NSIM2"/net -exec basename {} \;) +} + +link_ifaces() { + local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" + local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex) + local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex) + + exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}" + exec {INITNS_FD}</proc/self/ns/net + + # Bind the dst interface to namespace + ip link set "${DSTIF}" netns "${NAMESPACE}" + + # Linking one device to the other one (on the other namespace} + if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK + then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup + exit "${ksft_skip}" + fi +} + +function configure_ip() { + # Configure the IPs for both interfaces + ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}" + ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up + + ip addr add "${SRCIP}"/24 dev "${SRCIF}" + ip link set "${SRCIF}" up +} + +function set_network() { + # setup_ns function is coming from lib.sh + setup_ns NAMESPACE + + # Create both interfaces, and assign the destination to a different + # namespace + create_ifaces + + # Link both interfaces back to back + link_ifaces + + configure_ip +} + +function create_dynamic_target() { + DSTMAC=$(ip netns exec "${NAMESPACE}" \ + ip link show "${DSTIF}" | awk '/ether/ {print $2}') + + # Create a dynamic target + mkdir "${NETCONS_PATH}" + + echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip + echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip + echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac + echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + + echo 1 > "${NETCONS_PATH}"/enabled +} + +function cleanup() { + local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" + + # delete netconsole dynamic reconfiguration + echo 0 > "${NETCONS_PATH}"/enabled + # Remove all the keys that got created during the selftest + find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete + # Remove the configfs entry + rmdir "${NETCONS_PATH}" + + # Delete netdevsim devices + echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" + echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL" + + # this is coming from lib.sh + cleanup_all_ns + + # Restoring printk configurations + echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk +} + +function set_user_data() { + if [[ ! -d "${NETCONS_PATH}""/userdata" ]] + then + echo "Userdata path not available in ${NETCONS_PATH}/userdata" + exit "${ksft_skip}" + fi + + KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}" + mkdir -p "${KEY_PATH}" + VALUE_PATH="${KEY_PATH}""/value" + echo "${USERDATA_VALUE}" > "${VALUE_PATH}" +} + +function listen_port_and_save_to() { + local OUTPUT=${1} + # Just wait for 2 seconds + timeout 2 ip netns exec "${NAMESPACE}" \ + socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" +} + +function validate_result() { + local TMPFILENAME="$1" + + # TMPFILENAME will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # key=value + + # Check if the file exists + if [ ! -f "$TMPFILENAME" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${TMPFILENAME}"; then + echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi + + # Delete the file once it is validated, otherwise keep it + # for debugging purposes + rm "${TMPFILENAME}" + exit "${ksft_pass}" +} + +function check_for_dependencies() { + if [ "$(id -u)" -ne 0 ]; then + echo "This test must be run as root" >&2 + exit "${ksft_skip}" + fi + + if ! which socat > /dev/null ; then + echo "SKIP: socat(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if ! which ip > /dev/null ; then + echo "SKIP: ip(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if ! which udevadm > /dev/null ; then + echo "SKIP: udevadm(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then + echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 + exit "${ksft_skip}" + fi + + if [ ! -d "${NETCONS_CONFIGFS}" ]; then + echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2 + exit "${ksft_skip}" + fi + + if ip link show "${DSTIF}" 2> /dev/null; then + echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2 + exit "${ksft_skip}" + fi + + if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then + echo "SKIP: IPs already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh index b79542a4dcc7..4a11bf1d514a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh @@ -12,6 +12,7 @@ ALL_TESTS=" bridge_rif_remaster_port " +REQUIRE_TEAMD="yes" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh index e28f978104f3..b8bbe94f4736 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh @@ -10,6 +10,7 @@ ALL_TESTS=" lag_rif_nomaster_addr " +REQUIRE_TEAMD="yes" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh index 6318cfa6434c..d1a9d379eaf3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh @@ -10,6 +10,7 @@ ALL_TESTS=" lag_rif_nomaster_addr " +REQUIRE_TEAMD="yes" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index b175f4d966e5..fe765da498e8 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -18,224 +18,8 @@ set -euo pipefail SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -# Simple script to test dynamic targets in netconsole -SRCIF="" # to be populated later -SRCIP=192.0.2.1 -DSTIF="" # to be populated later -DSTIP=192.0.2.2 +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh -PORT="6666" -MSG="netconsole selftest" -USERDATA_KEY="key" -USERDATA_VALUE="value" -TARGET=$(mktemp -u netcons_XXXXX) -DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk) -NETCONS_CONFIGFS="/sys/kernel/config/netconsole" -NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" -KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}" -# NAMESPACE will be populated by setup_ns with a random value -NAMESPACE="" - -# IDs for netdevsim -NSIM_DEV_1_ID=$((256 + RANDOM % 256)) -NSIM_DEV_2_ID=$((512 + RANDOM % 256)) -NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" - -# Used to create and delete namespaces -source "${SCRIPTDIR}"/../../net/lib.sh -source "${SCRIPTDIR}"/../../net/net_helper.sh - -# Create netdevsim interfaces -create_ifaces() { - - echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" - echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" - udevadm settle 2> /dev/null || true - - local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID" - local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID" - - # These are global variables - SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \ - -path "$NSIM1"/net -exec basename {} \;) - DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \ - -path "$NSIM2"/net -exec basename {} \;) -} - -link_ifaces() { - local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" - local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex) - local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex) - - exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}" - exec {INITNS_FD}</proc/self/ns/net - - # Bind the dst interface to namespace - ip link set "${DSTIF}" netns "${NAMESPACE}" - - # Linking one device to the other one (on the other namespace} - if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK - then - echo "linking netdevsim1 with netdevsim2 should succeed" - cleanup - exit "${ksft_skip}" - fi -} - -function configure_ip() { - # Configure the IPs for both interfaces - ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}" - ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up - - ip addr add "${SRCIP}"/24 dev "${SRCIF}" - ip link set "${SRCIF}" up -} - -function set_network() { - # setup_ns function is coming from lib.sh - setup_ns NAMESPACE - - # Create both interfaces, and assign the destination to a different - # namespace - create_ifaces - - # Link both interfaces back to back - link_ifaces - - configure_ip -} - -function create_dynamic_target() { - DSTMAC=$(ip netns exec "${NAMESPACE}" \ - ip link show "${DSTIF}" | awk '/ether/ {print $2}') - - # Create a dynamic target - mkdir "${NETCONS_PATH}" - - echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip - echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip - echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac - echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name - - echo 1 > "${NETCONS_PATH}"/enabled -} - -function cleanup() { - local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" - - # delete netconsole dynamic reconfiguration - echo 0 > "${NETCONS_PATH}"/enabled - # Remove key - rmdir "${KEY_PATH}" - # Remove the configfs entry - rmdir "${NETCONS_PATH}" - - # Delete netdevsim devices - echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" - echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL" - - # this is coming from lib.sh - cleanup_all_ns - - # Restoring printk configurations - echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk -} - -function set_user_data() { - if [[ ! -d "${NETCONS_PATH}""/userdata" ]] - then - echo "Userdata path not available in ${NETCONS_PATH}/userdata" - exit "${ksft_skip}" - fi - - mkdir -p "${KEY_PATH}" - VALUE_PATH="${KEY_PATH}""/value" - echo "${USERDATA_VALUE}" > "${VALUE_PATH}" -} - -function listen_port_and_save_to() { - local OUTPUT=${1} - # Just wait for 2 seconds - timeout 2 ip netns exec "${NAMESPACE}" \ - socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" -} - -function validate_result() { - local TMPFILENAME="$1" - - # TMPFILENAME will contain something like: - # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM - # key=value - - # Check if the file exists - if [ ! -f "$TMPFILENAME" ]; then - echo "FAIL: File was not generated." >&2 - exit "${ksft_fail}" - fi - - if ! grep -q "${MSG}" "${TMPFILENAME}"; then - echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2 - cat "${TMPFILENAME}" >&2 - exit "${ksft_fail}" - fi - - if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then - echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 - cat "${TMPFILENAME}" >&2 - exit "${ksft_fail}" - fi - - # Delete the file once it is validated, otherwise keep it - # for debugging purposes - rm "${TMPFILENAME}" - exit "${ksft_pass}" -} - -function check_for_dependencies() { - if [ "$(id -u)" -ne 0 ]; then - echo "This test must be run as root" >&2 - exit "${ksft_skip}" - fi - - if ! which socat > /dev/null ; then - echo "SKIP: socat(1) is not available" >&2 - exit "${ksft_skip}" - fi - - if ! which ip > /dev/null ; then - echo "SKIP: ip(1) is not available" >&2 - exit "${ksft_skip}" - fi - - if ! which udevadm > /dev/null ; then - echo "SKIP: udevadm(1) is not available" >&2 - exit "${ksft_skip}" - fi - - if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then - echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 - exit "${ksft_skip}" - fi - - if [ ! -d "${NETCONS_CONFIGFS}" ]; then - echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2 - exit "${ksft_skip}" - fi - - if ip link show "${DSTIF}" 2> /dev/null; then - echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2 - exit "${ksft_skip}" - fi - - if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then - echo "SKIP: IPs already in use. Skipping it" >&2 - exit "${ksft_skip}" - fi -} - -# ========== # -# Start here # -# ========== # modprobe netdevsim 2> /dev/null || true modprobe netconsole 2> /dev/null || true diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh new file mode 100755 index 000000000000..29bad56448a2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This test verifies that users can successfully create up to +# MAX_USERDATA_ITEMS userdata entries without encountering any failures. +# +# Additionally, it tests for expected failure when attempting to exceed this +# maximum limit. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh +# This is coming from netconsole code. Check for it in drivers/net/netconsole.c +MAX_USERDATA_ITEMS=16 + +# Function to create userdata entries +function create_userdata_max_entries() { + # All these keys should be created without any error + for i in $(seq $MAX_USERDATA_ITEMS) + do + # USERDATA_KEY is used by set_user_data + USERDATA_KEY="key"${i} + set_user_data + done +} + +# Function to verify the entry limit +function verify_entry_limit() { + # Allowing the test to fail without exiting, since the next command + # will fail + set +e + mkdir "${NETCONS_PATH}/userdata/key_that_will_fail" 2> /dev/null + ret="$?" + set -e + if [ "$ret" -eq 0 ]; + then + echo "Adding more than ${MAX_USERDATA_ITEMS} entries in userdata should fail, but it didn't" >&2 + ls "${NETCONS_PATH}/userdata/" >&2 + exit "${ksft_fail}" + fi +} + +# ========== # +# Start here # +# ========== # + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies + +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# populate the maximum number of supported keys in userdata +create_userdata_max_entries +# Verify an additional entry is not allowed +verify_entry_limit +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh index fd13c8cfb7a8..b411fe66510f 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh @@ -58,9 +58,12 @@ for root in mq mqprio; do ethtool -L $NDEV combined 4 n_child_assert 4 "One real queue, rest default" - # Graft some - tcq replace parent 100:1 handle 204: - n_child_assert 3 "Grafted" + # Remove real one + tcq del parent 100:4 handle 204: + + # Replace default with pfifo + tcq replace parent 100:1 handle 205: pfifo limit 1000 + n_child_assert 3 "Deleting real one, replacing default one with pfifo" ethtool -L $NDEV combined 1 n_child_assert 1 "Grafted, one" diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 384cfa3d38a6..92c2f0376c08 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -142,7 +142,7 @@ function pre_ethtool { } function check_table { - local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 local -n expected=$2 local last=$3 @@ -212,7 +212,7 @@ function check_tables { } function print_table { - local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 read -a have < $path tree $NSIM_DEV_DFS/ @@ -641,7 +641,7 @@ for port in 0 1; do NSIM_NETDEV=`get_netdev_name old_netdevs` ip link set dev $NSIM_NETDEV up - echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error msg="1 - create VxLANs v6" exp0=( 0 0 0 0 ) @@ -663,7 +663,7 @@ for port in 0 1; do new_geneve gnv0 20000 msg="2 - destroy GENEVE" - echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error exp1=( `mke 20000 2` 0 0 0 ) del_dev gnv0 @@ -764,7 +764,7 @@ for port in 0 1; do msg="create VxLANs v4" new_vxlan vxlan0 10000 $NSIM_NETDEV - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="NIC device goes down" @@ -775,7 +775,7 @@ for port in 0 1; do fi check_tables - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="NIC device goes up again" @@ -789,7 +789,7 @@ for port in 0 1; do del_dev vxlan0 check_tables - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="destroy NIC" @@ -896,7 +896,7 @@ msg="vacate VxLAN in overflow table" exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) del_dev vxlan2 -echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset +echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="tunnels destroyed 2" diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 30f29096e27c..38303da957ee 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -1,32 +1,37 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 -from lib.py import ksft_run, ksft_exit, ksft_eq, KsftSkipEx -from lib.py import EthtoolFamily, NetdevFamily +from lib.py import ksft_disruptive, ksft_exit, ksft_run +from lib.py import ksft_eq, ksft_raises, KsftSkipEx +from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import NetDrvEnv -from lib.py import cmd +from lib.py import cmd, defer, ip +import errno import glob -def sys_get_queues(ifname) -> int: - folders = glob.glob(f'/sys/class/net/{ifname}/queues/rx-*') +def sys_get_queues(ifname, qtype='rx') -> int: + folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') return len(folders) -def nl_get_queues(cfg, nl): +def nl_get_queues(cfg, nl, qtype='rx'): queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) if queues: - return len([q for q in queues if q['type'] == 'rx']) + return len([q for q in queues if q['type'] == qtype]) return None def get_queues(cfg, nl) -> None: - queues = nl_get_queues(cfg, nl) - if not queues: - raise KsftSkipEx('queue-get not supported by device') + snl = NetdevFamily(recv_size=4096) - expected = sys_get_queues(cfg.dev['ifname']) - ksft_eq(queues, expected) + for qtype in ['rx', 'tx']: + queues = nl_get_queues(cfg, snl, qtype) + if not queues: + raise KsftSkipEx('queue-get not supported by device') + + expected = sys_get_queues(cfg.dev['ifname'], qtype) + ksft_eq(queues, expected) def addremove_queues(cfg, nl) -> None: @@ -56,9 +61,27 @@ def addremove_queues(cfg, nl) -> None: ksft_eq(queues, expected) +@ksft_disruptive +def check_down(cfg, nl) -> None: + # Check the NAPI IDs before interface goes down and hides them + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + + ip(f"link set dev {cfg.dev['ifname']} down") + defer(ip, f"link set dev {cfg.dev['ifname']} up") + + with ksft_raises(NlError) as cm: + nl.queue_get({'ifindex': cfg.ifindex, 'id': 0, 'type': 'rx'}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT) + + if napis: + with ksft_raises(NlError) as cm: + nl.napi_get({'id': napis[0]['id']}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT) + + def main() -> None: - with NetDrvEnv(__file__, queue_count=3) as cfg: - ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily())) + with NetDrvEnv(__file__, queue_count=100) as cfg: + ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 63e3c045a3b2..efcc1e10575b 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -2,12 +2,15 @@ # SPDX-License-Identifier: GPL-2.0 import errno +import subprocess +import time from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx +from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises +from lib.py import KsftSkipEx, KsftXfailEx from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv -from lib.py import ip, defer +from lib.py import cmd, ip, defer ethnl = EthtoolFamily() netfam = NetdevFamily() @@ -110,6 +113,23 @@ def qstat_by_ifindex(cfg) -> None: ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key) ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key) + # Sanity check the dumps + queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True) + # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}} + parsed = {} + for entry in queues: + ifindex = entry["ifindex"] + if ifindex not in parsed: + parsed[ifindex] = {"rx":[], "tx": []} + parsed[ifindex][entry["queue-type"]].append(entry['queue-id']) + # Now, validate + for ifindex, queues in parsed.items(): + for qtype in ['rx', 'tx']: + ksft_eq(len(queues[qtype]), len(set(queues[qtype])), + comment="repeated queue keys") + ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, + comment="missing queue keys") + # Test invalid dumps # 0 is invalid with ksft_raises(NlError) as cm: @@ -157,10 +177,95 @@ def check_down(cfg) -> None: netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True) +def __run_inf_loop(body): + body = body.strip() + if body[-1] != ';': + body += ';' + + return subprocess.Popen(f"while true; do {body} done", shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + +def __stats_increase_sanely(old, new) -> None: + for k in old.keys(): + ksft_ge(new[k], old[k]) + ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error") + + +def procfs_hammer(cfg) -> None: + """ + Reading stats via procfs only holds the RCU lock, which is not an exclusive + lock, make sure drivers can handle parallel reads of stats. + """ + one = __run_inf_loop("cat /proc/net/dev") + defer(one.kill) + two = __run_inf_loop("cat /proc/net/dev") + defer(two.kill) + + time.sleep(1) + # Make sure the processes are running + ksft_is(one.poll(), None) + ksft_is(two.poll(), None) + + rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + time.sleep(2) + rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + __stats_increase_sanely(rtstat1, rtstat2) + # defers will kill the loops + + +@ksft_disruptive +def procfs_downup_hammer(cfg) -> None: + """ + Reading stats via procfs only holds the RCU lock, drivers often try + to sleep when reading the stats, or don't protect against races. + """ + # Max out the queues, we'll flip between max and 1 + channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + rx_type = 'rx' + else: + rx_type = 'combined' + cur_queue_cnt = channels[f'{rx_type}-count'] + max_queue_cnt = channels[f'{rx_type}-max'] + + cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") + defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") + + # Real test stats + stats = __run_inf_loop("cat /proc/net/dev") + defer(stats.kill) + + ipset = f"ip link set dev {cfg.ifname}" + defer(ip, f"link set dev {cfg.ifname} up") + # The "echo -n 1" lets us count iterations below + updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \ + f"ethtool -L {cfg.ifname} {rx_type} 1; " + \ + f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \ + "echo -n 1" + updown = __run_inf_loop(updown) + kill_updown = defer(updown.kill) + + time.sleep(1) + # Make sure the processes are running + ksft_is(stats.poll(), None) + ksft_is(updown.poll(), None) + + rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + # We're looking for crashes, give it extra time + time.sleep(9) + rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + __stats_increase_sanely(rtstat1, rtstat2) + + kill_updown.exec() + stdout, _ = updown.communicate(timeout=5) + ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8'))) + + def main() -> None: - with NetDrvEnv(__file__) as cfg: + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, - check_down], + check_down, procfs_hammer, procfs_downup_hammer], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/ntsync/.gitignore b/tools/testing/selftests/drivers/ntsync/.gitignore new file mode 100644 index 000000000000..848573a3d3ea --- /dev/null +++ b/tools/testing/selftests/drivers/ntsync/.gitignore @@ -0,0 +1 @@ +ntsync diff --git a/tools/testing/selftests/drivers/ntsync/Makefile b/tools/testing/selftests/drivers/ntsync/Makefile new file mode 100644 index 000000000000..dbf2b055c0b2 --- /dev/null +++ b/tools/testing/selftests/drivers/ntsync/Makefile @@ -0,0 +1,7 @@ +# SPDX-LICENSE-IDENTIFIER: GPL-2.0-only +TEST_GEN_PROGS := ntsync + +CFLAGS += $(KHDR_INCLUDES) +LDLIBS += -lpthread + +include ../../lib.mk diff --git a/tools/testing/selftests/drivers/ntsync/config b/tools/testing/selftests/drivers/ntsync/config new file mode 100644 index 000000000000..60539c826d06 --- /dev/null +++ b/tools/testing/selftests/drivers/ntsync/config @@ -0,0 +1 @@ +CONFIG_WINESYNC=y diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c new file mode 100644 index 000000000000..3aad311574c4 --- /dev/null +++ b/tools/testing/selftests/drivers/ntsync/ntsync.c @@ -0,0 +1,1343 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Various unit tests for the "ntsync" synchronization primitive driver. + * + * Copyright (C) 2021-2022 Elizabeth Figura <zfigura@codeweavers.com> + */ + +#define _GNU_SOURCE +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <pthread.h> +#include <linux/ntsync.h> +#include "../../kselftest_harness.h" + +static int read_sem_state(int sem, __u32 *count, __u32 *max) +{ + struct ntsync_sem_args args; + int ret; + + memset(&args, 0xcc, sizeof(args)); + ret = ioctl(sem, NTSYNC_IOC_SEM_READ, &args); + *count = args.count; + *max = args.max; + return ret; +} + +#define check_sem_state(sem, count, max) \ + ({ \ + __u32 __count, __max; \ + int ret = read_sem_state((sem), &__count, &__max); \ + EXPECT_EQ(0, ret); \ + EXPECT_EQ((count), __count); \ + EXPECT_EQ((max), __max); \ + }) + +static int release_sem(int sem, __u32 *count) +{ + return ioctl(sem, NTSYNC_IOC_SEM_RELEASE, count); +} + +static int read_mutex_state(int mutex, __u32 *count, __u32 *owner) +{ + struct ntsync_mutex_args args; + int ret; + + memset(&args, 0xcc, sizeof(args)); + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &args); + *count = args.count; + *owner = args.owner; + return ret; +} + +#define check_mutex_state(mutex, count, owner) \ + ({ \ + __u32 __count, __owner; \ + int ret = read_mutex_state((mutex), &__count, &__owner); \ + EXPECT_EQ(0, ret); \ + EXPECT_EQ((count), __count); \ + EXPECT_EQ((owner), __owner); \ + }) + +static int unlock_mutex(int mutex, __u32 owner, __u32 *count) +{ + struct ntsync_mutex_args args; + int ret; + + args.owner = owner; + args.count = 0xdeadbeef; + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_UNLOCK, &args); + *count = args.count; + return ret; +} + +static int read_event_state(int event, __u32 *signaled, __u32 *manual) +{ + struct ntsync_event_args args; + int ret; + + memset(&args, 0xcc, sizeof(args)); + ret = ioctl(event, NTSYNC_IOC_EVENT_READ, &args); + *signaled = args.signaled; + *manual = args.manual; + return ret; +} + +#define check_event_state(event, signaled, manual) \ + ({ \ + __u32 __signaled, __manual; \ + int ret = read_event_state((event), &__signaled, &__manual); \ + EXPECT_EQ(0, ret); \ + EXPECT_EQ((signaled), __signaled); \ + EXPECT_EQ((manual), __manual); \ + }) + +static int wait_objs(int fd, unsigned long request, __u32 count, + const int *objs, __u32 owner, int alert, __u32 *index) +{ + struct ntsync_wait_args args = {0}; + struct timespec timeout; + int ret; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + args.timeout = timeout.tv_sec * 1000000000 + timeout.tv_nsec; + args.count = count; + args.objs = (uintptr_t)objs; + args.owner = owner; + args.index = 0xdeadbeef; + args.alert = alert; + ret = ioctl(fd, request, &args); + *index = args.index; + return ret; +} + +static int wait_any(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index) +{ + return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, count, objs, owner, 0, index); +} + +static int wait_all(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index) +{ + return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, count, objs, owner, 0, index); +} + +static int wait_any_alert(int fd, __u32 count, const int *objs, + __u32 owner, int alert, __u32 *index) +{ + return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, + count, objs, owner, alert, index); +} + +static int wait_all_alert(int fd, __u32 count, const int *objs, + __u32 owner, int alert, __u32 *index) +{ + return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, + count, objs, owner, alert, index); +} + +TEST(semaphore_state) +{ + struct ntsync_sem_args sem_args; + struct timespec timeout; + __u32 count, index; + int fd, ret, sem; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 3; + sem_args.max = 2; + sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(-1, sem); + EXPECT_EQ(EINVAL, errno); + + sem_args.count = 2; + sem_args.max = 2; + sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, sem); + check_sem_state(sem, 2, 2); + + count = 0; + ret = release_sem(sem, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + check_sem_state(sem, 2, 2); + + count = 1; + ret = release_sem(sem, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOVERFLOW, errno); + check_sem_state(sem, 2, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(sem, 1, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(sem, 0, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + count = 3; + ret = release_sem(sem, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOVERFLOW, errno); + check_sem_state(sem, 0, 2); + + count = 2; + ret = release_sem(sem, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + check_sem_state(sem, 2, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + + count = 1; + ret = release_sem(sem, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + check_sem_state(sem, 1, 2); + + count = ~0u; + ret = release_sem(sem, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOVERFLOW, errno); + check_sem_state(sem, 1, 2); + + close(sem); + + close(fd); +} + +TEST(mutex_state) +{ + struct ntsync_mutex_args mutex_args; + __u32 owner, count, index; + struct timespec timeout; + int fd, ret, mutex; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + mutex_args.owner = 123; + mutex_args.count = 0; + mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(-1, mutex); + EXPECT_EQ(EINVAL, errno); + + mutex_args.owner = 0; + mutex_args.count = 2; + mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(-1, mutex); + EXPECT_EQ(EINVAL, errno); + + mutex_args.owner = 123; + mutex_args.count = 2; + mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, mutex); + check_mutex_state(mutex, 2, 123); + + ret = unlock_mutex(mutex, 0, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = unlock_mutex(mutex, 456, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + check_mutex_state(mutex, 2, 123); + + ret = unlock_mutex(mutex, 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + check_mutex_state(mutex, 1, 123); + + ret = unlock_mutex(mutex, 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, count); + check_mutex_state(mutex, 0, 0); + + ret = unlock_mutex(mutex, 123, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + + ret = wait_any(fd, 1, &mutex, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_mutex_state(mutex, 1, 456); + + ret = wait_any(fd, 1, &mutex, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_mutex_state(mutex, 2, 456); + + ret = unlock_mutex(mutex, 456, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + check_mutex_state(mutex, 1, 456); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + owner = 0; + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + owner = 123; + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + check_mutex_state(mutex, 1, 456); + + owner = 456; + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(0, ret); + + memset(&mutex_args, 0xcc, sizeof(mutex_args)); + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, mutex_args.count); + EXPECT_EQ(0, mutex_args.owner); + + memset(&mutex_args, 0xcc, sizeof(mutex_args)); + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, mutex_args.count); + EXPECT_EQ(0, mutex_args.owner); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, index); + check_mutex_state(mutex, 1, 123); + + owner = 123; + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(0, ret); + + memset(&mutex_args, 0xcc, sizeof(mutex_args)); + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, mutex_args.count); + EXPECT_EQ(0, mutex_args.owner); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, index); + check_mutex_state(mutex, 1, 123); + + close(mutex); + + mutex_args.owner = 0; + mutex_args.count = 0; + mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, mutex); + check_mutex_state(mutex, 0, 0); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_mutex_state(mutex, 1, 123); + + close(mutex); + + mutex_args.owner = 123; + mutex_args.count = ~0u; + mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, mutex); + check_mutex_state(mutex, ~0u, 123); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + close(mutex); + + close(fd); +} + +TEST(manual_event_state) +{ + struct ntsync_event_args event_args; + __u32 index, signaled; + int fd, event, ret; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + event_args.manual = 1; + event_args.signaled = 0; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + check_event_state(event, 0, 1); + + signaled = 0xdeadbeef; + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(event, 1, 1); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + check_event_state(event, 1, 1); + + ret = wait_any(fd, 1, &event, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_event_state(event, 1, 1); + + signaled = 0xdeadbeef; + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + check_event_state(event, 0, 1); + + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(event, 0, 1); + + ret = wait_any(fd, 1, &event, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + + ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + check_event_state(event, 0, 1); + + ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(event, 0, 1); + + close(event); + + close(fd); +} + +TEST(auto_event_state) +{ + struct ntsync_event_args event_args; + __u32 index, signaled; + int fd, event, ret; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + event_args.manual = 0; + event_args.signaled = 1; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + + check_event_state(event, 1, 0); + + signaled = 0xdeadbeef; + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + check_event_state(event, 1, 0); + + ret = wait_any(fd, 1, &event, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_event_state(event, 0, 0); + + signaled = 0xdeadbeef; + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(event, 0, 0); + + ret = wait_any(fd, 1, &event, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + + ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + check_event_state(event, 0, 0); + + ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(event, 0, 0); + + close(event); + + close(fd); +} + +TEST(test_wait_any) +{ + int objs[NTSYNC_MAX_WAIT_COUNT + 1], fd, ret; + struct ntsync_mutex_args mutex_args = {0}; + struct ntsync_sem_args sem_args = {0}; + __u32 owner, index, count, i; + struct timespec timeout; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 2; + sem_args.max = 3; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + mutex_args.owner = 0; + mutex_args.count = 0; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, objs[1]); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 1, 3); + check_mutex_state(objs[1], 0, 0); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 0, 0); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 1, 123); + + count = 1; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 1, 123); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 2, 123); + + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + owner = 123; + ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(0, ret); + + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(1, index); + + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + + close(objs[1]); + + /* test waiting on the same object twice */ + + count = 2; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + + objs[1] = objs[0]; + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 1, 3); + + ret = wait_any(fd, 0, NULL, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + for (i = 1; i < NTSYNC_MAX_WAIT_COUNT + 1; ++i) + objs[i] = objs[0]; + + ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT + 1, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = wait_any(fd, -1, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + close(objs[0]); + + close(fd); +} + +TEST(test_wait_all) +{ + struct ntsync_event_args event_args = {0}; + struct ntsync_mutex_args mutex_args = {0}; + struct ntsync_sem_args sem_args = {0}; + __u32 owner, index, count; + int objs[2], fd, ret; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 2; + sem_args.max = 3; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + mutex_args.owner = 0; + mutex_args.count = 0; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, objs[1]); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 1, 3); + check_mutex_state(objs[1], 1, 123); + + ret = wait_all(fd, 2, objs, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + check_sem_state(objs[0], 1, 3); + check_mutex_state(objs[1], 1, 123); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 2, 123); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 2, 123); + + count = 3; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 2, 3); + check_mutex_state(objs[1], 3, 123); + + owner = 123; + ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(0, ret); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + check_sem_state(objs[0], 1, 3); + check_mutex_state(objs[1], 1, 123); + + close(objs[1]); + + event_args.manual = true; + event_args.signaled = true; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, objs[1]); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 0, 3); + check_event_state(objs[1], 1, 1); + + close(objs[1]); + + /* test waiting on the same object twice */ + objs[1] = objs[0]; + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + close(objs[0]); + + close(fd); +} + +struct wake_args { + int fd; + int obj; +}; + +struct wait_args { + int fd; + unsigned long request; + struct ntsync_wait_args *args; + int ret; + int err; +}; + +static void *wait_thread(void *arg) +{ + struct wait_args *args = arg; + + args->ret = ioctl(args->fd, args->request, args->args); + args->err = errno; + return NULL; +} + +static __u64 get_abs_timeout(unsigned int ms) +{ + struct timespec timeout; + clock_gettime(CLOCK_MONOTONIC, &timeout); + return (timeout.tv_sec * 1000000000) + timeout.tv_nsec + (ms * 1000000); +} + +static int wait_for_thread(pthread_t thread, unsigned int ms) +{ + struct timespec timeout; + + clock_gettime(CLOCK_REALTIME, &timeout); + timeout.tv_nsec += ms * 1000000; + timeout.tv_sec += (timeout.tv_nsec / 1000000000); + timeout.tv_nsec %= 1000000000; + return pthread_timedjoin_np(thread, NULL, &timeout); +} + +TEST(wake_any) +{ + struct ntsync_event_args event_args = {0}; + struct ntsync_mutex_args mutex_args = {0}; + struct ntsync_wait_args wait_args = {0}; + struct ntsync_sem_args sem_args = {0}; + struct wait_args thread_args; + __u32 count, index, signaled; + int objs[2], fd, ret; + pthread_t thread; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 0; + sem_args.max = 3; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + mutex_args.owner = 123; + mutex_args.count = 1; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, objs[1]); + + /* test waking the semaphore */ + + wait_args.timeout = get_abs_timeout(1000); + wait_args.objs = (uintptr_t)objs; + wait_args.count = 2; + wait_args.owner = 456; + wait_args.index = 0xdeadbeef; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = NTSYNC_IOC_WAIT_ANY; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + count = 1; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + check_sem_state(objs[0], 0, 3); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(0, wait_args.index); + + /* test waking the mutex */ + + /* first grab it again for owner 123 */ + ret = wait_any(fd, 1, &objs[1], 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + wait_args.timeout = get_abs_timeout(1000); + wait_args.owner = 456; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = unlock_mutex(objs[1], 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + + ret = pthread_tryjoin_np(thread, NULL); + EXPECT_EQ(EBUSY, ret); + + ret = unlock_mutex(objs[1], 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, mutex_args.count); + check_mutex_state(objs[1], 1, 456); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + close(objs[1]); + + /* test waking events */ + + event_args.manual = false; + event_args.signaled = false; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, objs[1]); + + wait_args.timeout = get_abs_timeout(1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(objs[1], 0, 0); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + wait_args.timeout = get_abs_timeout(1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(objs[1], 0, 0); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + close(objs[1]); + + event_args.manual = true; + event_args.signaled = false; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, objs[1]); + + wait_args.timeout = get_abs_timeout(1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(objs[1], 1, 1); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + ret = ioctl(objs[1], NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + + wait_args.timeout = get_abs_timeout(1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(objs[1], 0, 1); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + /* delete an object while it's being waited on */ + + wait_args.timeout = get_abs_timeout(200); + wait_args.owner = 123; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + close(objs[0]); + close(objs[1]); + + ret = wait_for_thread(thread, 200); + EXPECT_EQ(0, ret); + EXPECT_EQ(-1, thread_args.ret); + EXPECT_EQ(ETIMEDOUT, thread_args.err); + + close(fd); +} + +TEST(wake_all) +{ + struct ntsync_event_args manual_event_args = {0}; + struct ntsync_event_args auto_event_args = {0}; + struct ntsync_mutex_args mutex_args = {0}; + struct ntsync_wait_args wait_args = {0}; + struct ntsync_sem_args sem_args = {0}; + struct wait_args thread_args; + __u32 count, index, signaled; + int objs[4], fd, ret; + pthread_t thread; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 0; + sem_args.max = 3; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + mutex_args.owner = 123; + mutex_args.count = 1; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, objs[1]); + + manual_event_args.manual = true; + manual_event_args.signaled = true; + objs[2] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &manual_event_args); + EXPECT_LE(0, objs[2]); + + auto_event_args.manual = false; + auto_event_args.signaled = true; + objs[3] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args); + EXPECT_EQ(0, objs[3]); + + wait_args.timeout = get_abs_timeout(1000); + wait_args.objs = (uintptr_t)objs; + wait_args.count = 4; + wait_args.owner = 456; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = NTSYNC_IOC_WAIT_ALL; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + count = 1; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + + ret = pthread_tryjoin_np(thread, NULL); + EXPECT_EQ(EBUSY, ret); + + check_sem_state(objs[0], 1, 3); + + ret = wait_any(fd, 1, &objs[0], 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = unlock_mutex(objs[1], 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, count); + + ret = pthread_tryjoin_np(thread, NULL); + EXPECT_EQ(EBUSY, ret); + + check_mutex_state(objs[1], 0, 0); + + ret = ioctl(objs[2], NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + + count = 2; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + check_sem_state(objs[0], 2, 3); + + ret = ioctl(objs[3], NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + + ret = ioctl(objs[2], NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + + ret = ioctl(objs[3], NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + + check_sem_state(objs[0], 1, 3); + check_mutex_state(objs[1], 1, 456); + check_event_state(objs[2], 1, 1); + check_event_state(objs[3], 0, 0); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + + /* delete an object while it's being waited on */ + + wait_args.timeout = get_abs_timeout(200); + wait_args.owner = 123; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + close(objs[0]); + close(objs[1]); + close(objs[2]); + close(objs[3]); + + ret = wait_for_thread(thread, 200); + EXPECT_EQ(0, ret); + EXPECT_EQ(-1, thread_args.ret); + EXPECT_EQ(ETIMEDOUT, thread_args.err); + + close(fd); +} + +TEST(alert_any) +{ + struct ntsync_event_args event_args = {0}; + struct ntsync_wait_args wait_args = {0}; + struct ntsync_sem_args sem_args = {0}; + __u32 index, count, signaled; + struct wait_args thread_args; + int objs[2], event, fd, ret; + pthread_t thread; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 0; + sem_args.max = 2; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + sem_args.count = 1; + sem_args.max = 2; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[1]); + + event_args.manual = true; + event_args.signaled = true; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + + ret = wait_any_alert(fd, 0, NULL, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + + ret = wait_any_alert(fd, 0, NULL, 123, event, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + + ret = wait_any_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + + ret = wait_any_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + /* test wakeup via alert */ + + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + + wait_args.timeout = get_abs_timeout(1000); + wait_args.objs = (uintptr_t)objs; + wait_args.count = 2; + wait_args.owner = 123; + wait_args.index = 0xdeadbeef; + wait_args.alert = event; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = NTSYNC_IOC_WAIT_ANY; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(2, wait_args.index); + + close(event); + + /* test with an auto-reset event */ + + event_args.manual = false; + event_args.signaled = true; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + + count = 1; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + + ret = wait_any_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_any_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + ret = wait_any_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + close(event); + + close(objs[0]); + close(objs[1]); + + close(fd); +} + +TEST(alert_all) +{ + struct ntsync_event_args event_args = {0}; + struct ntsync_wait_args wait_args = {0}; + struct ntsync_sem_args sem_args = {0}; + struct wait_args thread_args; + __u32 index, count, signaled; + int objs[2], event, fd, ret; + pthread_t thread; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 2; + sem_args.max = 2; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + sem_args.count = 1; + sem_args.max = 2; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[1]); + + event_args.manual = true; + event_args.signaled = true; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + + ret = wait_all_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_all_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + /* test wakeup via alert */ + + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + + wait_args.timeout = get_abs_timeout(1000); + wait_args.objs = (uintptr_t)objs; + wait_args.count = 2; + wait_args.owner = 123; + wait_args.index = 0xdeadbeef; + wait_args.alert = event; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = NTSYNC_IOC_WAIT_ALL; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(2, wait_args.index); + + close(event); + + /* test with an auto-reset event */ + + event_args.manual = false; + event_args.signaled = true; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + + count = 2; + ret = release_sem(objs[1], &count); + EXPECT_EQ(0, ret); + + ret = wait_all_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_all_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + ret = wait_all_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + close(event); + + close(objs[0]); + close(objs[1]); + + close(fd); +} + +#define STRESS_LOOPS 10000 +#define STRESS_THREADS 4 + +static unsigned int stress_counter; +static int stress_device, stress_start_event, stress_mutex; + +static void *stress_thread(void *arg) +{ + struct ntsync_wait_args wait_args = {0}; + __u32 index, count, i; + int ret; + + wait_args.timeout = UINT64_MAX; + wait_args.count = 1; + wait_args.objs = (uintptr_t)&stress_start_event; + wait_args.owner = gettid(); + wait_args.index = 0xdeadbeef; + + ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args); + + wait_args.objs = (uintptr_t)&stress_mutex; + + for (i = 0; i < STRESS_LOOPS; ++i) { + ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args); + + ++stress_counter; + + unlock_mutex(stress_mutex, wait_args.owner, &count); + } + + return NULL; +} + +TEST(stress_wait) +{ + struct ntsync_event_args event_args; + struct ntsync_mutex_args mutex_args; + pthread_t threads[STRESS_THREADS]; + __u32 signaled, i; + int ret; + + stress_device = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, stress_device); + + mutex_args.owner = 0; + mutex_args.count = 0; + stress_mutex = ioctl(stress_device, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, stress_mutex); + + event_args.manual = 1; + event_args.signaled = 0; + stress_start_event = ioctl(stress_device, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, stress_start_event); + + for (i = 0; i < STRESS_THREADS; ++i) + pthread_create(&threads[i], NULL, stress_thread, NULL); + + ret = ioctl(stress_start_event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + + for (i = 0; i < STRESS_THREADS; ++i) { + ret = pthread_join(threads[i], NULL); + EXPECT_EQ(0, ret); + } + + EXPECT_EQ(STRESS_LOOPS * STRESS_THREADS, stress_counter); + + close(stress_start_event); + close(stress_mutex); + close(stress_device); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index d374878cc0ba..c62544b966ae 100755 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -76,11 +76,11 @@ test_create_empty() : > $file - if [ ! -e $file ]; then - echo "$file can not be created without writing" >&2 + if [ -e $file ]; then + echo "$file can be created without writing" >&2 + file_cleanup $file exit 1 fi - file_cleanup $file } test_create_read() @@ -89,10 +89,13 @@ test_create_read() ./create-read $file if [ $? -ne 0 ]; then echo "create and read $file failed" + exit 1 + fi + if [ -e $file ]; then + echo "file still exists and should not" file_cleanup $file exit 1 fi - file_cleanup $file } test_delete() @@ -202,6 +205,158 @@ test_invalid_filenames() exit $ret } +test_no_set_size() +{ + local attrs='\x07\x00\x00\x00' + local file=$efivarfs_mount/$FUNCNAME-$test_guid + local ret=0 + + printf "$attrs\x00" > $file + [ -e $file -a -s $file ] || exit 1 + chattr -i $file + : > $file + if [ $? != 0 ]; then + echo "variable file failed to accept truncation" + ret=1 + elif [ -e $file -a ! -s $file ]; then + echo "file can be truncated to zero size" + ret=1 + fi + rm $file || exit 1 + + exit $ret +} + +setup_test_multiple() +{ + ## + # we're going to do multi-threaded tests, so create a set of + # pipes for synchronization. We use pipes 1..3 to start the + # stalled shell job and pipes 4..6 as indicators that the job + # has started. If you need more than 3 jobs the two +3's below + # need increasing + ## + + declare -ag p + + # empty is because arrays number from 0 but jobs number from 1 + p[0]="" + + for f in 1 2 3 4 5 6; do + p[$f]=/tmp/efivarfs_pipe${f} + mknod ${p[$f]} p + done + + declare -g var=$efivarfs_mount/test_multiple-$test_guid + + cleanup() { + for f in ${p[@]}; do + rm -f ${f} + done + if [ -e $var ]; then + file_cleanup $var + fi + } + trap cleanup exit + + waitstart() { + cat ${p[$[$1+3]]} > /dev/null + } + + waitpipe() { + echo 1 > ${p[$[$1+3]]} + cat ${p[$1]} > /dev/null + } + + endjob() { + echo 1 > ${p[$1]} + wait -n %$1 + } +} + +test_multiple_zero_size() +{ + ## + # check for remove on last close, set up three threads all + # holding the variable (one write and two reads) and then + # close them sequentially (waiting for completion) and check + # the state of the variable + ## + + { waitpipe 1; echo 1; } > $var 2> /dev/null & + waitstart 1 + # zero length file should exist + [ -e $var ] || exit 1 + # second and third delayed close + { waitpipe 2; } < $var & + waitstart 2 + { waitpipe 3; } < $var & + waitstart 3 + # close first fd + endjob 1 + # var should only be deleted on last close + [ -e $var ] || exit 1 + # close second fd + endjob 2 + [ -e $var ] || exit 1 + # file should go on last close + endjob 3 + [ ! -e $var ] || exit 1 +} + +test_multiple_create() +{ + ## + # set multiple threads to access the variable but delay + # the final write to check the close of 2 and 3. The + # final write should succeed in creating the variable + ## + { waitpipe 1; printf '\x07\x00\x00\x00\x54'; } > $var & + waitstart 1 + [ -e $var -a ! -s $var ] || exit 1 + { waitpipe 2; } < $var & + waitstart 2 + { waitpipe 3; } < $var & + waitstart 3 + # close second and third fds + endjob 2 + # var should only be created (have size) on last close + [ -e $var -a ! -s $var ] || exit 1 + endjob 3 + [ -e $var -a ! -s $var ] || exit 1 + # close first fd + endjob 1 + # variable should still exist + [ -s $var ] || exit 1 + file_cleanup $var +} + +test_multiple_delete_on_write() { + ## + # delete the variable on final write; seqencing similar + # to test_multiple_create() + ## + printf '\x07\x00\x00\x00\x54' > $var + chattr -i $var + { waitpipe 1; printf '\x07\x00\x00\x00'; } > $var & + waitstart 1 + [ -e $var -a -s $var ] || exit 1 + { waitpipe 2; } < $var & + waitstart 2 + { waitpipe 3; } < $var & + waitstart 3 + # close first fd; write should set variable size to zero + endjob 1 + # var should only be deleted on last close + [ -e $var -a ! -s $var ] || exit 1 + endjob 2 + [ -e $var ] || exit 1 + # close last fd + endjob 3 + # variable should now be removed + [ ! -e $var ] || exit 1 +} + check_prereqs rc=0 @@ -214,5 +369,10 @@ run_test test_zero_size_delete run_test test_open_unlink run_test test_valid_filenames run_test test_invalid_filenames +run_test test_no_set_size +setup_test_multiple +run_test test_multiple_zero_size +run_test test_multiple_create +run_test test_multiple_delete_on_write exit $rc diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index a0dc5d4bf733..7f3d1ae762ec 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -9,9 +9,13 @@ execveat.ephemeral execveat.denatured non-regular null-argv +/check-exec +/false +/inc /load_address.* !load_address.c /recursion-depth +/set-exec xxxxxxxx* pipe S_I*.test diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index ba012bc5aab9..45a3cfc435cf 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -1,26 +1,33 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS = -Wall CFLAGS += -Wno-nonnull +CFLAGS += $(KHDR_INCLUDES) + +LDLIBS += -lcap ALIGNS := 0x1000 0x200000 0x1000000 ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS)) ALIGN_STATIC_PIES := $(patsubst %,load_address.static.%,$(ALIGNS)) ALIGNMENT_TESTS := $(ALIGN_PIES) $(ALIGN_STATIC_PIES) -TEST_PROGS := binfmt_script.py +TEST_PROGS := binfmt_script.py check-exec-tests.sh TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS) +TEST_GEN_PROGS_EXTENDED := false inc set-exec script-exec.inc script-noexec.inc TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile TEST_GEN_PROGS += recursion-depth TEST_GEN_PROGS += null-argv +TEST_GEN_PROGS += check-exec EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \ $(OUTPUT)/S_I*.test include ../lib.mk +CHECK_EXEC_SAMPLES := $(top_srcdir)/samples/check-exec + $(OUTPUT)/subdir: mkdir -p $@ $(OUTPUT)/script: Makefile @@ -38,3 +45,13 @@ $(OUTPUT)/load_address.0x%: load_address.c $(OUTPUT)/load_address.static.0x%: load_address.c $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \ -fPIE -static-pie $< -o $@ +$(OUTPUT)/false: false.c + $(CC) $(CFLAGS) $(LDFLAGS) -static $< -o $@ +$(OUTPUT)/inc: $(CHECK_EXEC_SAMPLES)/inc.c + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ +$(OUTPUT)/set-exec: $(CHECK_EXEC_SAMPLES)/set-exec.c + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ +$(OUTPUT)/script-exec.inc: $(CHECK_EXEC_SAMPLES)/script-exec.inc + cp $< $@ +$(OUTPUT)/script-noexec.inc: $(CHECK_EXEC_SAMPLES)/script-noexec.inc + cp $< $@ diff --git a/tools/testing/selftests/exec/check-exec-tests.sh b/tools/testing/selftests/exec/check-exec-tests.sh new file mode 100755 index 000000000000..87102906ae3c --- /dev/null +++ b/tools/testing/selftests/exec/check-exec-tests.sh @@ -0,0 +1,205 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test the "inc" interpreter. +# +# See include/uapi/linux/securebits.h, include/uapi/linux/fcntl.h and +# samples/check-exec/inc.c +# +# Copyright © 2024 Microsoft Corporation + +set -u -e -o pipefail + +EXPECTED_OUTPUT="1" +exec 2>/dev/null + +DIR="$(dirname $(readlink -f "$0"))" +source "${DIR}"/../kselftest/ktap_helpers.sh + +exec_direct() { + local expect="$1" + local script="$2" + shift 2 + local ret=0 + local out + + # Updates PATH for `env` to execute the `inc` interpreter. + out="$(PATH="." "$@" "${script}")" || ret=$? + + if [[ ${ret} -ne ${expect} ]]; then + echo "ERROR: Wrong expectation for direct file execution: ${ret}" + return 1 + fi + if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then + echo "ERROR: Wrong output for direct file execution: ${out}" + return 1 + fi +} + +exec_indirect() { + local expect="$1" + local script="$2" + shift 2 + local ret=0 + local out + + # Script passed as argument. + out="$("$@" ./inc "${script}")" || ret=$? + + if [[ ${ret} -ne ${expect} ]]; then + echo "ERROR: Wrong expectation for indirect file execution: ${ret}" + return 1 + fi + if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then + echo "ERROR: Wrong output for indirect file execution: ${out}" + return 1 + fi +} + +exec_stdin_reg() { + local expect="$1" + local script="$2" + shift 2 + local ret=0 + local out + + # Executing stdin must be allowed if the related file is executable. + out="$("$@" ./inc -i < "${script}")" || ret=$? + + if [[ ${ret} -ne ${expect} ]]; then + echo "ERROR: Wrong expectation for stdin regular file execution: ${ret}" + return 1 + fi + if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then + echo "ERROR: Wrong output for stdin regular file execution: ${out}" + return 1 + fi +} + +exec_stdin_pipe() { + local expect="$1" + shift + local ret=0 + local out + + # A pipe is not executable. + out="$(cat script-exec.inc | "$@" ./inc -i)" || ret=$? + + if [[ ${ret} -ne ${expect} ]]; then + echo "ERROR: Wrong expectation for stdin pipe execution: ${ret}" + return 1 + fi +} + +exec_argument() { + local expect="$1" + local ret=0 + shift + local out + + # Script not coming from a file must not be executed. + out="$("$@" ./inc -c "$(< script-exec.inc)")" || ret=$? + + if [[ ${ret} -ne ${expect} ]]; then + echo "ERROR: Wrong expectation for arbitrary argument execution: ${ret}" + return 1 + fi + if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then + echo "ERROR: Wrong output for arbitrary argument execution: ${out}" + return 1 + fi +} + +exec_interactive() { + exec_stdin_pipe "$@" + exec_argument "$@" +} + +ktap_test() { + ktap_test_result "$*" "$@" +} + +ktap_print_header +ktap_set_plan 28 + +# Without secbit configuration, nothing is changed. + +ktap_print_msg "By default, executable scripts are allowed to be interpreted and executed." +ktap_test exec_direct 0 script-exec.inc +ktap_test exec_indirect 0 script-exec.inc + +ktap_print_msg "By default, executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-exec.inc + +ktap_print_msg "By default, non-executable scripts are allowed to be interpreted, but not directly executed." +# We get 126 because of direct execution by Bash. +ktap_test exec_direct 126 script-noexec.inc +ktap_test exec_indirect 0 script-noexec.inc + +ktap_print_msg "By default, non-executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-noexec.inc + +ktap_print_msg "By default, interactive commands are allowed to be interpreted." +ktap_test exec_interactive 0 + +# With only file restriction: protect non-malicious users from inadvertent errors (e.g. python ~/Downloads/*.py). + +ktap_print_msg "With -f, executable scripts are allowed to be interpreted and executed." +ktap_test exec_direct 0 script-exec.inc ./set-exec -f -- +ktap_test exec_indirect 0 script-exec.inc ./set-exec -f -- + +ktap_print_msg "With -f, executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -f -- + +ktap_print_msg "With -f, non-executable scripts are not allowed to be executed nor interpreted." +# Direct execution of non-executable script is alwayse denied by the kernel. +ktap_test exec_direct 1 script-noexec.inc ./set-exec -f -- +ktap_test exec_indirect 1 script-noexec.inc ./set-exec -f -- + +ktap_print_msg "With -f, non-executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-noexec.inc ./set-exec -f -- + +ktap_print_msg "With -f, interactive commands are allowed to be interpreted." +ktap_test exec_interactive 0 ./set-exec -f -- + +# With only denied interactive commands: check or monitor script content (e.g. with LSM). + +ktap_print_msg "With -i, executable scripts are allowed to be interpreted and executed." +ktap_test exec_direct 0 script-exec.inc ./set-exec -i -- +ktap_test exec_indirect 0 script-exec.inc ./set-exec -i -- + +ktap_print_msg "With -i, executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -i -- + +ktap_print_msg "With -i, non-executable scripts are allowed to be interpreted, but not directly executed." +# Direct execution of non-executable script is alwayse denied by the kernel. +ktap_test exec_direct 1 script-noexec.inc ./set-exec -i -- +ktap_test exec_indirect 0 script-noexec.inc ./set-exec -i -- + +ktap_print_msg "With -i, non-executable stdin is not allowed to be interpreted." +ktap_test exec_stdin_reg 1 script-noexec.inc ./set-exec -i -- + +ktap_print_msg "With -i, interactive commands are not allowed to be interpreted." +ktap_test exec_interactive 1 ./set-exec -i -- + +# With both file restriction and denied interactive commands: only allow executable scripts. + +ktap_print_msg "With -fi, executable scripts are allowed to be interpreted and executed." +ktap_test exec_direct 0 script-exec.inc ./set-exec -fi -- +ktap_test exec_indirect 0 script-exec.inc ./set-exec -fi -- + +ktap_print_msg "With -fi, executable stdin is allowed to be interpreted." +ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -fi -- + +ktap_print_msg "With -fi, non-executable scripts are not allowed to be interpreted nor executed." +# Direct execution of non-executable script is alwayse denied by the kernel. +ktap_test exec_direct 1 script-noexec.inc ./set-exec -fi -- +ktap_test exec_indirect 1 script-noexec.inc ./set-exec -fi -- + +ktap_print_msg "With -fi, non-executable stdin is not allowed to be interpreted." +ktap_test exec_stdin_reg 1 script-noexec.inc ./set-exec -fi -- + +ktap_print_msg "With -fi, interactive commands are not allowed to be interpreted." +ktap_test exec_interactive 1 ./set-exec -fi -- + +ktap_finished diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c new file mode 100644 index 000000000000..55bce47e56b7 --- /dev/null +++ b/tools/testing/selftests/exec/check-exec.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test execveat(2) with AT_EXECVE_CHECK, and prctl(2) with + * SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE, and their locked + * counterparts. + * + * Copyright © 2018-2020 ANSSI + * Copyright © 2024 Microsoft Corporation + * + * Author: Mickaël Salaün <mic@digikod.net> + */ + +#include <asm-generic/unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/prctl.h> +#include <linux/securebits.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/capability.h> +#include <sys/mount.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/sysmacros.h> +#include <unistd.h> + +/* Defines AT_EXECVE_CHECK without type conflicts. */ +#define _ASM_GENERIC_FCNTL_H +#include <linux/fcntl.h> + +#include "../kselftest_harness.h" + +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + +static void drop_privileges(struct __test_metadata *const _metadata) +{ + const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED; + cap_t cap_p; + + if ((cap_get_secbits() & noroot) != noroot) + EXPECT_EQ(0, cap_set_secbits(noroot)); + + cap_p = cap_get_proc(); + EXPECT_NE(NULL, cap_p); + EXPECT_NE(-1, cap_clear(cap_p)); + + /* + * Drops everything, especially CAP_SETPCAP, CAP_DAC_OVERRIDE, and + * CAP_DAC_READ_SEARCH. + */ + EXPECT_NE(-1, cap_set_proc(cap_p)); + EXPECT_NE(-1, cap_free(cap_p)); +} + +static int test_secbits_set(const unsigned int secbits) +{ + int err; + + err = prctl(PR_SET_SECUREBITS, secbits); + if (err) + return errno; + return 0; +} + +FIXTURE(access) +{ + int memfd, pipefd; + int pipe_fds[2], socket_fds[2]; +}; + +FIXTURE_VARIANT(access) +{ + const bool mount_exec; + const bool file_exec; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(access, mount_exec_file_exec) { + /* clang-format on */ + .mount_exec = true, + .file_exec = true, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(access, mount_exec_file_noexec) { + /* clang-format on */ + .mount_exec = true, + .file_exec = false, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(access, mount_noexec_file_exec) { + /* clang-format on */ + .mount_exec = false, + .file_exec = true, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(access, mount_noexec_file_noexec) { + /* clang-format on */ + .mount_exec = false, + .file_exec = false, +}; + +static const char binary_path[] = "./false"; +static const char workdir_path[] = "./test-mount"; +static const char reg_file_path[] = "./test-mount/regular_file"; +static const char dir_path[] = "./test-mount/directory"; +static const char block_dev_path[] = "./test-mount/block_device"; +static const char char_dev_path[] = "./test-mount/character_device"; +static const char fifo_path[] = "./test-mount/fifo"; + +FIXTURE_SETUP(access) +{ + int procfd_path_size; + static const char path_template[] = "/proc/self/fd/%d"; + char procfd_path[sizeof(path_template) + 10]; + + /* Makes sure we are not already restricted nor locked. */ + EXPECT_EQ(0, test_secbits_set(0)); + + /* + * Cleans previous workspace if any error previously happened (don't + * check errors). + */ + umount(workdir_path); + rmdir(workdir_path); + + /* Creates a clean mount point. */ + ASSERT_EQ(0, mkdir(workdir_path, 00700)); + ASSERT_EQ(0, mount("test", workdir_path, "tmpfs", + MS_MGC_VAL | (variant->mount_exec ? 0 : MS_NOEXEC), + "mode=0700,size=9m")); + + /* Creates a regular file. */ + ASSERT_EQ(0, mknod(reg_file_path, + S_IFREG | (variant->file_exec ? 0700 : 0600), 0)); + /* Creates a directory. */ + ASSERT_EQ(0, mkdir(dir_path, variant->file_exec ? 0700 : 0600)); + /* Creates a character device: /dev/null. */ + ASSERT_EQ(0, mknod(char_dev_path, S_IFCHR | 0400, makedev(1, 3))); + /* Creates a block device: /dev/loop0 */ + ASSERT_EQ(0, mknod(block_dev_path, S_IFBLK | 0400, makedev(7, 0))); + /* Creates a fifo. */ + ASSERT_EQ(0, mknod(fifo_path, S_IFIFO | 0600, 0)); + + /* Creates a regular file without user mount point. */ + self->memfd = memfd_create("test-exec-probe", MFD_CLOEXEC); + ASSERT_LE(0, self->memfd); + /* Sets mode, which must be ignored by the exec check. */ + ASSERT_EQ(0, fchmod(self->memfd, variant->file_exec ? 0700 : 0600)); + + /* Creates a pipefs file descriptor. */ + ASSERT_EQ(0, pipe(self->pipe_fds)); + procfd_path_size = snprintf(procfd_path, sizeof(procfd_path), + path_template, self->pipe_fds[0]); + ASSERT_LT(procfd_path_size, sizeof(procfd_path)); + self->pipefd = open(procfd_path, O_RDWR | O_CLOEXEC); + ASSERT_LE(0, self->pipefd); + ASSERT_EQ(0, fchmod(self->pipefd, variant->file_exec ? 0700 : 0600)); + + /* Creates a socket file descriptor. */ + ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, + self->socket_fds)); +} + +FIXTURE_TEARDOWN_PARENT(access) +{ + /* There is no need to unlink the test files. */ + EXPECT_EQ(0, umount(workdir_path)); + EXPECT_EQ(0, rmdir(workdir_path)); +} + +static void fill_exec_fd(struct __test_metadata *_metadata, const int fd_out) +{ + char buf[1024]; + size_t len; + int fd_in; + + fd_in = open(binary_path, O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd_in); + /* Cannot use copy_file_range(2) because of EXDEV. */ + len = read(fd_in, buf, sizeof(buf)); + EXPECT_LE(0, len); + while (len > 0) { + EXPECT_EQ(len, write(fd_out, buf, len)) + { + TH_LOG("Failed to write: %s (%d)", strerror(errno), + errno); + } + len = read(fd_in, buf, sizeof(buf)); + EXPECT_LE(0, len); + } + EXPECT_EQ(0, close(fd_in)); +} + +static void fill_exec_path(struct __test_metadata *_metadata, + const char *const path) +{ + int fd_out; + + fd_out = open(path, O_CLOEXEC | O_WRONLY); + ASSERT_LE(0, fd_out) + { + TH_LOG("Failed to open %s: %s", path, strerror(errno)); + } + fill_exec_fd(_metadata, fd_out); + EXPECT_EQ(0, close(fd_out)); +} + +static void test_exec_fd(struct __test_metadata *_metadata, const int fd, + const int err_code) +{ + char *const argv[] = { "", NULL }; + int access_ret, access_errno; + + /* + * If we really execute fd, filled with the "false" binary, the current + * thread will exits with an error, which will be interpreted by the + * test framework as an error. With AT_EXECVE_CHECK, we only check a + * potential successful execution. + */ + access_ret = sys_execveat(fd, "", argv, NULL, + AT_EMPTY_PATH | AT_EXECVE_CHECK); + access_errno = errno; + if (err_code) { + EXPECT_EQ(-1, access_ret); + EXPECT_EQ(err_code, access_errno) + { + TH_LOG("Wrong error for execveat(2): %s (%d)", + strerror(access_errno), errno); + } + } else { + EXPECT_EQ(0, access_ret) + { + TH_LOG("Access denied: %s", strerror(access_errno)); + } + } +} + +static void test_exec_path(struct __test_metadata *_metadata, + const char *const path, const int err_code) +{ + int flags = O_CLOEXEC; + int fd; + + /* Do not block on pipes. */ + if (path == fifo_path) + flags |= O_NONBLOCK; + + fd = open(path, flags | O_RDONLY); + ASSERT_LE(0, fd) + { + TH_LOG("Failed to open %s: %s", path, strerror(errno)); + } + test_exec_fd(_metadata, fd, err_code); + EXPECT_EQ(0, close(fd)); +} + +/* Tests that we don't get ENOEXEC. */ +TEST_F(access, regular_file_empty) +{ + const int exec = variant->mount_exec && variant->file_exec; + + test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES); + + drop_privileges(_metadata); + test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES); +} + +TEST_F(access, regular_file_elf) +{ + const int exec = variant->mount_exec && variant->file_exec; + + fill_exec_path(_metadata, reg_file_path); + + test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES); + + drop_privileges(_metadata); + test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES); +} + +/* Tests that we don't get ENOEXEC. */ +TEST_F(access, memfd_empty) +{ + const int exec = variant->file_exec; + + test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES); + + drop_privileges(_metadata); + test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES); +} + +TEST_F(access, memfd_elf) +{ + const int exec = variant->file_exec; + + fill_exec_fd(_metadata, self->memfd); + + test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES); + + drop_privileges(_metadata); + test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES); +} + +TEST_F(access, non_regular_files) +{ + test_exec_path(_metadata, dir_path, EACCES); + test_exec_path(_metadata, block_dev_path, EACCES); + test_exec_path(_metadata, char_dev_path, EACCES); + test_exec_path(_metadata, fifo_path, EACCES); + test_exec_fd(_metadata, self->socket_fds[0], EACCES); + test_exec_fd(_metadata, self->pipefd, EACCES); +} + +/* clang-format off */ +FIXTURE(secbits) {}; +/* clang-format on */ + +FIXTURE_VARIANT(secbits) +{ + const bool is_privileged; + const int error; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(secbits, priv) { + /* clang-format on */ + .is_privileged = true, + .error = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(secbits, unpriv) { + /* clang-format on */ + .is_privileged = false, + .error = EPERM, +}; + +FIXTURE_SETUP(secbits) +{ + /* Makes sure no exec bits are set. */ + EXPECT_EQ(0, test_secbits_set(0)); + EXPECT_EQ(0, prctl(PR_GET_SECUREBITS)); + + if (!variant->is_privileged) + drop_privileges(_metadata); +} + +FIXTURE_TEARDOWN(secbits) +{ +} + +TEST_F(secbits, legacy) +{ + EXPECT_EQ(variant->error, test_secbits_set(0)); +} + +#define CHILD(...) \ + do { \ + pid_t child = vfork(); \ + EXPECT_LE(0, child); \ + if (child == 0) { \ + __VA_ARGS__; \ + _exit(0); \ + } \ + } while (0) + +TEST_F(secbits, exec) +{ + unsigned int secbits = prctl(PR_GET_SECUREBITS); + + secbits |= SECBIT_EXEC_RESTRICT_FILE; + EXPECT_EQ(0, test_secbits_set(secbits)); + EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)); + CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS))); + + secbits |= SECBIT_EXEC_DENY_INTERACTIVE; + EXPECT_EQ(0, test_secbits_set(secbits)); + EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)); + CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS))); + + secbits &= ~(SECBIT_EXEC_RESTRICT_FILE | SECBIT_EXEC_DENY_INTERACTIVE); + EXPECT_EQ(0, test_secbits_set(secbits)); + EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)); + CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS))); +} + +TEST_F(secbits, check_locked_set) +{ + unsigned int secbits = prctl(PR_GET_SECUREBITS); + + secbits |= SECBIT_EXEC_RESTRICT_FILE; + EXPECT_EQ(0, test_secbits_set(secbits)); + secbits |= SECBIT_EXEC_RESTRICT_FILE_LOCKED; + EXPECT_EQ(0, test_secbits_set(secbits)); + + /* Checks lock set but unchanged. */ + EXPECT_EQ(variant->error, test_secbits_set(secbits)); + CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits))); + + secbits &= ~SECBIT_EXEC_RESTRICT_FILE; + EXPECT_EQ(EPERM, test_secbits_set(0)); + CHILD(EXPECT_EQ(EPERM, test_secbits_set(0))); +} + +TEST_F(secbits, check_locked_unset) +{ + unsigned int secbits = prctl(PR_GET_SECUREBITS); + + secbits |= SECBIT_EXEC_RESTRICT_FILE_LOCKED; + EXPECT_EQ(0, test_secbits_set(secbits)); + + /* Checks lock unset but unchanged. */ + EXPECT_EQ(variant->error, test_secbits_set(secbits)); + CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits))); + + secbits &= ~SECBIT_EXEC_RESTRICT_FILE; + EXPECT_EQ(EPERM, test_secbits_set(0)); + CHILD(EXPECT_EQ(EPERM, test_secbits_set(0))); +} + +TEST_F(secbits, restrict_locked_set) +{ + unsigned int secbits = prctl(PR_GET_SECUREBITS); + + secbits |= SECBIT_EXEC_DENY_INTERACTIVE; + EXPECT_EQ(0, test_secbits_set(secbits)); + secbits |= SECBIT_EXEC_DENY_INTERACTIVE_LOCKED; + EXPECT_EQ(0, test_secbits_set(secbits)); + + /* Checks lock set but unchanged. */ + EXPECT_EQ(variant->error, test_secbits_set(secbits)); + CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits))); + + secbits &= ~SECBIT_EXEC_DENY_INTERACTIVE; + EXPECT_EQ(EPERM, test_secbits_set(0)); + CHILD(EXPECT_EQ(EPERM, test_secbits_set(0))); +} + +TEST_F(secbits, restrict_locked_unset) +{ + unsigned int secbits = prctl(PR_GET_SECUREBITS); + + secbits |= SECBIT_EXEC_DENY_INTERACTIVE_LOCKED; + EXPECT_EQ(0, test_secbits_set(secbits)); + + /* Checks lock unset but unchanged. */ + EXPECT_EQ(variant->error, test_secbits_set(secbits)); + CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits))); + + secbits &= ~SECBIT_EXEC_DENY_INTERACTIVE; + EXPECT_EQ(EPERM, test_secbits_set(0)); + CHILD(EXPECT_EQ(EPERM, test_secbits_set(0))); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/exec/config b/tools/testing/selftests/exec/config new file mode 100644 index 000000000000..c308079867b3 --- /dev/null +++ b/tools/testing/selftests/exec/config @@ -0,0 +1,2 @@ +CONFIG_BLK_DEV=y +CONFIG_BLK_DEV_LOOP=y diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index 071e03532cba..8fb7395fd35b 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -23,9 +23,11 @@ #include "../kselftest.h" -#define TESTS_EXPECTED 51 +#define TESTS_EXPECTED 54 #define TEST_NAME_LEN (PATH_MAX * 4) +#define CHECK_COMM "CHECK_COMM" + static char longpath[2 * PATH_MAX] = ""; static char *envp[] = { "IN_TEST=yes", NULL, NULL }; static char *argv[] = { "execveat", "99", NULL }; @@ -237,6 +239,29 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script) return fail; } +static int check_execveat_comm(int fd, char *argv0, char *expected) +{ + char buf[128], *old_env, *old_argv0; + int ret; + + snprintf(buf, sizeof(buf), CHECK_COMM "=%s", expected); + + old_env = envp[1]; + envp[1] = buf; + + old_argv0 = argv[0]; + argv[0] = argv0; + + ksft_print_msg("Check execveat(AT_EMPTY_PATH)'s comm is %s\n", + expected); + ret = check_execveat_invoked_rc(fd, "", AT_EMPTY_PATH, 0, 0); + + envp[1] = old_env; + argv[0] = old_argv0; + + return ret; +} + static int run_tests(void) { int fail = 0; @@ -389,6 +414,14 @@ static int run_tests(void) fail += check_execveat_pathmax(root_dfd, "execveat", 0); fail += check_execveat_pathmax(root_dfd, "script", 1); + + /* /proc/pid/comm gives filename by default */ + fail += check_execveat_comm(fd, "sentinel", "execveat"); + /* /proc/pid/comm gives argv[0] when invoked via link */ + fail += check_execveat_comm(fd_symlink, "sentinel", "execveat"); + /* /proc/pid/comm gives filename if NULL is passed */ + fail += check_execveat_comm(fd, NULL, "execveat"); + return fail; } @@ -415,9 +448,13 @@ int main(int argc, char **argv) int ii; int rc; const char *verbose = getenv("VERBOSE"); + const char *check_comm = getenv(CHECK_COMM); - if (argc >= 2) { - /* If we are invoked with an argument, don't run tests. */ + if (argc >= 2 || check_comm) { + /* + * If we are invoked with an argument, or no arguments but a + * command to check, don't run tests. + */ const char *in_test = getenv("IN_TEST"); if (verbose) { @@ -426,6 +463,38 @@ int main(int argc, char **argv) ksft_print_msg("\t[%d]='%s\n'", ii, argv[ii]); } + /* If the tests wanted us to check the command, do so. */ + if (check_comm) { + /* TASK_COMM_LEN == 16 */ + char buf[32]; + int fd, ret; + + fd = open("/proc/self/comm", O_RDONLY); + if (fd < 0) { + ksft_perror("open() comm failed"); + exit(1); + } + + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) { + ksft_perror("read() comm failed"); + close(fd); + exit(1); + } + close(fd); + + // trim off the \n + buf[ret-1] = 0; + + if (strcmp(buf, check_comm)) { + ksft_print_msg("bad comm, got: %s expected: %s\n", + buf, check_comm); + exit(1); + } + + exit(0); + } + /* Check expected environment transferred. */ if (!in_test || strcmp(in_test, "yes") != 0) { ksft_print_msg("no IN_TEST=yes in env\n"); diff --git a/tools/testing/selftests/exec/false.c b/tools/testing/selftests/exec/false.c new file mode 100644 index 000000000000..104383ec3a79 --- /dev/null +++ b/tools/testing/selftests/exec/false.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +int main(void) +{ + return 1; +} diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/filesystems/nsfs/.gitignore index ed79ebdf286e..92a8249006d1 100644 --- a/tools/testing/selftests/nsfs/.gitignore +++ b/tools/testing/selftests/filesystems/nsfs/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only owner pidns +iterate_mntns diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/filesystems/nsfs/Makefile index dd9bd50b7b93..231aaa7dfd95 100644 --- a/tools/testing/selftests/nsfs/Makefile +++ b/tools/testing/selftests/filesystems/nsfs/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -TEST_GEN_PROGS := owner pidns +TEST_GEN_PROGS := owner pidns iterate_mntns CFLAGS := -Wall -Werror -include ../lib.mk +include ../../lib.mk diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/filesystems/nsfs/config index 598d0a225fc9..598d0a225fc9 100644 --- a/tools/testing/selftests/nsfs/config +++ b/tools/testing/selftests/filesystems/nsfs/config diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c new file mode 100644 index 000000000000..457cf76f3c5f --- /dev/null +++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" + +#define MNT_NS_COUNT 11 +#define MNT_NS_LAST_INDEX 10 + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info) + +FIXTURE(iterate_mount_namespaces) { + int fd_mnt_ns[MNT_NS_COUNT]; + __u64 mnt_ns_id[MNT_NS_COUNT]; +}; + +FIXTURE_SETUP(iterate_mount_namespaces) +{ + for (int i = 0; i < MNT_NS_COUNT; i++) + self->fd_mnt_ns[i] = -EBADF; + + /* + * Creating a new user namespace let's us guarantee that we only see + * mount namespaces that we did actually create. + */ + ASSERT_EQ(unshare(CLONE_NEWUSER), 0); + + for (int i = 0; i < MNT_NS_COUNT; i++) { + struct mnt_ns_info info = {}; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + self->fd_mnt_ns[i] = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC); + ASSERT_GE(self->fd_mnt_ns[i], 0); + ASSERT_EQ(ioctl(self->fd_mnt_ns[i], NS_MNT_GET_INFO, &info), 0); + self->mnt_ns_id[i] = info.mnt_ns_id; + } +} + +FIXTURE_TEARDOWN(iterate_mount_namespaces) +{ + for (int i = 0; i < MNT_NS_COUNT; i++) { + if (self->fd_mnt_ns[i] < 0) + continue; + ASSERT_EQ(close(self->fd_mnt_ns[i]), 0); + } +} + +TEST_F(iterate_mount_namespaces, iterate_all_forward) +{ + int fd_mnt_ns_cur, count = 0; + + fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC); + ASSERT_GE(fd_mnt_ns_cur, 0); + + for (;; count++) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_next; + + fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info); + if (fd_mnt_ns_next < 0 && errno == ENOENT) + break; + ASSERT_GE(fd_mnt_ns_next, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_next; + } + ASSERT_EQ(count, MNT_NS_LAST_INDEX); +} + +TEST_F(iterate_mount_namespaces, iterate_all_backwards) +{ + int fd_mnt_ns_cur, count = 0; + + fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC); + ASSERT_GE(fd_mnt_ns_cur, 0); + + for (;; count++) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_prev; + + fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info); + if (fd_mnt_ns_prev < 0 && errno == ENOENT) + break; + ASSERT_GE(fd_mnt_ns_prev, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_prev; + } + ASSERT_EQ(count, MNT_NS_LAST_INDEX); +} + +TEST_F(iterate_mount_namespaces, iterate_forward) +{ + int fd_mnt_ns_cur; + + ASSERT_EQ(setns(self->fd_mnt_ns[0], CLONE_NEWNS), 0); + + fd_mnt_ns_cur = self->fd_mnt_ns[0]; + for (int i = 1; i < MNT_NS_COUNT; i++) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_next; + + fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info); + ASSERT_GE(fd_mnt_ns_next, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_next; + ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); + } +} + +TEST_F(iterate_mount_namespaces, iterate_backward) +{ + int fd_mnt_ns_cur; + + ASSERT_EQ(setns(self->fd_mnt_ns[MNT_NS_LAST_INDEX], CLONE_NEWNS), 0); + + fd_mnt_ns_cur = self->fd_mnt_ns[MNT_NS_LAST_INDEX]; + for (int i = MNT_NS_LAST_INDEX - 1; i >= 0; i--) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_prev; + + fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info); + ASSERT_GE(fd_mnt_ns_prev, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_prev; + ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/filesystems/nsfs/owner.c index 96a976c74550..96a976c74550 100644 --- a/tools/testing/selftests/nsfs/owner.c +++ b/tools/testing/selftests/filesystems/nsfs/owner.c diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/filesystems/nsfs/pidns.c index e3c772c6a7c7..e3c772c6a7c7 100644 --- a/tools/testing/selftests/nsfs/pidns.c +++ b/tools/testing/selftests/filesystems/nsfs/pidns.c diff --git a/tools/testing/selftests/filesystems/statmount/.gitignore b/tools/testing/selftests/filesystems/statmount/.gitignore index 82a4846cbc4b..973363ad66a2 100644 --- a/tools/testing/selftests/filesystems/statmount/.gitignore +++ b/tools/testing/selftests/filesystems/statmount/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only +statmount_test_ns /*_test diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 3af3136e35a4..14ee91a41650 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := statmount_test statmount_test_ns +TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/listmount_test.c b/tools/testing/selftests/filesystems/statmount/listmount_test.c new file mode 100644 index 000000000000..15f0834f7557 --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/listmount_test.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "statmount.h" +#include "../../kselftest_harness.h" + +#ifndef LISTMOUNT_REVERSE +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ +#endif + +#define LISTMNT_BUFFER 10 + +/* Check that all mount ids are in increasing order. */ +TEST(listmount_forward) +{ + uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0; + + for (;;) { + ssize_t nr_mounts; + + nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id, + list, LISTMNT_BUFFER, 0); + ASSERT_GE(nr_mounts, 0); + if (nr_mounts == 0) + break; + + for (size_t cur = 0; cur < nr_mounts; cur++) { + if (cur < nr_mounts - 1) + ASSERT_LT(list[cur], list[cur + 1]); + last_mnt_id = list[cur]; + } + } +} + +/* Check that all mount ids are in decreasing order. */ +TEST(listmount_backward) +{ + uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0; + + for (;;) { + ssize_t nr_mounts; + + nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id, + list, LISTMNT_BUFFER, LISTMOUNT_REVERSE); + ASSERT_GE(nr_mounts, 0); + if (nr_mounts == 0) + break; + + for (size_t cur = 0; cur < nr_mounts; cur++) { + if (cur < nr_mounts - 1) + ASSERT_GT(list[cur], list[cur + 1]); + last_mnt_id = list[cur]; + } + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index a1e955d2de4c..49d96bb16355 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -6,4 +6,6 @@ TEST_PROGS := ftracetest-ktap TEST_FILES := test.d settings EXTRA_CLEAN := $(OUTPUT)/logs/* +TEST_GEN_PROGS = poll + include ../lib.mk diff --git a/tools/testing/selftests/ftrace/poll.c b/tools/testing/selftests/ftrace/poll.c new file mode 100644 index 000000000000..53258f7515e7 --- /dev/null +++ b/tools/testing/selftests/ftrace/poll.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Simple poll on a file. + * + * Copyright (c) 2024 Google LLC. + */ + +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#define BUFSIZE 4096 + +/* + * Usage: + * poll [-I|-P] [-t timeout] FILE + */ +int main(int argc, char *argv[]) +{ + struct pollfd pfd = {.events = POLLIN}; + char buf[BUFSIZE]; + int timeout = -1; + int ret, opt; + + while ((opt = getopt(argc, argv, "IPt:")) != -1) { + switch (opt) { + case 'I': + pfd.events = POLLIN; + break; + case 'P': + pfd.events = POLLPRI; + break; + case 't': + timeout = atoi(optarg); + break; + default: + fprintf(stderr, "Usage: %s [-I|-P] [-t timeout] FILE\n", + argv[0]); + return -1; + } + } + if (optind >= argc) { + fprintf(stderr, "Error: Polling file is not specified\n"); + return -1; + } + + pfd.fd = open(argv[optind], O_RDONLY); + if (pfd.fd < 0) { + fprintf(stderr, "failed to open %s", argv[optind]); + perror("open"); + return -1; + } + + /* Reset poll by read if POLLIN is specified. */ + if (pfd.events & POLLIN) + do {} while (read(pfd.fd, buf, BUFSIZE) == BUFSIZE); + + ret = poll(&pfd, 1, timeout); + if (ret < 0 && errno != EINTR) { + perror("poll"); + return -1; + } + close(pfd.fd); + + /* If timeout happned (ret == 0), exit code is 1 */ + if (ret == 0) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc index 35e8d47d6072..8a7ce647a60d 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc @@ -15,11 +15,11 @@ find_alternate_gid() { tac /etc/group | grep -v ":$original_gid:" | head -1 | cut -d: -f3 } -mount_tracefs_with_options() { +remount_tracefs_with_options() { local mount_point="$1" local options="$2" - mount -t tracefs -o "$options" nodev "$mount_point" + mount -t tracefs -o "remount,$options" nodev "$mount_point" setup } @@ -81,7 +81,7 @@ test_gid_mount_option() { # Unmount existing tracefs instance and mount with new GID unmount_tracefs "$mount_point" - mount_tracefs_with_options "$mount_point" "$new_options" + remount_tracefs_with_options "$mount_point" "$new_options" check_gid "$mount_point" "$other_group" @@ -92,7 +92,7 @@ test_gid_mount_option() { # Unmount and remount with the original GID unmount_tracefs "$mount_point" - mount_tracefs_with_options "$mount_point" "$mount_options" + remount_tracefs_with_options "$mount_point" "$mount_options" check_gid "$mount_point" "$original_group" } diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc new file mode 100644 index 000000000000..b4ad09237e2a --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc @@ -0,0 +1,19 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - Repeating add/remove fprobe events +# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README + +echo 0 > events/enable +echo > dynamic_events + +PLACE=$FUNCTION_FORK +REPEAT_TIMES=64 + +for i in `seq 1 $REPEAT_TIMES`; do + echo "f:myevent $PLACE" >> dynamic_events + grep -q myevent dynamic_events + test -d events/fprobes/myevent + echo > dynamic_events +done + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc index a275decdc880..86c76679c56e 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc @@ -6,8 +6,10 @@ echo 0 > events/enable echo > dynamic_events +REALBIN=`readlink -f /bin/sh` + echo 'cat /proc/$$/maps' | /bin/sh | \ - grep "r-xp .*/bin/.*sh$" | \ + grep "r-xp .*${REALBIN}$" | \ awk '{printf "p:myevent %s:0x%s\n", $6,$3 }' >> uprobe_events grep -q myevent uprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc index 61877d166451..c9425a34fae3 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc @@ -16,9 +16,7 @@ aarch64) REG=%r0 ;; esac -check_error 'f^100 vfs_read' # MAXACT_NO_KPROBE -check_error 'f^1a111 vfs_read' # BAD_MAXACT -check_error 'f^100000 vfs_read' # MAXACT_TOO_BIG +check_error 'f^100 vfs_read' # BAD_MAXACT check_error 'f ^non_exist_func' # BAD_PROBE_ADDR (enoent) check_error 'f ^vfs_read+10' # BAD_PROBE_ADDR diff --git a/tools/testing/selftests/ftrace/test.d/event/event-mod.tc b/tools/testing/selftests/ftrace/test.d/event/event-mod.tc new file mode 100644 index 000000000000..175243cd9ab7 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/event/event-mod.tc @@ -0,0 +1,191 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event tracing - enable/disable with module event +# requires: set_event "Can enable module events via: :mod:":README +# flags: instance + +rmmod trace-events-sample ||: +if ! modprobe trace-events-sample ; then + echo "No trace-events sample module - please make CONFIG_SAMPLE_TRACE_EVENTS=m" + exit_unresolved; +fi +trap "rmmod trace-events-sample" EXIT + +# Set events for the module +echo ":mod:trace-events-sample" > set_event + +test_all_enabled() { + + # Check if more than one is enabled + grep -q sample-trace:foo_bar set_event + grep -q sample-trace:foo_bar_with_cond set_event + grep -q sample-trace:foo_bar_with_fn set_event + + # All of them should be enabled. Check via the enable file + val=`cat events/sample-trace/enable` + if [ $val -ne 1 ]; then + exit_fail + fi +} + +clear_events() { + echo > set_event + val=`cat events/enable` + if [ "$val" != "0" ]; then + exit_fail + fi + count=`cat set_event | wc -l` + if [ $count -ne 0 ]; then + exit_fail + fi +} + +test_all_enabled + +echo clear all events +echo 0 > events/enable + +echo Confirm the events are disabled +val=`cat events/sample-trace/enable` +if [ $val -ne 0 ]; then + exit_fail +fi + +echo And the set_event file is empty + +cnt=`wc -l set_event` +if [ $cnt -ne 0 ]; then + exit_fail +fi + +echo now enable all events +echo 1 > events/enable + +echo Confirm the events are enabled again +val=`cat events/sample-trace/enable` +if [ $val -ne 1 ]; then + exit_fail +fi + +echo disable just the module events +echo '!:mod:trace-events-sample' >> set_event + +echo Should have mix of events enabled +val=`cat events/enable` +if [ "$val" != "X" ]; then + exit_fail +fi + +echo Confirm the module events are disabled +val=`cat events/sample-trace/enable` +if [ $val -ne 0 ]; then + exit_fail +fi + +echo 0 > events/enable + +echo now enable the system events +echo 'sample-trace:mod:trace-events-sample' > set_event + +test_all_enabled + +echo clear all events +echo 0 > events/enable + +echo Confirm the events are disabled +val=`cat events/sample-trace/enable` +if [ $val -ne 0 ]; then + exit_fail +fi + +echo Test enabling foo_bar only +echo 'foo_bar:mod:trace-events-sample' > set_event + +grep -q sample-trace:foo_bar set_event + +echo make sure nothing is found besides foo_bar +if grep -q -v sample-trace:foo_bar set_event ; then + exit_fail +fi + +echo Append another using the system and event name +echo 'sample-trace:foo_bar_with_cond:mod:trace-events-sample' >> set_event + +grep -q sample-trace:foo_bar set_event +grep -q sample-trace:foo_bar_with_cond set_event + +count=`cat set_event | wc -l` + +if [ $count -ne 2 ]; then + exit_fail +fi + +clear_events + +rmmod trace-events-sample + +echo ':mod:trace-events-sample' > set_event + +echo make sure that the module shows up, and '-' is converted to '_' +grep -q '\*:\*:mod:trace_events_sample' set_event + +modprobe trace-events-sample + +test_all_enabled + +clear_events + +rmmod trace-events-sample + +echo Enable just the system events +echo 'sample-trace:mod:trace-events-sample' > set_event +grep -q 'sample-trace:mod:trace_events_sample' set_event + +modprobe trace-events-sample + +test_all_enabled + +clear_events + +rmmod trace-events-sample + +echo Enable event with just event name +echo 'foo_bar:mod:trace-events-sample' > set_event +grep -q 'foo_bar:mod:trace_events_sample' set_event + +echo Enable another event with both system and event name +echo 'sample-trace:foo_bar_with_cond:mod:trace-events-sample' >> set_event +grep -q 'sample-trace:foo_bar_with_cond:mod:trace_events_sample' set_event +echo Make sure the other event was still there +grep -q 'foo_bar:mod:trace_events_sample' set_event + +modprobe trace-events-sample + +echo There should be no :mod: cached events +if grep -q ':mod:' set_event; then + exit_fail +fi + +echo two events should be enabled +count=`cat set_event | wc -l` +if [ $count -ne 2 ]; then + exit_fail +fi + +echo only two events should be enabled +val=`cat events/sample-trace/enable` +if [ "$val" != "X" ]; then + exit_fail +fi + +val=`cat events/sample-trace/foo_bar/enable` +if [ "$val" != "1" ]; then + exit_fail +fi + +val=`cat events/sample-trace/foo_bar_with_cond/enable` +if [ "$val" != "1" ]; then + exit_fail +fi + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-poll.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-poll.tc new file mode 100644 index 000000000000..8d275e3238d9 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-poll.tc @@ -0,0 +1,74 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test poll wait on histogram +# requires: set_event events/sched/sched_process_free/trigger events/sched/sched_process_free/hist +# flags: instance + +POLL=${FTRACETEST_ROOT}/poll + +if [ ! -x ${POLL} ]; then + echo "poll program is not compiled!" + exit_unresolved +fi + +EVENT=events/sched/sched_process_free/ + +# Check poll ops is supported. Before implementing poll on hist file, it +# returns soon with POLLIN | POLLOUT, but not POLLPRI. + +# This must wait >1 sec and return 1 (timeout). +set +e +${POLL} -I -t 1000 ${EVENT}/hist +ret=$? +set -e +if [ ${ret} != 1 ]; then + echo "poll on hist file is not supported" + exit_unsupported +fi + +# Test POLLIN +echo > trace +echo 'hist:key=comm if comm =="sleep"' > ${EVENT}/trigger +echo 1 > ${EVENT}/enable + +# This sleep command will exit after 2 seconds. +sleep 2 & +BGPID=$! +# if timeout happens, poll returns 1. +${POLL} -I -t 4000 ${EVENT}/hist +echo 0 > tracing_on + +if [ -d /proc/${BGPID} ]; then + echo "poll exits too soon" + kill -KILL ${BGPID} ||: + exit_fail +fi + +if ! grep -qw "sleep" trace; then + echo "poll exits before event happens" + exit_fail +fi + +# Test POLLPRI +echo > trace +echo 1 > tracing_on + +# This sleep command will exit after 2 seconds. +sleep 2 & +BGPID=$! +# if timeout happens, poll returns 1. +${POLL} -P -t 4000 ${EVENT}/hist +echo 0 > tracing_on + +if [ -d /proc/${BGPID} ]; then + echo "poll exits too soon" + kill -KILL ${BGPID} ||: + exit_fail +fi + +if ! grep -qw "sleep" trace; then + echo "poll exits before event happens" + exit_fail +fi + +exit_pass diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh index 6fb66a687f17..bbc29ed9c60a 100755 --- a/tools/testing/selftests/gpio/gpio-sim.sh +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -46,12 +46,6 @@ remove_chip() { rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip" } -configfs_cleanup() { - for CHIP in `ls $CONFIGFS_DIR/`; do - remove_chip $CHIP - done -} - create_chip() { local CHIP=$1 @@ -105,6 +99,13 @@ disable_chip() { echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip" } +configfs_cleanup() { + for CHIP in `ls $CONFIGFS_DIR/`; do + disable_chip $CHIP + remove_chip $CHIP + done +} + configfs_chip_name() { local CHIP=$1 local BANK=$2 @@ -181,6 +182,7 @@ create_chip chip create_bank chip bank enable_chip chip test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work" +disable_chip chip remove_chip chip echo "1.2. chip_name returns 'none' if the chip is still pending" @@ -195,6 +197,7 @@ create_chip chip create_bank chip bank enable_chip chip test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work" +disable_chip chip remove_chip chip echo "2. Creating and configuring simulated chips" @@ -204,6 +207,7 @@ create_chip chip create_bank chip bank enable_chip chip test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1" +disable_chip chip remove_chip chip echo "2.2. Number of lines can be specified" @@ -212,6 +216,7 @@ create_bank chip bank set_num_lines chip bank 16 enable_chip chip test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16" +disable_chip chip remove_chip chip echo "2.3. Label can be set" @@ -220,6 +225,7 @@ create_bank chip bank set_label chip bank foobar enable_chip chip test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect" +disable_chip chip remove_chip chip echo "2.4. Label can be left empty" @@ -227,6 +233,7 @@ create_chip chip create_bank chip bank enable_chip chip test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty" +disable_chip chip remove_chip chip echo "2.5. Line names can be configured" @@ -238,6 +245,7 @@ set_line_name chip bank 2 bar enable_chip chip test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect" test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect" +disable_chip chip remove_chip chip echo "2.6. Line config can remain unused if offset is greater than number of lines" @@ -248,6 +256,7 @@ set_line_name chip bank 5 foobar enable_chip chip test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect" test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect" +disable_chip chip remove_chip chip echo "2.7. Line configfs directory names are sanitized" @@ -267,6 +276,7 @@ for CHIP in $CHIPS; do enable_chip $CHIP done for CHIP in $CHIPS; do + disable_chip $CHIP remove_chip $CHIP done @@ -278,6 +288,7 @@ echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \ fail "Setting label of a live chip should fail" echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \ fail "Setting number of lines of a live chip should fail" +disable_chip chip remove_chip chip echo "2.10. Can't create line items when chip is live" @@ -285,6 +296,7 @@ create_chip chip create_bank chip bank enable_chip chip mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail" +disable_chip chip remove_chip chip echo "2.11. Probe errors are propagated to user-space" @@ -316,6 +328,7 @@ mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog enable_chip chip $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \ fail "Setting the value of a hogged line shouldn't succeed" +disable_chip chip remove_chip chip echo "3. Controlling simulated chips" @@ -331,6 +344,7 @@ test "$?" = "1" || fail "pull set incorrectly" sysfs_set_pull chip bank 0 pull-down $BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1 test "$?" = "0" || fail "pull set incorrectly" +disable_chip chip remove_chip chip echo "3.2. Pull can be read from sysfs" @@ -344,6 +358,7 @@ SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed" sysfs_set_pull chip bank 0 pull-up test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed" +disable_chip chip remove_chip chip echo "3.3. Incorrect input in sysfs is rejected" @@ -355,6 +370,7 @@ DEVNAME=`configfs_dev_name chip` CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull" echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected" +disable_chip chip remove_chip chip echo "3.4. Can't write to value" @@ -365,6 +381,7 @@ DEVNAME=`configfs_dev_name chip` CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly" +disable_chip chip remove_chip chip echo "4. Simulated GPIO chips are functional" @@ -382,6 +399,7 @@ $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 & sleep 0.1 # FIXME Any better way? test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs" kill $! +disable_chip chip remove_chip chip echo "4.2. Bias settings work correctly" @@ -394,6 +412,7 @@ CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" $BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0 test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work" +disable_chip chip remove_chip chip echo "GPIO $MODULE test PASS" diff --git a/tools/testing/selftests/hid/.gitignore b/tools/testing/selftests/hid/.gitignore index 746c62361f77..933f483815b2 100644 --- a/tools/testing/selftests/hid/.gitignore +++ b/tools/testing/selftests/hid/.gitignore @@ -1,5 +1,6 @@ bpftool *.skel.h +/host-tools /tools hid_bpf hidraw diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index c75ea4094870..e9dbb84c100a 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -194,7 +194,7 @@ int fill_msgque(struct msgque_data *msgque) int main(int argc, char **argv) { - int msg, pid, err; + int err; struct msgque_data msgque; if (getuid() != 0) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 29fedf609611..cdf91b0ca40f 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -18,7 +18,8 @@ * ksft_print_msg(fmt, ...); * ksft_perror(msg); * - * and finally report the pass/fail/skip/xfail state of the test with one of: + * and finally report the pass/fail/skip/xfail/xpass state of the test + * with one of: * * ksft_test_result(condition, fmt, ...); * ksft_test_result_report(result, fmt, ...); @@ -26,6 +27,7 @@ * ksft_test_result_fail(fmt, ...); * ksft_test_result_skip(fmt, ...); * ksft_test_result_xfail(fmt, ...); + * ksft_test_result_xpass(fmt, ...); * ksft_test_result_error(fmt, ...); * ksft_test_result_code(exit_code, test_name, fmt, ...); * @@ -147,6 +149,11 @@ static inline void ksft_set_plan(unsigned int plan) static inline void ksft_print_cnts(void) { + if (ksft_cnt.ksft_xskip > 0) + printf( + "# %u skipped test(s) detected. Consider enabling relevant config options to improve coverage.\n", + ksft_cnt.ksft_xskip + ); if (ksft_plan != ksft_test_num()) printf("# Planned tests != run tests (%u != %u)\n", ksft_plan, ksft_test_num()); @@ -227,6 +234,20 @@ static inline __printf(1, 2) void ksft_test_result_xfail(const char *msg, ...) va_end(args); } +static inline __printf(1, 2) void ksft_test_result_xpass(const char *msg, ...) +{ + int saved_errno = errno; + va_list args; + + ksft_cnt.ksft_xpass++; + + va_start(args, msg); + printf("ok %u # XPASS ", ksft_test_num()); + errno = saved_errno; + vprintf(msg, args); + va_end(args); +} + static inline __printf(1, 2) void ksft_test_result_skip(const char *msg, ...) { int saved_errno = errno; @@ -318,6 +339,9 @@ void ksft_test_result_code(int exit_code, const char *test_name, case KSFT_XFAIL: \ ksft_test_result_xfail(fmt, ##__VA_ARGS__); \ break; \ + case KSFT_XPASS: \ + ksft_test_result_xpass(fmt, ##__VA_ARGS__); \ + break; \ case KSFT_SKIP: \ ksft_test_result_skip(fmt, ##__VA_ARGS__); \ break; \ @@ -403,7 +427,7 @@ static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ... */ if (ksft_plan || ksft_test_num()) { ksft_cnt.ksft_xskip++; - printf("ok %d # SKIP ", 1 + ksft_test_num()); + printf("ok %u # SKIP ", 1 + ksft_test_num()); } else { printf("1..0 # SKIP "); } diff --git a/tools/testing/selftests/kselftest/ksft.py b/tools/testing/selftests/kselftest/ksft.py index bf215790a89d..0e030837fc17 100644 --- a/tools/testing/selftests/kselftest/ksft.py +++ b/tools/testing/selftests/kselftest/ksft.py @@ -27,6 +27,9 @@ def set_plan(num_tests): def print_cnts(): + if ksft_cnt['skip'] > 0: + print(f"# {ksft_cnt['skip']} skipped test(s) detected. Consider enabling relevant config options to improve coverage.") + print( f"# Totals: pass:{ksft_cnt['pass']} fail:{ksft_cnt['fail']} xfail:0 xpass:0 skip:{ksft_cnt['skip']} error:0" ) diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh index 79a125eb24c2..32dbfe9da2c4 100644 --- a/tools/testing/selftests/kselftest/ktap_helpers.sh +++ b/tools/testing/selftests/kselftest/ktap_helpers.sh @@ -7,6 +7,7 @@ KTAP_TESTNO=1 KTAP_CNT_PASS=0 KTAP_CNT_FAIL=0 +KTAP_CNT_XFAIL=0 KTAP_CNT_SKIP=0 KSFT_PASS=0 @@ -40,7 +41,7 @@ ktap_skip_all() { __ktap_test() { result="$1" description="$2" - directive="$3" # optional + directive="${3:-}" # optional local directive_str= [ ! -z "$directive" ] && directive_str="# $directive" @@ -69,6 +70,16 @@ ktap_test_skip() { KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1)) } +ktap_test_xfail() { + description="$1" + + result="ok" + directive="XFAIL" + __ktap_test "$result" "$description" "$directive" + + KTAP_CNT_XFAIL=$((KTAP_CNT_XFAIL+1)) +} + ktap_test_fail() { description="$1" @@ -99,7 +110,7 @@ ktap_exit_fail_msg() { ktap_finished() { ktap_print_totals - if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP)) -eq "$KSFT_NUM_TESTS" ]; then + if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP + KTAP_CNT_XFAIL)) -eq "$KSFT_NUM_TESTS" ]; then exit "$KSFT_PASS" else exit "$KSFT_FAIL" @@ -107,5 +118,9 @@ ktap_finished() { } ktap_print_totals() { - echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0" + if [ "$KTAP_CNT_SKIP" -gt 0 ]; then + echo "# $KTAP_CNT_SKIP skipped test(s) detected. " \ + "Consider enabling relevant config options to improve coverage." + fi + echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:$KTAP_CNT_XFAIL xpass:0 skip:$KTAP_CNT_SKIP error:0" } diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index a5a72415e37b..666c9fde76da 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -760,33 +760,33 @@ /* Report with actual signedness to avoid weird output. */ \ switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \ case 0: { \ - unsigned long long __exp_print = (uintptr_t)__exp; \ - unsigned long long __seen_print = (uintptr_t)__seen; \ - __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ + uintmax_t __exp_print = (uintmax_t)__exp; \ + uintmax_t __seen_print = (uintmax_t)__seen; \ + __TH_LOG("Expected %s (%ju) %s %s (%ju)", \ _expected_str, __exp_print, #_t, \ _seen_str, __seen_print); \ break; \ } \ case 1: { \ - unsigned long long __exp_print = (uintptr_t)__exp; \ - long long __seen_print = (intptr_t)__seen; \ - __TH_LOG("Expected %s (%llu) %s %s (%lld)", \ + uintmax_t __exp_print = (uintmax_t)__exp; \ + intmax_t __seen_print = (intmax_t)__seen; \ + __TH_LOG("Expected %s (%ju) %s %s (%jd)", \ _expected_str, __exp_print, #_t, \ _seen_str, __seen_print); \ break; \ } \ case 2: { \ - long long __exp_print = (intptr_t)__exp; \ - unsigned long long __seen_print = (uintptr_t)__seen; \ - __TH_LOG("Expected %s (%lld) %s %s (%llu)", \ + intmax_t __exp_print = (intmax_t)__exp; \ + uintmax_t __seen_print = (uintmax_t)__seen; \ + __TH_LOG("Expected %s (%jd) %s %s (%ju)", \ _expected_str, __exp_print, #_t, \ _seen_str, __seen_print); \ break; \ } \ case 3: { \ - long long __exp_print = (intptr_t)__exp; \ - long long __seen_print = (intptr_t)__seen; \ - __TH_LOG("Expected %s (%lld) %s %s (%lld)", \ + intmax_t __exp_print = (intmax_t)__exp; \ + intmax_t __seen_print = (intmax_t)__seen; \ + __TH_LOG("Expected %s (%jd) %s %s (%jd)", \ _expected_str, __exp_print, #_t, \ _seen_str, __seen_print); \ break; \ diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 7f57abf936e7..1d41a046a7bf 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -9,3 +9,4 @@ !config !settings !Makefile +!Makefile.kvm
\ No newline at end of file diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 41593d2e7de9..20af35a91d6f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -1,347 +1,16 @@ # SPDX-License-Identifier: GPL-2.0-only -include ../../../build/Build.include - -all: - top_srcdir = ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -ifeq ($(ARCH),x86) - ARCH_DIR := x86_64 -else ifeq ($(ARCH),arm64) - ARCH_DIR := aarch64 -else ifeq ($(ARCH),s390) - ARCH_DIR := s390x -else - ARCH_DIR := $(ARCH) -endif - -LIBKVM += lib/assert.c -LIBKVM += lib/elf.c -LIBKVM += lib/guest_modes.c -LIBKVM += lib/io.c -LIBKVM += lib/kvm_util.c -LIBKVM += lib/memstress.c -LIBKVM += lib/guest_sprintf.c -LIBKVM += lib/rbtree.c -LIBKVM += lib/sparsebit.c -LIBKVM += lib/test_util.c -LIBKVM += lib/ucall_common.c -LIBKVM += lib/userfaultfd_util.c - -LIBKVM_STRING += lib/string_override.c - -LIBKVM_x86_64 += lib/x86_64/apic.c -LIBKVM_x86_64 += lib/x86_64/handlers.S -LIBKVM_x86_64 += lib/x86_64/hyperv.c -LIBKVM_x86_64 += lib/x86_64/memstress.c -LIBKVM_x86_64 += lib/x86_64/pmu.c -LIBKVM_x86_64 += lib/x86_64/processor.c -LIBKVM_x86_64 += lib/x86_64/sev.c -LIBKVM_x86_64 += lib/x86_64/svm.c -LIBKVM_x86_64 += lib/x86_64/ucall.c -LIBKVM_x86_64 += lib/x86_64/vmx.c - -LIBKVM_aarch64 += lib/aarch64/gic.c -LIBKVM_aarch64 += lib/aarch64/gic_v3.c -LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c -LIBKVM_aarch64 += lib/aarch64/handlers.S -LIBKVM_aarch64 += lib/aarch64/processor.c -LIBKVM_aarch64 += lib/aarch64/spinlock.c -LIBKVM_aarch64 += lib/aarch64/ucall.c -LIBKVM_aarch64 += lib/aarch64/vgic.c - -LIBKVM_s390x += lib/s390x/diag318_test_handler.c -LIBKVM_s390x += lib/s390x/processor.c -LIBKVM_s390x += lib/s390x/ucall.c -LIBKVM_s390x += lib/s390x/facility.c - -LIBKVM_riscv += lib/riscv/handlers.S -LIBKVM_riscv += lib/riscv/processor.c -LIBKVM_riscv += lib/riscv/ucall.c - -# Non-compiled test targets -TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh - -# Compiled test targets -TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test -TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test -TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test -TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test -TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush -TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test -TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test -TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test -TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test -TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test -TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test -TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test -TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id -TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test -TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test -TEST_GEN_PROGS_x86_64 += x86_64/smm_test -TEST_GEN_PROGS_x86_64 += x86_64/state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test -TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync -TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test -TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test -TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test -TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/debug_regs -TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests -TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests -TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test -TEST_GEN_PROGS_x86_64 += x86_64/amx_test -TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test -TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test -TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test -TEST_GEN_PROGS_x86_64 += access_tracking_perf_test -TEST_GEN_PROGS_x86_64 += coalesced_io_test -TEST_GEN_PROGS_x86_64 += demand_paging_test -TEST_GEN_PROGS_x86_64 += dirty_log_test -TEST_GEN_PROGS_x86_64 += dirty_log_perf_test -TEST_GEN_PROGS_x86_64 += guest_memfd_test -TEST_GEN_PROGS_x86_64 += guest_print_test -TEST_GEN_PROGS_x86_64 += hardware_disable_test -TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86_64 += kvm_page_table_test -TEST_GEN_PROGS_x86_64 += max_guest_memory_test -TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test -TEST_GEN_PROGS_x86_64 += memslot_perf_test -TEST_GEN_PROGS_x86_64 += rseq_test -TEST_GEN_PROGS_x86_64 += set_memory_region_test -TEST_GEN_PROGS_x86_64 += steal_time -TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test -TEST_GEN_PROGS_x86_64 += system_counter_offset_test -TEST_GEN_PROGS_x86_64 += pre_fault_memory_test - -# Compiled outputs used by test targets -TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test - -TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases -TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions -TEST_GEN_PROGS_aarch64 += aarch64/hypercalls -TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort -TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test -TEST_GEN_PROGS_aarch64 += aarch64/psci_test -TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter -TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config -TEST_GEN_PROGS_aarch64 += aarch64/vgic_init -TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq -TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress -TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access -TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3 -TEST_GEN_PROGS_aarch64 += access_tracking_perf_test -TEST_GEN_PROGS_aarch64 += arch_timer -TEST_GEN_PROGS_aarch64 += coalesced_io_test -TEST_GEN_PROGS_aarch64 += demand_paging_test -TEST_GEN_PROGS_aarch64 += dirty_log_test -TEST_GEN_PROGS_aarch64 += dirty_log_perf_test -TEST_GEN_PROGS_aarch64 += guest_print_test -TEST_GEN_PROGS_aarch64 += get-reg-list -TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus -TEST_GEN_PROGS_aarch64 += kvm_page_table_test -TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test -TEST_GEN_PROGS_aarch64 += memslot_perf_test -TEST_GEN_PROGS_aarch64 += rseq_test -TEST_GEN_PROGS_aarch64 += set_memory_region_test -TEST_GEN_PROGS_aarch64 += steal_time -TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test - -TEST_GEN_PROGS_s390x = s390x/memop -TEST_GEN_PROGS_s390x += s390x/resets -TEST_GEN_PROGS_s390x += s390x/sync_regs_test -TEST_GEN_PROGS_s390x += s390x/tprot -TEST_GEN_PROGS_s390x += s390x/cmma_test -TEST_GEN_PROGS_s390x += s390x/debug_test -TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test -TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test -TEST_GEN_PROGS_s390x += s390x/ucontrol_test -TEST_GEN_PROGS_s390x += demand_paging_test -TEST_GEN_PROGS_s390x += dirty_log_test -TEST_GEN_PROGS_s390x += guest_print_test -TEST_GEN_PROGS_s390x += kvm_create_max_vcpus -TEST_GEN_PROGS_s390x += kvm_page_table_test -TEST_GEN_PROGS_s390x += rseq_test -TEST_GEN_PROGS_s390x += set_memory_region_test -TEST_GEN_PROGS_s390x += kvm_binary_stats_test - -TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test -TEST_GEN_PROGS_riscv += riscv/ebreak_test -TEST_GEN_PROGS_riscv += arch_timer -TEST_GEN_PROGS_riscv += coalesced_io_test -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test -TEST_GEN_PROGS_riscv += get-reg-list -TEST_GEN_PROGS_riscv += guest_print_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test -TEST_GEN_PROGS_riscv += steal_time - -SPLIT_TESTS += arch_timer -SPLIT_TESTS += get-reg-list - -TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) -LIBKVM += $(LIBKVM_$(ARCH_DIR)) - -OVERRIDE_TARGETS = 1 - -# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most -# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, -# which causes the environment variable to override the makefile). -include ../lib.mk - -INSTALL_HDR_PATH = $(top_srcdir)/usr -LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64)) +# Top-level selftests allows ARCH=x86_64 :-( ifeq ($(ARCH),x86_64) -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include -else -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include + ARCH := x86 endif -CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ - -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ - -fno-builtin-memcmp -fno-builtin-memcpy \ - -fno-builtin-memset -fno-builtin-strnlen \ - -fno-stack-protector -fno-PIE -fno-strict-aliasing \ - -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ - -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \ - -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES) -ifeq ($(ARCH),s390) - CFLAGS += -march=z10 -endif -ifeq ($(ARCH),x86) -ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0) - CFLAGS += -march=x86-64-v2 -endif -endif -ifeq ($(ARCH),arm64) -tools_dir := $(top_srcdir)/tools -arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ - -ifneq ($(abs_objdir),) -arm64_hdr_outdir := $(abs_objdir)/tools/ +include Makefile.kvm else -arm64_hdr_outdir := $(tools_dir)/ -endif - -GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ -CFLAGS += -I$(GEN_HDRS) - -$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) - $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +# Empty targets for unsupported architectures +all: +clean: endif - -no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) - -# On s390, build the testcases KVM-enabled -pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) - -LDLIBS += -ldl -LDFLAGS += -pthread $(no-pie-option) $(pgste-option) - -LIBKVM_C := $(filter %.c,$(LIBKVM)) -LIBKVM_S := $(filter %.S,$(LIBKVM)) -LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) -LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) -LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) -SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) -SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH_DIR)/%.o, $(SPLIT_TESTS)) - -TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) -TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) -TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) -TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) -TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ)) --include $(TEST_DEP_FILES) - -$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) - -$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \ -$(TEST_GEN_PROGS_EXTENDED): %: %.o - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ -$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH_DIR)/%.o - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ -$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH_DIR)/%.o: $(ARCH_DIR)/%.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -EXTRA_CLEAN += $(GEN_HDRS) \ - $(LIBKVM_OBJS) \ - $(SPLIT_TEST_GEN_OBJ) \ - $(TEST_DEP_FILES) \ - $(TEST_GEN_OBJ) \ - cscope.* - -$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS) - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS) - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -# Compile the string overrides as freestanding to prevent the compiler from -# generating self-referential code, e.g. without "freestanding" the compiler may -# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. -$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ - -$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) -$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS) -$(TEST_GEN_PROGS): $(LIBKVM_OBJS) -$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) -$(TEST_GEN_OBJ): $(GEN_HDRS) - -cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. -cscope: - $(RM) cscope.* - (find $(include_paths) -name '*.h' \ - -exec realpath --relative-base=$(PWD) {} \;; \ - find . -name '*.c' \ - -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files - cscope -b diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm new file mode 100644 index 000000000000..4277b983cace --- /dev/null +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -0,0 +1,330 @@ +# SPDX-License-Identifier: GPL-2.0-only +include ../../../build/Build.include + +all: + +LIBKVM += lib/assert.c +LIBKVM += lib/elf.c +LIBKVM += lib/guest_modes.c +LIBKVM += lib/io.c +LIBKVM += lib/kvm_util.c +LIBKVM += lib/memstress.c +LIBKVM += lib/guest_sprintf.c +LIBKVM += lib/rbtree.c +LIBKVM += lib/sparsebit.c +LIBKVM += lib/test_util.c +LIBKVM += lib/ucall_common.c +LIBKVM += lib/userfaultfd_util.c + +LIBKVM_STRING += lib/string_override.c + +LIBKVM_x86 += lib/x86/apic.c +LIBKVM_x86 += lib/x86/handlers.S +LIBKVM_x86 += lib/x86/hyperv.c +LIBKVM_x86 += lib/x86/memstress.c +LIBKVM_x86 += lib/x86/pmu.c +LIBKVM_x86 += lib/x86/processor.c +LIBKVM_x86 += lib/x86/sev.c +LIBKVM_x86 += lib/x86/svm.c +LIBKVM_x86 += lib/x86/ucall.c +LIBKVM_x86 += lib/x86/vmx.c + +LIBKVM_arm64 += lib/arm64/gic.c +LIBKVM_arm64 += lib/arm64/gic_v3.c +LIBKVM_arm64 += lib/arm64/gic_v3_its.c +LIBKVM_arm64 += lib/arm64/handlers.S +LIBKVM_arm64 += lib/arm64/processor.c +LIBKVM_arm64 += lib/arm64/spinlock.c +LIBKVM_arm64 += lib/arm64/ucall.c +LIBKVM_arm64 += lib/arm64/vgic.c + +LIBKVM_s390 += lib/s390/diag318_test_handler.c +LIBKVM_s390 += lib/s390/processor.c +LIBKVM_s390 += lib/s390/ucall.c +LIBKVM_s390 += lib/s390/facility.c + +LIBKVM_riscv += lib/riscv/handlers.S +LIBKVM_riscv += lib/riscv/processor.c +LIBKVM_riscv += lib/riscv/ucall.c + +# Non-compiled test targets +TEST_PROGS_x86 += x86/nx_huge_pages_test.sh + +# Compiled test targets +TEST_GEN_PROGS_x86 = x86/cpuid_test +TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test +TEST_GEN_PROGS_x86 += x86/feature_msrs_test +TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test +TEST_GEN_PROGS_x86 += x86/fix_hypercall_test +TEST_GEN_PROGS_x86 += x86/hwcr_msr_test +TEST_GEN_PROGS_x86 += x86/hyperv_clock +TEST_GEN_PROGS_x86 += x86/hyperv_cpuid +TEST_GEN_PROGS_x86 += x86/hyperv_evmcs +TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls +TEST_GEN_PROGS_x86 += x86/hyperv_features +TEST_GEN_PROGS_x86 += x86/hyperv_ipi +TEST_GEN_PROGS_x86 += x86/hyperv_svm_test +TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush +TEST_GEN_PROGS_x86 += x86/kvm_clock_test +TEST_GEN_PROGS_x86 += x86/kvm_pv_test +TEST_GEN_PROGS_x86 += x86/monitor_mwait_test +TEST_GEN_PROGS_x86 += x86/nested_exceptions_test +TEST_GEN_PROGS_x86 += x86/platform_info_test +TEST_GEN_PROGS_x86 += x86/pmu_counters_test +TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test +TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test +TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test +TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id +TEST_GEN_PROGS_x86 += x86/set_sregs_test +TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test +TEST_GEN_PROGS_x86 += x86/smm_test +TEST_GEN_PROGS_x86 += x86/state_test +TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test +TEST_GEN_PROGS_x86 += x86/svm_vmcall_test +TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test +TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test +TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test +TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync +TEST_GEN_PROGS_x86 += x86/sync_regs_test +TEST_GEN_PROGS_x86 += x86/ucna_injection_test +TEST_GEN_PROGS_x86 += x86/userspace_io_test +TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test +TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test +TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test +TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test +TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_msrs_test +TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test +TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test +TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test +TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test +TEST_GEN_PROGS_x86 += x86/xapic_ipi_test +TEST_GEN_PROGS_x86 += x86/xapic_state_test +TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test +TEST_GEN_PROGS_x86 += x86/xss_msr_test +TEST_GEN_PROGS_x86 += x86/debug_regs +TEST_GEN_PROGS_x86 += x86/tsc_msrs_test +TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test +TEST_GEN_PROGS_x86 += x86/xen_shinfo_test +TEST_GEN_PROGS_x86 += x86/xen_vmcall_test +TEST_GEN_PROGS_x86 += x86/sev_init2_tests +TEST_GEN_PROGS_x86 += x86/sev_migrate_tests +TEST_GEN_PROGS_x86 += x86/sev_smoke_test +TEST_GEN_PROGS_x86 += x86/amx_test +TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test +TEST_GEN_PROGS_x86 += x86/triple_fault_event_test +TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test +TEST_GEN_PROGS_x86 += access_tracking_perf_test +TEST_GEN_PROGS_x86 += coalesced_io_test +TEST_GEN_PROGS_x86 += demand_paging_test +TEST_GEN_PROGS_x86 += dirty_log_test +TEST_GEN_PROGS_x86 += dirty_log_perf_test +TEST_GEN_PROGS_x86 += guest_memfd_test +TEST_GEN_PROGS_x86 += guest_print_test +TEST_GEN_PROGS_x86 += hardware_disable_test +TEST_GEN_PROGS_x86 += kvm_create_max_vcpus +TEST_GEN_PROGS_x86 += kvm_page_table_test +TEST_GEN_PROGS_x86 += memslot_modification_stress_test +TEST_GEN_PROGS_x86 += memslot_perf_test +TEST_GEN_PROGS_x86 += mmu_stress_test +TEST_GEN_PROGS_x86 += rseq_test +TEST_GEN_PROGS_x86 += set_memory_region_test +TEST_GEN_PROGS_x86 += steal_time +TEST_GEN_PROGS_x86 += kvm_binary_stats_test +TEST_GEN_PROGS_x86 += system_counter_offset_test +TEST_GEN_PROGS_x86 += pre_fault_memory_test + +# Compiled outputs used by test targets +TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test + +TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs +TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases +TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/hypercalls +TEST_GEN_PROGS_arm64 += arm64/mmio_abort +TEST_GEN_PROGS_arm64 += arm64/page_fault_test +TEST_GEN_PROGS_arm64 += arm64/psci_test +TEST_GEN_PROGS_arm64 += arm64/set_id_regs +TEST_GEN_PROGS_arm64 += arm64/smccc_filter +TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config +TEST_GEN_PROGS_arm64 += arm64/vgic_init +TEST_GEN_PROGS_arm64 += arm64/vgic_irq +TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress +TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access +TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += access_tracking_perf_test +TEST_GEN_PROGS_arm64 += arch_timer +TEST_GEN_PROGS_arm64 += coalesced_io_test +TEST_GEN_PROGS_arm64 += demand_paging_test +TEST_GEN_PROGS_arm64 += dirty_log_test +TEST_GEN_PROGS_arm64 += dirty_log_perf_test +TEST_GEN_PROGS_arm64 += guest_print_test +TEST_GEN_PROGS_arm64 += get-reg-list +TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus +TEST_GEN_PROGS_arm64 += kvm_page_table_test +TEST_GEN_PROGS_arm64 += memslot_modification_stress_test +TEST_GEN_PROGS_arm64 += memslot_perf_test +TEST_GEN_PROGS_arm64 += mmu_stress_test +TEST_GEN_PROGS_arm64 += rseq_test +TEST_GEN_PROGS_arm64 += set_memory_region_test +TEST_GEN_PROGS_arm64 += steal_time +TEST_GEN_PROGS_arm64 += kvm_binary_stats_test + +TEST_GEN_PROGS_s390 = s390/memop +TEST_GEN_PROGS_s390 += s390/resets +TEST_GEN_PROGS_s390 += s390/sync_regs_test +TEST_GEN_PROGS_s390 += s390/tprot +TEST_GEN_PROGS_s390 += s390/cmma_test +TEST_GEN_PROGS_s390 += s390/debug_test +TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test +TEST_GEN_PROGS_s390 += s390/shared_zeropage_test +TEST_GEN_PROGS_s390 += s390/ucontrol_test +TEST_GEN_PROGS_s390 += demand_paging_test +TEST_GEN_PROGS_s390 += dirty_log_test +TEST_GEN_PROGS_s390 += guest_print_test +TEST_GEN_PROGS_s390 += kvm_create_max_vcpus +TEST_GEN_PROGS_s390 += kvm_page_table_test +TEST_GEN_PROGS_s390 += rseq_test +TEST_GEN_PROGS_s390 += set_memory_region_test +TEST_GEN_PROGS_s390 += kvm_binary_stats_test + +TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test +TEST_GEN_PROGS_riscv += riscv/ebreak_test +TEST_GEN_PROGS_riscv += arch_timer +TEST_GEN_PROGS_riscv += coalesced_io_test +TEST_GEN_PROGS_riscv += demand_paging_test +TEST_GEN_PROGS_riscv += dirty_log_test +TEST_GEN_PROGS_riscv += get-reg-list +TEST_GEN_PROGS_riscv += guest_print_test +TEST_GEN_PROGS_riscv += kvm_binary_stats_test +TEST_GEN_PROGS_riscv += kvm_create_max_vcpus +TEST_GEN_PROGS_riscv += kvm_page_table_test +TEST_GEN_PROGS_riscv += set_memory_region_test +TEST_GEN_PROGS_riscv += steal_time + +SPLIT_TESTS += arch_timer +SPLIT_TESTS += get-reg-list + +TEST_PROGS += $(TEST_PROGS_$(ARCH)) +TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH)) +TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH)) +LIBKVM += $(LIBKVM_$(ARCH)) + +OVERRIDE_TARGETS = 1 + +# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most +# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, +# which causes the environment variable to override the makefile). +include ../lib.mk + +INSTALL_HDR_PATH = $(top_srcdir)/usr +LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ +LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include +CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ + -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ + -fno-builtin-memcmp -fno-builtin-memcpy \ + -fno-builtin-memset -fno-builtin-strnlen \ + -fno-stack-protector -fno-PIE -fno-strict-aliasing \ + -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ + -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH) \ + -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES) +ifeq ($(ARCH),s390) + CFLAGS += -march=z10 +endif +ifeq ($(ARCH),x86) +ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0) + CFLAGS += -march=x86-64-v2 +endif +endif +ifeq ($(ARCH),arm64) +tools_dir := $(top_srcdir)/tools +arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ + +ifneq ($(abs_objdir),) +arm64_hdr_outdir := $(abs_objdir)/tools/ +else +arm64_hdr_outdir := $(tools_dir)/ +endif + +GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ +CFLAGS += -I$(GEN_HDRS) + +$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) + $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +endif + +no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +# On s390, build the testcases KVM-enabled +pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) + +LDLIBS += -ldl +LDFLAGS += -pthread $(no-pie-option) $(pgste-option) + +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) +SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) +SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS)) + +TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) +TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) +TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ)) +-include $(TEST_DEP_FILES) + +$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) + +$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \ +$(TEST_GEN_PROGS_EXTENDED): %: %.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ +$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH)/%.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ +$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH)/%.o: $(ARCH)/%.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +EXTRA_CLEAN += $(GEN_HDRS) \ + $(LIBKVM_OBJS) \ + $(SPLIT_TEST_GEN_OBJ) \ + $(TEST_DEP_FILES) \ + $(TEST_GEN_OBJ) \ + cscope.* + +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +# Compile the string overrides as freestanding to prevent the compiler from +# generating self-referential code, e.g. without "freestanding" the compiler may +# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. +$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ + +$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) +$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS) +$(TEST_GEN_PROGS): $(LIBKVM_OBJS) +$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) +$(TEST_GEN_OBJ): $(GEN_HDRS) + +cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. +cscope: + $(RM) cscope.* + (find $(include_paths) -name '*.h' \ + -exec realpath --relative-base=$(PWD) {} \;; \ + find . -name '*.c' \ + -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files + cscope -b diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c index 8e5bd07a3727..cef8f7323ceb 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c @@ -97,7 +97,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) uint64_t reg_id = raz_wi_reg_ids[i]; uint64_t val; - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); /* @@ -106,7 +106,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) */ vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); } } @@ -126,14 +126,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) uint64_t reg_id = raz_invariant_reg_ids[i]; uint64_t val; - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); TEST_ASSERT(r < 0 && errno == EINVAL, "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); } } @@ -144,10 +144,10 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) { uint64_t val, el0; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); - return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY; + return el0 == ID_AA64PFR0_EL1_EL0_IMP; } int main(void) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c index eeba1cc87ff8..eeba1cc87ff8 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer.c diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index a36a7e2db434..a36a7e2db434 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index ff7a949fc96a..c7fb55c9135b 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -501,7 +501,7 @@ void test_single_step_from_userspace(int test_cnt) TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG"); /* Check if the current pc is expected. */ - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); TEST_ASSERT(!test_pc || pc == test_pc, "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); @@ -583,7 +583,7 @@ int main(int argc, char *argv[]) uint64_t aa64dfr0; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0); + aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); __TEST_REQUIRE(debug_version(aa64dfr0) >= 6, "Armv8 debug architecture not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index d43fb3f49050..d43fb3f49050 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c index 9d192ce0078d..ec54ec7726e9 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -173,7 +173,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; /* First 'read' should be an upper limit of the features supported */ - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); @@ -184,7 +184,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) "Failed to clear all the features of reg: 0x%lx; ret: %d", reg_info->reg, errno); - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == 0, "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); @@ -214,7 +214,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) * Before starting the VM, the test clears all the bits. * Check if that's still the case. */ - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == 0, "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); diff --git a/tools/testing/selftests/kvm/aarch64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c index 8b7a80a51b1c..8b7a80a51b1c 100644 --- a/tools/testing/selftests/kvm/aarch64/mmio_abort.c +++ b/tools/testing/selftests/kvm/arm64/mmio_abort.c diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c index 58304bbc2036..ebd70430c89d 100644 --- a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c @@ -164,7 +164,7 @@ int main(int argc, char *argv[]) uint64_t pfr0; vm = vm_create_with_one_vcpu(&vcpu, NULL); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0); + pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), "GICv3 not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index ec33a8f9c908..ec33a8f9c908 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c index eaa7655fefc1..ab491ee9e5f7 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/arm64/psci_test.c @@ -111,8 +111,8 @@ static void assert_vcpu_reset(struct kvm_vcpu *vcpu) { uint64_t obs_pc, obs_x0; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &obs_pc); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]), &obs_x0); + obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); + obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0])); TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, "unexpected target cpu pc: %lx (expected: %lx)", @@ -152,7 +152,7 @@ static void host_test_cpu_on(void) */ vcpu_power_off(target); - vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr); + target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK); enter_guest(source); @@ -244,7 +244,7 @@ static void host_test_system_off2(void) setup_vm(guest_test_system_off2, &source, &target); - vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION, &psci_version); + psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION); TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3), "Unexpected PSCI version %lu.%lu", diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index a79b7f18452d..217541fe6536 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -152,7 +152,6 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0), REG_FTR_END, }; @@ -346,7 +345,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, uint64_t mask = ftr_bits->mask; uint64_t val, new_val, ftr; - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); ftr = (val & mask) >> shift; ftr = get_safe_value(ftr_bits, ftr); @@ -356,7 +355,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, val |= ftr; vcpu_set_reg(vcpu, reg, val); - vcpu_get_reg(vcpu, reg, &new_val); + new_val = vcpu_get_reg(vcpu, reg); TEST_ASSERT_EQ(new_val, val); return new_val; @@ -370,7 +369,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, uint64_t val, old_val, ftr; int r; - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); ftr = (val & mask) >> shift; ftr = get_invalid_value(ftr_bits, ftr); @@ -384,7 +383,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, TEST_ASSERT(r < 0 && errno == EINVAL, "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); TEST_ASSERT_EQ(val, old_val); } @@ -471,7 +470,7 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) } /* Get the id register value */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); /* Try to set MPAM=0. This should always be possible. */ val &= ~ID_AA64PFR0_EL1_MPAM_MASK; @@ -508,7 +507,7 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) } /* Get the id register value */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); /* Try to set MPAM_frac=0. This should always be possible. */ val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; @@ -576,7 +575,7 @@ static void test_clidr(struct kvm_vcpu *vcpu) uint64_t clidr; int level; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), &clidr); + clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1)); /* find the first empty level in the cache hierarchy */ for (level = 1; level < 7; level++) { @@ -601,7 +600,7 @@ static void test_ctr(struct kvm_vcpu *vcpu) { u64 ctr; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), &ctr); + ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0)); ctr &= ~CTR_EL0_DIC_MASK; if (ctr & CTR_EL0_IminLine_MASK) ctr--; @@ -617,7 +616,7 @@ static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) test_clidr(vcpu); test_ctr(vcpu); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); val++; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val); @@ -630,7 +629,7 @@ static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encodin size_t idx = encoding_to_range_idx(encoding); uint64_t observed; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding), &observed); + observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding)); TEST_ASSERT_EQ(test_reg_vals[idx], observed); } @@ -665,9 +664,9 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_code); /* Check for AARCH64 only system */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); - aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP); ksft_print_header(); diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c index 2d189f3da228..2d189f3da228 100644 --- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c +++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c index 80b74c6f152b..80b74c6f152b 100644 --- a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c +++ b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index b3b5fb0ff0a9..b3b5fb0ff0a9 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index f4ac28d53747..f4ac28d53747 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index fc4fe52fb6f8..fc4fe52fb6f8 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index f9c0c86d7e85..f16b3b27e32e 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -440,8 +440,7 @@ static void create_vpmu_vm(void *guest_code) "Failed to create vgic-v3, skipping"); /* Make sure that PMUv3 support is indicated in the ID register */ - vcpu_get_reg(vpmu_vm.vcpu, - KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &dfr0); + dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, @@ -484,7 +483,7 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) create_vpmu_vm(guest_code); vcpu = vpmu_vm.vcpu; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr_orig); + pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); pmcr = pmcr_orig; /* @@ -493,7 +492,7 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) */ set_pmcr_n(&pmcr, pmcr_n); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr); + pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); if (expect_fail) TEST_ASSERT(pmcr_orig == pmcr, @@ -521,7 +520,7 @@ static void run_access_test(uint64_t pmcr_n) vcpu = vpmu_vm.vcpu; /* Save the initial sp to restore them later to run the guest again */ - vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1), &sp); + sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1)); run_vcpu(vcpu, pmcr_n); @@ -572,12 +571,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) * Test if the 'set' and 'clr' variants of the registers * are initialized based on the number of valid counters. */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); @@ -589,12 +588,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) */ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); @@ -625,7 +624,7 @@ static uint64_t get_pmcr_n_limit(void) uint64_t pmcr; create_vpmu_vm(guest_code); - vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr); + pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); destroy_vpmu_vm(); return get_pmcr_n(pmcr); } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 9f24303acb8c..e79817bd0e29 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -21,7 +21,7 @@ #include "ucall_common.h" #ifdef __aarch64__ -#include "aarch64/vgic.h" +#include "arm64/vgic.h" static int gic_fd; diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h index bf461de34785..bf461de34785 100644 --- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h index 329e4f5079ea..329e4f5079ea 100644 --- a/tools/testing/selftests/kvm/include/aarch64/delay.h +++ b/tools/testing/selftests/kvm/include/arm64/delay.h diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h index baeb3c859389..baeb3c859389 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic.h +++ b/tools/testing/selftests/kvm/include/arm64/gic.h diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/arm64/gic_v3.h index a76615fa39a1..a76615fa39a1 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3.h diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h index 3722ed9c8f96..3722ed9c8f96 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h index e43a57d99b56..e43a57d99b56 100644 --- a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 1e8d0d531fbd..1e8d0d531fbd 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/arm64/spinlock.h index cf0984106d14..cf0984106d14 100644 --- a/tools/testing/selftests/kvm/include/aarch64/spinlock.h +++ b/tools/testing/selftests/kvm/include/arm64/spinlock.h diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h index 4ec801f37f00..4ec801f37f00 100644 --- a/tools/testing/selftests/kvm/include/aarch64/ucall.h +++ b/tools/testing/selftests/kvm/include/arm64/ucall.h diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h index c481d0c00a5d..c481d0c00a5d 100644 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ b/tools/testing/selftests/kvm/include/arm64/vgic.h diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bc7c242480d6..4c4e5a847f67 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -702,16 +702,22 @@ static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t va return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); } -static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id) { - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + uint64_t val; + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); + return val; } static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) { struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); + vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); } diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390/debug_print.h index 1bf275631cc6..1bf275631cc6 100644 --- a/tools/testing/selftests/kvm/include/s390x/debug_print.h +++ b/tools/testing/selftests/kvm/include/s390/debug_print.h diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h index b0ed71302722..b0ed71302722 100644 --- a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h +++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h diff --git a/tools/testing/selftests/kvm/include/s390x/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h index 00a1ced6538b..00a1ced6538b 100644 --- a/tools/testing/selftests/kvm/include/s390x/facility.h +++ b/tools/testing/selftests/kvm/include/s390/facility.h diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h index e43a57d99b56..e43a57d99b56 100644 --- a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390/processor.h index 33fef6fd9617..33fef6fd9617 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390/processor.h diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390/sie.h index 160acd4a1db9..160acd4a1db9 100644 --- a/tools/testing/selftests/kvm/include/s390x/sie.h +++ b/tools/testing/selftests/kvm/include/s390/sie.h diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h index 8035a872a351..8035a872a351 100644 --- a/tools/testing/selftests/kvm/include/s390x/ucall.h +++ b/tools/testing/selftests/kvm/include/s390/ucall.h diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h index 51990094effd..80fe9f69b38d 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/apic.h - * * Copyright (C) 2021, Google LLC. */ diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h index 901caf0e0939..5a74bb30e2f8 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86/evmcs.h @@ -1,9 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * tools/testing/selftests/kvm/include/x86_64/evmcs.h - * * Copyright (C) 2018, Red Hat, Inc. - * */ #ifndef SELFTEST_KVM_EVMCS_H diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h index 6849e2552f1b..f13e532be240 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86/hyperv.h @@ -1,9 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * tools/testing/selftests/kvm/include/x86_64/hyperv.h - * * Copyright (C) 2021, Red Hat, Inc. - * */ #ifndef SELFTEST_KVM_HYPERV_H diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h index 972bb1c4ab4c..972bb1c4ab4c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86/mce.h index 6119321f3f5d..295f2d554754 100644 --- a/tools/testing/selftests/kvm/include/x86_64/mce.h +++ b/tools/testing/selftests/kvm/include/x86/mce.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/mce.h - * * Copyright (C) 2022, Google LLC. */ diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h index 3c10c4dc0ae8..3c10c4dc0ae8 100644 --- a/tools/testing/selftests/kvm/include/x86_64/pmu.h +++ b/tools/testing/selftests/kvm/include/x86/pmu.h diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 645200e95f89..d60da8966772 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/processor.h - * * Copyright (C) 2018, Google LLC. */ @@ -29,6 +27,8 @@ extern uint64_t guest_tsc_khz; #define MAX_NR_CPUID_ENTRIES 100 #endif +#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull + /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" @@ -571,6 +571,11 @@ static inline void set_cr4(uint64_t val) __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); } +static inline void set_idt(const struct desc_ptr *idt_desc) +{ + __asm__ __volatile__("lidt %0"::"m"(*idt_desc)); +} + static inline u64 xgetbv(u32 index) { u32 eax, edx; @@ -1012,10 +1017,19 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function, uint32_t index) { + TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)"); + + vcpu_get_cpuid(vcpu); + return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, function, index); } @@ -1036,7 +1050,7 @@ static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu) return r; /* On success, refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); return 0; } @@ -1046,12 +1060,7 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); /* Refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); -} - -static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) -{ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); } void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h index 82c11c81a956..82c11c81a956 100644 --- a/tools/testing/selftests/kvm/include/x86_64/sev.h +++ b/tools/testing/selftests/kvm/include/x86/sev.h diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h index 4803e1056055..29cffd0a9181 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86/svm.h @@ -1,10 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * tools/testing/selftests/kvm/include/x86_64/svm.h - * This is a copy of arch/x86/include/asm/svm.h - * - */ - #ifndef SELFTEST_KVM_SVM_H #define SELFTEST_KVM_SVM_H diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h index 044f0f872ba9..b74c6dcddcbd 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86/svm_util.h @@ -1,8 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/svm_utils.h - * Header for nested SVM testing - * * Copyright (C) 2020, Red Hat, Inc. */ diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h index d3825dcc3cd9..d3825dcc3cd9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/ucall.h +++ b/tools/testing/selftests/kvm/include/x86/ucall.h diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 5f0c0a29c556..edb3c391b982 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/vmx.h - * * Copyright (C) 2018, Google LLC. */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c index 7abbf8866512..7abbf8866512 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h index d24e9ecc96c6..d24e9ecc96c6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c index 66d05506f78b..66d05506f78b 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c index 09f270545646..09f270545646 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S index 0e443eadfac6..0e443eadfac6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/handlers.S +++ b/tools/testing/selftests/kvm/lib/arm64/handlers.S diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 698e34f39241..7ba3aa3755f3 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -281,8 +281,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) */ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); + sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1)); + tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1)); /* Configure base granule size */ switch (vm->mode) { @@ -360,8 +360,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { uint64_t pstate, pc; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate)); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", indent, "", pstate, pc); diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c index a076e780be5d..a076e780be5d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c +++ b/tools/testing/selftests/kvm/lib/arm64/spinlock.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c index ddab0ce89d4d..ddab0ce89d4d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c index 4427f43f73ea..4427f43f73ea 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 480e3a40d197..33fefeb3ca44 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1648,7 +1648,8 @@ int _vcpu_run(struct kvm_vcpu *vcpu) rc = __vcpu_run(vcpu); } while (rc == -1 && errno == EINTR); - assert_on_unhandled_exception(vcpu); + if (!rc) + assert_on_unhandled_exception(vcpu); return rc; } diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 6ae47b3d6b25..dd663bcf0cc0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -221,39 +221,39 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { struct kvm_riscv_core core; - vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6); + core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode)); + core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); + core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra)); + core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp)); + core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp)); + core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp)); + core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0)); + core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1)); + core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2)); + core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0)); + core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1)); + core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0)); + core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1)); + core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2)); + core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3)); + core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4)); + core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5)); + core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6)); + core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7)); + core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2)); + core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3)); + core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4)); + core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5)); + core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6)); + core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7)); + core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8)); + core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9)); + core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10)); + core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11)); + core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3)); + core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4)); + core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5)); + core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6)); fprintf(stream, " MODE: 0x%lx\n", core.mode); diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c index 2c432fa164f1..2c432fa164f1 100644 --- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c +++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c index d540812d911a..d540812d911a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/facility.c +++ b/tools/testing/selftests/kvm/lib/s390/facility.c diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 20cfe970e3e3..20cfe970e3e3 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c index cca98734653d..cca98734653d 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390/ucall.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c index 89153a333e83..89153a333e83 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ b/tools/testing/selftests/kvm/lib/x86/apic.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S index 7629819734af..7629819734af 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S +++ b/tools/testing/selftests/kvm/lib/x86/handlers.S diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c index 15bc8cd583aa..15bc8cd583aa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index d61e623afc8c..7f5d62a65c68 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * x86_64-specific extensions to memstress.c. + * x86-specific extensions to memstress.c. * * Copyright (C) 2022, Google, Inc. */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c index f31f0427c17c..f31f0427c17c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/pmu.c +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 636b29ba8985..bd5a802fa7a5 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2018, Google LLC. */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c index e9535ee20b7f..e9535ee20b7f 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ b/tools/testing/selftests/kvm/lib/x86/sev.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index 5495a92dfd5a..d239c2097391 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/svm.c * Helpers used for nested SVM testing * Largely inspired from KVM unit test svm.c * diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c index 1265cecc7dd1..1265cecc7dd1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86/ucall.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index d7ac122820bf..d4d1208dd023 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/vmx.c - * * Copyright (C) 2018, Google LLC. */ diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 0b9678858b6d..d9c76b4c0d88 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -15,16 +15,62 @@ #include "test_util.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" + +static bool mprotect_ro_done; static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { uint64_t gpa; + int i; - for (;;) { + for (i = 0; i < 2; i++) { for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - *((volatile uint64_t *)gpa) = gpa; - GUEST_SYNC(0); + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + GUEST_SYNC(i); } + + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + *((volatile uint64_t *)gpa); + GUEST_SYNC(2); + + /* + * Write to the region while mprotect(PROT_READ) is underway. Keep + * looping until the memory is guaranteed to be read-only, otherwise + * vCPUs may complete their writes and advance to the next stage + * prematurely. + * + * For architectures that support skipping the faulting instruction, + * generate the store via inline assembly to ensure the exact length + * of the instruction is known and stable (vcpu_arch_put_guest() on + * fixed-length architectures should work, but the cost of paranoia + * is low in this case). For x86, hand-code the exact opcode so that + * there is no room for variability in the generated instruction. + */ + do { + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) +#ifdef __x86_64__ + asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */ +#elif defined(__aarch64__) + asm volatile("str %0, [%0]" :: "r" (gpa) : "memory"); +#else + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); +#endif + } while (!READ_ONCE(mprotect_ro_done)); + + /* + * Only architectures that write the entire range can explicitly sync, + * as other architectures will be stuck on the write fault. + */ +#if defined(__x86_64__) || defined(__aarch64__) + GUEST_SYNC(3); +#endif + + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + GUEST_SYNC(4); + + GUEST_ASSERT(0); } struct vcpu_info { @@ -51,34 +97,100 @@ static void rendezvous_with_boss(void) } } -static void run_vcpu(struct kvm_vcpu *vcpu) +static void assert_sync_stage(struct kvm_vcpu *vcpu, int stage) +{ + struct ucall uc; + + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], stage); +} + +static void run_vcpu(struct kvm_vcpu *vcpu, int stage) { vcpu_run(vcpu); - TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + assert_sync_stage(vcpu, stage); } static void *vcpu_worker(void *data) { + struct kvm_sregs __maybe_unused sregs; struct vcpu_info *info = data; struct kvm_vcpu *vcpu = info->vcpu; struct kvm_vm *vm = vcpu->vm; - struct kvm_sregs sregs; + int r; vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size); rendezvous_with_boss(); - run_vcpu(vcpu); + /* Stage 0, write all of guest memory. */ + run_vcpu(vcpu, 0); rendezvous_with_boss(); - vcpu_sregs_get(vcpu, &sregs); #ifdef __x86_64__ + vcpu_sregs_get(vcpu, &sregs); /* Toggle CR0.WP to trigger a MMU context reset. */ sregs.cr0 ^= X86_CR0_WP; -#endif vcpu_sregs_set(vcpu, &sregs); +#endif rendezvous_with_boss(); - run_vcpu(vcpu); + /* Stage 1, re-write all of guest memory. */ + run_vcpu(vcpu, 1); + rendezvous_with_boss(); + + /* Stage 2, read all of guest memory, which is now read-only. */ + run_vcpu(vcpu, 2); + + /* + * Stage 3, write guest memory and verify KVM returns -EFAULT for once + * the mprotect(PROT_READ) lands. Only architectures that support + * validating *all* of guest memory sync for this stage, as vCPUs will + * be stuck on the faulting instruction for other architectures. Go to + * stage 3 without a rendezvous + */ + do { + r = _vcpu_run(vcpu); + } while (!r); + TEST_ASSERT(r == -1 && errno == EFAULT, + "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno); + +#if defined(__x86_64__) || defined(__aarch64__) + /* + * Verify *all* writes from the guest hit EFAULT due to the VMA now + * being read-only. x86 and arm64 only at this time as skipping the + * instruction that hits the EFAULT requires advancing the program + * counter, which is arch specific and relies on inline assembly. + */ +#ifdef __x86_64__ + vcpu->run->kvm_valid_regs = KVM_SYNC_X86_REGS; +#endif + for (;;) { + r = _vcpu_run(vcpu); + if (!r) + break; + TEST_ASSERT_EQ(errno, EFAULT); +#if defined(__x86_64__) + WRITE_ONCE(vcpu->run->kvm_dirty_regs, KVM_SYNC_X86_REGS); + vcpu->run->s.regs.regs.rip += 3; +#elif defined(__aarch64__) + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)) + 4); +#endif + + } + assert_sync_stage(vcpu, 3); +#endif /* __x86_64__ || __aarch64__ */ + rendezvous_with_boss(); + + /* + * Stage 4. Run to completion, waiting for mprotect(PROT_WRITE) to + * make the memory writable again. + */ + do { + r = _vcpu_run(vcpu); + } while (r && errno == EFAULT); + TEST_ASSERT_EQ(r, 0); + assert_sync_stage(vcpu, 4); rendezvous_with_boss(); return NULL; @@ -161,7 +273,7 @@ int main(int argc, char *argv[]) const uint64_t start_gpa = SZ_4G; const int first_slot = 1; - struct timespec time_start, time_run1, time_reset, time_run2; + struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw; uint64_t max_gpa, gpa, slot_size, max_mem, i; int max_slots, slot, opt, fd; bool hugepages = false; @@ -209,7 +321,13 @@ int main(int argc, char *argv[]) vcpus = malloc(nr_vcpus * sizeof(*vcpus)); TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); - vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + vm = __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, +#ifdef __x86_64__ + max_mem / SZ_1G, +#else + max_mem / vm_guest_mode_params[VM_MODE_DEFAULT].page_size, +#endif + guest_code, vcpus); max_gpa = vm->max_gfn << vm->page_shift; TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); @@ -259,14 +377,28 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_reset, "reset"); rendezvous_with_vcpus(&time_run2, "run 2"); + mprotect(mem, slot_size, PROT_READ); + usleep(10); + mprotect_ro_done = true; + sync_global_to_guest(vm, mprotect_ro_done); + + rendezvous_with_vcpus(&time_ro, "mprotect RO"); + mprotect(mem, slot_size, PROT_READ | PROT_WRITE); + rendezvous_with_vcpus(&time_rw, "mprotect RW"); + + time_rw = timespec_sub(time_rw, time_ro); + time_ro = timespec_sub(time_ro, time_run2); time_run2 = timespec_sub(time_run2, time_reset); - time_reset = timespec_sub(time_reset, time_run1); + time_reset = timespec_sub(time_reset, time_run1); time_run1 = timespec_sub(time_run1, time_start); - pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n", + pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, " + "ro = %ld.%.9lds, rw = %ld.%.9lds\n", time_run1.tv_sec, time_run1.tv_nsec, time_reset.tv_sec, time_reset.tv_nsec, - time_run2.tv_sec, time_run2.tv_nsec); + time_run2.tv_sec, time_run2.tv_nsec, + time_ro.tv_sec, time_ro.tv_nsec, + time_rw.tv_sec, time_rw.tv_nsec); /* * Delete even numbered slots (arbitrary) and unmap the first half of diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index 2c792228ac0b..9e370800a6a2 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -93,7 +93,7 @@ struct kvm_vm *test_vm_create(void) vcpu_init_vector_tables(vcpus[i]); /* Initialize guest timer frequency. */ - vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq); + timer_freq = vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency)); sync_global_to_guest(vm, timer_freq); pr_debug("timer_freq: %lu\n", timer_freq); diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index 0e0712854953..cfed6c727bfc 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -60,7 +60,7 @@ int main(void) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc); + pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1)); /* skip sw_bp_1 */ diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 4bc1051848e5..8515921dfdbf 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -52,6 +52,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: @@ -71,6 +73,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR: @@ -112,6 +115,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR: @@ -429,6 +433,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), @@ -448,6 +454,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), @@ -535,10 +542,11 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off) KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR), - KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), }; if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) @@ -949,6 +957,7 @@ KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE); KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA); KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU); KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN); +KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP); KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); @@ -964,6 +973,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU); KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); +KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC); +KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA); KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); @@ -983,6 +994,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); +KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE); KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND); KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR); @@ -1017,6 +1029,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_sbi_sta, &config_sbi_pmu, &config_sbi_dbcn, + &config_sbi_susp, &config_aia, &config_fp_f, &config_fp_d, @@ -1031,6 +1044,8 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_svinval, &config_svnapot, &config_svpbmt, + &config_svvptc, + &config_zabha, &config_zacas, &config_zawrs, &config_zba, @@ -1050,6 +1065,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zfhmin, &config_zicbom, &config_zicboz, + &config_ziccrse, &config_zicntr, &config_zicond, &config_zicsr, diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index f299cbfd23ca..f45c0ecc902d 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -608,7 +608,7 @@ static void test_vm_events_overflow(void *guest_code) vcpu_init_vector_tables(vcpu); /* Initialize guest timer frequency. */ - vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq); + timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency)); sync_global_to_guest(vm, timer_freq); run_vcpu(vcpu); diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c index e32dd59703a0..e32dd59703a0 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390/cmma_test.c diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390/config index 23270f2d679f..23270f2d679f 100644 --- a/tools/testing/selftests/kvm/s390x/config +++ b/tools/testing/selftests/kvm/s390/config diff --git a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c index 27255880dabd..27255880dabd 100644 --- a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c +++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c index ad8095968601..ad8095968601 100644 --- a/tools/testing/selftests/kvm/s390x/debug_test.c +++ b/tools/testing/selftests/kvm/s390/debug_test.c diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390/memop.c index 4374b4cd2a80..4374b4cd2a80 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390/resets.c index 357943f2bea8..b58f75b381e5 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390/resets.c @@ -61,7 +61,7 @@ static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value) { uint64_t eval_reg; - vcpu_get_reg(vcpu, id, &eval_reg); + eval_reg = vcpu_get_reg(vcpu, id); TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); } diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c index bba0d9a6dcc8..bba0d9a6dcc8 100644 --- a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c +++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390/sync_regs_test.c index 53def355ccba..53def355ccba 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390/sync_regs_test.c diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index 12d5e1cb62e3..12d5e1cb62e3 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index 0c112319dab1..135ee22856cf 100644 --- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -210,10 +210,13 @@ TEST_F(uc_kvm, uc_attr_mem_limit) struct kvm_device_attr attr = { .group = KVM_S390_VM_MEM_CTRL, .attr = KVM_S390_VM_MEM_LIMIT_SIZE, - .addr = (unsigned long)&limit, + .addr = (u64)&limit, }; int rc; + rc = ioctl(self->vm_fd, KVM_HAS_DEVICE_ATTR, &attr); + EXPECT_EQ(0, rc); + rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr); EXPECT_EQ(0, rc); EXPECT_EQ(~0UL, limit); @@ -635,4 +638,171 @@ TEST_F(uc_kvm, uc_skey) uc_assert_diag44(self); } +static char uc_flic_b[PAGE_SIZE]; +static struct kvm_s390_io_adapter uc_flic_ioa = { .id = 0 }; +static struct kvm_s390_io_adapter_req uc_flic_ioam = { .id = 0 }; +static struct kvm_s390_ais_req uc_flic_asim = { .isc = 0 }; +static struct kvm_s390_ais_all uc_flic_asima = { .simm = 0 }; +static struct uc_flic_attr_test { + char *name; + struct kvm_device_attr a; + int hasrc; + int geterrno; + int seterrno; +} uc_flic_attr_tests[] = { + { + .name = "KVM_DEV_FLIC_GET_ALL_IRQS", + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_GET_ALL_IRQS, + .addr = (u64)&uc_flic_b, + .attr = PAGE_SIZE, + }, + }, + { + .name = "KVM_DEV_FLIC_ENQUEUE", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_ENQUEUE, }, + }, + { + .name = "KVM_DEV_FLIC_CLEAR_IRQS", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_CLEAR_IRQS, }, + }, + { + .name = "KVM_DEV_FLIC_ADAPTER_REGISTER", + .geterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_ADAPTER_REGISTER, + .addr = (u64)&uc_flic_ioa, + }, + }, + { + .name = "KVM_DEV_FLIC_ADAPTER_MODIFY", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_ADAPTER_MODIFY, + .addr = (u64)&uc_flic_ioam, + .attr = sizeof(uc_flic_ioam), + }, + }, + { + .name = "KVM_DEV_FLIC_CLEAR_IO_IRQ", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_CLEAR_IO_IRQ, + .attr = 32, + }, + }, + { + .name = "KVM_DEV_FLIC_AISM", + .geterrno = EINVAL, + .seterrno = ENOTSUP, + .a = { + .group = KVM_DEV_FLIC_AISM, + .addr = (u64)&uc_flic_asim, + }, + }, + { + .name = "KVM_DEV_FLIC_AIRQ_INJECT", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_AIRQ_INJECT, }, + }, + { + .name = "KVM_DEV_FLIC_AISM_ALL", + .geterrno = ENOTSUP, + .seterrno = ENOTSUP, + .a = { + .group = KVM_DEV_FLIC_AISM_ALL, + .addr = (u64)&uc_flic_asima, + .attr = sizeof(uc_flic_asima), + }, + }, + { + .name = "KVM_DEV_FLIC_APF_ENABLE", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_APF_ENABLE, }, + }, + { + .name = "KVM_DEV_FLIC_APF_DISABLE_WAIT", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_APF_DISABLE_WAIT, }, + }, +}; + +TEST_F(uc_kvm, uc_flic_attrs) +{ + struct kvm_create_device cd = { .type = KVM_DEV_TYPE_FLIC }; + struct kvm_device_attr attr; + u64 value; + int rc, i; + + rc = ioctl(self->vm_fd, KVM_CREATE_DEVICE, &cd); + ASSERT_EQ(0, rc) TH_LOG("create device failed with err %s (%i)", + strerror(errno), errno); + + for (i = 0; i < ARRAY_SIZE(uc_flic_attr_tests); i++) { + TH_LOG("test %s", uc_flic_attr_tests[i].name); + attr = (struct kvm_device_attr) { + .group = uc_flic_attr_tests[i].a.group, + .attr = uc_flic_attr_tests[i].a.attr, + .addr = uc_flic_attr_tests[i].a.addr, + }; + if (attr.addr == 0) + attr.addr = (u64)&value; + + rc = ioctl(cd.fd, KVM_HAS_DEVICE_ATTR, &attr); + EXPECT_EQ(uc_flic_attr_tests[i].hasrc, !!rc) + TH_LOG("expected dev attr missing %s", + uc_flic_attr_tests[i].name); + + rc = ioctl(cd.fd, KVM_GET_DEVICE_ATTR, &attr); + EXPECT_EQ(!!uc_flic_attr_tests[i].geterrno, !!rc) + TH_LOG("get dev attr rc not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + if (uc_flic_attr_tests[i].geterrno) + EXPECT_EQ(uc_flic_attr_tests[i].geterrno, errno) + TH_LOG("get dev attr errno not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + + rc = ioctl(cd.fd, KVM_SET_DEVICE_ATTR, &attr); + EXPECT_EQ(!!uc_flic_attr_tests[i].seterrno, !!rc) + TH_LOG("set sev attr rc not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + if (uc_flic_attr_tests[i].seterrno) + EXPECT_EQ(uc_flic_attr_tests[i].seterrno, errno) + TH_LOG("set dev attr errno not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + } + + close(cd.fd); +} + +TEST_F(uc_kvm, uc_set_gsi_routing) +{ + struct kvm_irq_routing *routing = kvm_gsi_routing_create(); + struct kvm_irq_routing_entry ue = { + .type = KVM_IRQ_ROUTING_S390_ADAPTER, + .gsi = 1, + .u.adapter = (struct kvm_irq_routing_s390_adapter) { + .ind_addr = 0, + }, + }; + int rc; + + routing->entries[0] = ue; + routing->nr = 1; + rc = ioctl(self->vm_fd, KVM_SET_GSI_ROUTING, routing); + ASSERT_EQ(-1, rc) TH_LOG("err %s (%i)", strerror(errno), errno); + ASSERT_EQ(EINVAL, errno) TH_LOG("err %s (%i)", strerror(errno), errno); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index a8267628e9ed..bc440d5aba57 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -17,9 +17,9 @@ #include <processor.h> /* - * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a - * 2MB sized and aligned region so that the initial region corresponds to - * exactly one large page. + * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB + * sized and aligned region so that the initial region corresponds to exactly + * one large page. */ #define MEM_REGION_SIZE 0x200000 @@ -235,7 +235,7 @@ static void guest_code_delete_memory_region(void) * in the guest will never succeed, and so isn't an option. */ memset(&idt, 0, sizeof(idt)); - __asm__ __volatile__("lidt %0" :: "m"(idt)); + set_idt(&idt); GUEST_SYNC(0); @@ -553,6 +553,56 @@ static void test_add_overlapping_private_memory_regions(void) close(memfd); kvm_vm_free(vm); } + +static void guest_code_mmio_during_vectoring(void) +{ + const struct desc_ptr idt_desc = { + .address = MEM_REGION_GPA, + .size = 0xFFF, + }; + + set_idt(&idt_desc); + + /* Generate a #GP by dereferencing a non-canonical address */ + *((uint8_t *)NONCANONICAL) = 0x1; + + GUEST_ASSERT(0); +} + +/* + * This test points the IDT descriptor base to an MMIO address. It should cause + * a KVM internal error when an event occurs in the guest. + */ +static void test_mmio_during_vectoring(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + u64 expected_gpa; + + pr_info("Testing MMIO during vectoring error handling\n"); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_mmio_during_vectoring); + virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1); + + run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); + TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_DELIVERY_EV, + "Unexpected suberror = %d", vcpu->run->internal.suberror); + TEST_ASSERT(run->internal.ndata != 4, "Unexpected internal error data array size = %d", + run->internal.ndata); + + /* The reported GPA should be IDT base + offset of the GP vector */ + expected_gpa = MEM_REGION_GPA + GP_VECTOR * sizeof(struct idt_entry); + + TEST_ASSERT(run->internal.data[3] == expected_gpa, + "Unexpected GPA = %llx (expected %lx)", + vcpu->run->internal.data[3], expected_gpa); + + kvm_vm_free(vm); +} #endif int main(int argc, char *argv[]) @@ -568,6 +618,7 @@ int main(int argc, char *argv[]) * KVM_RUN fails with ENOEXEC or EFAULT. */ test_zero_memory_regions(); + test_mmio_during_vectoring(); #endif test_invalid_memory_region_flags(); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index a8d3afa0b86b..cce2520af720 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -269,9 +269,8 @@ static void guest_code(int cpu) static bool is_steal_time_supported(struct kvm_vcpu *vcpu) { uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA); - unsigned long enabled; + unsigned long enabled = vcpu_get_reg(vcpu, id); - vcpu_get_reg(vcpu, id, &enabled); TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result"); return enabled; diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index f4ce5a185a7d..f4ce5a185a7d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c diff --git a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c index f8916bb34405..f8916bb34405 100644 --- a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c index 7b3fda6842bc..7b3fda6842bc 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/cpuid_test.c diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c index 28cc66454601..28cc66454601 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c index 2d814c1d1dc4..2d814c1d1dc4 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86/debug_regs.c diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c index 2929c067c207..2929c067c207 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c index 81055476d394..81055476d394 100644 --- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c +++ b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c index a72f13ae2edb..a72f13ae2edb 100644 --- a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c index 762628f7d4ba..762628f7d4ba 100644 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h index 37b1a9f52864..37b1a9f52864 100644 --- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h +++ b/tools/testing/selftests/kvm/x86/flds_emulation.h diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c index 10b1b0ba374e..10b1b0ba374e 100644 --- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c +++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c index e058bc676cd6..e058bc676cd6 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 4f5881d4ef66..4f5881d4ef66 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c index 74cf19661309..74cf19661309 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c index 949e08e98f31..949e08e98f31 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 068e9c69710d..068e9c69710d 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 22c0c124582f..22c0c124582f 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c index 0ddb63229bcb..0ddb63229bcb 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index 077cd0ec3040..077cd0ec3040 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c index 5bc12222d87a..5bc12222d87a 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 78878b3a2725..1b805cbdb47b 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -139,10 +139,12 @@ static void test_pv_unhalt(void) struct kvm_vm *vm; struct kvm_cpuid_entry2 *ent; u32 kvm_sig_old; + int r; - pr_info("testing KVM_FEATURE_PV_UNHALT\n"); + if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT)) + return; - TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS); + pr_info("testing KVM_FEATURE_PV_UNHALT\n"); /* KVM_PV_UNHALT test */ vm = vm_create_with_one_vcpu(&vcpu, guest_main); @@ -151,19 +153,45 @@ static void test_pv_unhalt(void) TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); - /* Make sure KVM clears vcpu->arch.kvm_cpuid */ + /* Verify KVM disallows disabling exits after vCPU creation. */ + r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + TEST_ASSERT(r && errno == EINVAL, + "Disabling exits after vCPU creation didn't fail as expected"); + + kvm_vm_free(vm); + + /* Verify that KVM clear PV_UNHALT from guest CPUID. */ + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "vCPU created with PV_UNHALT set by default"); + + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); + + /* + * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT + * when KVM PV is not present, and DOES clear PV_UNHALT when switching + * back to the correct signature.. + */ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); kvm_sig_old = ent->ebx; ent->ebx = 0xdeadbeef; vcpu_set_cpuid(vcpu); - vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT cleared when using bogus KVM PV signature"); + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); ent->ebx = kvm_sig_old; vcpu_set_cpuid(vcpu); TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), - "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS"); + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c index 7e2bfb3c3f3b..7e2bfb3c3f3b 100644 --- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c +++ b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 2b550eff35f1..2b550eff35f1 100644 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 3eb0313ffa39..3eb0313ffa39 100644 --- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c index e7efb2b35f8b..e7efb2b35f8b 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh index caad084b8bfd..caad084b8bfd 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c index 9cbf283ebc55..9cbf283ebc55 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86/platform_info_test.c diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 698cb36989db..698cb36989db 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c index c15513cd74d1..c15513cd74d1 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c index 82a8d88b5338..82a8d88b5338 100644 --- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c index 13e72fcec8dd..13e72fcec8dd 100644 --- a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c index cbc92a862ea9..cbc92a862ea9 100644 --- a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c +++ b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c index 49913784bc82..49913784bc82 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c index c021c0795a96..f4095a3d1278 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c @@ -41,13 +41,15 @@ do { \ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \ } while (0) +#define KVM_ALWAYS_ALLOWED_CR4 (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \ + X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ + X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ + X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT) + static uint64_t calc_supported_cr4_feature_bits(void) { - uint64_t cr4; + uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4; - cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | - X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE | - X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; if (kvm_cpu_has(X86_FEATURE_UMIP)) cr4 |= X86_CR4_UMIP; if (kvm_cpu_has(X86_FEATURE_LA57)) @@ -72,36 +74,31 @@ static uint64_t calc_supported_cr4_feature_bits(void) return cr4; } -int main(int argc, char *argv[]) +static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4) { struct kvm_sregs sregs; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t cr4; int rc, i; - /* - * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and - * use it to verify all supported CR4 bits can be set prior to defining - * the vCPU model, i.e. without doing KVM_SET_CPUID2. - */ - vm = vm_create_barebones(); - vcpu = __vm_vcpu_add(vm, 0); - vcpu_sregs_get(vcpu, &sregs); - - sregs.cr0 = 0; - sregs.cr4 |= calc_supported_cr4_feature_bits(); - cr4 = sregs.cr4; - + sregs.cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + sregs.cr4 |= cr4; rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)), + "KVM didn't %s OSXSAVE in CPUID as expected", + (sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear"); + + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)), + "KVM didn't %s OSPKE in CPUID as expected", + (sregs.cr4 & X86_CR4_PKE) ? "set" : "clear"); + vcpu_sregs_get(vcpu, &sregs); TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", sregs.cr4, cr4); - /* Verify all unsupported features are rejected by KVM. */ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP); TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57); TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE); @@ -119,10 +116,28 @@ int main(int argc, char *argv[]) /* NW without CD is illegal, as is PG without PE. */ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW); TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG); +} + +int main(int argc, char *argv[]) +{ + struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + /* + * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and + * use it to verify KVM enforces guest CPUID even if *userspace* never + * sets CPUID. + */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + test_cr_bits(vcpu, KVM_ALWAYS_ALLOWED_CR4); kvm_vm_free(vm); - /* Create a "real" VM and verify APIC_BASE can be set. */ + /* Create a "real" VM with a fully populated guest CPUID and verify + * APIC_BASE and all supported CR4 can be set. + */ vm = vm_create_with_one_vcpu(&vcpu, NULL); vcpu_sregs_get(vcpu, &sregs); @@ -135,6 +150,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", sregs.apic_base); + test_cr_bits(vcpu, calc_supported_cr4_feature_bits()); + kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c index 3fb967f40c6a..3fb967f40c6a 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c index 0a6dfba3905b..0a6dfba3905b 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index ae77698e6e97..a1a688e75266 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -155,7 +155,7 @@ static void guest_shutdown_code(void) /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */ memset(&idt, 0, sizeof(idt)); - __asm__ __volatile__("lidt %0" :: "m"(idt)); + set_idt(&idt); __asm__ __volatile__("ud2"); } diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c index fabeeaddfb3a..fabeeaddfb3a 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c index 55c88d664a94..55c88d664a94 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86/smm_test.c diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c index 141b7fc0c965..141b7fc0c965 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86/state_test.c diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c index 916e04248fbb..916e04248fbb 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c index 00135cbba35e..00135cbba35e 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c index 7b6481d6c0d3..7b6481d6c0d3 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c index 8a62cca28cfb..8a62cca28cfb 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c index 8fa3948b0170..8fa3948b0170 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c index 56306a19144a..56306a19144a 100644 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c index 12b0964f4f13..12b0964f4f13 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c index 59c7304f805e..59c7304f805e 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c index 57f157c06b39..57f157c06b39 100644 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c index 9481cbcf284f..9481cbcf284f 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index 32b2794b78fe..32b2794b78fe 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c index a81a24761aac..a81a24761aac 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c index dad988351493..dad988351493 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c index fa512d033205..fa512d033205 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c index 3fd6eceab46f..3fd6eceab46f 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c index a100ee5f0009..a100ee5f0009 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c index 90720b6205f4..90720b6205f4 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c index 1759fa5cb3f2..1759fa5cb3f2 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c index a1f5ff45d518..a1f5ff45d518 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c index 00dd2ac07a61..00dd2ac07a61 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c index 67a62a5a8895..67a62a5a8895 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c index 2ceb5c78c442..2ceb5c78c442 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index a76078a08ff8..a76078a08ff8 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index 88bcca188799..88bcca188799 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c index c8a5c5e51661..c8a5c5e51661 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index a59b3c799bb2..a59b3c799bb2 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c index 2585087cdf5c..2585087cdf5c 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c index f331a4e9bae3..f331a4e9bae3 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile index 348e2dbdb4e0..5cb0828f0514 100644 --- a/tools/testing/selftests/landlock/Makefile +++ b/tools/testing/selftests/landlock/Makefile @@ -10,14 +10,14 @@ src_test := $(wildcard *_test.c) TEST_GEN_PROGS := $(src_test:.c=) -TEST_GEN_PROGS_EXTENDED := true +TEST_GEN_PROGS_EXTENDED := true sandbox-and-launch wait-pipe # Short targets: -$(TEST_GEN_PROGS): LDLIBS += -lcap +$(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread $(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static include ../lib.mk # Targets with $(OUTPUT)/ prefix: -$(TEST_GEN_PROGS): LDLIBS += -lcap +$(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread $(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 61056fa074bb..a604ea5d8297 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -9,17 +9,15 @@ #include <arpa/inet.h> #include <errno.h> -#include <linux/landlock.h> #include <linux/securebits.h> #include <sys/capability.h> #include <sys/socket.h> -#include <sys/syscall.h> -#include <sys/types.h> #include <sys/un.h> #include <sys/wait.h> #include <unistd.h> #include "../kselftest_harness.h" +#include "wrappers.h" #define TMP_DIR "tmp" @@ -30,33 +28,8 @@ /* TEST_F_FORK() should not be used for new tests. */ #define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name) -#ifndef landlock_create_ruleset -static inline int -landlock_create_ruleset(const struct landlock_ruleset_attr *const attr, - const size_t size, const __u32 flags) -{ - return syscall(__NR_landlock_create_ruleset, attr, size, flags); -} -#endif - -#ifndef landlock_add_rule -static inline int landlock_add_rule(const int ruleset_fd, - const enum landlock_rule_type rule_type, - const void *const rule_attr, - const __u32 flags) -{ - return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr, - flags); -} -#endif - -#ifndef landlock_restrict_self -static inline int landlock_restrict_self(const int ruleset_fd, - const __u32 flags) -{ - return syscall(__NR_landlock_restrict_self, ruleset_fd, flags); -} -#endif +static const char bin_sandbox_and_launch[] = "./sandbox-and-launch"; +static const char bin_wait_pipe[] = "./wait-pipe"; static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) { @@ -250,11 +223,6 @@ struct service_fixture { }; }; -static pid_t __maybe_unused sys_gettid(void) -{ - return syscall(__NR_gettid); -} - static void __maybe_unused set_unix_address(struct service_fixture *const srv, const unsigned short index) { diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 6788762188fe..aa6f2c1cbec7 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -37,6 +37,10 @@ #include <linux/fs.h> #include <linux/mount.h> +/* Defines AT_EXECVE_CHECK without type conflicts. */ +#define _ASM_GENERIC_FCNTL_H +#include <linux/fcntl.h> + #include "common.h" #ifndef renameat2 @@ -55,11 +59,17 @@ int open_tree(int dfd, const char *filename, unsigned int flags) } #endif +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + #ifndef RENAME_EXCHANGE #define RENAME_EXCHANGE (1 << 1) #endif -#define BINARY_PATH "./true" +static const char bin_true[] = "./true"; /* Paths (sibling number and depth) */ static const char dir_s1d1[] = TMP_DIR "/s1d1"; @@ -85,6 +95,9 @@ static const char file1_s3d1[] = TMP_DIR "/s3d1/f1"; /* dir_s3d2 is a mount point. */ static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2"; static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; +static const char file1_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3/f1"; +static const char dir_s3d4[] = TMP_DIR "/s3d1/s3d2/s3d4"; +static const char file1_s3d4[] = TMP_DIR "/s3d1/s3d2/s3d4/f1"; /* * layout1 hierarchy: @@ -108,8 +121,11 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; * │  └── f2 * └── s3d1 *   ├── f1 - * └── s3d2 - * └── s3d3 + * └── s3d2 [mount point] + *   ├── s3d3 + *   │ └── f1 + *   └── s3d4 + *   └── f1 */ static bool fgrep(FILE *const inf, const char *const str) @@ -358,7 +374,8 @@ static void create_layout1(struct __test_metadata *const _metadata) ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2)); clear_cap(_metadata, CAP_SYS_ADMIN); - ASSERT_EQ(0, mkdir(dir_s3d3, 0700)); + create_file(_metadata, file1_s3d3); + create_file(_metadata, file1_s3d4); } static void remove_layout1(struct __test_metadata *const _metadata) @@ -378,7 +395,8 @@ static void remove_layout1(struct __test_metadata *const _metadata) EXPECT_EQ(0, remove_path(dir_s2d2)); EXPECT_EQ(0, remove_path(file1_s3d1)); - EXPECT_EQ(0, remove_path(dir_s3d3)); + EXPECT_EQ(0, remove_path(file1_s3d3)); + EXPECT_EQ(0, remove_path(file1_s3d4)); set_cap(_metadata, CAP_SYS_ADMIN); umount(dir_s3d2); clear_cap(_metadata, CAP_SYS_ADMIN); @@ -1957,8 +1975,8 @@ TEST_F_FORK(layout1, relative_chroot_chdir) test_relative_path(_metadata, REL_CHROOT_CHDIR); } -static void copy_binary(struct __test_metadata *const _metadata, - const char *const dst_path) +static void copy_file(struct __test_metadata *const _metadata, + const char *const src_path, const char *const dst_path) { int dst_fd, src_fd; struct stat statbuf; @@ -1968,11 +1986,10 @@ static void copy_binary(struct __test_metadata *const _metadata, { TH_LOG("Failed to open \"%s\": %s", dst_path, strerror(errno)); } - src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC); + src_fd = open(src_path, O_RDONLY | O_CLOEXEC); ASSERT_LE(0, src_fd) { - TH_LOG("Failed to open \"" BINARY_PATH "\": %s", - strerror(errno)); + TH_LOG("Failed to open \"%s\": %s", src_path, strerror(errno)); } ASSERT_EQ(0, fstat(src_fd, &statbuf)); ASSERT_EQ(statbuf.st_size, @@ -2003,11 +2020,26 @@ static void test_execute(struct __test_metadata *const _metadata, const int err, ASSERT_EQ(1, WIFEXITED(status)); ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) { - TH_LOG("Unexpected return code for \"%s\": %s", path, - strerror(errno)); + TH_LOG("Unexpected return code for \"%s\"", path); }; } +static void test_check_exec(struct __test_metadata *const _metadata, + const int err, const char *const path) +{ + int ret; + char *const argv[] = { (char *)path, NULL }; + + ret = sys_execveat(AT_FDCWD, path, argv, NULL, + AT_EMPTY_PATH | AT_EXECVE_CHECK); + if (err) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(errno, err); + } else { + EXPECT_EQ(0, ret); + } +} + TEST_F_FORK(layout1, execute) { const struct rule rules[] = { @@ -2021,9 +2053,13 @@ TEST_F_FORK(layout1, execute) create_ruleset(_metadata, rules[0].access, rules); ASSERT_LE(0, ruleset_fd); - copy_binary(_metadata, file1_s1d1); - copy_binary(_metadata, file1_s1d2); - copy_binary(_metadata, file1_s1d3); + copy_file(_metadata, bin_true, file1_s1d1); + copy_file(_metadata, bin_true, file1_s1d2); + copy_file(_metadata, bin_true, file1_s1d3); + + /* Checks before file1_s1d1 being denied. */ + test_execute(_metadata, 0, file1_s1d1); + test_check_exec(_metadata, 0, file1_s1d1); enforce_ruleset(_metadata, ruleset_fd); ASSERT_EQ(0, close(ruleset_fd)); @@ -2031,14 +2067,94 @@ TEST_F_FORK(layout1, execute) ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); test_execute(_metadata, EACCES, file1_s1d1); + test_check_exec(_metadata, EACCES, file1_s1d1); ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); test_execute(_metadata, 0, file1_s1d2); + test_check_exec(_metadata, 0, file1_s1d2); ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); test_execute(_metadata, 0, file1_s1d3); + test_check_exec(_metadata, 0, file1_s1d3); +} + +TEST_F_FORK(layout1, umount_sandboxer) +{ + int pipe_child[2], pipe_parent[2]; + char buf_parent; + pid_t child; + int status; + + copy_file(_metadata, bin_sandbox_and_launch, file1_s3d3); + ASSERT_EQ(0, pipe2(pipe_child, 0)); + ASSERT_EQ(0, pipe2(pipe_parent, 0)); + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + char pipe_child_str[12], pipe_parent_str[12]; + char *const argv[] = { (char *)file1_s3d3, + (char *)bin_wait_pipe, pipe_child_str, + pipe_parent_str, NULL }; + + /* Passes the pipe FDs to the executed binary and its child. */ + EXPECT_EQ(0, close(pipe_child[0])); + EXPECT_EQ(0, close(pipe_parent[1])); + snprintf(pipe_child_str, sizeof(pipe_child_str), "%d", + pipe_child[1]); + snprintf(pipe_parent_str, sizeof(pipe_parent_str), "%d", + pipe_parent[0]); + + /* + * We need bin_sandbox_and_launch (copied inside the mount as + * file1_s3d3) to execute bin_wait_pipe (outside the mount) to + * make sure the mount point will not be EBUSY because of + * file1_s3d3 being in use. This avoids a potential race + * condition between the following read() and umount() calls. + */ + ASSERT_EQ(0, execve(argv[0], argv, NULL)) + { + TH_LOG("Failed to execute \"%s\": %s", argv[0], + strerror(errno)); + }; + _exit(1); + return; + } + + EXPECT_EQ(0, close(pipe_child[1])); + EXPECT_EQ(0, close(pipe_parent[0])); + + /* Waits for the child to sandbox itself. */ + EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1)); + + /* Tests that the sandboxer is tied to its mount point. */ + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(-1, umount(dir_s3d2)); + EXPECT_EQ(EBUSY, errno); + clear_cap(_metadata, CAP_SYS_ADMIN); + + /* Signals the child to launch a grandchild. */ + EXPECT_EQ(1, write(pipe_parent[1], ".", 1)); + + /* Waits for the grandchild. */ + EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1)); + + /* Tests that the domain's sandboxer is not tied to its mount point. */ + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(dir_s3d2)) + { + TH_LOG("Failed to umount \"%s\": %s", dir_s3d2, + strerror(errno)); + }; + clear_cap(_metadata, CAP_SYS_ADMIN); + + /* Signals the grandchild to terminate. */ + EXPECT_EQ(1, write(pipe_parent[1], ".", 1)); + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFEXITED(status)); + ASSERT_EQ(0, WEXITSTATUS(status)); } TEST_F_FORK(layout1, link) @@ -2444,6 +2560,44 @@ TEST_F_FORK(layout1, refer_mount_root_deny) EXPECT_EQ(0, close(root_fd)); } +TEST_F_FORK(layout1, refer_part_mount_tree_is_allowed) +{ + const struct rule layer1[] = { + { + /* Parent mount point. */ + .path = dir_s3d1, + .access = LANDLOCK_ACCESS_FS_REFER | + LANDLOCK_ACCESS_FS_MAKE_REG, + }, + { + /* + * Removing the source file is allowed because its + * access rights are already a superset of the + * destination. + */ + .path = dir_s3d4, + .access = LANDLOCK_ACCESS_FS_REFER | + LANDLOCK_ACCESS_FS_MAKE_REG | + LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + {}, + }; + int ruleset_fd; + + ASSERT_EQ(0, unlink(file1_s3d3)); + ruleset_fd = create_ruleset(_metadata, + LANDLOCK_ACCESS_FS_REFER | + LANDLOCK_ACCESS_FS_MAKE_REG | + LANDLOCK_ACCESS_FS_REMOVE_FILE, + layer1); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, rename(file1_s3d4, file1_s3d3)); +} + TEST_F_FORK(layout1, reparent_link) { const struct rule layer1[] = { diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c index a19db4d0b3bd..8f31b673ff2d 100644 --- a/tools/testing/selftests/landlock/ptrace_test.c +++ b/tools/testing/selftests/landlock/ptrace_test.c @@ -22,8 +22,6 @@ /* Copied from security/yama/yama_lsm.c */ #define YAMA_SCOPE_DISABLED 0 #define YAMA_SCOPE_RELATIONAL 1 -#define YAMA_SCOPE_CAPABILITY 2 -#define YAMA_SCOPE_NO_ATTACH 3 static void create_domain(struct __test_metadata *const _metadata) { diff --git a/tools/testing/selftests/landlock/sandbox-and-launch.c b/tools/testing/selftests/landlock/sandbox-and-launch.c new file mode 100644 index 000000000000..3e32e1a51ac5 --- /dev/null +++ b/tools/testing/selftests/landlock/sandbox-and-launch.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Sandbox itself and execute another program (in a different mount point). + * + * Used by layout1.umount_sandboxer from fs_test.c + * + * Copyright © 2024-2025 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/prctl.h> +#include <unistd.h> + +#include "wrappers.h" + +int main(int argc, char *argv[]) +{ + struct landlock_ruleset_attr ruleset_attr = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + int pipe_child, pipe_parent, ruleset_fd; + char buf; + + /* + * The first argument must be the file descriptor number of a pipe. + * The second argument must be the program to execute. + */ + if (argc != 4) { + fprintf(stderr, "Wrong number of arguments (not three)\n"); + return 1; + } + + pipe_child = atoi(argv[2]); + pipe_parent = atoi(argv[3]); + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + if (ruleset_fd < 0) { + perror("Failed to create ruleset"); + return 1; + } + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + perror("Failed to call prctl()"); + return 1; + } + + if (landlock_restrict_self(ruleset_fd, 0)) { + perror("Failed to restrict self"); + return 1; + } + + if (close(ruleset_fd)) { + perror("Failed to close ruleset"); + return 1; + } + + /* Signals that we are sandboxed. */ + errno = 0; + if (write(pipe_child, ".", 1) != 1) { + perror("Failed to write to the second argument"); + return 1; + } + + /* Waits for the parent to try to umount. */ + if (read(pipe_parent, &buf, 1) != 1) { + perror("Failed to write to the third argument"); + return 1; + } + + /* Shifts arguments. */ + argv[0] = argv[1]; + argv[1] = argv[2]; + argv[2] = argv[3]; + argv[3] = NULL; + execve(argv[0], argv, NULL); + perror("Failed to execute the provided binary"); + return 1; +} diff --git a/tools/testing/selftests/landlock/wait-pipe.c b/tools/testing/selftests/landlock/wait-pipe.c new file mode 100644 index 000000000000..0dbcd260a0fa --- /dev/null +++ b/tools/testing/selftests/landlock/wait-pipe.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Write in a pipe and wait. + * + * Used by layout1.umount_sandboxer from fs_test.c + * + * Copyright © 2024-2025 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +int main(int argc, char *argv[]) +{ + int pipe_child, pipe_parent; + char buf; + + /* The first argument must be the file descriptor number of a pipe. */ + if (argc != 3) { + fprintf(stderr, "Wrong number of arguments (not two)\n"); + return 1; + } + + pipe_child = atoi(argv[1]); + pipe_parent = atoi(argv[2]); + + /* Signals that we are waiting. */ + if (write(pipe_child, ".", 1) != 1) { + perror("Failed to write to first argument"); + return 1; + } + + /* Waits for the parent do its test. */ + if (read(pipe_parent, &buf, 1) != 1) { + perror("Failed to write to the second argument"); + return 1; + } + + return 0; +} diff --git a/tools/testing/selftests/landlock/wrappers.h b/tools/testing/selftests/landlock/wrappers.h new file mode 100644 index 000000000000..65548323e45d --- /dev/null +++ b/tools/testing/selftests/landlock/wrappers.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Syscall wrappers + * + * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> + * Copyright © 2019-2020 ANSSI + * Copyright © 2021-2025 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <linux/landlock.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#ifndef landlock_create_ruleset +static inline int +landlock_create_ruleset(const struct landlock_ruleset_attr *const attr, + const size_t size, const __u32 flags) +{ + return syscall(__NR_landlock_create_ruleset, attr, size, flags); +} +#endif + +#ifndef landlock_add_rule +static inline int landlock_add_rule(const int ruleset_fd, + const enum landlock_rule_type rule_type, + const void *const rule_attr, + const __u32 flags) +{ + return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr, + flags); +} +#endif + +#ifndef landlock_restrict_self +static inline int landlock_restrict_self(const int ruleset_fd, + const __u32 flags) +{ + return syscall(__NR_landlock_restrict_self, ruleset_fd, flags); +} +#endif + +static inline pid_t sys_gettid(void) +{ + return syscall(__NR_gettid); +} diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh index 37bbc3fb2780..2a03deb26a12 100755 --- a/tools/testing/selftests/livepatch/test-callbacks.sh +++ b/tools/testing/selftests/livepatch/test-callbacks.sh @@ -259,7 +259,7 @@ $MOD_TARGET: ${MOD_TARGET}_init % insmod test_modules/$MOD_LIVEPATCH.ko pre_patch_ret=-19 livepatch: enabling patch '$MOD_LIVEPATCH' livepatch: '$MOD_LIVEPATCH': initializing patching transition -test_klp_callbacks_demo: pre_patch_callback: vmlinux +$MOD_LIVEPATCH: pre_patch_callback: vmlinux livepatch: pre-patch callback failed for object 'vmlinux' livepatch: failed to enable patch '$MOD_LIVEPATCH' livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh index 2c91428d2997..58fe1d96997c 100755 --- a/tools/testing/selftests/livepatch/test-sysfs.sh +++ b/tools/testing/selftests/livepatch/test-sysfs.sh @@ -5,6 +5,8 @@ . $(dirname $0)/functions.sh MOD_LIVEPATCH=test_klp_livepatch +MOD_LIVEPATCH2=test_klp_callbacks_demo +MOD_LIVEPATCH3=test_klp_syscall setup_config @@ -19,6 +21,8 @@ check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--" check_sysfs_value "$MOD_LIVEPATCH" "enabled" "1" check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------" check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--" +check_sysfs_rights "$MOD_LIVEPATCH" "stack_order" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1" check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--" check_sysfs_value "$MOD_LIVEPATCH" "transition" "0" check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--" @@ -131,4 +135,71 @@ livepatch: '$MOD_LIVEPATCH': completing unpatching transition livepatch: '$MOD_LIVEPATCH': unpatching complete % rmmod $MOD_LIVEPATCH" +start_test "sysfs test stack_order value" + +load_lp $MOD_LIVEPATCH + +check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1" + +load_lp $MOD_LIVEPATCH2 + +check_sysfs_value "$MOD_LIVEPATCH2" "stack_order" "2" + +load_lp $MOD_LIVEPATCH3 + +check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "3" + +disable_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH2 + +check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1" +check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "2" + +disable_lp $MOD_LIVEPATCH3 +unload_lp $MOD_LIVEPATCH3 + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% insmod test_modules/$MOD_LIVEPATCH.ko +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +% insmod test_modules/$MOD_LIVEPATCH2.ko +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +$MOD_LIVEPATCH2: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +$MOD_LIVEPATCH2: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': patching complete +% insmod test_modules/$MOD_LIVEPATCH3.ko +livepatch: enabling patch '$MOD_LIVEPATCH3' +livepatch: '$MOD_LIVEPATCH3': initializing patching transition +livepatch: '$MOD_LIVEPATCH3': starting patching transition +livepatch: '$MOD_LIVEPATCH3': completing patching transition +livepatch: '$MOD_LIVEPATCH3': patching complete +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled +livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition +$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': starting unpatching transition +livepatch: '$MOD_LIVEPATCH2': completing unpatching transition +$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': unpatching complete +% rmmod $MOD_LIVEPATCH2 +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH3/enabled +livepatch: '$MOD_LIVEPATCH3': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH3': starting unpatching transition +livepatch: '$MOD_LIVEPATCH3': completing unpatching transition +livepatch: '$MOD_LIVEPATCH3': unpatching complete +% rmmod $MOD_LIVEPATCH3 +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + exit 0 diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c index 66dec47e3ca3..732e89fe99c0 100644 --- a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c +++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c @@ -56,16 +56,15 @@ TEST(flags_zero_lsm_set_self_attr) TEST(flags_overset_lsm_set_self_attr) { const long page_size = sysconf(_SC_PAGESIZE); - char *ctx = calloc(page_size, 1); + struct lsm_ctx *ctx = calloc(page_size, 1); __u32 size = page_size; - struct lsm_ctx *tctx = (struct lsm_ctx *)ctx; ASSERT_NE(NULL, ctx); if (attr_lsm_count()) { - ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, tctx, &size, + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0)); } - ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, tctx, + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx, size, 0)); free(ctx); diff --git a/tools/testing/selftests/media_tests/regression_test.txt b/tools/testing/selftests/media_tests/regression_test.txt index 2627367681f7..9d0fcd98c085 100644 --- a/tools/testing/selftests/media_tests/regression_test.txt +++ b/tools/testing/selftests/media_tests/regression_test.txt @@ -1,5 +1,5 @@ Testing for regressions in Media Controller API register, ioctl, syscall, -and unregister paths. There have a few problems that result in user-after +and unregister paths. There have a few problems that result in use-after free on media_device, media_devnode, and cdev pointers when the driver is unbound while ioctl is in progress. @@ -15,11 +15,11 @@ Build media_device_test cd tools/testing/selftests/media_tests make -Regressions test for cdev user-after free error on /dev/mediaX when driver +Regressions test for cdev use-after-free error on /dev/mediaX when driver is unbound: Start media_device_test to regression test media devnode dynamic alloc -and cdev user-after-free fixes. This opens media dev files and sits in +and cdev use-after-free fixes. This opens media dev files and sits in a loop running media ioctl MEDIA_IOC_DEVICE_INFO command once every 10 seconds. The idea is when device file goes away, media devnode and cdev should stick around until this test exits. @@ -40,4 +40,4 @@ keep ioctls going while bind/unbind runs. Copy bind_unbind_sample.txt and make changes to specify the driver name and number to run bind and unbind. Start the bind_unbind.sh -Run dmesg looking for any user-after free errors or mutex lock errors. +Run dmesg looking for any use-after-free errors or mutex lock errors. diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 95af2d78fd31..5b993924cc3f 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -9,6 +9,7 @@ #include <fcntl.h> #include <linux/memfd.h> #include <sched.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <signal.h> @@ -170,7 +171,7 @@ static void mfd_fail_new(const char *name, unsigned int flags) r = sys_memfd_create(name, flags); if (r >= 0) { printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n", - name, flags); + name ? name : "NULL", flags); close(r); abort(); } @@ -281,6 +282,24 @@ static void *mfd_assert_mmap_shared(int fd) return p; } +static void *mfd_assert_mmap_read_shared(int fd) +{ + void *p; + + p = mmap(NULL, + mfd_def_size, + PROT_READ, + MAP_SHARED, + fd, + 0); + if (p == MAP_FAILED) { + printf("mmap() failed: %m\n"); + abort(); + } + + return p; +} + static void *mfd_assert_mmap_private(int fd) { void *p; @@ -979,6 +998,30 @@ static void test_seal_future_write(void) close(fd); } +static void test_seal_write_map_read_shared(void) +{ + int fd; + void *p; + + printf("%s SEAL-WRITE-MAP-READ\n", memfd_str); + + fd = mfd_assert_new("kern_memfd_seal_write_map_read", + mfd_def_size, + MFD_CLOEXEC | MFD_ALLOW_SEALING); + + mfd_assert_add_seals(fd, F_SEAL_WRITE); + mfd_assert_has_seals(fd, F_SEAL_WRITE); + + p = mfd_assert_mmap_read_shared(fd); + + mfd_assert_read(fd); + mfd_assert_read_shared(fd); + mfd_fail_write(fd); + + munmap(p, mfd_def_size); + close(fd); +} + /* * Test SEAL_SHRINK * Test whether SEAL_SHRINK actually prevents shrinking @@ -1557,6 +1600,11 @@ static void test_share_fork(char *banner, char *b_suffix) close(fd); } +static bool pid_ns_supported(void) +{ + return access("/proc/self/ns/pid", F_OK) == 0; +} + int main(int argc, char **argv) { pid_t pid; @@ -1587,12 +1635,17 @@ int main(int argc, char **argv) test_seal_write(); test_seal_future_write(); + test_seal_write_map_read_shared(); test_seal_shrink(); test_seal_grow(); test_seal_resize(); - test_sysctl_simple(); - test_sysctl_nested(); + if (pid_ns_supported()) { + test_sysctl_simple(); + test_sysctl_nested(); + } else { + printf("PID namespaces are not supported; skipping sysctl tests\n"); + } test_share_dup("SHARE-DUP", ""); test_share_mmap("SHARE-MMAP", ""); diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 8f01f4da1c0d..121000c28c10 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -27,6 +27,7 @@ protection_keys_64 madv_populate uffd-stress uffd-unit-tests +uffd-wp-mremap mlock-intersect-test mlock-random-test virtual_address_range @@ -36,6 +37,9 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests memfd_secret +hugetlb_dio +pkey_sighandler_tests_32 +pkey_sighandler_tests_64 soft-dirty split_huge_page_test ksm_tests @@ -49,7 +53,6 @@ va_high_addr_switch hugetlb_fault_after_madv hugetlb_madv_vs_map mseal_test -seal_elf droppable hugetlb_dio pkey_sighandler_tests_32 diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 3de23ea4663f..63ce39d024bb 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -33,9 +33,16 @@ endif # LDLIBS. MAKEFLAGS += --no-builtin-rules -CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) LDLIBS = -lrt -lpthread -lm +# Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is +# automatically enabled at -O1 or above. This triggers various unused-result +# warnings where functions such as read() or write() are called and their +# return value is not checked. Disable _FORTIFY_SOURCE to silence those +# warnings. +CFLAGS += -U_FORTIFY_SOURCE + KDIR ?= /lib/modules/$(shell uname -r)/build ifneq (,$(wildcard $(KDIR)/Module.symvers)) ifneq (,$(wildcard $(KDIR)/include/linux/page_frag_cache.h)) @@ -75,13 +82,13 @@ TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test TEST_GEN_FILES += mseal_test -TEST_GEN_FILES += seal_elf TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress TEST_GEN_FILES += uffd-unit-tests +TEST_GEN_FILES += uffd-wp-mremap TEST_GEN_FILES += split_huge_page_test TEST_GEN_FILES += ksm_tests TEST_GEN_FILES += ksm_functional_tests @@ -152,11 +159,16 @@ $(TEST_GEN_FILES): vm_util.c thp_settings.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c +$(OUTPUT)/uffd-wp-mremap: uffd-common.c +$(OUTPUT)/protection_keys: pkey_util.c +$(OUTPUT)/pkey_sighandler_tests: pkey_util.c ifeq ($(ARCH),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) +$(BINARIES_32) $(BINARIES_64): pkey_util.c + define gen-target-rule-32 $(1) $(1)_32: $(OUTPUT)/$(1)_32 .PHONY: $(1) $(1)_32 diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index 4309916f629e..a28baa536332 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -7,3 +7,4 @@ CONFIG_TEST_HMM=m CONFIG_GUP_TEST=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_MEM_SOFT_DIRTY=y +CONFIG_ANON_VMA_NAME=y diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 32c6ccc2a6be..9446673645eb 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -758,7 +758,7 @@ static void do_run_with_base_page(test_fn fn, bool swapout) } /* Populate a base page. */ - memset(mem, 0, pagesize); + memset(mem, 1, pagesize); if (swapout) { madvise(mem, pagesize, MADV_PAGEOUT); @@ -824,12 +824,12 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) * Try to populate a THP. Touch the first sub-page and test if * we get the last sub-page populated automatically. */ - mem[0] = 0; + mem[0] = 1; if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { ksft_test_result_skip("Did not get a THP populated\n"); goto munmap; } - memset(mem, 0, thpsize); + memset(mem, 1, thpsize); size = thpsize; switch (thp_run) { @@ -1012,7 +1012,7 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) } /* Populate an huge page. */ - memset(mem, 0, hugetlbsize); + memset(mem, 1, hugetlbsize); /* * We need a total of two hugetlb pages to handle COW/unsharing @@ -1482,7 +1482,7 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) } smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); - if (mem == MAP_FAILED) { + if (smem == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); goto munmap; } @@ -1583,7 +1583,7 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc) goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); - if (mem == MAP_FAILED) { + if (smem == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); goto munmap; } @@ -1634,7 +1634,7 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); - if (mem == MAP_FAILED) { + if (smem == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); goto munmap; } @@ -1684,7 +1684,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, goto close; } smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); - if (mem == MAP_FAILED) { + if (smem == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); goto munmap; } @@ -1696,7 +1696,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, fn(mem, smem, hugetlbsize); munmap: munmap(mem, hugetlbsize); - if (mem != MAP_FAILED) + if (smem != MAP_FAILED) munmap(smem, hugetlbsize); close: close(fd); diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-pages.c index 7cdf815d0d63..ece37212a8a2 100644 --- a/tools/testing/selftests/mm/guard-pages.c +++ b/tools/testing/selftests/mm/guard-pages.c @@ -55,6 +55,12 @@ static int pidfd_open(pid_t pid, unsigned int flags) return syscall(SYS_pidfd_open, pid, flags); } +static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec, + size_t n, int advice, unsigned int flags) +{ + return syscall(__NR_process_madvise, pidfd, iovec, n, advice, flags); +} + /* * Enable our signal catcher and try to read/write the specified buffer. The * return value indicates whether the read/write succeeds without a fatal @@ -419,7 +425,7 @@ TEST_F(guard_pages, process_madvise) ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0); /* Now guard in one step. */ - count = process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0); + count = sys_process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0); /* OK we don't have permission to do this, skip. */ if (count == -1 && errno == EPERM) @@ -440,7 +446,7 @@ TEST_F(guard_pages, process_madvise) ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size])); /* Now do the same with unguard... */ - count = process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0); + count = sys_process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0); /* ...and everything should now succeed. */ @@ -990,7 +996,7 @@ TEST_F(guard_pages, fork) MAP_ANON | MAP_PRIVATE, -1, 0); ASSERT_NE(ptr, MAP_FAILED); - /* Establish guard apges in the first 5 pages. */ + /* Establish guard pages in the first 5 pages. */ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); pid = fork(); @@ -1030,6 +1036,77 @@ TEST_F(guard_pages, fork) } /* + * Assert expected behaviour after we fork populated ranges of anonymous memory + * and then guard and unguard the range. + */ +TEST_F(guard_pages, fork_cow) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Populate range. */ + for (i = 0; i < 10 * page_size; i++) { + char chr = 'a' + (i % 26); + + ptr[i] = chr; + } + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Ensure the range is as expected. */ + for (i = 0; i < 10 * page_size; i++) { + char expected = 'a' + (i % 26); + char actual = ptr[i]; + + ASSERT_EQ(actual, expected); + } + + /* Establish guard pages across the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + /* Remove it. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* + * By removing the guard pages, the page tables will be + * cleared. Assert that we are looking at the zero page now. + */ + for (i = 0; i < 10 * page_size; i++) { + char actual = ptr[i]; + + ASSERT_EQ(actual, '\0'); + } + + exit(0); + } + + /* Parent process. */ + + /* Parent simply waits on child. */ + waitpid(pid, NULL, 0); + + /* Ensure the range is unchanged in parent anon range. */ + for (i = 0; i < 10 * page_size; i++) { + char expected = 'a' + (i % 26); + char actual = ptr[i]; + + ASSERT_EQ(actual, expected); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* * Assert that forking a process with VMAs that do have VM_WIPEONFORK set * behave as expected. */ diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index b748c48908d9..dcdd5bb20f3d 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -776,7 +776,7 @@ err_out: int main(int argc, char *argv[]) { - int ret, opt; + int ret = 0, opt; int prot = 0; int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT; int merge_type = KSM_MERGE_TYPE_DEFAULT; diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 64bcbb7151cf..1e3a595fbf01 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -204,4 +204,103 @@ TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME) ASSERT_EQ(pthread_cancel(self->threads[i]), 0); } +/* + * migration test with shared anon THP page + */ + +TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME) +{ + pid_t pid; + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, 2 * TWOMEG, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG); + ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) { + pid = fork(); + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGHUP); + /* Parent may have died before prctl so check now. */ + if (getppid() == 1) + kill(getpid(), SIGHUP); + access_mem(ptr); + } else { + self->pids[i] = pid; + } + } + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(kill(self->pids[i], SIGTERM), 0); +} + +/* + * migration test with private anon hugetlb page + */ +TEST_F_TIMEOUT(migration, private_anon_htlb, 2*RUNTIME) +{ + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) + if (pthread_create(&self->threads[i], NULL, access_mem, ptr)) + perror("Couldn't create thread"); + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(pthread_cancel(self->threads[i]), 0); +} + +/* + * migration test with shared anon hugetlb page + */ +TEST_F_TIMEOUT(migration, shared_anon_htlb, 2*RUNTIME) +{ + pid_t pid; + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) { + pid = fork(); + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGHUP); + /* Parent may have died before prctl so check now. */ + if (getppid() == 1) + kill(getpid(), SIGHUP); + access_mem(ptr); + } else { + self->pids[i] = pid; + } + } + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(kill(self->pids[i], SIGTERM), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index 1db134063c38..af2fce496912 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -280,6 +280,7 @@ static void test_uffdio_copy(void) dst = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0); if (dst == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); + free(src); return; } diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 5a3a9bcba640..bb84476a177f 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -34,7 +34,7 @@ struct config { unsigned long long dest_alignment; unsigned long long region_size; int overlapping; - int dest_preamble_size; + unsigned int dest_preamble_size; }; struct test { @@ -328,7 +328,7 @@ static void mremap_move_within_range(unsigned int pattern_seed, char *rand_addr) { char *test_name = "mremap mremap move within range"; void *src, *dest; - int i, success = 1; + unsigned int i, success = 1; size_t size = SIZE_MB(20); void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, @@ -384,7 +384,7 @@ out: static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) { - void *addr, *src_addr, *dest_addr, *dest_preamble_addr; + void *addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL; unsigned long long t, d; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; @@ -569,7 +569,7 @@ static void mremap_move_1mb_from_start(unsigned int pattern_seed, { char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src"; void *src = NULL, *dest = NULL; - int i, success = 1; + unsigned int i, success = 1; /* Config to reuse get_source_mapping() to do an aligned mmap. */ struct config c = { @@ -636,7 +636,7 @@ out: static void run_mremap_test_case(struct test test_case, int *failures, unsigned int threshold_mb, - unsigned int pattern_seed, char *rand_addr) + char *rand_addr) { long long remap_time = remap_region(test_case.config, threshold_mb, rand_addr); @@ -708,7 +708,8 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb, int main(int argc, char **argv) { int failures = 0; - int i, run_perf_tests; + unsigned int i; + int run_perf_tests; unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD; /* hard-coded test configs */ @@ -831,7 +832,7 @@ int main(int argc, char **argv) for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_mremap_test_case(test_cases[i], &failures, threshold_mb, - pattern_seed, rand_addr); + rand_addr); maps_fp = fopen("/proc/self/maps", "r"); @@ -853,7 +854,7 @@ int main(int argc, char **argv) "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:"); for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++) run_mremap_test_case(perf_test_cases[i], &failures, - threshold_mb, pattern_seed, + threshold_mb, rand_addr); } diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c index 01675c412b2a..ad17005521a8 100644 --- a/tools/testing/selftests/mm/mseal_test.c +++ b/tools/testing/selftests/mm/mseal_test.c @@ -802,7 +802,7 @@ static void test_seal_mprotect_partial_mprotect_tail(bool seal) } -static void test_seal_mprotect_two_vma_with_gap(bool seal) +static void test_seal_mprotect_two_vma_with_gap(void) { void *ptr; unsigned long page_size = getpagesize(); @@ -1864,7 +1864,7 @@ static void test_seal_madvise_nodiscard(bool seal) REPORT_TEST_PASS(); } -int main(int argc, char **argv) +int main(void) { bool test_seal = seal_support(); @@ -1913,8 +1913,8 @@ int main(int argc, char **argv) test_seal_mprotect_partial_mprotect(false); test_seal_mprotect_partial_mprotect(true); - test_seal_mprotect_two_vma_with_gap(false); - test_seal_mprotect_two_vma_with_gap(true); + test_seal_mprotect_two_vma_with_gap(); + test_seal_mprotect_two_vma_with_gap(); test_seal_mprotect_merge(false); test_seal_mprotect_merge(true); diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index bcc73b4e805c..57b4bba2b45f 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -34,8 +34,8 @@ #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; -int page_size; -int hpage_size; +unsigned int page_size; +unsigned int hpage_size; const char *progname; #define LEN(region) ((region.end - region.start)/page_size) @@ -235,7 +235,9 @@ int get_reads(struct page_region *vec, int vec_size) int sanity_tests_sd(void) { - int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 1000, total_pages = 0; + unsigned long long mem_size, vec_size, i, total_pages = 0; + long ret, ret2, ret3; + int num_pages = 1000; int total_writes, total_reads, reads, count; struct page_region *vec, *vec2; char *mem, *m[2]; @@ -321,9 +323,9 @@ int sanity_tests_sd(void) ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == mem_size/(page_size * 2), + ksft_test_result((unsigned long long)ret == mem_size/(page_size * 2), "%s Repeated pattern of written and non-written pages\n", __func__); /* 4. Repeated pattern of written and non-written pages in parts */ @@ -331,21 +333,21 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno)); ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret3 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret3, errno, strerror(errno)); ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2, - "%s Repeated pattern of written and non-written pages in parts %d %d %d\n", + "%s Repeated pattern of written and non-written pages in parts %ld %ld %ld\n", __func__, ret, ret3, ret2); /* 5. Repeated pattern of written and non-written pages max_pages */ @@ -357,13 +359,13 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno)); ksft_test_result(ret == num_pages/2 && ret2 == 1, "%s Repeated pattern of written and non-written pages max_pages\n", @@ -378,12 +380,12 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno)); ksft_test_result(ret == 1 && LEN(vec[0]) == 2 && vec[0].start == (uintptr_t)(mem + page_size) && @@ -416,7 +418,7 @@ int sanity_tests_sd(void) ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size, "%s Two regions\n", __func__); @@ -448,7 +450,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); for (i = 0; i < mem_size/page_size; i += 2) mem[i * page_size]++; @@ -457,7 +459,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); total_pages += ret; @@ -465,7 +467,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); total_pages += ret; @@ -473,7 +475,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); total_pages += ret; @@ -515,9 +517,9 @@ int sanity_tests_sd(void) vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); - if (ret > vec_size) + if ((unsigned long)ret > vec_size) break; reads = get_reads(vec, ret); @@ -554,63 +556,63 @@ int sanity_tests_sd(void) ret = pagemap_ioc(mem, 0, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0 && walk_end == (long)mem, "Walk_end: Same start and end address\n"); ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0 && walk_end == (long)mem, "Walk_end: Same start and end with WP\n"); ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0 && walk_end == (long)mem, "Walk_end: Same start and end with 0 output buffer\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: Big vec\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: vec of minimum length\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2), "Walk_end: Half max pages\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size), "Walk_end: 1 max page\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, -1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: max pages\n"); @@ -621,49 +623,49 @@ int sanity_tests_sd(void) ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_end sparse: Big vec\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), "Walk_end sparse: vec of minimum length\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), "Walk_end sparse: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_end sparse: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_end sparse: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_endsparse : Half max pages\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), "Walk_end: 1 max page\n"); @@ -674,9 +676,10 @@ int sanity_tests_sd(void) return 0; } -int base_tests(char *prefix, char *mem, int mem_size, int skip) +int base_tests(char *prefix, char *mem, unsigned long long mem_size, int skip) { - int vec_size, written; + unsigned long long vec_size; + int written; struct page_region *vec, *vec2; if (skip) { @@ -799,8 +802,8 @@ int hpage_unit_tests(void) char *map; int ret, ret2; size_t num_pages = 10; - int map_size = hpage_size * num_pages; - int vec_size = map_size/page_size; + unsigned long long map_size = hpage_size * num_pages; + unsigned long long vec_size = map_size/page_size; struct page_region *vec, *vec2; vec = malloc(sizeof(struct page_region) * vec_size); @@ -1047,7 +1050,8 @@ static void test_simple(void) int sanity_tests(void) { - int mem_size, vec_size, ret, fd, i, buf_size; + unsigned long long mem_size, vec_size; + int ret, fd, i, buf_size; struct page_region *vec; char *mem, *fmem; struct stat sbuf; @@ -1312,7 +1316,9 @@ static ssize_t get_dirty_pages_reset(char *mem, unsigned int count, { struct pm_scan_arg arg = {0}; struct page_region rgns[256]; - int i, j, cnt, ret; + unsigned long long i, j; + long ret; + int cnt; arg.size = sizeof(struct pm_scan_arg); arg.start = (uintptr_t)mem; @@ -1330,7 +1336,7 @@ static ssize_t get_dirty_pages_reset(char *mem, unsigned int count, ksft_exit_fail_msg("ioctl failed\n"); cnt = 0; - for (i = 0; i < ret; ++i) { + for (i = 0; i < (unsigned long)ret; ++i) { if (rgns[i].categories != PAGE_IS_WRITTEN) ksft_exit_fail_msg("wrong flags\n"); @@ -1384,9 +1390,10 @@ void *thread_proc(void *mem) static void transact_test(int page_size) { unsigned int i, count, extra_pages; + unsigned int c; pthread_t th; char *mem; - int ret, c; + int ret; if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1)) ksft_exit_fail_msg("pthread_barrier_init\n"); @@ -1405,9 +1412,9 @@ static void transact_test(int page_size) memset(mem, 0, 0x1000 * nthreads * pages_per_thread); count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); - ksft_test_result(count > 0, "%s count %d\n", __func__, count); + ksft_test_result(count > 0, "%s count %u\n", __func__, count); count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); - ksft_test_result(count == 0, "%s count %d\n", __func__, count); + ksft_test_result(count == 0, "%s count %u\n", __func__, count); finish = 0; for (i = 0; i < nthreads; ++i) @@ -1429,7 +1436,7 @@ static void transact_test(int page_size) ksft_exit_fail_msg("pthread_barrier_wait\n"); if (count > nthreads * access_per_thread) - ksft_exit_fail_msg("Too big count %d expected %d, iter %d\n", + ksft_exit_fail_msg("Too big count %u expected %u, iter %u\n", count, nthreads * access_per_thread, i); c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); @@ -1454,7 +1461,7 @@ static void transact_test(int page_size) * access and application gets page fault again for the same write. */ if (count < nthreads * access_per_thread) { - ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count, + ksft_test_result_fail("Lost update, iter %u, %u vs %u.\n", i, count, nthreads * access_per_thread); return; } @@ -1467,15 +1474,16 @@ static void transact_test(int page_size) finish = 1; pthread_barrier_wait(&end_barrier); - ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__, + ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %u.\n", __func__, extra_pages, 100.0 * extra_pages / (iter_count * nthreads * access_per_thread), extra_thread_faults); } -int main(int argc, char *argv[]) +int main(int __attribute__((unused)) argc, char *argv[]) { - int mem_size, shmid, buf_size, fd, i, ret; + int shmid, buf_size, fd, i, ret; + unsigned long long mem_size; char *mem, *map, *fmem; struct stat sbuf; diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h index d9d2100eafc0..8e9685e03c44 100644 --- a/tools/testing/selftests/mm/pkey-arm64.h +++ b/tools/testing/selftests/mm/pkey-arm64.h @@ -30,7 +30,7 @@ #define NR_PKEYS 8 #define NR_RESERVED_PKEYS 1 /* pkey-0 */ -#define PKEY_ALLOW_ALL 0x77777777 +#define PKEY_REG_ALLOW_ALL 0x77777777 #define PKEY_REG_ALLOW_NONE 0x0 #define PKEY_BITS_PER_PKEY 4 @@ -81,11 +81,11 @@ static inline int get_arch_reserved_keys(void) return NR_RESERVED_PKEYS; } -void expect_fault_on_read_execonly_key(void *p1, int pkey) +static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) { } -void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { return PTR_ERR_ENOTSUP; } diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index f7cfe163b0ff..f080e97b39be 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -13,22 +13,22 @@ #include <ucontext.h> #include <sys/mman.h> +#include <linux/types.h> + #include "../kselftest.h" /* Define some kernel-like types */ -#define u8 __u8 -#define u16 __u16 -#define u32 __u32 -#define u64 __u64 +typedef __u8 u8; +typedef __u16 u16; +typedef __u32 u32; +typedef __u64 u64; #define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) #ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 #endif -#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 extern int dprint_in_signal; -extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; extern int test_nr; extern int iteration_nr; @@ -83,17 +83,18 @@ extern void abort_hooks(void); #ifndef noinline # define noinline __attribute__((noinline)) #endif +#ifndef __maybe_unused +# define __maybe_unused __attribute__((__unused__)) +#endif -noinline int read_ptr(int *ptr) -{ - /* Keep GCC from optimizing this away somehow */ - barrier(); - return *ptr; -} - -void expected_pkey_fault(int pkey); int sys_pkey_alloc(unsigned long flags, unsigned long init_val); int sys_pkey_free(unsigned long pkey); +int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey); + +/* For functions called from protection_keys.c only */ +noinline int read_ptr(int *ptr); +void expected_pkey_fault(int pkey); int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, unsigned long pkey); void record_pkey_malloc(void *ptr, long size, int prot); @@ -171,38 +172,6 @@ static inline void write_pkey_reg(u64 pkey_reg) pkey_reg, __read_pkey_reg()); } -/* - * These are technically racy. since something could - * change PKEY register between the read and the write. - */ -static inline void __pkey_access_allow(int pkey, int do_allow) -{ - u64 pkey_reg = read_pkey_reg(); - int bit = pkey * 2; - - if (do_allow) - pkey_reg &= (1<<bit); - else - pkey_reg |= (1<<bit); - - dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); - write_pkey_reg(pkey_reg); -} - -static inline void __pkey_write_allow(int pkey, int do_allow_write) -{ - u64 pkey_reg = read_pkey_reg(); - int bit = pkey * 2 + 1; - - if (do_allow_write) - pkey_reg &= (1<<bit); - else - pkey_reg |= (1<<bit); - - write_pkey_reg(pkey_reg); - dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); -} - #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) #define ALIGN_PTR_UP(p, ptr_align_to) \ diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index 3d0c0bdae5bc..1bad310d282a 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -91,7 +91,7 @@ static inline int get_arch_reserved_keys(void) return NR_RESERVED_PKEYS_64K_3KEYS; } -void expect_fault_on_read_execonly_key(void *p1, int pkey) +static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) { /* * powerpc does not allow userspace to change permissions of exec-only @@ -105,7 +105,7 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey) /* 4-byte instructions * 16384 = 64K page */ #define __page_o_noops() asm(".rept 16384 ; nop; .endr") -void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { void *ptr; int ret; diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index ac91777c8917..f7ecd335df1e 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -113,7 +113,7 @@ static inline u32 pkey_bit_position(int pkey) #define XSTATE_PKEY 0x200 #define XSTATE_BV_OFFSET 512 -int pkey_reg_xstate_offset(void) +static inline int pkey_reg_xstate_offset(void) { unsigned int eax; unsigned int ebx; @@ -148,7 +148,7 @@ static inline int get_arch_reserved_keys(void) return NR_RESERVED_PKEYS; } -void expect_fault_on_read_execonly_key(void *p1, int pkey) +static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) { int ptr_contents; @@ -157,7 +157,7 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey) expected_pkey_fault(pkey); } -void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { return PTR_ERR_ENOTSUP; } diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index c593a426341c..1ac8c8809880 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -32,11 +32,9 @@ #define STACK_SIZE PTHREAD_STACK_MIN -void expected_pkey_fault(int pkey) {} - -pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t cond = PTHREAD_COND_INITIALIZER; -siginfo_t siginfo = {0}; +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +static siginfo_t siginfo = {0}; /* * We need to use inline assembly instead of glibc's syscall because glibc's @@ -163,7 +161,7 @@ static void *thread_segv_with_pkey0_disabled(void *ptr) __write_pkey_reg(pkey_reg_restrictive_default()); /* Segfault (with SEGV_MAPERR) */ - *(int *) (0x1) = 1; + *(volatile int *)NULL = 1; return NULL; } @@ -179,7 +177,6 @@ static void *thread_segv_pkuerr_stack(void *ptr) static void *thread_segv_maperr_ptr(void *ptr) { stack_t *stack = ptr; - int *bad = (int *)1; u64 pkey_reg; /* @@ -195,7 +192,7 @@ static void *thread_segv_maperr_ptr(void *ptr) __write_pkey_reg(pkey_reg); /* Segfault */ - *bad = 1; + *(volatile int *)NULL = 1; syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); return NULL; } @@ -234,7 +231,7 @@ static void test_sigsegv_handler_with_pkey0_disabled(void) ksft_test_result(siginfo.si_signo == SIGSEGV && siginfo.si_code == SEGV_MAPERR && - siginfo.si_addr == (void *)1, + siginfo.si_addr == NULL, "%s\n", __func__); } @@ -314,11 +311,11 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) __write_pkey_reg(pkey_reg); /* Protect the new stack with MPK 1 */ - pkey = pkey_alloc(0, 0); - pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); + pkey = sys_pkey_alloc(0, 0); + sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); /* Set up alternate signal stack that will use the default MPK */ - sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); sigstack.ss_flags = 0; sigstack.ss_size = STACK_SIZE; @@ -349,7 +346,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) ksft_test_result(siginfo.si_signo == SIGSEGV && siginfo.si_code == SEGV_MAPERR && - siginfo.si_addr == (void *)1, + siginfo.si_addr == NULL, "%s\n", __func__); } @@ -487,11 +484,11 @@ static void test_pkru_sigreturn(void) __write_pkey_reg(pkey_reg); /* Protect the stack with MPK 2 */ - pkey = pkey_alloc(0, 0); - pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); + pkey = sys_pkey_alloc(0, 0); + sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); /* Set up alternate signal stack that will use the default MPK */ - sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); sigstack.ss_flags = 0; sigstack.ss_size = STACK_SIZE; @@ -538,6 +535,9 @@ int main(int argc, char *argv[]) ksft_print_header(); ksft_set_plan(ARRAY_SIZE(pkey_tests)); + if (!is_pkeys_supported()) + ksft_exit_skip("pkeys not supported\n"); + for (i = 0; i < ARRAY_SIZE(pkey_tests); i++) (*pkey_tests[i])(); diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c new file mode 100644 index 000000000000..ca4ad0d44ab2 --- /dev/null +++ b/tools/testing/selftests/mm/pkey_util.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/syscall.h> +#include <unistd.h> + +#include "pkey-helpers.h" + +int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +{ + int ret = syscall(SYS_pkey_alloc, flags, init_val); + dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", + __func__, flags, init_val, ret, errno); + return ret; +} + +int sys_pkey_free(unsigned long pkey) +{ + int ret = syscall(SYS_pkey_free, pkey); + dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); + return ret; +} + +int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int sret; + + dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, + ptr, size, orig_prot, pkey); + + errno = 0; + sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); + if (errno) { + dprintf2("SYS_mprotect_key sret: %d\n", sret); + dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); + dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); + if (DEBUG_LEVEL >= 2) + perror("SYS_mprotect_pkey"); + } + return sret; +} diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 4990f7ab4cb7..a4683f2476f2 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -53,9 +53,15 @@ int test_nr; u64 shadow_pkey_reg; int dprint_in_signal; -char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -void cat_into_file(char *str, char *file) +noinline int read_ptr(int *ptr) +{ + /* Keep GCC from optimizing this away somehow */ + barrier(); + return *ptr; +} + +static void cat_into_file(char *str, char *file) { int fd = open(file, O_RDWR); int ret; @@ -82,7 +88,7 @@ void cat_into_file(char *str, char *file) #if CONTROL_TRACING > 0 static int warned_tracing; -int tracing_root_ok(void) +static int tracing_root_ok(void) { if (geteuid() != 0) { if (!warned_tracing) @@ -95,7 +101,7 @@ int tracing_root_ok(void) } #endif -void tracing_on(void) +static void tracing_on(void) { #if CONTROL_TRACING > 0 #define TRACEDIR "/sys/kernel/tracing" @@ -119,7 +125,7 @@ void tracing_on(void) #endif } -void tracing_off(void) +static void tracing_off(void) { #if CONTROL_TRACING > 0 if (!tracing_root_ok()) @@ -153,7 +159,7 @@ __attribute__((__aligned__(65536))) #else __attribute__((__aligned__(PAGE_SIZE))) #endif -void lots_o_noops_around_write(int *write_to_me) +static void lots_o_noops_around_write(int *write_to_me) { dprintf3("running %s()\n", __func__); __page_o_noops(); @@ -164,7 +170,7 @@ void lots_o_noops_around_write(int *write_to_me) dprintf3("%s() done\n", __func__); } -void dump_mem(void *dumpme, int len_bytes) +static void dump_mem(void *dumpme, int len_bytes) { char *c = (void *)dumpme; int i; @@ -207,7 +213,7 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) return 0; } -void pkey_disable_set(int pkey, int flags) +static void pkey_disable_set(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; @@ -245,7 +251,7 @@ void pkey_disable_set(int pkey, int flags) pkey, flags); } -void pkey_disable_clear(int pkey, int flags) +static void pkey_disable_clear(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; @@ -271,19 +277,19 @@ void pkey_disable_clear(int pkey, int flags) pkey, read_pkey_reg()); } -void pkey_write_allow(int pkey) +__maybe_unused static void pkey_write_allow(int pkey) { pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); } -void pkey_write_deny(int pkey) +__maybe_unused static void pkey_write_deny(int pkey) { pkey_disable_set(pkey, PKEY_DISABLE_WRITE); } -void pkey_access_allow(int pkey) +__maybe_unused static void pkey_access_allow(int pkey) { pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); } -void pkey_access_deny(int pkey) +__maybe_unused static void pkey_access_deny(int pkey) { pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); } @@ -301,9 +307,9 @@ static char *si_code_str(int si_code) return "UNKNOWN"; } -int pkey_faults; -int last_si_pkey = -1; -void signal_handler(int signum, siginfo_t *si, void *vucontext) +static int pkey_faults; +static int last_si_pkey = -1; +static void signal_handler(int signum, siginfo_t *si, void *vucontext) { ucontext_t *uctxt = vucontext; int trapno; @@ -390,27 +396,21 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) /* restore access and let the faulting instruction continue */ pkey_access_allow(siginfo_pkey); #elif defined(__aarch64__) - aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL); + aarch64_write_signal_pkey(uctxt, PKEY_REG_ALLOW_ALL); #endif /* arch */ pkey_faults++; dprintf1("<<<<==================================================\n"); dprint_in_signal = 0; } -int wait_all_children(void) -{ - int status; - return waitpid(-1, &status, 0); -} - -void sig_chld(int x) +static void sig_chld(int x) { dprint_in_signal = 1; dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); dprint_in_signal = 0; } -void setup_sigsegv_handler(void) +static void setup_sigsegv_handler(void) { int r, rs; struct sigaction newact; @@ -436,13 +436,13 @@ void setup_sigsegv_handler(void) pkey_assert(r == 0); } -void setup_handlers(void) +static void setup_handlers(void) { signal(SIGCHLD, &sig_chld); setup_sigsegv_handler(); } -pid_t fork_lazy_child(void) +static pid_t fork_lazy_child(void) { pid_t forkret; @@ -460,35 +460,7 @@ pid_t fork_lazy_child(void) return forkret; } -int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int sret; - - dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, - ptr, size, orig_prot, pkey); - - errno = 0; - sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); - if (errno) { - dprintf2("SYS_mprotect_key sret: %d\n", sret); - dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); - dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); - if (DEBUG_LEVEL >= 2) - perror("SYS_mprotect_pkey"); - } - return sret; -} - -int sys_pkey_alloc(unsigned long flags, unsigned long init_val) -{ - int ret = syscall(SYS_pkey_alloc, flags, init_val); - dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", - __func__, flags, init_val, ret, errno); - return ret; -} - -int alloc_pkey(void) +static int alloc_pkey(void) { int ret; unsigned long init_val = 0x0; @@ -534,19 +506,12 @@ int alloc_pkey(void) return ret; } -int sys_pkey_free(unsigned long pkey) -{ - int ret = syscall(SYS_pkey_free, pkey); - dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); - return ret; -} - /* * I had a bug where pkey bits could be set by mprotect() but * not cleared. This ensures we get lots of random bit sets * and clears on the vma and pte pkey bits. */ -int alloc_random_pkey(void) +static int alloc_random_pkey(void) { int max_nr_pkey_allocs; int ret; @@ -629,7 +594,7 @@ struct pkey_malloc_record { }; struct pkey_malloc_record *pkey_malloc_records; struct pkey_malloc_record *pkey_last_malloc_record; -long nr_pkey_malloc_records; +static long nr_pkey_malloc_records; void record_pkey_malloc(void *ptr, long size, int prot) { long i; @@ -667,7 +632,7 @@ void record_pkey_malloc(void *ptr, long size, int prot) nr_pkey_malloc_records++; } -void free_pkey_malloc(void *ptr) +static void free_pkey_malloc(void *ptr) { long i; int ret; @@ -694,8 +659,7 @@ void free_pkey_malloc(void *ptr) pkey_assert(false); } - -void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) +static void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) { void *ptr; int ret; @@ -715,7 +679,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) return ptr; } -void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) +static void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) { int ret; void *ptr; @@ -745,10 +709,10 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) return ptr; } -int hugetlb_setup_ok; +static int hugetlb_setup_ok; #define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages" #define GET_NR_HUGE_PAGES 10 -void setup_hugetlbfs(void) +static void setup_hugetlbfs(void) { int err; int fd; @@ -796,7 +760,7 @@ void setup_hugetlbfs(void) hugetlb_setup_ok = 1; } -void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) +static void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) { void *ptr; int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; @@ -817,42 +781,15 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) return ptr; } -void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) -{ - void *ptr; - int fd; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - fd = open("/dax/foo", O_RDWR); - pkey_assert(fd >= 0); - - ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); - pkey_assert(ptr != (void *)-1); - - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size, prot); - - dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); - close(fd); - return ptr; -} - -void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { +static void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { malloc_pkey_with_mprotect, malloc_pkey_with_mprotect_subpage, malloc_pkey_anon_huge, malloc_pkey_hugetlb -/* can not do direct with the pkey_mprotect() API: - malloc_pkey_mmap_direct, - malloc_pkey_mmap_dax, -*/ }; -void *malloc_pkey(long size, int prot, u16 pkey) +static void *malloc_pkey(long size, int prot, u16 pkey) { void *ret; static int malloc_type; @@ -882,7 +819,7 @@ void *malloc_pkey(long size, int prot, u16 pkey) return ret; } -int last_pkey_faults; +static int last_pkey_faults; #define UNKNOWN_PKEY -2 void expected_pkey_fault(int pkey) { @@ -905,7 +842,7 @@ void expected_pkey_fault(int pkey) */ if (__read_pkey_reg() != 0) #elif defined(__aarch64__) - if (__read_pkey_reg() != PKEY_ALLOW_ALL) + if (__read_pkey_reg() != PKEY_REG_ALLOW_ALL) #else if (__read_pkey_reg() != shadow_pkey_reg) #endif /* arch */ @@ -924,9 +861,9 @@ void expected_pkey_fault(int pkey) pkey_assert(last_pkey_faults == pkey_faults); \ } while (0) -int test_fds[10] = { -1 }; -int nr_test_fds; -void __save_test_fd(int fd) +static int test_fds[10] = { -1 }; +static int nr_test_fds; +static void __save_test_fd(int fd) { pkey_assert(fd >= 0); pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); @@ -934,14 +871,14 @@ void __save_test_fd(int fd) nr_test_fds++; } -int get_test_read_fd(void) +static int get_test_read_fd(void) { int test_fd = open("/etc/passwd", O_RDONLY); __save_test_fd(test_fd); return test_fd; } -void close_test_fds(void) +static void close_test_fds(void) { int i; @@ -954,7 +891,7 @@ void close_test_fds(void) nr_test_fds = 0; } -void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) +static void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) { int i, err; int max_nr_pkey_allocs; @@ -1006,7 +943,7 @@ void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) pkey_assert(!err); } -void test_read_of_write_disabled_region(int *ptr, u16 pkey) +static void test_read_of_write_disabled_region(int *ptr, u16 pkey) { int ptr_contents; @@ -1016,7 +953,7 @@ void test_read_of_write_disabled_region(int *ptr, u16 pkey) dprintf1("*ptr: %d\n", ptr_contents); dprintf1("\n"); } -void test_read_of_access_disabled_region(int *ptr, u16 pkey) +static void test_read_of_access_disabled_region(int *ptr, u16 pkey) { int ptr_contents; @@ -1028,7 +965,7 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) expected_pkey_fault(pkey); } -void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, +static void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, u16 pkey) { int ptr_contents; @@ -1045,7 +982,7 @@ void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); } -void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, +static void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, u16 pkey) { *ptr = __LINE__; @@ -1056,14 +993,14 @@ void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); } -void test_write_of_write_disabled_region(int *ptr, u16 pkey) +static void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); pkey_write_deny(pkey); *ptr = __LINE__; expected_pkey_fault(pkey); } -void test_write_of_access_disabled_region(int *ptr, u16 pkey) +static void test_write_of_access_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); pkey_access_deny(pkey); @@ -1071,7 +1008,7 @@ void test_write_of_access_disabled_region(int *ptr, u16 pkey) expected_pkey_fault(pkey); } -void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, +static void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, u16 pkey) { *ptr = __LINE__; @@ -1082,7 +1019,7 @@ void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); } -void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) +static void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) { int ret; int test_fd = get_test_read_fd(); @@ -1094,7 +1031,8 @@ void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) dprintf1("read ret: %d\n", ret); pkey_assert(ret); } -void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) + +static void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) { int ret; int test_fd = get_test_read_fd(); @@ -1107,7 +1045,7 @@ void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) pkey_assert(ret); } -void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) +static void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) { int pipe_ret, vmsplice_ret; struct iovec iov; @@ -1129,7 +1067,7 @@ void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) close(pipe_fds[1]); } -void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) +static void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) { int ignored = 0xdada; int futex_ret; @@ -1147,7 +1085,7 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) } /* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) +static void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) { int err; int i; @@ -1170,7 +1108,7 @@ void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) } /* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) +static void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) { int err; int bad_pkey = NR_PKEYS+99; @@ -1180,7 +1118,7 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) pkey_assert(err); } -void become_child(void) +static void become_child(void) { pid_t forkret; @@ -1196,7 +1134,7 @@ void become_child(void) } /* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_alloc_exhaust(int *ptr, u16 pkey) +static void test_pkey_alloc_exhaust(int *ptr, u16 pkey) { int err; int allocated_pkeys[NR_PKEYS] = {0}; @@ -1263,7 +1201,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) } } -void arch_force_pkey_reg_init(void) +static void arch_force_pkey_reg_init(void) { #if defined(__i386__) || defined(__x86_64__) /* arch */ u64 *buf; @@ -1302,7 +1240,7 @@ void arch_force_pkey_reg_init(void) * a long-running test that continually checks the pkey * register. */ -void test_pkey_init_state(int *ptr, u16 pkey) +static void test_pkey_init_state(int *ptr, u16 pkey) { int err; int allocated_pkeys[NR_PKEYS] = {0}; @@ -1340,7 +1278,7 @@ void test_pkey_init_state(int *ptr, u16 pkey) * have to call pkey_alloc() to use it first. Make sure that it * is usable. */ -void test_mprotect_with_pkey_0(int *ptr, u16 pkey) +static void test_mprotect_with_pkey_0(int *ptr, u16 pkey) { long size; int prot; @@ -1364,7 +1302,7 @@ void test_mprotect_with_pkey_0(int *ptr, u16 pkey) mprotect_pkey(ptr, size, prot, pkey); } -void test_ptrace_of_child(int *ptr, u16 pkey) +static void test_ptrace_of_child(int *ptr, u16 pkey) { __attribute__((__unused__)) int peek_result; pid_t child_pid; @@ -1440,7 +1378,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) free(plain_ptr_unaligned); } -void *get_pointer_to_instructions(void) +static void *get_pointer_to_instructions(void) { void *p1; @@ -1461,7 +1399,7 @@ void *get_pointer_to_instructions(void) return p1; } -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +static void test_executing_on_unreadable_memory(int *ptr, u16 pkey) { void *p1; int scratch; @@ -1493,7 +1431,7 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) pkey_assert(!ret); } -void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) +static void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) { void *p1; int scratch; @@ -1542,7 +1480,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) } #if defined(__i386__) || defined(__x86_64__) -void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +static void test_ptrace_modifies_pkru(int *ptr, u16 pkey) { u32 new_pkru; pid_t child; @@ -1665,7 +1603,7 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey) #endif #if defined(__aarch64__) -void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +static void test_ptrace_modifies_pkru(int *ptr, u16 pkey) { pid_t child; int status, ret; @@ -1742,7 +1680,7 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey) } #endif -void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) +static void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; int sret; @@ -1756,7 +1694,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) pkey_assert(sret < 0); } -void (*pkey_tests[])(int *ptr, u16 pkey) = { +static void (*pkey_tests[])(int *ptr, u16 pkey) = { test_read_of_write_disabled_region, test_read_of_access_disabled_region, test_read_of_access_disabled_region_with_page_already_mapped, @@ -1782,7 +1720,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { #endif }; -void run_tests_once(void) +static void run_tests_once(void) { int *ptr; int prot = PROT_READ|PROT_WRITE; @@ -1816,7 +1754,7 @@ void run_tests_once(void) iteration_nr++; } -void pkey_setup_shadow(void) +static void pkey_setup_shadow(void) { shadow_pkey_reg = __read_pkey_reg(); } diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 2fc290d9430c..333c468c2699 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -45,6 +45,8 @@ separated by spaces: vmalloc smoke tests - hmm hmm smoke tests +- madv_guard + test madvise(2) MADV_GUARD_INSTALL and MADV_GUARD_REMOVE options - madv_populate test memadvise(2) MADV_POPULATE_{READ,WRITE} options - memfd_secret @@ -307,6 +309,7 @@ CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 3 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16 +CATEGORY="userfaultfd" run_test ./uffd-wp-mremap #cleanup echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages @@ -375,6 +378,9 @@ CATEGORY="mremap" run_test ./mremap_dontunmap CATEGORY="hmm" run_test bash ./test_hmm.sh smoke +# MADV_GUARD_INSTALL and MADV_GUARD_REMOVE tests +CATEGORY="madv_guard" run_test ./guard-pages + # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c deleted file mode 100644 index d9f8ba8d5050..000000000000 --- a/tools/testing/selftests/mm/seal_elf.c +++ /dev/null @@ -1,137 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE -#include <sys/mman.h> -#include <stdint.h> -#include <asm-generic/unistd.h> -#include <string.h> -#include <sys/time.h> -#include <sys/resource.h> -#include <stdbool.h> -#include "../kselftest.h" -#include <syscall.h> -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <sys/vfs.h> -#include <sys/stat.h> -#include "mseal_helpers.h" - -/* - * define sys_xyx to call syscall directly. - */ -static int sys_mseal(void *start, size_t len) -{ - int sret; - - errno = 0; - sret = syscall(__NR_mseal, start, len, 0); - return sret; -} - -static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot) -{ - int sret; - - errno = 0; - sret = syscall(__NR_mprotect, ptr, size, prot); - return sret; -} - -static bool seal_support(void) -{ - int ret; - void *ptr; - unsigned long page_size = getpagesize(); - - ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (ptr == (void *) -1) - return false; - - ret = sys_mseal(ptr, page_size); - if (ret < 0) - return false; - - return true; -} - -const char somestr[4096] = {"READONLY"}; - -static void test_seal_elf(void) -{ - int ret; - FILE *maps; - char line[512]; - uintptr_t addr_start, addr_end; - char prot[5]; - char filename[256]; - unsigned long page_size = getpagesize(); - unsigned long long ptr = (unsigned long long) somestr; - char *somestr2 = (char *)somestr; - - /* - * Modify the protection of readonly somestr - */ - if (((unsigned long long)ptr % page_size) != 0) - ptr = (unsigned long long)ptr & ~(page_size - 1); - - ksft_print_msg("somestr = %s\n", somestr); - ksft_print_msg("change protection to rw\n"); - ret = sys_mprotect((void *)ptr, page_size, PROT_READ|PROT_WRITE); - FAIL_TEST_IF_FALSE(!ret); - *somestr2 = 'A'; - ksft_print_msg("somestr is modified to: %s\n", somestr); - ret = sys_mprotect((void *)ptr, page_size, PROT_READ); - FAIL_TEST_IF_FALSE(!ret); - - maps = fopen("/proc/self/maps", "r"); - FAIL_TEST_IF_FALSE(maps); - - /* - * apply sealing to elf binary - */ - while (fgets(line, sizeof(line), maps)) { - if (sscanf(line, "%lx-%lx %4s %*x %*x:%*x %*u %255[^\n]", - &addr_start, &addr_end, prot, filename) == 4) { - if (strlen(filename)) { - /* - * seal the mapping if read only. - */ - if (strstr(prot, "r-")) { - ret = sys_mseal((void *)addr_start, addr_end - addr_start); - FAIL_TEST_IF_FALSE(!ret); - ksft_print_msg("sealed: %lx-%lx %s %s\n", - addr_start, addr_end, prot, filename); - if ((uintptr_t) somestr >= addr_start && - (uintptr_t) somestr <= addr_end) - ksft_print_msg("mapping for somestr found\n"); - } - } - } - } - fclose(maps); - - ret = sys_mprotect((void *)ptr, page_size, PROT_READ | PROT_WRITE); - FAIL_TEST_IF_FALSE(ret < 0); - ksft_print_msg("somestr is sealed, mprotect is rejected\n"); - - REPORT_TEST_PASS(); -} - -int main(int argc, char **argv) -{ - bool test_seal = seal_support(); - - ksft_print_header(); - ksft_print_msg("pid=%d\n", getpid()); - - if (!test_seal) - ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n"); - - ksft_set_plan(1); - - test_seal_elf(); - - ksft_finished(); -} diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index bdfa5d085f00..8e1462ce0532 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -128,7 +128,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon) { const char *type[] = {"file", "anon"}; const char *fname = "./soft-dirty-test-file"; - int test_fd; + int test_fd = 0; char *map; if (anon) { diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index eb6d1b9fc362..3f353f3d070f 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -108,38 +108,28 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp unsigned long rss_anon_before, rss_anon_after; size_t i; - if (!check_huge_anon(one_page, 4, pmd_pagesize)) { - printf("No THP is allocated\n"); - exit(EXIT_FAILURE); - } + if (!check_huge_anon(one_page, 4, pmd_pagesize)) + ksft_exit_fail_msg("No THP is allocated\n"); rss_anon_before = rss_anon(); - if (!rss_anon_before) { - printf("No RssAnon is allocated before split\n"); - exit(EXIT_FAILURE); - } + if (!rss_anon_before) + ksft_exit_fail_msg("No RssAnon is allocated before split\n"); /* split all THPs */ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, (uint64_t)one_page + len, 0); for (i = 0; i < len; i++) - if (one_page[i] != (char)0) { - printf("%ld byte corrupted\n", i); - exit(EXIT_FAILURE); - } + if (one_page[i] != (char)0) + ksft_exit_fail_msg("%ld byte corrupted\n", i); - if (!check_huge_anon(one_page, 0, pmd_pagesize)) { - printf("Still AnonHugePages not split\n"); - exit(EXIT_FAILURE); - } + if (!check_huge_anon(one_page, 0, pmd_pagesize)) + ksft_exit_fail_msg("Still AnonHugePages not split\n"); rss_anon_after = rss_anon(); - if (rss_anon_after >= rss_anon_before) { - printf("Incorrect RssAnon value. Before: %ld After: %ld\n", + if (rss_anon_after >= rss_anon_before) + ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n", rss_anon_before, rss_anon_after); - exit(EXIT_FAILURE); - } } void split_pmd_zero_pages(void) @@ -150,11 +140,11 @@ void split_pmd_zero_pages(void) one_page = allocate_zero_filled_hugepage(len); verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); - printf("Split zero filled huge pages successful\n"); + ksft_test_result_pass("Split zero filled huge pages successful\n"); free(one_page); } -void split_pmd_thp(void) +void split_pmd_thp_to_order(int order) { char *one_page; size_t len = 4 * pmd_pagesize; @@ -174,7 +164,7 @@ void split_pmd_thp(void) /* split all THPs */ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, - (uint64_t)one_page + len, 0); + (uint64_t)one_page + len, order); for (i = 0; i < len; i++) if (one_page[i] != (char)i) @@ -184,7 +174,7 @@ void split_pmd_thp(void) if (!check_huge_anon(one_page, 0, pmd_pagesize)) ksft_exit_fail_msg("Still AnonHugePages not split\n"); - ksft_test_result_pass("Split huge pages successful\n"); + ksft_test_result_pass("Split huge pages to order %d successful\n", order); free(one_page); } @@ -491,7 +481,7 @@ int main(int argc, char **argv) if (argc > 1) optional_xfs_path = argv[1]; - ksft_set_plan(3+9); + ksft_set_plan(1+8+2+9); pagesize = getpagesize(); pageshift = ffs(pagesize) - 1; @@ -502,7 +492,11 @@ int main(int argc, char **argv) fd_size = 2 * pmd_pagesize; split_pmd_zero_pages(); - split_pmd_thp(); + + for (i = 0; i < 9; i++) + if (i != 1) + split_pmd_thp_to_order(i); + split_pte_mapped_thp(); split_file_backed_thp(); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index 577eaab6266f..ad872af1c81a 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -87,7 +87,7 @@ int write_file(const char *path, const char *buf, size_t buflen) return (unsigned int) numwritten; } -const unsigned long read_num(const char *path) +unsigned long read_num(const char *path) { char buf[21]; @@ -172,7 +172,7 @@ void thp_write_string(const char *name, const char *val) } } -const unsigned long thp_read_num(const char *name) +unsigned long thp_read_num(const char *name) { char path[PATH_MAX]; int ret; diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index 876235a23460..fc131d23d593 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -64,12 +64,12 @@ struct thp_settings { int read_file(const char *path, char *buf, size_t buflen); int write_file(const char *path, const char *buf, size_t buflen); -const unsigned long read_num(const char *path); +unsigned long read_num(const char *path); void write_num(const char *path, unsigned long num); int thp_read_string(const char *name, const char * const strings[]); void thp_write_string(const char *name, const char *val); -const unsigned long thp_read_num(const char *name); +unsigned long thp_read_num(const char *name); void thp_write_num(const char *name, unsigned long num); void thp_write_settings(struct thp_settings *settings); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index a2e71b1636e7..9ff71fa1f9bf 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1122,7 +1122,7 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, char c; unsigned long long count; struct uffd_args args = { 0 }; - char *orig_area_src, *orig_area_dst; + char *orig_area_src = NULL, *orig_area_dst = NULL; unsigned long step_size, step_count; unsigned long src_offs = 0; unsigned long dst_offs = 0; @@ -1190,7 +1190,7 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, nr, count, count_verify[src_offs + nr + i]); } } - if (step_size > page_size) { + if (chunk_size > page_size) { area_src = orig_area_src; area_dst = orig_area_dst; } diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c new file mode 100644 index 000000000000..2c4f984bd73c --- /dev/null +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define _GNU_SOURCE +#include <stdbool.h> +#include <stdint.h> +#include <fcntl.h> +#include <assert.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include "../kselftest.h" +#include "thp_settings.h" +#include "uffd-common.h" + +static int pagemap_fd; +static size_t pagesize; +static int nr_pagesizes = 1; +static int nr_thpsizes; +static size_t thpsizes[20]; +static int nr_hugetlbsizes; +static size_t hugetlbsizes[10]; + +static int sz2ord(size_t size) +{ + return __builtin_ctzll(size / pagesize); +} + +static int detect_thp_sizes(size_t sizes[], int max) +{ + int count = 0; + unsigned long orders; + size_t kb; + int i; + + /* thp not supported at all. */ + if (!read_pmd_pagesize()) + return 0; + + orders = thp_supported_orders(); + + for (i = 0; orders && count < max; i++) { + if (!(orders & (1UL << i))) + continue; + orders &= ~(1UL << i); + kb = (pagesize >> 10) << i; + sizes[count++] = kb * 1024; + ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); + } + + return count; +} + +static void *mmap_aligned(size_t size, int prot, int flags) +{ + size_t mmap_size = size * 2; + char *mmap_mem, *mem; + + mmap_mem = mmap(NULL, mmap_size, prot, flags, -1, 0); + if (mmap_mem == MAP_FAILED) + return mmap_mem; + + mem = (char *)(((uintptr_t)mmap_mem + size - 1) & ~(size - 1)); + munmap(mmap_mem, mem - mmap_mem); + munmap(mem + size, mmap_mem + mmap_size - mem - size); + + return mem; +} + +static void *alloc_one_folio(size_t size, bool private, bool hugetlb) +{ + bool thp = !hugetlb && size > pagesize; + int flags = MAP_ANONYMOUS; + int prot = PROT_READ | PROT_WRITE; + char *mem, *addr; + + assert((size & (size - 1)) == 0); + + if (private) + flags |= MAP_PRIVATE; + else + flags |= MAP_SHARED; + + /* + * For THP, we must explicitly enable the THP size, allocate twice the + * required space then manually align. + */ + if (thp) { + struct thp_settings settings = *thp_current_settings(); + + if (private) + settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + else + settings.shmem_hugepages[sz2ord(size)].enabled = SHMEM_ALWAYS; + + thp_push_settings(&settings); + + mem = mmap_aligned(size, prot, flags); + } else { + if (hugetlb) { + flags |= MAP_HUGETLB; + flags |= __builtin_ctzll(size) << MAP_HUGE_SHIFT; + } + + mem = mmap(NULL, size, prot, flags, -1, 0); + } + + if (mem == MAP_FAILED) { + mem = NULL; + goto out; + } + + assert(((uintptr_t)mem & (size - 1)) == 0); + + /* + * Populate the folio by writing the first byte and check that all pages + * are populated. Finally set the whole thing to non-zero data to avoid + * kernel from mapping it back to the zero page. + */ + mem[0] = 1; + for (addr = mem; addr < mem + size; addr += pagesize) { + if (!pagemap_is_populated(pagemap_fd, addr)) { + munmap(mem, size); + mem = NULL; + goto out; + } + } + memset(mem, 1, size); +out: + if (thp) + thp_pop_settings(); + + return mem; +} + +static bool check_uffd_wp_state(void *mem, size_t size, bool expect) +{ + uint64_t pte; + void *addr; + + for (addr = mem; addr < mem + size; addr += pagesize) { + pte = pagemap_get_entry(pagemap_fd, addr); + if (!!(pte & PM_UFFD_WP) != expect) { + ksft_test_result_fail("uffd-wp not %s for pte %lu!\n", + expect ? "set" : "clear", + (addr - mem) / pagesize); + return false; + } + } + + return true; +} + +static bool range_is_swapped(void *addr, size_t size) +{ + for (; size; addr += pagesize, size -= pagesize) + if (!pagemap_is_swapped(pagemap_fd, addr)) + return false; + return true; +} + +static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb) +{ + struct uffdio_writeprotect wp_prms; + uint64_t features = 0; + void *addr = NULL; + void *mem = NULL; + + assert(!(hugetlb && swapout)); + + ksft_print_msg("[RUN] %s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n", + __func__, + size, + private ? "true" : "false", + swapout ? "true" : "false", + hugetlb ? "true" : "false"); + + /* Allocate a folio of required size and type. */ + mem = alloc_one_folio(size, private, hugetlb); + if (!mem) { + ksft_test_result_fail("alloc_one_folio() failed\n"); + goto out; + } + + /* Register range for uffd-wp. */ + if (userfaultfd_open(&features)) { + ksft_test_result_fail("userfaultfd_open() failed\n"); + goto out; + } + if (uffd_register(uffd, mem, size, false, true, false)) { + ksft_test_result_fail("uffd_register() failed\n"); + goto out; + } + wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP; + wp_prms.range.start = (uintptr_t)mem; + wp_prms.range.len = size; + if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { + ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n"); + goto out; + } + + if (swapout) { + madvise(mem, size, MADV_PAGEOUT); + if (!range_is_swapped(mem, size)) { + ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + goto out; + } + } + + /* Check that uffd-wp is set for all PTEs in range. */ + if (!check_uffd_wp_state(mem, size, true)) + goto out; + + /* + * Move the mapping to a new, aligned location. Since + * UFFD_FEATURE_EVENT_REMAP is not set, we expect the uffd-wp bit for + * each PTE to be cleared in the new mapping. + */ + addr = mmap_aligned(size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS); + if (addr == MAP_FAILED) { + ksft_test_result_fail("mmap_aligned() failed\n"); + goto out; + } + if (mremap(mem, size, size, MREMAP_FIXED | MREMAP_MAYMOVE, addr) == MAP_FAILED) { + ksft_test_result_fail("mremap() failed\n"); + munmap(addr, size); + goto out; + } + mem = addr; + + /* Check that uffd-wp is cleared for all PTEs in range. */ + if (!check_uffd_wp_state(mem, size, false)) + goto out; + + ksft_test_result_pass("%s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n", + __func__, + size, + private ? "true" : "false", + swapout ? "true" : "false", + hugetlb ? "true" : "false"); +out: + if (mem) + munmap(mem, size); + if (uffd >= 0) { + close(uffd); + uffd = -1; + } +} + +struct testcase { + size_t *sizes; + int *nr_sizes; + bool private; + bool swapout; + bool hugetlb; +}; + +static const struct testcase testcases[] = { + /* base pages. */ + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = false, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = true, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = false, + .swapout = true, + .hugetlb = false, + }, + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = true, + .swapout = true, + .hugetlb = false, + }, + + /* thp. */ + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = false, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = true, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = false, + .swapout = true, + .hugetlb = false, + }, + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = true, + .swapout = true, + .hugetlb = false, + }, + + /* hugetlb. */ + { + .sizes = hugetlbsizes, + .nr_sizes = &nr_hugetlbsizes, + .private = false, + .swapout = false, + .hugetlb = true, + }, + { + .sizes = hugetlbsizes, + .nr_sizes = &nr_hugetlbsizes, + .private = true, + .swapout = false, + .hugetlb = true, + }, +}; + +int main(int argc, char **argv) +{ + struct thp_settings settings; + int i, j, plan = 0; + + pagesize = getpagesize(); + nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); + nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, + ARRAY_SIZE(hugetlbsizes)); + + /* If THP is supported, save THP settings and initially disable THP. */ + if (nr_thpsizes) { + thp_save_settings(); + thp_read_settings(&settings); + for (i = 0; i < NR_ORDERS; i++) { + settings.hugepages[i].enabled = THP_NEVER; + settings.shmem_hugepages[i].enabled = SHMEM_NEVER; + } + thp_push_settings(&settings); + } + + for (i = 0; i < ARRAY_SIZE(testcases); i++) + plan += *testcases[i].nr_sizes; + ksft_set_plan(plan); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + const struct testcase *tc = &testcases[i]; + + for (j = 0; j < *tc->nr_sizes; j++) + test_one_folio(tc->sizes[j], tc->private, tc->swapout, + tc->hugetlb); + } + + /* If THP is supported, restore original THP settings. */ + if (nr_thpsizes) + thp_restore_settings(); + + i = ksft_get_fail_cnt(); + if (i) + ksft_exit_fail_msg("%d out of %d tests failed\n", + i, ksft_test_num()); + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 2a2b69e91950..b380e102b22f 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -10,10 +10,12 @@ #include <string.h> #include <unistd.h> #include <errno.h> +#include <sys/prctl.h> #include <sys/mman.h> #include <sys/time.h> #include <fcntl.h> +#include "vm_util.h" #include "../kselftest.h" /* @@ -82,6 +84,24 @@ static void validate_addr(char *ptr, int high_addr) ksft_exit_fail_msg("Bad address %lx\n", addr); } +static void mark_range(char *ptr, size_t size) +{ + if (prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ptr, size, "virtual_address_range") == -1) { + if (errno == EINVAL) { + /* Depends on CONFIG_ANON_VMA_NAME */ + ksft_test_result_skip("prctl(PR_SET_VMA_ANON_NAME) not supported\n"); + ksft_finished(); + } else { + ksft_exit_fail_perror("prctl(PR_SET_VMA_ANON_NAME) failed\n"); + } + } +} + +static int is_marked_vma(const char *vma_name) +{ + return vma_name && !strcmp(vma_name, "[anon:virtual_address_range]\n"); +} + static int validate_lower_address_hint(void) { char *ptr; @@ -116,12 +136,17 @@ static int validate_complete_va_space(void) prev_end_addr = 0; while (fgets(line, sizeof(line), file)) { + const char *vma_name = NULL; + int vma_name_start = 0; unsigned long hop; - if (sscanf(line, "%lx-%lx %s[rwxp-]", - &start_addr, &end_addr, prot) != 3) + if (sscanf(line, "%lx-%lx %4s %*s %*s %*s %n", + &start_addr, &end_addr, prot, &vma_name_start) != 3) ksft_exit_fail_msg("cannot parse /proc/self/maps\n"); + if (vma_name_start) + vma_name = line + vma_name_start; + /* end of userspace mappings; ignore vsyscall mapping */ if (start_addr & (1UL << 63)) return 0; @@ -135,6 +160,9 @@ static int validate_complete_va_space(void) if (prot[0] != 'r') continue; + if (check_vmflag_io((void *)start_addr)) + continue; + /* * Confirm whether MAP_CHUNK_SIZE chunk can be found or not. * If write succeeds, no need to check MAP_CHUNK_SIZE - 1 @@ -149,6 +177,9 @@ static int validate_complete_va_space(void) return 1; lseek(fd, 0, SEEK_SET); + if (is_marked_vma(vma_name)) + munmap((char *)(start_addr + hop), MAP_CHUNK_SIZE); + hop += MAP_CHUNK_SIZE; } } @@ -166,7 +197,7 @@ int main(int argc, char *argv[]) ksft_set_plan(1); for (i = 0; i < NR_CHUNKS_LOW; i++) { - ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr[i] == MAP_FAILED) { @@ -175,6 +206,7 @@ int main(int argc, char *argv[]) break; } + mark_range(ptr[i], MAP_CHUNK_SIZE); validate_addr(ptr[i], 0); } lchunks = i; @@ -186,12 +218,13 @@ int main(int argc, char *argv[]) for (i = 0; i < NR_CHUNKS_HIGH; i++) { hint = hint_addr(); - hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (hptr[i] == MAP_FAILED) break; + mark_range(ptr[i], MAP_CHUNK_SIZE); validate_addr(hptr[i], 1); } hchunks = i; diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index d8d0cf04bb57..a36734fb62f3 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -2,6 +2,7 @@ #include <string.h> #include <fcntl.h> #include <dirent.h> +#include <inttypes.h> #include <sys/ioctl.h> #include <linux/userfaultfd.h> #include <linux/fs.h> @@ -138,7 +139,7 @@ void clear_softdirty(void) ksft_exit_fail_msg("opening clear_refs failed\n"); ret = write(fd, ctrl, strlen(ctrl)); close(fd); - if (ret != strlen(ctrl)) + if (ret != (signed int)strlen(ctrl)) ksft_exit_fail_msg("writing clear_refs failed\n"); } @@ -193,13 +194,11 @@ err_out: return rss_anon; } -bool __check_huge(void *addr, char *pattern, int nr_hpages, - uint64_t hpage_size) +char *__get_smap_entry(void *addr, const char *pattern, char *buf, size_t len) { - uint64_t thp = -1; int ret; FILE *fp; - char buffer[MAX_LINE_LENGTH]; + char *entry = NULL; char addr_pattern[MAX_LINE_LENGTH]; ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", @@ -211,23 +210,40 @@ bool __check_huge(void *addr, char *pattern, int nr_hpages, if (!fp) ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); - if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) + if (!check_for_pattern(fp, addr_pattern, buf, len)) goto err_out; - /* - * Fetch the pattern in the same block and check the number of - * hugepages. - */ - if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer))) + /* Fetch the pattern in the same block */ + if (!check_for_pattern(fp, pattern, buf, len)) goto err_out; - snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern); + /* Trim trailing newline */ + entry = strchr(buf, '\n'); + if (entry) + *entry = '\0'; - if (sscanf(buffer, addr_pattern, &thp) != 1) - ksft_exit_fail_msg("Reading smap error\n"); + entry = buf + strlen(pattern); err_out: fclose(fp); + return entry; +} + +bool __check_huge(void *addr, char *pattern, int nr_hpages, + uint64_t hpage_size) +{ + char buffer[MAX_LINE_LENGTH]; + uint64_t thp = -1; + char *entry; + + entry = __get_smap_entry(addr, pattern, buffer, sizeof(buffer)); + if (!entry) + goto err_out; + + if (sscanf(entry, "%9" SCNu64 " kB", &thp) != 1) + ksft_exit_fail_msg("Reading smap error\n"); + +err_out: return thp == (nr_hpages * (hpage_size >> 10)); } @@ -384,3 +400,27 @@ unsigned long get_free_hugepages(void) fclose(f); return fhp; } + +bool check_vmflag_io(void *addr) +{ + char buffer[MAX_LINE_LENGTH]; + const char *flags; + size_t flaglen; + + flags = __get_smap_entry(addr, "VmFlags:", buffer, sizeof(buffer)); + if (!flags) + ksft_exit_fail_msg("%s: No VmFlags for %p\n", __func__, addr); + + while (true) { + flags += strspn(flags, " "); + + flaglen = strcspn(flags, " "); + if (!flaglen) + return false; + + if (flaglen == strlen("io") && !memcmp(flags, "io", flaglen)) + return true; + + flags += flaglen; + } +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 2eaed8209925..b60ac68a9dc8 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -53,6 +53,7 @@ int uffd_unregister(int uffd, void *addr, uint64_t len); int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls); unsigned long get_free_hugepages(void); +bool check_vmflag_io(void *addr); /* * On ppc64 this will only work with radix 2M hugepage size diff --git a/tools/testing/selftests/mm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c index 1289d311efd7..34c91f7e6128 100644 --- a/tools/testing/selftests/mm/write_to_hugetlbfs.c +++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c @@ -89,7 +89,7 @@ int main(int argc, char **argv) size = atoi(optarg); break; case 'p': - strncpy(path, optarg, sizeof(path)); + strncpy(path, optarg, sizeof(path) - 1); break; case 'm': if (atoi(optarg) >= MAX_METHOD) { diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index cb2fc601de66..73ee88d6b043 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -32,6 +32,7 @@ TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh +TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh TEST_PROGS += nl_netdev.py @@ -95,6 +96,7 @@ TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh fdb_notify.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh +TEST_PROGS += vlan_bridge_binding.sh TEST_PROGS += bpf_offload.py TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index d10f420e4ef6..fd0d959914e4 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -215,12 +215,14 @@ def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None, - fail=True, include_stderr=False): + fail=True, include_stderr=False, dev_bind=None): args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name) if prog_type is not None: args += " type " + prog_type if dev is not None: args += " dev " + dev + elif dev_bind is not None: + args += " xdpmeta_dev " + dev_bind if len(maps): args += " map " + " map ".join(maps) @@ -980,6 +982,16 @@ try: rm("/sys/fs/bpf/offload") sim.wait_for_flush() + bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/devbound", + dev_bind=sim['ifname']) + devbound = bpf_pinned("/sys/fs/bpf/devbound") + start_test("Test dev-bound program in generic mode...") + ret, _, err = sim.set_xdp(devbound, "generic", fail=False, include_stderr=True) + fail(ret == 0, "devbound program in generic mode allowed") + check_extack(err, "Can't attach device-bound programs in generic mode.", args) + rm("/sys/fs/bpf/devbound") + sim.wait_for_flush() + start_test("Test XDP load failure...") sim.dfs["dev/bpf_bind_verifier_accept"] = 0 ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload", diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c index 99b0e8c17fca..04c7ff577bb8 100644 --- a/tools/testing/selftests/net/busy_poller.c +++ b/tools/testing/selftests/net/busy_poller.c @@ -54,16 +54,16 @@ struct epoll_params { #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) #endif -static uint32_t cfg_port = 8000; +static uint16_t cfg_port = 8000; static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; static char *cfg_outfile; static int cfg_max_events = 8; -static int cfg_ifindex; +static uint32_t cfg_ifindex; /* busy poll params */ static uint32_t cfg_busy_poll_usecs; -static uint32_t cfg_busy_poll_budget; -static uint32_t cfg_prefer_busy_poll; +static uint16_t cfg_busy_poll_budget; +static uint8_t cfg_prefer_busy_poll; /* IRQ params */ static uint32_t cfg_defer_hard_irqs; @@ -79,6 +79,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + unsigned long long tmp; int ret; int c; @@ -86,31 +87,40 @@ static void parse_opts(int argc, char **argv) usage(argv[0]); while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + /* most options take integer values, except o and b, so reduce + * code duplication a bit for the common case by calling + * strtoull here and leave bounds checking and casting per + * option below. + */ + if (c != 'o' && c != 'b') + tmp = strtoull(optarg, NULL, 0); + switch (c) { case 'u': - cfg_busy_poll_usecs = strtoul(optarg, NULL, 0); - if (cfg_busy_poll_usecs == ULONG_MAX || - cfg_busy_poll_usecs > UINT32_MAX) + if (tmp == ULLONG_MAX || tmp > UINT32_MAX) error(1, ERANGE, "busy_poll_usecs too large"); + + cfg_busy_poll_usecs = (uint32_t)tmp; break; case 'P': - cfg_prefer_busy_poll = strtoul(optarg, NULL, 0); - if (cfg_prefer_busy_poll == ULONG_MAX || - cfg_prefer_busy_poll > 1) + if (tmp == ULLONG_MAX || tmp > 1) error(1, ERANGE, "prefer busy poll should be 0 or 1"); + + cfg_prefer_busy_poll = (uint8_t)tmp; break; case 'g': - cfg_busy_poll_budget = strtoul(optarg, NULL, 0); - if (cfg_busy_poll_budget == ULONG_MAX || - cfg_busy_poll_budget > UINT16_MAX) + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) error(1, ERANGE, "busy poll budget must be [0, UINT16_MAX]"); + + cfg_busy_poll_budget = (uint16_t)tmp; break; case 'p': - cfg_port = strtoul(optarg, NULL, 0); - if (cfg_port > UINT16_MAX) + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) error(1, ERANGE, "port must be <= 65535"); + + cfg_port = (uint16_t)tmp; break; case 'b': ret = inet_aton(optarg, &cfg_bind_addr); @@ -124,41 +134,39 @@ static void parse_opts(int argc, char **argv) error(1, 0, "outfile invalid"); break; case 'm': - cfg_max_events = strtol(optarg, NULL, 0); - - if (cfg_max_events == LONG_MIN || - cfg_max_events == LONG_MAX || - cfg_max_events <= 0) + if (tmp == ULLONG_MAX || tmp > INT_MAX) error(1, ERANGE, - "max events must be > 0 and < LONG_MAX"); + "max events must be > 0 and <= INT_MAX"); + + cfg_max_events = (int)tmp; break; case 'd': - cfg_defer_hard_irqs = strtoul(optarg, NULL, 0); - - if (cfg_defer_hard_irqs == ULONG_MAX || - cfg_defer_hard_irqs > INT32_MAX) + if (tmp == ULLONG_MAX || tmp > INT32_MAX) error(1, ERANGE, "defer_hard_irqs must be <= INT32_MAX"); + + cfg_defer_hard_irqs = (uint32_t)tmp; break; case 'r': - cfg_gro_flush_timeout = strtoull(optarg, NULL, 0); - - if (cfg_gro_flush_timeout == ULLONG_MAX) + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) error(1, ERANGE, - "gro_flush_timeout must be < ULLONG_MAX"); + "gro_flush_timeout must be < UINT64_MAX"); + + cfg_gro_flush_timeout = (uint64_t)tmp; break; case 's': - cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0); - - if (cfg_irq_suspend_timeout == ULLONG_MAX) + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) error(1, ERANGE, "irq_suspend_timeout must be < ULLONG_MAX"); + + cfg_irq_suspend_timeout = (uint64_t)tmp; break; case 'i': - cfg_ifindex = strtoul(optarg, NULL, 0); - if (cfg_ifindex == ULONG_MAX) + if (tmp == ULLONG_MAX || tmp > INT_MAX) error(1, ERANGE, - "ifindex must be < ULONG_MAX"); + "ifindex must be <= INT_MAX"); + + cfg_ifindex = (int)tmp; break; } } @@ -215,7 +223,7 @@ static void setup_queue(void) struct netdev_napi_set_req *set_req = NULL; struct ynl_sock *ys; struct ynl_error yerr; - uint32_t napi_id; + uint32_t napi_id = 0; ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!ys) @@ -277,8 +285,8 @@ static void run_poller(void) * here */ epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; - epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget; - epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll; + epoll_params.busy_poll_budget = cfg_busy_poll_budget; + epoll_params.prefer_busy_poll = cfg_prefer_busy_poll; epoll_params.__pad = 0; val = 1; @@ -342,5 +350,9 @@ int main(int argc, char *argv[]) parse_opts(argc, argv); setup_queue(); run_poller(); + + if (cfg_outfile) + free(cfg_outfile); + return 0; } diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 876c2db02a63..bc314382e4e1 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -59,6 +59,7 @@ struct options { unsigned int proto; } sock; struct option_cmsg_u32 mark; + struct option_cmsg_u32 priority; struct { bool ena; unsigned int delay; @@ -97,6 +98,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\n" "\t\t-m val Set SO_MARK with given value\n" "\t\t-M val Set SO_MARK via setsockopt\n" + "\t\t-P val Set SO_PRIORITY via setsockopt\n" + "\t\t-Q val Set SO_PRIORITY via cmsg\n" "\t\t-d val Set SO_TXTIME with given delay (usec)\n" "\t\t-t Enable time stamp reporting\n" "\t\t-f val Set don't fragment via cmsg\n" @@ -115,7 +118,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -148,6 +151,10 @@ static void cs_parse_args(int argc, char *argv[]) opt.mark.ena = true; opt.mark.val = atoi(optarg); break; + case 'Q': + opt.priority.ena = true; + opt.priority.val = atoi(optarg); + break; case 'M': opt.sockopt.mark = atoi(optarg); break; @@ -253,6 +260,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_SOCKET, SO_MARK, &opt.mark); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_SOCKET, SO_PRIORITY, &opt.priority); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass); diff --git a/tools/testing/selftests/net/cmsg_so_priority.sh b/tools/testing/selftests/net/cmsg_so_priority.sh new file mode 100755 index 000000000000..ee07d8653262 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_so_priority.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +readonly KSFT_SKIP=4 + +IP4=192.0.2.1/24 +TGT4=192.0.2.2 +TGT4_RAW=192.0.2.3 +IP6=2001:db8::1/64 +TGT6=2001:db8::2 +TGT6_RAW=2001:db8::3 +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +if ! command -v jq &> /dev/null; then + echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2 + exit "$KSFT_SKIP" +fi + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +create_filter() { + local handle=$1 + local vlan_prio=$2 + local ip_type=$3 + local proto=$4 + local dst_ip=$5 + local ip_proto + + if [[ "$proto" == "u" ]]; then + ip_proto="udp" + elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then + ip_proto="icmp" + elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then + ip_proto="icmpv6" + fi + + tc -n $NS filter add dev dummy1 \ + egress pref 1 handle "$handle" proto 802.1q \ + flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \ + dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \ + action pass +} + +ip -n $NS link set dev lo up +ip -n $NS link add name dummy1 up type dummy + +ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \ + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +ip -n $NS address add $IP4 dev dummy1.10 +ip -n $NS address add $IP6 dev dummy1.10 nodad + +ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647' + +ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 + +tc -n $NS qdisc add dev dummy1 clsact + +FILTER_COUNTER=10 + +for i in 4 6; do + for proto in u i r; do + echo "Test IPV$i, prot: $proto" + for priority in {0..7}; do + if [[ $i == 4 && $proto == "r" ]]; then + TGT=$TGT4_RAW + elif [[ $i == 6 && $proto == "r" ]]; then + TGT=$TGT6_RAW + elif [ $i == 4 ]; then + TGT=$TGT4 + else + TGT=$TGT6 + fi + + handle="${FILTER_COUNTER}${priority}" + + create_filter $handle $priority ipv$i $proto $TGT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + + if [[ $pkts == 0 ]]; then + check_result 0 + else + echo "prio $priority: expected 0, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -Q $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 1 ]]; then + check_result 0 + else + echo "prio $priority -Q: expected 1, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -P $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 2 ]]; then + check_result 0 + else + echo "prio $priority -P: expected 2, got $pkts" + check_result 1 + fi + done + FILTER_COUNTER=$((FILTER_COUNTER + 10)) + done +done + +if [ $FAILED_TESTS -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit 1 +else + echo "OK - All $TOTAL_TESTS tests passed" + exit 0 +fi diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh index 1d7e756644bc..478af0aefa97 100755 --- a/tools/testing/selftests/net/cmsg_time.sh +++ b/tools/testing/selftests/net/cmsg_time.sh @@ -34,13 +34,28 @@ BAD=0 TOTAL=0 check_result() { + local ret=$1 + local got=$2 + local exp=$3 + local case=$4 + local xfail=$5 + local xf= + local inc= + + if [ "$xfail" == "xfail" ]; then + xf="(XFAIL)" + inc=0 + else + inc=1 + fi + ((TOTAL++)) - if [ $1 -ne 0 ]; then - echo " Case $4 returned $1, expected 0" - ((BAD++)) + if [ $ret -ne 0 ]; then + echo " Case $case returned $ret, expected 0 $xf" + ((BAD+=inc)) elif [ "$2" != "$3" ]; then - echo " Case $4 returned '$2', expected '$3'" - ((BAD++)) + echo " Case $case returned '$got', expected '$exp' $xf" + ((BAD+=inc)) fi } @@ -66,14 +81,14 @@ for i in "-4 $TGT4" "-6 $TGT6"; do awk '/SND/ { if ($3 > 1000) print "OK"; }') check_result $? "$ts" "OK" "$prot - TXTIME abs" - [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000 + [ "$KSFT_MACHINE_SLOW" = yes ] && xfail=xfail - ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay | + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 | awk '/SND/ {snd=$3} /SCHED/ {sch=$3} - END { if (snd - sch > '$((delay/2))') print "OK"; - else print snd, "-", sch, "<", '$((delay/2))'; }') - check_result $? "$ts" "OK" "$prot - TXTIME rel" + END { if (snd - sch > 500) print "OK"; + else print snd, "-", sch, "<", 500; }') + check_result $? "$ts" "OK" "$prot - TXTIME rel" $xfail done done diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh index c03151e7791c..c159230c9b62 100755 --- a/tools/testing/selftests/net/fdb_notify.sh +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -49,7 +49,7 @@ test_dup_vxlan_self() { ip_link_add br up type bridge vlan_filtering 1 ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_master vx br + ip_link_set_master vx br do_test_dup add "vxlan" dev vx self dst 192.0.2.1 do_test_dup del "vxlan" dev vx self dst 192.0.2.1 @@ -59,7 +59,7 @@ test_dup_vxlan_master() { ip_link_add br up type bridge vlan_filtering 1 ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_master vx br + ip_link_set_master vx br do_test_dup add "vxlan master" dev vx master do_test_dup del "vxlan master" dev vx master @@ -79,7 +79,7 @@ test_dup_macvlan_master() ip_link_add br up type bridge vlan_filtering 1 ip_link_add dd up type dummy ip_link_add mv up link dd type macvlan mode passthru - ip_link_master mv br + ip_link_set_master mv br do_test_dup add "macvlan master" dev mv self do_test_dup del "macvlan master" dev mv self diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 1d58b3b87465..847936363a12 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -291,6 +291,37 @@ fib_rule6_test() "$getnomatch" "iif dscp redirect to table" \ "iif dscp no redirect to table" fi + + fib_check_iproute_support "flowlabel" "flowlabel" + if [ $? -eq 0 ]; then + match="flowlabel 0xfffff" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel redirect to table" \ + "flowlabel no redirect to table" + + match="flowlabel 0xfffff" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel redirect to table" \ + "iif flowlabel no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel masked redirect to table" \ + "flowlabel masked no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel masked redirect to table" \ + "iif flowlabel masked no redirect to table" + fi } fib_rule6_vrf_test() diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 7d885cff8d79..00bde7b6f39e 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -105,6 +105,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ + vxlan_reserved.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index 1c8a26046589..2b5700b61ffa 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change" NUM_NETIFS=4 source lib.sh @@ -77,12 +77,16 @@ cleanup() ping_ipv4() { - ping_test $h1 192.0.2.2 + local msg=$1 + + ping_test $h1 192.0.2.2 "$msg" } ping_ipv6() { - ping6_test $h1 2001:db8:1::2 + local msg=$1 + + ping6_test $h1 2001:db8:1::2 "$msg" } learning() @@ -95,6 +99,21 @@ flooding() flood_test $swp2 $h1 $h2 } +pvid_change() +{ + # Test that the changing of the VLAN-aware PVID does not affect + # VLAN-unaware forwarding + bridge vlan add vid 3 dev $swp1 pvid untagged + + ping_ipv4 " with bridge port $swp1 PVID changed" + ping_ipv6 " with bridge port $swp1 PVID changed" + + bridge vlan del vid 3 dev $swp1 + + ping_ipv4 " with bridge port $swp1 PVID deleted" + ping_ipv6 " with bridge port $swp1 PVID deleted" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 7337f398f9cc..8de80acf249e 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -68,6 +68,7 @@ declare -A NETIFS=( : "${REQUIRE_JQ:=yes}" : "${REQUIRE_MZ:=yes}" : "${REQUIRE_MTOOLS:=no}" +: "${REQUIRE_TEAMD:=no}" # Whether to override MAC addresses on interfaces participating in the test. : "${STABLE_MAC_ADDRS:=no}" @@ -321,6 +322,9 @@ fi if [[ "$REQUIRE_MZ" = "yes" ]]; then require_command $MZ fi +if [[ "$REQUIRE_TEAMD" = "yes" ]]; then + require_command $TEAMD +fi if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then # https://github.com/troglobit/mtools require_command msend @@ -932,13 +936,6 @@ packets_rate() echo $(((t1 - t0) / interval)) } -mac_get() -{ - local if_name=$1 - - ip -j link show dev $if_name | jq -r '.[]["address"]' -} - ether_addr_to_u64() { local addr="$1" diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index c35548767756..ecd34f364125 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -7,7 +7,6 @@ ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes -REQUIRE_MZ=no source lib.sh diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index fe4d7c906a70..a20d22d1df36 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -49,6 +49,7 @@ ALL_TESTS=" test_mirror_gretap_second " +REQUIRE_TEAMD="yes" NUM_NETIFS=6 source lib.sh source mirror_lib.sh diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh index 1261e6f46e34..ff7049582d35 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh @@ -53,6 +53,7 @@ ALL_TESTS=" test_mirror_gretap_second " +REQUIRE_TEAMD="yes" NUM_NETIFS=6 source lib.sh source mirror_lib.sh diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh index e064b946e821..16583a470ec3 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh @@ -109,6 +109,7 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 " +REQUIRE_TEAMD="yes" NUM_NETIFS=8 source lib.sh diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh index f05ffe213c46..2a4cd1af1b85 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh @@ -76,6 +76,7 @@ ping_ipv4 ping_ipv6 "} +REQUIRE_TEAMD="yes" NUM_NETIFS=8 : ${lib_dir:=.} source $lib_dir/lib.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh new file mode 100755 index 000000000000..46c31794b91b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh @@ -0,0 +1,352 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|---------------+ +# | +# +----|--------------------------------+ +# | SW | | +# | +--|------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | id 1000 dstport $VXPORT | | +# | +---------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +--|----------------------------------+ +# | +# +--|----------------------------------+ +# | | | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | +# | VRP2 (vrf) | +# +-------------------------------------+ + +: ${VXPORT:=4789} +: ${ALL_TESTS:=" + default_test + plain_test + reserved_0_test + reserved_10_test + reserved_31_test + reserved_56_test + reserved_63_test + "} + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + defer simple_if_fini $h1 192.0.2.1/28 + + tc qdisc add dev $h1 clsact + defer tc qdisc del dev $h1 clsact + + tc filter add dev $h1 ingress pref 77 \ + prot ip flower skip_hw ip_proto icmp action drop + defer tc filter del dev $h1 ingress pref 77 +} + +switch_create() +{ + ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip_link_set_addr br1 $(mac_get $swp1) + ip_link_set_up br1 + + ip_link_set_up $rp1 + ip_addr_add $rp1 192.0.2.17/28 + ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 + + ip_link_set_master $swp1 br1 + ip_link_set_up $swp1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + defer simple_if_fini $rp2 192.0.2.18/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + defer vrf_cleanup + + forwarding_enable + defer forwarding_restore + + h1_create + switch_create + + vrp2_create +} + +vxlan_header_bytes() +{ + local vni=$1; shift + local -a extra_bits=("$@") + local -a bits + local i + + for ((i=0; i < 64; i++)); do + bits[i]=0 + done + + # Bit 4 is the I flag and is always on. + bits[4]=1 + + for i in ${extra_bits[@]}; do + bits[i]=1 + done + + # Bits 32..55 carry the VNI + local mask=0x800000 + for ((i=0; i < 24; i++)); do + bits[$((i + 32))]=$(((vni & mask) != 0)) + ((mask >>= 1)) + done + + local bytes + for ((i=0; i < 8; i++)); do + local byte=0 + local j + for ((j=0; j < 8; j++)); do + local bit=${bits[8 * i + j]} + ((byte += bit << (7 - j))) + done + bytes+=$(printf %02x $byte): + done + + echo ${bytes%:} +} + +neg_bytes() +{ + local bytes=$1; shift + + local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8 + [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:) + local out + local i + + for ((i=0; i < ${#bytes}; i++)); do + local c=${bytes:$i:1} + out+=${neg[$c]} + done + echo $out +} + +vxlan_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local vni=$1; shift + local reserved_bits=$1; shift + + local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits) + + $MZ $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp sp=23456,dp=$VXPORT,p=$(: + )"$vxlan_header:"$( : VXLAN + )"$dest_mac:"$( : ETH daddr + )"00:11:22:33:44:55:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"00:00:"$( : IP header csum + )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr + )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr + )"08:"$( : ICMP type + )"00:"$( : ICMP code + )"8b:f2:"$( : ICMP csum + )"1f:6a:"$( : ICMP request identifier + )"00:01:"$( : ICMP request seq. number + )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload + )"6d:74:0b:00:00:00:00:00:"$( : + )"10:11:12:13:14:15:16:17:"$( : + )"18:19:1a:1b:1c:1d:1e:1f:"$( : + )"20:21:22:23:24:25:26:27:"$( : + )"28:29:2a:2b:2c:2d:2e:2f:"$( : + )"30:31:32:33:34:35:36:37" +} + +vxlan_device_add() +{ + ip_link_add vx1 up type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 "$@" + ip_link_set_master vx1 br1 +} + +vxlan_all_reserved_bits() +{ + local i + + for ((i=0; i < 64; i++)); do + if ((i == 4 || i >= 32 && i < 56)); then + continue + fi + echo $i + done +} + +vxlan_ping_vanilla() +{ + vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000 +} + +vxlan_ping_reserved() +{ + for bit in $(vxlan_all_reserved_bits); do + vxlan_ping_do 1 $rp2 $(mac_get $rp1) \ + 192.0.2.17 $(mac_get $h1) 1000 "$bit" + ((n++)) + done +} + +vxlan_ping_test() +{ + local what=$1; shift + local get_stat=$1; shift + local expect=$1; shift + + RET=0 + + local t0=$($get_stat) + + "$@" + check_err $? "Failure when running $@" + + local t1=$($get_stat) + local delta=$((t1 - t0)) + + ((expect == delta)) + check_err $? "Expected to capture $expect packets, got $delta." + + log_test "$what" +} + +__default_test_do() +{ + local n_allowed_bits=$1; shift + local what=$1; shift + + vxlan_ping_test "$what: clean packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + 10 vxlan_ping_vanilla + + local t0=$(link_stats_get vx1 rx errors) + vxlan_ping_test "$what: mangled packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + $n_allowed_bits vxlan_ping_reserved + local t1=$(link_stats_get vx1 rx errors) + + RET=0 + local expect=$((39 - n_allowed_bits)) + local delta=$((t1 - t0)) + ((expect == delta)) + check_err $? "Expected $expect error packets, got $delta." + log_test "$what: drops reported" +} + +default_test_do() +{ + vxlan_device_add + __default_test_do 0 "Default" +} + +default_test() +{ + in_defer_scope \ + default_test_do +} + +plain_test_do() +{ + vxlan_device_add reserved_bits 0xf7ffffff000000ff + __default_test_do 0 "reserved_bits 0xf7ffffff000000ff" +} + +plain_test() +{ + in_defer_scope \ + plain_test_do +} + +reserved_test() +{ + local bit=$1; shift + + local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit) + local reserved_bytes=$(neg_bytes $allowed_bytes) + local reserved_bits=${reserved_bytes//:/} + + vxlan_device_add reserved_bits 0x$reserved_bits + __default_test_do 1 "reserved_bits 0x$reserved_bits" +} + +reserved_0_test() +{ + in_defer_scope \ + reserved_test 0 +} + +reserved_10_test() +{ + in_defer_scope \ + reserved_test 10 +} + +reserved_31_test() +{ + in_defer_scope \ + reserved_test 31 +} + +reserved_56_test() +{ + in_defer_scope \ + reserved_test 56 +} + +reserved_63_test() +{ + in_defer_scope \ + reserved_test 63 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index be4a30a0d02a..9b44a091802c 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -227,7 +227,8 @@ static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, attr->rta_len = RTA_LENGTH(size); attr->rta_type = rta_type; - memcpy(RTA_DATA(attr), payload, size); + if (payload) + memcpy(RTA_DATA(attr), payload, size); return 0; } diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 8994fec1c38f..0bd9a038a1f0 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -435,6 +435,13 @@ xfail_on_veth() fi } +mac_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["address"]' +} + kill_process() { local pid=$1; shift @@ -451,7 +458,7 @@ ip_link_add() defer ip link del dev "$name" } -ip_link_master() +ip_link_set_master() { local member=$1; shift local master=$1; shift @@ -459,3 +466,62 @@ ip_link_master() ip link set dev "$member" master "$master" defer ip link set dev "$member" nomaster } + +ip_link_set_addr() +{ + local name=$1; shift + local addr=$1; shift + + local old_addr=$(mac_get "$name") + ip link set dev "$name" address "$addr" + defer ip link set dev "$name" address "$old_addr" +} + +ip_link_is_up() +{ + local name=$1; shift + + local state=$(ip -j link show "$name" | + jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"') + [[ $state == "UP" ]] +} + +ip_link_set_up() +{ + local name=$1; shift + + if ! ip_link_is_up "$name"; then + ip link set dev "$name" up + defer ip link set dev "$name" down + fi +} + +ip_link_set_down() +{ + local name=$1; shift + + if ip_link_is_up "$name"; then + ip link set dev "$name" down + defer ip link set dev "$name" up + fi +} + +ip_addr_add() +{ + local name=$1; shift + + ip addr add dev "$name" "$@" + defer ip addr del dev "$name" "$@" +} + +ip_route_add() +{ + ip route add "$@" + defer ip route del "$@" +} + +bridge_vlan_add() +{ + bridge vlan add "$@" + defer bridge vlan del "$@" +} diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index 18b9443454a9..bc6b6762baf3 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../../ diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 477ae76de93d..3efe005436cd 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -71,6 +71,11 @@ def ksft_in(a, b, comment=""): _fail("Check failed", a, "not in", b, comment) +def ksft_is(a, b, comment=""): + if a is not b: + _fail("Check failed", a, "is not", b, comment) + + def ksft_ge(a, b, comment=""): if a < b: _fail("Check failed", a, "<", b, comment) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 72590c3f90f1..9e3bcddcf3e8 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -10,7 +10,9 @@ import time class CmdExitFailure(Exception): - pass + def __init__(self, msg, cmd_obj): + super().__init__(msg) + self.cmd = cmd_obj class cmd: @@ -48,7 +50,7 @@ class cmd: if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % - (self.proc.args, stdout, stderr)) + (self.proc.args, stdout, stderr), self) class bkg(cmd): diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index a0d689d58c57..ad1e36baee2a 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -13,14 +13,14 @@ try: SPEC_PATH = KSFT_DIR / "net/lib/specs" sys.path.append(tools_full_path.as_posix()) - from net.lib.ynl.lib import YnlFamily, NlError + from net.lib.ynl.pyynl.lib import YnlFamily, NlError else: # Running in tree tools_full_path = KSRC / "tools" SPEC_PATH = KSRC / "Documentation/netlink/specs" sys.path.append(tools_full_path.as_posix()) - from net.ynl.lib import YnlFamily, NlError + from net.ynl.pyynl.lib import YnlFamily, NlError except ModuleNotFoundError as e: ksft_pr("Failed importing `ynl` library from kernel sources") ksft_pr(str(e)) @@ -32,23 +32,23 @@ except ModuleNotFoundError as e: # Set schema='' to avoid jsonschema validation, it's slow # class EthtoolFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class RtnlFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class NetshaperFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 8e3fc05a5397..c76525fe2b84 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 4209b9569039..414addef9a45 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -25,6 +25,8 @@ #include <sys/types.h> #include <sys/mman.h> +#include <arpa/inet.h> + #include <netdb.h> #include <netinet/in.h> @@ -1211,23 +1213,42 @@ static void parse_setsock_options(const char *name) exit(1); } -void xdisconnect(int fd, int addrlen) +void xdisconnect(int fd) { - struct sockaddr_storage empty; + socklen_t addrlen = sizeof(struct sockaddr_storage); + struct sockaddr_storage addr, empty; int msec_sleep = 10; - int queued = 1; - int i; + void *raw_addr; + int i, cmdlen; + char cmd[128]; + + /* get the local address and convert it to string */ + if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0) + xerror("getsockname"); + + if (addr.ss_family == AF_INET) + raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr); + else if (addr.ss_family == AF_INET6) + raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr); + else + xerror("bad family"); + + strcpy(cmd, "ss -M | grep -q "); + cmdlen = strlen(cmd); + if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], + sizeof(cmd) - cmdlen)) + xerror("inet_ntop"); shutdown(fd, SHUT_WR); - /* while until the pending data is completely flushed, the later + /* + * wait until the pending data is completely flushed and all + * the MPTCP sockets reached the closed status. * disconnect will bypass/ignore/drop any pending data. */ for (i = 0; ; i += msec_sleep) { - if (ioctl(fd, SIOCOUTQ, &queued) < 0) - xerror("can't query out socket queue: %d", errno); - - if (!queued) + /* closed socket are not listed by 'ss' */ + if (system(cmd) != 0) break; if (i > poll_timeout) @@ -1281,9 +1302,9 @@ again: return ret; if (cfg_truncate > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); } else if (--cfg_repeat > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); /* the socket could be unblocking at this point, we need the * connect to be blocking diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b48b4e56826a..5e3c56253274 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -137,7 +137,7 @@ TEST_GROUP="" #shellcheck disable=SC2317 cleanup() { - rm -f "$cin_disconnect" "$cout_disconnect" + rm -f "$cin_disconnect" rm -f "$cin" "$cout" rm -f "$sin" "$sout" rm -f "$capout" @@ -155,7 +155,6 @@ cin=$(mktemp) cout=$(mktemp) capout=$(mktemp) cin_disconnect="$cin".disconnect -cout_disconnect="$cout".disconnect trap cleanup EXIT mptcp_lib_ns_init ns1 ns2 ns3 ns4 @@ -445,12 +444,8 @@ do_transfer() printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - cat /tmp/${listener_ns}.out - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" - [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ + "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" echo cat "$capout" @@ -587,7 +582,7 @@ make_file() mptcp_lib_make_file $name 1024 $ksize dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null - echo "Created $name (size $(du -b "$name")) containing data sent by $who" + echo "Created $name (size $(stat -c "%s" "$name") B) containing data sent by $who" } run_tests_lo() diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index c07e2bd3a315..13a3b68181ee 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1039,13 +1039,8 @@ do_transfer() if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then fail_test "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - cat /tmp/${listener_ns}.out - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" - cat /tmp/${connector_ns}.out - + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ + "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" return 1 fi diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 975d4d4c862a..051e289d7967 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -107,6 +107,27 @@ mptcp_lib_pr_info() { mptcp_lib_print_info "INFO: ${*}" } +# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file +mptcp_lib_pr_err_stats() { + local lns="${1}" + local cns="${2}" + local port="${3}" + local lstat="${4}" + local cstat="${5}" + + echo -en "${MPTCP_LIB_COLOR_RED}" + { + printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}" + ip netns exec "${lns}" ss -Menitam -o "sport = :${port}" + cat "${lstat}" + + printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}" + ip netns exec "${cns}" ss -Menitam -o "dport = :${port}" + [ "${lstat}" != "${cstat}" ] && cat "${cstat}" + } 1>&2 + echo -en "${MPTCP_LIB_COLOR_RESET}" +} + # SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all # features using the last version of the kernel and the selftests to make sure # a test is not being skipped by mistake. diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 5e8d5b83e2d0..418a903c3a4d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -169,6 +169,11 @@ do_transfer() cmsg+=",TCPINQ" fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n + timeout ${timeout_test} \ ip netns exec ${listener_ns} \ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ @@ -189,14 +194,16 @@ do_transfer() wait $spid local rets=$? + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat | grep Tcp > /tmp/${listener_ns}.out + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat | grep Tcp > /tmp/${connector_ns}.out + print_title "Transfer ${ip:2}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ + "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" mptcp_lib_result_fail "transfer ${ip}" diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 8fa77c8e9b65..9c2a415976cb 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -155,6 +155,11 @@ do_transfer() sleep 1 fi + NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ + nstat -n + NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ + nstat -n + timeout ${timeout_test} \ ip netns exec ${ns3} \ ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ @@ -180,25 +185,27 @@ do_transfer() kill ${cappid_connector} fi + NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ + nstat | grep Tcp > /tmp/${ns3}.out + NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ + nstat | grep Tcp > /tmp/${ns1}.out + cmp $sin $cout > /dev/null 2>&1 local cmps=$? cmp $cin $sout > /dev/null 2>&1 local cmpc=$? - printf "%-16s" " max $max_time " if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then + printf "%-16s" " max $max_time " mptcp_lib_pr_ok cat "$capout" return 0 fi - mptcp_lib_pr_fail - echo "client exit code $retc, server $rets" 1>&2 - echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 - ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" - echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2 - ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port" + mptcp_lib_pr_fail "client exit code $retc, server $rets" + mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \ + "/tmp/${ns3}.out" "/tmp/${ns1}.out" ls -l $sin $cout ls -l $cin $sout diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 93d9d914529b..93e8cb671c3d 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -18,6 +18,23 @@ def lo_check(nf) -> None: ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0) +def napi_list_check(nf) -> None: + with NetdevSimDev(queue_count=100) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100) + + for q in [50, 0, 99]: + for i in range(4): + nsim.dfs_write("queue_reset", f"{q} {i}") + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100, + comment=f"queue count after reset queue {q} mode {i}") + + def page_pool_check(nf) -> None: with NetdevSimDev() as nsimdev: nsim = nsimdev.nsims[0] @@ -89,7 +106,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check], + ksft_run([empty_check, lo_check, page_pool_check, napi_list_check], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile index 2f1508abc826..3fd1da2ec07d 100644 --- a/tools/testing/selftests/net/openvswitch/Makefile +++ b/tools/testing/selftests/net/openvswitch/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := openvswitch.sh diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index cc0bfae2bafa..960e1ab4dd04 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -171,8 +171,10 @@ ovs_add_netns_and_veths () { ovs_add_if "$1" "$2" "$4" -u || return 1 fi - [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \ - tcpdump -i any -s 65535 + if [ $TRACING -eq 1 ]; then + ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553 + ovs_wait grep -q "listening on any" ${ovs_dir}/stderr + fi return 0 } diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index 4071c133f29e..ef8b25a606d8 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -23,7 +23,7 @@ if [ $# -ne 1 ]; then ktap_exit_fail_msg "usage: $0 <script>" exit "$KSFT_FAIL" fi -script="$1" +script="$(basename $1)" if [ -z "$(which packetdrill)" ]; then ktap_skip_all "packetdrill not found in PATH" @@ -31,16 +31,32 @@ if [ -z "$(which packetdrill)" ]; then fi declare -a optargs +failfunc=ktap_test_fail + if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then optargs+=('--tolerance_usecs=14000') + + # xfail tests that are known flaky with dbg config, not fixable. + # still run them for coverage (and expect 100% pass without dbg). + declare -ar xfail_list=( + "tcp_eor_no-coalesce-retrans.pkt" + "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_slow_start_slow-start-after-win-update.pkt" + "tcp_timestamping.*.pkt" + "tcp_user_timeout_user-timeout-probe.pkt" + "tcp_zerocopy_epoll_.*.pkt" + "tcp_tcp_info_tcp-info-.*-limited.pkt" + ) + readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$" + [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail fi ktap_print_header ktap_set_plan 2 -unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $(basename $script) > /dev/null \ - && ktap_test_pass "ipv4" || ktap_test_fail "ipv4" -unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $(basename $script) > /dev/null \ - && ktap_test_pass "ipv6" || ktap_test_fail "ipv6" +unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass "ipv4" || $failfunc "ipv4" +unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass "ipv6" || $failfunc "ipv6" ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt new file mode 100644 index 000000000000..38535701656e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking accept. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0...0.200 accept(3, ..., ...) = 4 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + + +.1 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt new file mode 100644 index 000000000000..3692ef102381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking connect. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + + +.1...0.200 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.1 < S. 0:0(0) ack 1 win 5792 <mss 1460,nop,wscale 2,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt new file mode 100644 index 000000000000..914eabab367a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking read. +--tolerance_usecs=10000 + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0...0.100 read(4, ..., 2000) = 2000 + +.1 < P. 1:2001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 2001 + + +.1...0.200 read(4, ..., 2000) = 2000 + +.1 < P. 2001:4001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 + + +.1 < P. 4001:6001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 6001 + +0...0.000 read(4, ..., 1000) = 1000 + +0...0.000 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt new file mode 100644 index 000000000000..cec5a0725d95 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking write. +--tolerance_usecs=10000 + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10 +` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 50000 <mss 1000,nop,wscale 0> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 50000 + +0 accept(3, ..., ...) = 4 + +// Kernel doubles our value -> sk->sk_sndbuf is set to 42000 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [21000], 4) = 0 + +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF, [42000], [4]) = 0 + +// A write of 60000 does not block. + +0...0.300 write(4, ..., 61000) = 61000 // this write() blocks + + +.1 < . 1:1(0) ack 10001 win 50000 + + +.1 < . 1:1(0) ack 30001 win 50000 + +// This ACK should wakeup the write(). An ACK of 35001 does not. + +.1 < . 1:1(0) ack 36001 win 50000 + +// Reset to sysctls defaults. +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt new file mode 100644 index 000000000000..8514d6bdbb6d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test basic connection teardown where local process closes first: +// the local process calls close() first, so we send a FIN, and receive an ACK. +// Then we receive a FIN and ACK it. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.01...0.011 connect(3, ..., ...) = 0 + +0 > S 0:0(0) <...> + +0 < S. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + + +0 write(3, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1001 win 257 + + +0 close(3) = 0 + +0 > F. 1001:1001(0) ack 1 + +0 < . 1:1(0) ack 1002 win 257 + + +0 < F. 1:1(0) ack 1002 win 257 + +0 > . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt new file mode 100644 index 000000000000..04103134bd99 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test to make sure no RST is being sent when close() +// is called on a socket with SYN_SENT state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <...> + +// Application decideds to close the socket in SYN_SENT state +// Make sure no RST is sent after close(). + +0 close(3) = 0 + +// Receive syn-ack to trigger the send side packet examination: +// If a RESET were sent right after close(), it would have failed with +// a mismatched timestamp. + +.1 < S. 0:0(0) ack 1 win 32000 <mss 1460,nop,wscale 7> + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt new file mode 100644 index 000000000000..5f3a2914213a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify behavior for the sequence: remote side sends FIN, then we close(). +// Since the remote side (client) closes first, we test our LAST_ACK code path. + +`./defaults.sh` + +// Initialize a server socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// Client closes first. + +.01 < F. 1:1(0) ack 1 win 257 + +0 > . 1:1(0) ack 2 + +// App notices that client closed. + +0 read(4, ..., 1000) = 0 + +// Then we close. + +.01 close(4) = 0 + +0 > F. 1:1(0) ack 2 + +// Client ACKs our FIN. + +.01 < . 2:2(0) ack 2 win 257 + +// Verify that we send RST in response to any incoming segments +// (because the kernel no longer has any record of this socket). + +.01 < . 2:2(0) ack 2 win 257 + +0 > R 2:2(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt new file mode 100644 index 000000000000..643baf3267cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1). +// +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 # fully enabled +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response ++.002 ... 0.004 connect(4, ..., ...) = 0 + + +0 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.002 < SE. 0:0(0) ack 1 win 32767 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + +// Write 1 MSS. ++.002 write(4, ..., 1000) = 1000 +// Send 1 MSS with ect0. + +0 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt new file mode 100644 index 000000000000..f95b9b3c9fa1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. The large chunk itself should be packetized as +// usual. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write another 10040B chunk with no coalescing options. + +0 send(4, ..., 10400, MSG_EOR) = 10400 + +// Write a 2KB chunk. This chunk should not be appended to the packets created +// the previous chunk. + +0 write(4, ..., 2000) = 2000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:20801(10800) ack 1 ++.001 < . 1:1(0) ack 20801 win 514 +// This 2KB packet should be sent alone. + +0 > P. 20801:22801(2000) ack 1 ++.001 < . 1:1(0) ack 22801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt new file mode 100644 index 000000000000..2ff66075288e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. Also, when packets are retransmitted, they +// will not be coalesce into the same skb. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 <sack 13201:14401,nop,nop> +// TCP should fill the hole but no coalescing should happen, and all +// retransmissions should be sent out as individual packets. + +// Note : This is timeout based retransmit. +// Do not put +0 here or flakes will come back. ++.004~+.008 > P. 12001:12401(400) ack 1 + ++.001 < . 1:1(0) ack 12401 win 514 <sack 13201:14401,nop,nop> + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 ++.001 < . 1:1(0) ack 12801 win 514 <sack 13201:14401,nop,nop> ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt new file mode 100644 index 000000000000..77039c5aac39 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write a 400B chunk with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 + +0 > P. 10801:20801(10000) ack 1 ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 20801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt new file mode 100644 index 000000000000..dd5a06250595 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk even though we have 10 back-to-back small +// writes. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 ++.001 < . 1:1(0) ack 12401 win 514 ++.001 < . 1:1(0) ack 12801 win 514 ++.001 < . 1:1(0) ack 13201 win 514 ++.001 < . 1:1(0) ack 13601 win 514 ++.001 < . 1:1(0) ack 14001 win 514 ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt new file mode 100644 index 000000000000..0d3c8077e830 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we test a simple case where in-flight == ssthresh +// all the way through recovery, so during fast recovery we send one segment +// for each segment SACKed/ACKed. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// RTT 100ms + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001. + +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:4001,nop,nop> +// Enter fast recovery. + +0 > . 1:1001(1000) ack 1 + +.01 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + +// Write some more, which we will send 1 MSS at a time, +// as in-flight segments are SACKed or ACKed. + +.01 write(4, ..., 7000) = 7000 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:5001,nop,nop> + +0 > . 10001:11001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:6001,nop,nop> + +0 > . 11001:12001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:7001,nop,nop> + +0 > . 12001:13001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:8001,nop,nop> + +0 > . 13001:14001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:9001,nop,nop> + +0 > . 14001:15001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:10001,nop,nop> + +0 > . 15001:16001(1000) ack 1 + + +.02 < . 1:1(0) ack 10001 win 320 + +0 > P. 16001:17001(1000) ack 1 +// Leave fast recovery. + +.01 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + + +.03 < . 1:1(0) ack 12001 win 320 + +.02 < . 1:1(0) ack 14001 win 320 + +.02 < . 1:1(0) ack 16001 win 320 + +.02 < . 1:1(0) ack 17001 win 320 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt new file mode 100644 index 000000000000..7842a10b6967 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4, and 11 to 16 are dropped. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write 20 data segments. + +0 write(4, ..., 20000) = 20000 + +0 > P. 1:10001(10000) ack 1 + +// Receive first DUPACK, entering PRR part + +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop> + +0 > . 10001:11001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop> + +0 > . 11001:12001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop> + +0 > . 1:1001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop> + +0 > . 2001:3001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop> + +0 > . 3001:4001(1000) ack 1 +// Enter PRR CRB ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:11001,nop,nop> + +0 > . 12001:13001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:12001,nop,nop> + +0 > . 13001:14001(1000) ack 1 +// Enter PRR slow start + +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:12001,nop,nop> + +0 > P. 14001:16001(2000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:12001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 > . 16001:17001(1000) ack 1 +// inflight reaches ssthresh, goes into packet conservation mode ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:13001,nop,nop> + +0 > . 17001:18001(1000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:14001,nop,nop> + +0 > . 18001:19001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt new file mode 100644 index 000000000000..b66d7644c3b6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4 are lost. The sender writes another 10 packets. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 20 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1,2,3,4 + +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop> ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop> + +0 > . 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop> + +0 > . 3001:4001(1000) ack 1 + +// Receiver ACKs all data. + +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 2001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 3001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 10001 win 320 + +// Writes another 10 packets, which the ssthresh*mss amount +// should be sent right away + +.01 write(4, ..., 10000) = 10000 + +0 > . 10001:17001(7000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt new file mode 100644 index 000000000000..8e87bfecabb5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we verify that the sender uses SACK info on an ACK +// below snd_una. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// RTT 10ms + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001,4001:5001,7001:8001. + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001 8001:9001,nop,nop> + +0 > . 1:1001(1000) ack 1 + ++.012 < . 1:1(0) ack 4001 win 320 <sack 8001:9001,nop,nop> + +0 > . 4001:7001(3000) ack 1 + + +0 write(4, ..., 10000) = 10000 + +// The following ACK was reordered - delayed so that it arrives with +// an ACK field below snd_una. Here we check that the newly-SACKed +// 2MSS at 5001:7001 cause us to send out 2 more MSS. ++.002 < . 1:1(0) ack 3001 win 320 <sack 5001:7001,nop,nop> + +0 > . 7001:8001(1000) ack 1 + +0 > . 10001:11001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt new file mode 100644 index 000000000000..96b01eb5b7a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that doesn't support SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. +// We have one packet newly acked (1001:3001 were DUP-ACK'd) +// So we revert state back to Open. Slow start cwnd from 10 to 11 +// and send 11 - 9 = 2 packets + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt new file mode 100644 index 000000000000..642da51ec3a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that supports SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt new file mode 100644 index 000000000000..7adae7a9ef4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// This is a test inspired by an Android client app using SSL. This +// test verifies using TCP_NODELAY would save application latency +// (Perhaps even better with TCP_NAGLE). +// +`./defaults.sh +ethtool -K tun0 tso off gso off +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < S. 0:0(0) ack 1 win 5792 <mss 974,nop,nop,sackOK,nop,wscale 7> + +0 > . 1:1(0) ack 1 + +// SSL handshake (resumed session) + +0 write(4, ..., 517) = 517 + +0 > P. 1:518(517) ack 1 + +.1 < . 1:1(0) ack 518 win 229 + + +0 < P. 1:144(143) ack 1 win 229 + +0 > . 518:518(0) ack 144 + +0 read(4, ..., 1000) = 143 + +// Application POST header (51B) and body (2002B) + +0 write(4, ..., 51) = 51 + +0 > P. 518:569(51) ack 144 + +.03 write(4, ..., 2002) = 2002 + +0 > . 569:1543(974) ack 144 + +0 > P. 1543:2517(974) ack 144 +// Without disabling Nagle, this packet will not happen until the remote ACK. + +0 > P. 2517:2571(54) ack 144 + + +.1 < . 1:1(0) ack 2571 win 229 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt new file mode 100644 index 000000000000..fa9c01813996 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test the MSG_MORE flag will correctly corks the tiny writes +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Disable Nagle by default on this socket. + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle. + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40 + +.21~+.215 > P. 1:41(40) ack 1 + +.01 < . 1:1(0) ack 41 win 257 + +// Test unsetting MSG_MORE releases the packet + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100 ++.005 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160 + +.01 sendmsg(4, {msg_name(...)=..., + msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}], + msg_flags=0}, MSG_MORE) = 495 ++.008 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5 + +0 > P. 41:801(760) ack 1 + +.02 < . 1:1(0) ack 801 win 257 + + +// Test >MSS write will unleash MSS packets but hold on the remaining data. + +.1 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100 + +0 > . 801:3801(3000) ack 1 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50 + + +.01 < . 1:1(0) ack 2801 win 257 +// Err... we relase the remaining right after the ACK? note that PUSH is reset + +0 > . 3801:3951(150) ack 1 + +// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0 ++.001 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1 ++.002 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3 ++.004 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4 + +.02 < . 1:1(0) ack 3951 win 257 + +0 > . 3951:3961(10) ack 1 + +.02 < . 1:1(0) ack 3961 win 257 + + +// Test the case a MSG_MORE send followed by a write flushes the data + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20 + +.05 write(4, ..., 20) = 20 + +0 > P. 3961:4001(40) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt new file mode 100644 index 000000000000..0ddec5f7dc1a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_CORK and TCP_NODELAY sockopt behavior +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Unset TCP_CORK should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0 + +0 > P. 1:81(80) ack 1 + +.01 < . 1:1(0) ack 81 win 257 + +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > P. 81:161(80) ack 1 + +.01 < . 1:1(0) ack 161 win 257 + +// Set MSG_MORE to hold small packets + +0 send(4, ..., 40, MSG_MORE) = 40 + +.05 send(4, ..., 40, MSG_MORE) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > . 161:241(80) ack 1 + +.01 < . 1:1(0) ack 241 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt new file mode 100644 index 000000000000..310ef31518da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that setsockopt calls that force a route refresh do not +// cause problems matching SACKs with packets in the write queue. +// This variant tests IP_TOS. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0 + +0...0.010 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 65535 <mss 1460,nop,wscale 2,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 1:5841(5840) ack 1 + +.01 < . 1:1(0) ack 5841 win 65535 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 5841:11681(5840) ack 1 + +.01 < . 1:1(0) ack 11681 win 65535 + + +.01 write(3, ..., 14600) = 14600 + +0 > P. 11681:26281(14600) ack 1 + +// Try the socket option that we know can force a route refresh. + +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0 +// Then revert to avoid routing/mangling/etc implications of that setting. + +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0 + +// Verify that we do not retransmit the SACKed segments. + +.01 < . 1:1(0) ack 13141 win 65535 <sack 16061:17521 20441:26281,nop,nop> + +0 > . 13141:16061(2920) ack 1 + +0 > P. 17521:20441(2920) ack 1 + +.01 < . 1:1(0) ack 26281 win 65535 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt new file mode 100644 index 000000000000..f185e1ac57ea --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests non-FACK SACK with SACKs coming in the order +// 2 6 8 3 9, to test what happens when we get a new SACKed range +// (for packet 3) that is on the right of an existing SACKed range +// (for packet 2). + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + + +.1 < . 1:1(0) ack 1 win 257 <sack 2001:3001,nop,nop> ++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001,nop,nop> ++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001 8001:9001,nop,nop> + +// 3 SACKed packets, so we enter Fast Recovery. + +0 > . 1:1001(1000) ack 1 + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_lost == 6, tcpi_lost }% + +// SACK for 3001:4001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. ++.007 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:9001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +0 %{ assert tcpi_reordering == 6, tcpi_reordering }% // 8001:9001 -> 3001:4001 is 6 + +// SACK for 9001:10001. + +.01 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// ACK for 1:1001 as packets from t=0.303 arrive. ++.083 < . 1:1(0) ack 1001 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 4,tcpi_lost }% + +// ACK for 1:4001 as packets from t=0.310 arrive. ++.017 < . 1:1(0) ack 4001 win 257 <sack 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 3,tcpi_lost }% + +// ACK for 1:7001 as packets from t=0.320 arrive. + +.01 < . 1:1(0) ack 7001 win 257 <sack 8001:10001,nop,nop> + +// ACK for all data as packets from t=0.403 arrive. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt new file mode 100644 index 000000000000..0093b4973934 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 3, and then a SACK for 4. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay the fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop> +// RACK reordering timer ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7, tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 3001:4001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:4001 7001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost # since 3001:4001 is no longer lost +assert tcpi_reordering == 5, tcpi_reordering # 7001:8001 -> 3001:4001 +}% + +// SACK for 4001:5001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. +// It uses the RFC3517 algorithm to mark 1:3001 lost +// because >=3 higher-sequence packets are SACKed. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:8001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 %{ +assert tcpi_lost == 5,tcpi_lost # SACK/RFC3517 thinks 1:3001 are lost +}% + +// SACK for 8001:9001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:9001,nop,nop> + +// SACK for 9001:10001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:10001,nop,nop> + +0 > . 5001:6001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt new file mode 100644 index 000000000000..980a832dc81c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 5, and then a SACK for 6. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay a fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop> ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7,tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 5001:6001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:6001 7001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost +assert tcpi_reordering == 3, tcpi_reordering # 7001:8001 -> 5001:6001 is 3 +}% + +// SACK for 6001:7001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:8001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// SACK for 8001:9001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:9001,nop,nop> + +0 > . 3001:4001(1000) ack 1 + +// SACK for 9001:10001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:10001,nop,nop> + +0 > . 4001:5001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt new file mode 100644 index 000000000000..6740859a1360 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Simplest possible test of open() and then sendfile(). +// We write some zeroes into a file (since packetdrill expects payloads +// to be all zeroes) and then open() the file, then use sendfile() +// and verify that the correct number of zeroes goes out. + +`./defaults.sh +/bin/rm -f /tmp/testfile +/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 open("/tmp/testfile", O_RDONLY) = 5 + +0 sendfile(4, 5, [0], 5) = 5 + +0 > P. 1:6(5) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt new file mode 100644 index 000000000000..0cbd43253236 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize a server socket + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Connection should get accepted + +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <...> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0 pipe([5, 6]) = 0 + +0 < U. 1:101(100) ack 1 win 257 urg 100 + +0 splice(4, NULL, 6, NULL, 99, 0) = 99 + +0 splice(4, NULL, 6, NULL, 1, 0) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..8940726a3ec2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero +// buffer length. +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + +// Cache warmup: send a Fast Open cookie request + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress) ++0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop> ++0 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop> ++0 > . 1:1(0) ack 1 ++0 close(3) = 0 ++0 > F. 1:1(0) ack 1 ++0 < F. 1:1(0) ack 2 win 92 ++0 > . 2:2(0) ack 2 + +// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(4) = 0 + +// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 ++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(5, ..., ...) = 0 ++0 sendto(5, NULL, 1, 0, ..., ...) = -1 ++0 close(5) = 0 + +// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6 ++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(6, ..., ...) = 0 ++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(6) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt new file mode 100644 index 000000000000..b2b2cdf27e20 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we correctly skip zero-length IOVs. +`./defaults.sh` + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}], + msg_flags=0}, 0) = 60 + +0 > P. 1:61(60) ack 1 + +.01 < . 1:1(0) ack 61 win 257 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}], + msg_flags=0}, MSG_ZEROCOPY) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}], + msg_flags=0}, MSG_ZEROCOPY) = 60 + +0 > P. 61:121(60) ack 1 + +.01 < . 1:1(0) ack 121 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..59f5903f285c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test kernel behavior with NULL as buffer pointer + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.2 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 write(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) + + +0 < . 1:1001(1000) ack 1 win 200 + +0 read(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt new file mode 100644 index 000000000000..d7fdb43a8e89 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tcpi_last_data_recv for active session +`./defaults.sh` + +// Create a socket and set it to non-blocking. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) ++0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.030 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8> ++0 > . 1:1(0) ack 1 + ++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }% + ++0 < . 1:1001(1000) ack 1 win 300 ++0 > . 1:1(0) ack 1001 + ++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt new file mode 100644 index 000000000000..a9bcd46f6cb6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test rwnd limited time in tcp_info for client side. + +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +// Server advertises 0 receive window. + +.01 < S. 0:0(0) ack 1 win 0 <mss 1000,nop,nop,sackOK> + + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +// Make sure that initial rwnd limited time is 0. + +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }% + +// Receive window limited time starts here. + +0 write(3, ..., 1000) = 1000 + +// Check that rwnd limited time in tcp_info is around 0.1s. + +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }% + +// Server opens the receive window. + +.1 < . 1:1(0) ack 1 win 2000 + +// Check that rwnd limited time in tcp_info is around 0.2s. + +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }% + + +0 > P. 1:1001(1000) ack 1 + +// Server advertises a very small receive window. + +.03 < . 1:1(0) ack 1001 win 10 + +// Receive window limited time starts again. + +0 write(3, ..., 1000) = 1000 + +// Server opens the receive window again. + +.1 < . 1:1(0) ack 1001 win 2000 +// Check that rwnd limited time in tcp_info is around 0.3s +// and busy time is 0.3 + 0.03 (server opened small window temporarily). + +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\ + assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\ +}% + + +0 > P. 1001:2001(1000) ack 1 + +.02 < . 1:1(0) ack 2001 win 2000 + +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt new file mode 100644 index 000000000000..f0de2acd0f8e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test send-buffer-limited time in tcp_info for client side. +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0 + +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0 + + +.09...0.14 write(3, ..., 150000) = 150000 + + +.01 < . 1:1(0) ack 10001 win 10000 + + +.01 < . 1:1(0) ack 30001 win 10000 + +// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB + +.01 < . 1:1(0) ack 70001 win 10000 + + +.02 < . 1:1(0) ack 95001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \ + assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% + +// This ack frees up enough buffer so we are no longer +// buffer limited (socket flag SOCK_NOSPACE is cleared) + +.02 < . 1:1(0) ack 150001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\ + assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt new file mode 100644 index 000000000000..2087ec0c746a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that tx timestamping sends timestamps only for +// the last byte of each sendmsg. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 20000 <mss 1000,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + + +0 write(3, ..., 11000) = 11000 + +0 > P. 1:10001(10000) ack 1 + +.01 < . 1:1(0) ack 10001 win 4000 + +0 > P. 10001:11001(1000) ack 1 + +.01 < . 1:1(0) ack 11001 win 4000 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// chunk to have a timestamp key of 10999 (i.e., 11000 - 1). + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the last byte should be received at t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt new file mode 100644 index 000000000000..876024a31110 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for partial writes (IPv4). +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 2000 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// We have a partial write. + +0 write(3, ..., 10000) = 2964 + +0 > . 1:989(988) ack 1 <nop,nop,TS val 110 ecr 700> + +0 > P. 989:1977(988) ack 1 <nop,nop,TS val 110 ecr 700> + +.01 < . 1:1(0) ack 1977 win 92 <nop,nop,TS val 800 ecr 200> + +0 > P. 1977:2965(988) ack 1 <nop,nop,TS val 114 ecr 800> + +.01 < . 1:1(0) ack 2965 win 92 <nop,nop,TS val 800 ecr 200> + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at +// t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt new file mode 100644 index 000000000000..84d94780e6be --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for server-side (IPv4). +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// Write two 2KB chunks. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and +// 3999 (i.e., 4000 - 1) respectively. + +0 write(4, ..., 2000) = 2000 + +0 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 + +0 > P. 2001:4001(2000) ack 1 + +.01 < . 1:1(0) ack 2001 win 514 + +.01 < . 1:1(0) ack 4001 win 514 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt new file mode 100644 index 000000000000..e61424a7bd0a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send FIN packet with correct TSval +--tcp_ts_tick_usecs=1000 +--tolerance_usecs=7000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + + +1 close(4) = 0 +// Check that FIN TSval is updated properly, one second has passed since last sent packet. + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1200 ecr 200> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt new file mode 100644 index 000000000000..174ce9a1bfc0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we reject TS val updates on a packet with invalid ACK sequence + +`./defaults.sh +` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +.1 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + +// bad packet with high tsval (its ACK sequence is above our sndnxt) + +0 < F. 1:1(0) ack 9999 win 20000 <nop,nop,TS val 200000 ecr 100> + + + +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> + +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt new file mode 100644 index 000000000000..2e3b3bb7493a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send RST packet with correct TSval +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + + +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> + +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201> + + +1 close(4) = 0 +// Check that RST TSval is updated properly, one second has passed since last sent packet. + +0 > R. 1:1(0) ack 1001 <nop,nop,TS val 1200 ecr 201> diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt new file mode 100644 index 000000000000..183051ba0cae --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + + +0 < S 0:0(0) win 0 <mss 1460> + +0 > S. 0:0(0) ack 1 <mss 1460> + + +.1 < . 1:1(0) ack 1 win 65530 + +0 accept(3, ..., ...) = 4 + + +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 write(4, ..., 24) = 24 + +0 > P. 1:25(24) ack 1 + +.1 < . 1:1(0) ack 25 win 65530 + +0 %{ assert tcpi_probes == 0, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +// install a qdisc dropping all packets + +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + +0 write(4, ..., 24) = 24 + // When qdisc is congested we retry every 500ms + // (TCP_RESOURCE_PROBE_INTERVAL) and therefore + // we retry 6 times before hitting 3s timeout. + // First verify that the connection is alive: ++3.250 write(4, ..., 24) = 24 + // Now verify that shortly after that the socket is dead: + +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) + + +0 %{ assert tcpi_probes == 6, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt new file mode 100644 index 000000000000..2efe02bfba9c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +// Okay, we received nothing, and decide to close this idle socket. +// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth +// trying hard to cleanly close this flow, at the price of keeping +// a TCP structure in kernel for about 1 minute ! + +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 close(4) = 0 + + +0 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.6~+.800 > F. 1:1(0) ack 1 + +// We finally receive something from the peer, but it is way too late +// Our socket vanished because TCP_USER_TIMEOUT was really small + +0 < . 1:2(1) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt new file mode 100644 index 000000000000..8bd60226ccfc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that established connections drop a segment without the ACK flag set. + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.01 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + +// Receive a segment with no flags set, verify that it's not enqueued. + +.01 < - 1:1001(1000) win 20000 + +0 ioctl(4, SIOCINQ, [0]) = 0 + +// Receive a segment with ACK flag set, verify that it is enqueued. + +.01 < . 1:1001(1000) ack 1 win 20000 + +0 ioctl(4, SIOCINQ, [1000]) = 0 diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 1a706d03bb6b..9a85f93c33d8 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -44,9 +44,11 @@ struct tls_crypto_info_keys { }; static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, - struct tls_crypto_info_keys *tls12) + struct tls_crypto_info_keys *tls12, + char key_generation) { - memset(tls12, 0, sizeof(*tls12)); + memset(tls12, key_generation, sizeof(*tls12)); + memset(tls12, 0, sizeof(struct tls_crypto_info)); switch (cipher_type) { case TLS_CIPHER_CHACHA20_POLY1305: @@ -275,7 +277,7 @@ TEST_F(tls_basic, recseq_wrap) if (self->notls) SKIP(return, "no TLS support"); - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq)); ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); @@ -391,7 +393,7 @@ FIXTURE_SETUP(tls) SKIP(return, "Unsupported cipher in FIPS mode"); tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); @@ -1175,7 +1177,7 @@ TEST_F(tls, bidir) struct tls_crypto_info_keys tls12; tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, tls12.len); @@ -1614,7 +1616,7 @@ TEST_F(tls, getsockopt) EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type); /* get the full crypto_info */ - tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect, 0); len = expect.len; memrnd(&get, sizeof(get)); EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0); @@ -1668,6 +1670,464 @@ TEST_F(tls, recv_efault) EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); } +#define TLS_RECORD_TYPE_HANDSHAKE 0x16 +/* key_update, length 1, update_not_requested */ +static const char key_update_msg[] = "\x18\x00\x00\x01\x00"; +static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd) +{ + size_t len = sizeof(key_update_msg); + + EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE, + (char *)key_update_msg, len, 0), + len); +} + +static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags) +{ + char buf[100]; + + EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags), + sizeof(key_update_msg)); + EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0); +} + +/* set the key to 0 then 1 for RX, immediately to 1 for TX */ +TEST_F(tls_basic, rekey_rx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +/* set the key to 0 then 1 for TX, immediately to 1 for RX */ +TEST_F(tls_basic, rekey_tx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +TEST_F(tls, rekey) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* send after rekey */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_fail) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + + if (variant->tls_version != TLS_1_3_VERSION) { + /* just check that rekey is not supported and return */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EBUSY); + return; + } + + /* successful update */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* invalid update: change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update (RX socket): change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update: change of cipher */ + if (variant->cipher_type == TLS_CIPHER_AES_GCM_256) + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1); + else + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* send after rekey, the invalid updates shouldn't have an effect */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_peek) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); +} + +TEST_F(tls, splice_rekey) +{ + int send_len = TLS_PAYLOAD_MAX_LEN / 2; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + struct tls_crypto_info_keys tls12; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); + + /* can't splice the KeyUpdate */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EINVAL); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* can't splice before updating the key */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, rekey_peek_splice) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + ASSERT_GE(pipe(p), 0); + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0); +} + +TEST_F(tls, rekey_getsockopt) +{ + struct tls_crypto_info_keys tls12; + struct tls_crypto_info_keys tls12_get; + socklen_t len; + + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + tls_recv_keyupdate(_metadata, self->cfd, 0); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); +} + +TEST_F(tls, rekey_poll_pending) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* send immediately after rekey */ + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + /* key hasn't been updated, expect cfd to be non-readable */ + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + + exit(!__test_passed(_metadata)); + } +} + +TEST_F(tls, rekey_poll_delay) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + sleep(1); + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + exit(!__test_passed(_metadata)); + } +} + FIXTURE(tls_err) { int fd, cfd; @@ -1696,7 +2156,7 @@ FIXTURE_SETUP(tls_err) int ret; tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls); @@ -2118,7 +2578,7 @@ TEST(tls_v6ops) { int sfd, ret, fd; socklen_t len, len2; - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); addr.sin6_family = AF_INET6; addr.sin6_addr = in6addr_any; @@ -2177,7 +2637,7 @@ TEST(prequeue) { len = sizeof(addr); memrnd(buf, sizeof(buf)); - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12, 0); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 640bc43452fa..88fa1d53ba2b 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -92,6 +92,9 @@ run_udp() { echo "udp" run_in_netns ${args} + echo "udp sendmmsg" + run_in_netns ${args} -m + echo "udp gso" run_in_netns ${args} -S 0 diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh new file mode 100755 index 000000000000..e7cb8c678bde --- /dev/null +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ALL_TESTS=" + test_binding_on + test_binding_off + test_binding_toggle_on + test_binding_toggle_off + test_binding_toggle_on_when_upper_down + test_binding_toggle_off_when_upper_down + test_binding_toggle_on_when_lower_down + test_binding_toggle_off_when_lower_down +" + +setup_prepare() +{ + local port + + ip_link_add br up type bridge vlan_filtering 1 + + for port in d1 d2 d3; do + ip_link_add $port type veth peer name r$port + ip_link_set_up $port + ip_link_set_up r$port + ip_link_set_master $port br + done + + bridge_vlan_add vid 11 dev br self + bridge_vlan_add vid 11 dev d1 master + + bridge_vlan_add vid 12 dev br self + bridge_vlan_add vid 12 dev d2 master + + bridge_vlan_add vid 13 dev br self + bridge_vlan_add vid 13 dev d1 master + bridge_vlan_add vid 13 dev d2 master + + bridge_vlan_add vid 14 dev br self + bridge_vlan_add vid 14 dev d1 master + bridge_vlan_add vid 14 dev d2 master + bridge_vlan_add vid 14 dev d3 master +} + +operstate_is() +{ + local dev=$1; shift + local expect=$1; shift + + local operstate=$(ip -j link show $dev | jq -r .[].operstate) + if [[ $operstate == UP ]]; then + operstate=1 + elif [[ $operstate == DOWN || $operstate == LOWERLAYERDOWN ]]; then + operstate=0 + fi + echo -n $operstate + [[ $operstate == $expect ]] +} + +check_operstate() +{ + local dev=$1; shift + local expect=$1; shift + local operstate + + operstate=$(busywait 1000 \ + operstate_is "$dev" "$expect") + check_err $? "Got operstate of $operstate, expected $expect" +} + +add_one_vlan() +{ + local link=$1; shift + local id=$1; shift + + ip_link_add $link.$id link $link type vlan id $id "$@" +} + +add_vlans() +{ + add_one_vlan br 11 "$@" + add_one_vlan br 12 "$@" + add_one_vlan br 13 "$@" + add_one_vlan br 14 "$@" +} + +set_vlans() +{ + ip link set dev br.11 "$@" + ip link set dev br.12 "$@" + ip link set dev br.13 "$@" + ip link set dev br.14 "$@" +} + +down_netdevs() +{ + local dev + + for dev in "$@"; do + ip_link_set_down $dev + done +} + +check_operstates() +{ + local opst_11=$1; shift + local opst_12=$1; shift + local opst_13=$1; shift + local opst_14=$1; shift + + check_operstate br.11 $opst_11 + check_operstate br.12 $opst_12 + check_operstate br.13 $opst_13 + check_operstate br.14 $opst_14 +} + +do_test_binding() +{ + local inject=$1; shift + local what=$1; shift + local opsts_d1=$1; shift + local opsts_d2=$1; shift + local opsts_d12=$1; shift + local opsts_d123=$1; shift + + RET=0 + + defer_scope_push + down_netdevs d1 + $inject + check_operstates $opsts_d1 + defer_scope_pop + + defer_scope_push + down_netdevs d2 + $inject + check_operstates $opsts_d2 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 + $inject + check_operstates $opsts_d12 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 d3 + $inject + check_operstates $opsts_d123 + defer_scope_pop + + log_test "Test bridge_binding $what" +} + +do_test_binding_on() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "0 1 1 1" \ + "1 0 1 1" \ + "0 0 0 1" \ + "0 0 0 0" +} + +do_test_binding_off() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "1 1 1 1" \ + "1 1 1 1" \ + "1 1 1 1" \ + "0 0 0 0" +} + +test_binding_on() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_on : "on" +} + +test_binding_off() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_off : "off" +} + +test_binding_toggle_on() +{ + add_vlans bridge_binding off + set_vlans up + set_vlans type vlan bridge_binding on + do_test_binding_on : "off->on" +} + +test_binding_toggle_off() +{ + add_vlans bridge_binding on + set_vlans up + set_vlans type vlan bridge_binding off + do_test_binding_off : "on->off" +} + +dfr_set_binding_on() +{ + set_vlans type vlan bridge_binding on + defer set_vlans type vlan bridge_binding off +} + +dfr_set_binding_off() +{ + set_vlans type vlan bridge_binding off + defer set_vlans type vlan bridge_binding on +} + +test_binding_toggle_on_when_lower_down() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_on dfr_set_binding_on "off->on when lower down" +} + +test_binding_toggle_off_when_lower_down() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_off dfr_set_binding_off "on->off when lower down" +} + +test_binding_toggle_on_when_upper_down() +{ + add_vlans bridge_binding off + set_vlans type vlan bridge_binding on + set_vlans up + do_test_binding_on : "off->on when upper down" +} + +test_binding_toggle_off_when_upper_down() +{ + add_vlans bridge_binding on + set_vlans type vlan bridge_binding off + set_vlans up + do_test_binding_off : "on->off when upper down" +} + +trap defer_scopes_cleanup EXIT +setup_prepare +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index d43afe243779..12e7cae251be 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -31,7 +31,8 @@ $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a EXTRA_CLEAN += \ - $(top_srcdir)/tools/net/ynl/lib/__pycache__ \ + $(top_srcdir)/tools/net/ynl/pyynl/__pycache__ \ + $(top_srcdir)/tools/net/ynl/pyynl/lib/__pycache__ \ $(top_srcdir)/tools/net/ynl/lib/*.[ado] \ $(OUTPUT)/.libynl-*.sig \ $(OUTPUT)/libynl.a diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index e92e0b885861..7d14a7c0cb62 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -43,6 +43,7 @@ cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2)) # configure default variants for target kernel supported architectures XARCH_powerpc = ppc XARCH_mips = mips32le +XARCH_riscv = riscv64 XARCH = $(or $(XARCH_$(ARCH)),$(ARCH)) # map from user input variants to their kernel supported architectures @@ -51,6 +52,8 @@ ARCH_ppc64 = powerpc ARCH_ppc64le = powerpc ARCH_mips32le = mips ARCH_mips32be = mips +ARCH_riscv32 = riscv +ARCH_riscv64 = riscv ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture @@ -65,6 +68,8 @@ IMAGE_ppc = vmlinux IMAGE_ppc64 = vmlinux IMAGE_ppc64le = arch/powerpc/boot/zImage IMAGE_riscv = arch/riscv/boot/Image +IMAGE_riscv32 = arch/riscv/boot/Image +IMAGE_riscv64 = arch/riscv/boot/Image IMAGE_s390 = arch/s390/boot/bzImage IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi IMAGE = $(objtree)/$(IMAGE_$(XARCH)) @@ -82,6 +87,8 @@ DEFCONFIG_ppc = pmac32_defconfig DEFCONFIG_ppc64 = powernv_be_defconfig DEFCONFIG_ppc64le = powernv_defconfig DEFCONFIG_riscv = defconfig +DEFCONFIG_riscv32 = rv32_defconfig +DEFCONFIG_riscv64 = defconfig DEFCONFIG_s390 = defconfig DEFCONFIG_loongarch = defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) @@ -104,6 +111,8 @@ QEMU_ARCH_ppc = ppc QEMU_ARCH_ppc64 = ppc64 QEMU_ARCH_ppc64le = ppc64 QEMU_ARCH_riscv = riscv64 +QEMU_ARCH_riscv32 = riscv32 +QEMU_ARCH_riscv64 = riscv64 QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) @@ -130,6 +139,8 @@ QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIB QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 6fba7025c5e3..0e0e3b48a8c3 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -302,7 +302,10 @@ int expect_syszr(int expr, int llen) { int ret = 0; - if (expr) { + if (errno == ENOSYS) { + llen += printf(" = ENOSYS"); + result(llen, SKIPPED); + } else if (expr) { ret = 1; llen += printf(" = %d %s ", expr, errorname(errno)); result(llen, FAIL); @@ -342,7 +345,10 @@ int expect_sysne(int expr, int llen, int val) { int ret = 0; - if (expr == val) { + if (errno == ENOSYS) { + llen += printf(" = ENOSYS"); + result(llen, SKIPPED); + } else if (expr == val) { ret = 1; llen += printf(" = %d %s ", expr, errorname(errno)); result(llen, FAIL); @@ -367,7 +373,9 @@ int expect_syserr2(int expr, int expret, int experr1, int experr2, int llen) int _errno = errno; llen += printf(" = %d %s ", expr, errorname(_errno)); - if (expr != expret || (_errno != experr1 && _errno != experr2)) { + if (errno == ENOSYS) { + result(llen, SKIPPED); + } else if (expr != expret || (_errno != experr1 && _errno != experr2)) { ret = 1; if (experr2 == 0) llen += printf(" != (%d %s) ", expret, errorname(experr1)); @@ -1229,19 +1237,20 @@ int run_stdlib(int min, int max) static int expect_vfprintf(int llen, int c, const char *expected, const char *fmt, ...) { - int ret, fd; + int ret, pipefd[2]; ssize_t w, r; char buf[100]; FILE *memfile; va_list args; - fd = open("/tmp", O_TMPFILE | O_EXCL | O_RDWR, 0600); - if (fd == -1) { - result(llen, SKIPPED); - return 0; + ret = pipe(pipefd); + if (ret == -1) { + llen += printf(" pipe() != %s", strerror(errno)); + result(llen, FAIL); + return 1; } - memfile = fdopen(fd, "w+"); + memfile = fdopen(pipefd[1], "w"); if (!memfile) { result(llen, FAIL); return 1; @@ -1257,13 +1266,10 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm return 1; } - fflush(memfile); - lseek(fd, 0, SEEK_SET); - - r = read(fd, buf, sizeof(buf) - 1); - fclose(memfile); + r = read(pipefd[0], buf, sizeof(buf) - 1); + if (r != w) { llen += printf(" written(%d) != read(%d)", (int)w, (int)r); result(llen, FAIL); @@ -1323,7 +1329,8 @@ static int run_protection(int min __attribute__((unused)), int max __attribute__((unused))) { pid_t pid; - int llen = 0, status; + int llen = 0, ret; + siginfo_t siginfo = {}; struct rlimit rlimit = { 0, 0 }; llen += printf("0 -fstackprotector "); @@ -1361,10 +1368,11 @@ static int run_protection(int min __attribute__((unused)), return 1; default: - pid = waitpid(pid, &status, 0); + ret = waitid(P_PID, pid, &siginfo, WEXITED); - if (pid == -1 || !WIFSIGNALED(status) || WTERMSIG(status) != SIGABRT) { - llen += printf("waitpid()"); + if (ret != 0 || siginfo.si_signo != SIGCHLD || + siginfo.si_code != CLD_KILLED || siginfo.si_status != SIGABRT) { + llen += printf("waitid()"); result(llen, FAIL); return 1; } diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index e7ecda4ae796..9c5160c53881 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -17,7 +17,7 @@ perform_download=0 test_mode=system werror=1 llvm= -archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch" +archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv32 riscv64 s390 loongarch" TEMP=$(getopt -o 'j:d:c:b:a:m:pelh' -n "$0" -- "$@") @@ -143,6 +143,13 @@ test_arch() { arch=$1 ct_arch=$(crosstool_arch "$arch") ct_abi=$(crosstool_abi "$1") + + if [ ! -d "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/." ]; then + echo "No toolchain found in ${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}." + echo "Did you install the toolchains or set the correct arch ? Rerun with -h for help." + return 1 + fi + cross_compile=$(realpath "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/${ct_arch}-${ct_abi}-") build_dir="${build_location}/${arch}" if [ "$werror" -ne 0 ]; then diff --git a/tools/testing/selftests/pci_endpoint/.gitignore b/tools/testing/selftests/pci_endpoint/.gitignore new file mode 100644 index 000000000000..6a4837a3e034 --- /dev/null +++ b/tools/testing/selftests/pci_endpoint/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +pci_endpoint_test diff --git a/tools/testing/selftests/pci_endpoint/Makefile b/tools/testing/selftests/pci_endpoint/Makefile new file mode 100644 index 000000000000..bf21ebf20b4a --- /dev/null +++ b/tools/testing/selftests/pci_endpoint/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -O2 -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) +LDFLAGS += -lrt -lpthread -lm + +TEST_GEN_PROGS = pci_endpoint_test + +include ../lib.mk diff --git a/tools/testing/selftests/pci_endpoint/config b/tools/testing/selftests/pci_endpoint/config new file mode 100644 index 000000000000..7cdcf117db8d --- /dev/null +++ b/tools/testing/selftests/pci_endpoint/config @@ -0,0 +1,4 @@ +CONFIG_PCI_ENDPOINT=y +CONFIG_PCI_ENDPOINT_CONFIGFS=y +CONFIG_PCI_EPF_TEST=m +CONFIG_PCI_ENDPOINT_TEST=m diff --git a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c new file mode 100644 index 000000000000..c267b822c108 --- /dev/null +++ b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Kselftest for PCI Endpoint Subsystem + * + * Copyright (c) 2022 Samsung Electronics Co., Ltd. + * https://www.samsung.com + * Author: Aman Gupta <aman1.gupta@samsung.com> + * + * Copyright (c) 2024, Linaro Ltd. + * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include "../../../../include/uapi/linux/pcitest.h" + +#include "../kselftest_harness.h" + +#define pci_ep_ioctl(cmd, arg) \ +({ \ + ret = ioctl(self->fd, cmd, arg); \ + ret = ret < 0 ? -errno : 0; \ +}) + +static const char *test_device = "/dev/pci-endpoint-test.0"; +static const unsigned long test_size[5] = { 1, 1024, 1025, 1024000, 1024001 }; + +FIXTURE(pci_ep_bar) +{ + int fd; +}; + +FIXTURE_SETUP(pci_ep_bar) +{ + self->fd = open(test_device, O_RDWR); + + ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device"); +} + +FIXTURE_TEARDOWN(pci_ep_bar) +{ + close(self->fd); +} + +FIXTURE_VARIANT(pci_ep_bar) +{ + int barno; +}; + +FIXTURE_VARIANT_ADD(pci_ep_bar, BAR0) { .barno = 0 }; +FIXTURE_VARIANT_ADD(pci_ep_bar, BAR1) { .barno = 1 }; +FIXTURE_VARIANT_ADD(pci_ep_bar, BAR2) { .barno = 2 }; +FIXTURE_VARIANT_ADD(pci_ep_bar, BAR3) { .barno = 3 }; +FIXTURE_VARIANT_ADD(pci_ep_bar, BAR4) { .barno = 4 }; +FIXTURE_VARIANT_ADD(pci_ep_bar, BAR5) { .barno = 5 }; + +TEST_F(pci_ep_bar, BAR_TEST) +{ + int ret; + + pci_ep_ioctl(PCITEST_BAR, variant->barno); + EXPECT_FALSE(ret) TH_LOG("Test failed for BAR%d", variant->barno); +} + +FIXTURE(pci_ep_basic) +{ + int fd; +}; + +FIXTURE_SETUP(pci_ep_basic) +{ + self->fd = open(test_device, O_RDWR); + + ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device"); +} + +FIXTURE_TEARDOWN(pci_ep_basic) +{ + close(self->fd); +} + +TEST_F(pci_ep_basic, CONSECUTIVE_BAR_TEST) +{ + int ret; + + pci_ep_ioctl(PCITEST_BARS, 0); + EXPECT_FALSE(ret) TH_LOG("Consecutive BAR test failed"); +} + +TEST_F(pci_ep_basic, LEGACY_IRQ_TEST) +{ + int ret; + + pci_ep_ioctl(PCITEST_SET_IRQTYPE, 0); + ASSERT_EQ(0, ret) TH_LOG("Can't set Legacy IRQ type"); + + pci_ep_ioctl(PCITEST_LEGACY_IRQ, 0); + EXPECT_FALSE(ret) TH_LOG("Test failed for Legacy IRQ"); +} + +TEST_F(pci_ep_basic, MSI_TEST) +{ + int ret, i; + + pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1); + ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type"); + + for (i = 1; i <= 32; i++) { + pci_ep_ioctl(PCITEST_MSI, i); + EXPECT_FALSE(ret) TH_LOG("Test failed for MSI%d", i); + } +} + +TEST_F(pci_ep_basic, MSIX_TEST) +{ + int ret, i; + + pci_ep_ioctl(PCITEST_SET_IRQTYPE, 2); + ASSERT_EQ(0, ret) TH_LOG("Can't set MSI-X IRQ type"); + + for (i = 1; i <= 2048; i++) { + pci_ep_ioctl(PCITEST_MSIX, i); + EXPECT_FALSE(ret) TH_LOG("Test failed for MSI-X%d", i); + } +} + +FIXTURE(pci_ep_data_transfer) +{ + int fd; +}; + +FIXTURE_SETUP(pci_ep_data_transfer) +{ + self->fd = open(test_device, O_RDWR); + + ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device"); +} + +FIXTURE_TEARDOWN(pci_ep_data_transfer) +{ + close(self->fd); +} + +FIXTURE_VARIANT(pci_ep_data_transfer) +{ + bool use_dma; +}; + +FIXTURE_VARIANT_ADD(pci_ep_data_transfer, memcpy) +{ + .use_dma = false, +}; + +FIXTURE_VARIANT_ADD(pci_ep_data_transfer, dma) +{ + .use_dma = true, +}; + +TEST_F(pci_ep_data_transfer, READ_TEST) +{ + struct pci_endpoint_test_xfer_param param = {}; + int ret, i; + + if (variant->use_dma) + param.flags = PCITEST_FLAGS_USE_DMA; + + pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1); + ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type"); + + for (i = 0; i < ARRAY_SIZE(test_size); i++) { + param.size = test_size[i]; + pci_ep_ioctl(PCITEST_READ, ¶m); + EXPECT_FALSE(ret) TH_LOG("Test failed for size (%ld)", + test_size[i]); + } +} + +TEST_F(pci_ep_data_transfer, WRITE_TEST) +{ + struct pci_endpoint_test_xfer_param param = {}; + int ret, i; + + if (variant->use_dma) + param.flags = PCITEST_FLAGS_USE_DMA; + + pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1); + ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type"); + + for (i = 0; i < ARRAY_SIZE(test_size); i++) { + param.size = test_size[i]; + pci_ep_ioctl(PCITEST_WRITE, ¶m); + EXPECT_FALSE(ret) TH_LOG("Test failed for size (%ld)", + test_size[i]); + } +} + +TEST_F(pci_ep_data_transfer, COPY_TEST) +{ + struct pci_endpoint_test_xfer_param param = {}; + int ret, i; + + if (variant->use_dma) + param.flags = PCITEST_FLAGS_USE_DMA; + + pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1); + ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type"); + + for (i = 0; i < ARRAY_SIZE(test_size); i++) { + param.size = test_size[i]; + pci_ep_ioctl(PCITEST_COPY, ¶m); + EXPECT_FALSE(ret) TH_LOG("Test failed for size (%ld)", + test_size[i]); + } +} +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pid_namespace/.gitignore b/tools/testing/selftests/pid_namespace/.gitignore index 93ab9d7e5b7e..5118f0f3edf4 100644 --- a/tools/testing/selftests/pid_namespace/.gitignore +++ b/tools/testing/selftests/pid_namespace/.gitignore @@ -1 +1,2 @@ +pid_max regression_enomem diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile index 9286a1d22cd3..b972f55d07ae 100644 --- a/tools/testing/selftests/pid_namespace/Makefile +++ b/tools/testing/selftests/pid_namespace/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -g $(KHDR_INCLUDES) -TEST_GEN_PROGS = regression_enomem +TEST_GEN_PROGS = regression_enomem pid_max LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c new file mode 100644 index 000000000000..51c414faabb0 --- /dev/null +++ b/tools/testing/selftests/pid_namespace/pid_max.c @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/types.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "../pidfd/pidfd.h" + +#define __STACK_SIZE (8 * 1024 * 1024) +static pid_t do_clone(int (*fn)(void *), void *arg, int flags) +{ + char *stack; + pid_t ret; + + stack = malloc(__STACK_SIZE); + if (!stack) + return -ENOMEM; + +#ifdef __ia64__ + ret = __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg); +#else + ret = clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg); +#endif + free(stack); + return ret; +} + +static int pid_max_cb(void *data) +{ + int fd, ret; + pid_t pid; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return -1; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return -1; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return -1; + } + + ret = write(fd, "500", sizeof("500") - 1); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return -1; + } + + for (int i = 0; i < 501; i++) { + pid = fork(); + if (pid == 0) + exit(EXIT_SUCCESS); + wait_for_pid(pid); + if (pid > 500) { + fprintf(stderr, "Managed to create pid number beyond limit\n"); + return -1; + } + } + + return 0; +} + +static int pid_max_nested_inner(void *data) +{ + int fret = -1; + pid_t pids[2]; + int fd, i, ret; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return fret; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return fret; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return fret; + } + + ret = write(fd, "500", sizeof("500") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return fret; + } + + pids[0] = fork(); + if (pids[0] < 0) { + fprintf(stderr, "Failed to create first new process\n"); + return fret; + } + + if (pids[0] == 0) + exit(EXIT_SUCCESS); + + pids[1] = fork(); + wait_for_pid(pids[0]); + if (pids[1] >= 0) { + if (pids[1] == 0) + exit(EXIT_SUCCESS); + wait_for_pid(pids[1]); + + fprintf(stderr, "Managed to create process even though ancestor pid namespace had a limit\n"); + return fret; + } + + /* Now make sure that we wrap pids at 400. */ + for (i = 0; i < 510; i++) { + pid_t pid; + + pid = fork(); + if (pid < 0) + return fret; + + if (pid == 0) + exit(EXIT_SUCCESS); + + wait_for_pid(pid); + if (pid >= 500) { + fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid); + return fret; + } + } + + return 0; +} + +static int pid_max_nested_outer(void *data) +{ + int fret = -1, nr_procs = 400; + pid_t pids[1000]; + int fd, i, ret; + pid_t pid; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return fret; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return fret; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return fret; + } + + ret = write(fd, "400", sizeof("400") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return fret; + } + + /* + * Create 397 processes. This leaves room for do_clone() (398) and + * one more 399. So creating another process needs to fail. + */ + for (nr_procs = 0; nr_procs < 396; nr_procs++) { + pid = fork(); + if (pid < 0) + goto reap; + + if (pid == 0) + exit(EXIT_SUCCESS); + + pids[nr_procs] = pid; + } + + pid = do_clone(pid_max_nested_inner, NULL, CLONE_NEWPID | CLONE_NEWNS); + if (pid < 0) { + fprintf(stderr, "%m - Failed to clone nested pidns\n"); + goto reap; + } + + if (wait_for_pid(pid)) { + fprintf(stderr, "%m - Nested pid_max failed\n"); + goto reap; + } + + fret = 0; + +reap: + for (int i = 0; i < nr_procs; i++) + wait_for_pid(pids[i]); + + return fret; +} + +static int pid_max_nested_limit_inner(void *data) +{ + int fret = -1, nr_procs = 400; + int fd, ret; + pid_t pid; + pid_t pids[1000]; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return fret; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return fret; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return fret; + } + + ret = write(fd, "500", sizeof("500") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return fret; + } + + for (nr_procs = 0; nr_procs < 500; nr_procs++) { + pid = fork(); + if (pid < 0) + break; + + if (pid == 0) + exit(EXIT_SUCCESS); + + pids[nr_procs] = pid; + } + + if (nr_procs >= 400) { + fprintf(stderr, "Managed to create processes beyond the configured outer limit\n"); + goto reap; + } + + fret = 0; + +reap: + for (int i = 0; i < nr_procs; i++) + wait_for_pid(pids[i]); + + return fret; +} + +static int pid_max_nested_limit_outer(void *data) +{ + int fd, ret; + pid_t pid; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return -1; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return -1; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return -1; + } + + ret = write(fd, "400", sizeof("400") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return -1; + } + + pid = do_clone(pid_max_nested_limit_inner, NULL, CLONE_NEWPID | CLONE_NEWNS); + if (pid < 0) { + fprintf(stderr, "%m - Failed to clone nested pidns\n"); + return -1; + } + + if (wait_for_pid(pid)) { + fprintf(stderr, "%m - Nested pid_max failed\n"); + return -1; + } + + return 0; +} + +TEST(pid_max_simple) +{ + pid_t pid; + + + pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS); + ASSERT_GT(pid, 0); + ASSERT_EQ(0, wait_for_pid(pid)); +} + +TEST(pid_max_nested_limit) +{ + pid_t pid; + + pid = do_clone(pid_max_nested_limit_outer, NULL, CLONE_NEWPID | CLONE_NEWNS); + ASSERT_GT(pid, 0); + ASSERT_EQ(0, wait_for_pid(pid)); +} + +TEST(pid_max_nested) +{ + pid_t pid; + + pid = do_clone(pid_max_nested_outer, NULL, CLONE_NEWPID | CLONE_NEWNS); + ASSERT_GT(pid, 0); + ASSERT_EQ(0, wait_for_pid(pid)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 973198a3ec3d..bf92481f925c 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -6,3 +6,5 @@ pidfd_wait pidfd_fdinfo_test pidfd_getfd_test pidfd_setns_test +pidfd_file_handle_test +pidfd_bind_mount diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index d731e3e76d5b..301343a11b62 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -2,7 +2,8 @@ CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ - pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test + pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ + pidfd_file_handle_test pidfd_bind_mount include ../lib.mk diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 88d6830ee004..0b96ac4b8ce5 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -12,11 +12,11 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> -#include <sys/mount.h> #include <sys/types.h> #include <sys/wait.h> #include "../kselftest.h" +#include "../clone3/clone3_selftests.h" #ifndef P_PIDFD #define P_PIDFD 3 @@ -68,6 +68,11 @@ #define PIDFD_SKIP 3 #define PIDFD_XFAIL 4 +static inline int sys_waitid(int which, pid_t pid, siginfo_t *info, int options) +{ + return syscall(__NR_waitid, which, pid, info, options, NULL); +} + static inline int wait_for_pid(pid_t pid) { int status, ret; @@ -114,4 +119,37 @@ static inline int sys_memfd_create(const char *name, unsigned int flags) return syscall(__NR_memfd_create, name, flags); } +static inline pid_t create_child(int *pidfd, unsigned flags) +{ + struct __clone_args args = { + .flags = CLONE_PIDFD | flags, + .exit_signal = SIGCHLD, + .pidfd = ptr_to_u64(pidfd), + }; + + return sys_clone3(&args, sizeof(struct __clone_args)); +} + +static inline ssize_t read_nointr(int fd, void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = read(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static inline ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + #endif /* __PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c new file mode 100644 index 000000000000..7822dd080258 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "pidfd.h" +#include "../kselftest_harness.h" + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #elif defined __ia64__ + #define __NR_open_tree (428 + 1024) + #else + #define __NR_open_tree 428 + #endif +#endif + +#ifndef __NR_move_mount + #if defined __alpha__ + #define __NR_move_mount 539 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_move_mount 4429 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_move_mount 6429 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_move_mount 5429 + #endif + #elif defined __ia64__ + #define __NR_move_mount (428 + 1024) + #else + #define __NR_move_mount 429 + #endif +#endif + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#endif + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#endif + +static inline int sys_move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, + to_pathname, flags); +} + +#ifndef OPEN_TREE_CLONE +#define OPEN_TREE_CLONE 1 +#endif + +#ifndef OPEN_TREE_CLOEXEC +#define OPEN_TREE_CLOEXEC O_CLOEXEC +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) +{ + return syscall(__NR_open_tree, dfd, filename, flags); +} + +FIXTURE(pidfd_bind_mount) { + char template[PATH_MAX]; + int fd_tmp; + int pidfd; + struct stat st1; + struct stat st2; + __u32 gen1; + __u32 gen2; + bool must_unmount; +}; + +FIXTURE_SETUP(pidfd_bind_mount) +{ + self->fd_tmp = -EBADF; + self->must_unmount = false; + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_LE(snprintf(self->template, PATH_MAX, "%s", P_tmpdir "/pidfd_bind_mount_XXXXXX"), PATH_MAX); + self->fd_tmp = mkstemp(self->template); + ASSERT_GE(self->fd_tmp, 0); + self->pidfd = sys_pidfd_open(getpid(), 0); + ASSERT_GE(self->pidfd, 0); + ASSERT_GE(fstat(self->pidfd, &self->st1), 0); + ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen1), 0); +} + +FIXTURE_TEARDOWN(pidfd_bind_mount) +{ + ASSERT_EQ(close(self->fd_tmp), 0); + if (self->must_unmount) + ASSERT_EQ(umount2(self->template, 0), 0); + ASSERT_EQ(unlink(self->template), 0); +} + +/* + * Test that a detached mount can be created for a pidfd and then + * attached to the filesystem hierarchy. + */ +TEST_F(pidfd_bind_mount, bind_mount) +{ + int fd_tree; + + fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); + ASSERT_GE(fd_tree, 0); + + ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); + self->must_unmount = true; + + ASSERT_EQ(close(fd_tree), 0); +} + +/* Test that a pidfd can be reopened through procfs. */ +TEST_F(pidfd_bind_mount, reopen) +{ + int pidfd; + char proc_path[PATH_MAX]; + + sprintf(proc_path, "/proc/self/fd/%d", self->pidfd); + pidfd = open(proc_path, O_RDONLY | O_NOCTTY | O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_GE(fstat(self->pidfd, &self->st2), 0); + ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen2), 0); + + ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino); + ASSERT_TRUE(self->gen1 == self->gen2); + + ASSERT_EQ(close(pidfd), 0); +} + +/* + * Test that a detached mount can be created for a pidfd and then + * attached to the filesystem hierarchy and reopened. + */ +TEST_F(pidfd_bind_mount, bind_mount_reopen) +{ + int fd_tree, fd_pidfd_mnt; + + fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); + ASSERT_GE(fd_tree, 0); + + ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); + self->must_unmount = true; + + fd_pidfd_mnt = openat(-EBADF, self->template, O_RDONLY | O_NOCTTY | O_CLOEXEC); + ASSERT_GE(fd_pidfd_mnt, 0); + + ASSERT_GE(fstat(fd_tree, &self->st2), 0); + ASSERT_EQ(ioctl(fd_pidfd_mnt, FS_IOC_GETVERSION, &self->gen2), 0); + + ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino); + ASSERT_TRUE(self->gen1 == self->gen2); + + ASSERT_EQ(close(fd_tree), 0); + ASSERT_EQ(close(fd_pidfd_mnt), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c new file mode 100644 index 000000000000..439b9c6c0457 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c @@ -0,0 +1,503 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <poll.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> +#include <sys/stat.h> + +#include "pidfd.h" +#include "../kselftest_harness.h" + +FIXTURE(file_handle) +{ + pid_t pid; + int pidfd; + + pid_t child_pid1; + int child_pidfd1; + + pid_t child_pid2; + int child_pidfd2; + + pid_t child_pid3; + int child_pidfd3; +}; + +FIXTURE_SETUP(file_handle) +{ + int ret; + int ipc_sockets[2]; + char c; + + self->pid = getpid(); + self->pidfd = sys_pidfd_open(self->pid, 0); + ASSERT_GE(self->pidfd, 0); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER); + EXPECT_GE(self->child_pid1, 0); + + if (self->child_pid1 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid2, 0); + + if (self->child_pid2 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid3, 0); + + if (self->child_pid3 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); +} + +FIXTURE_TEARDOWN(file_handle) +{ + EXPECT_EQ(close(self->pidfd), 0); + + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0); + if (self->child_pidfd1 >= 0) + EXPECT_EQ(0, close(self->child_pidfd1)); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0); + + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0); + if (self->child_pidfd2 >= 0) + EXPECT_EQ(0, close(self->child_pidfd2)); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0); + + if (self->child_pidfd3 >= 0) { + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(0, close(self->child_pidfd3)); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); + } +} + +/* + * Test that we can decode a pidfs file handle in the same pid + * namespace. + */ +TEST_F(file_handle, file_handle_same_pidns) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + free(fh); +} + +/* + * Test that we can decode a pidfs file handle from a child pid + * namespace. + */ +TEST_F(file_handle, file_handle_child_pidns) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + free(fh); +} + +/* + * Test that we fail to decode a pidfs file handle from an ancestor + * child pid namespace. + */ +TEST_F(file_handle, file_handle_foreign_pidns) +{ + int mnt_id; + struct file_handle *fh; + pid_t pid; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->pidfd, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(setns(self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int pidfd = open_by_handle_at(self->pidfd, fh, 0); + if (pidfd >= 0) { + TH_LOG("Managed to open pidfd outside of the caller's pid namespace hierarchy"); + _exit(1); + } + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); + + free(fh); +} + +/* + * Test that we can decode a pidfs file handle of a process that has + * exited but not been reaped. + */ +TEST_F(file_handle, pid_has_exited) +{ + int mnt_id, pidfd, child_pidfd3; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + child_pidfd3 = self->child_pidfd3; + self->child_pidfd3 = -EBADF; + EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(close(child_pidfd3), 0); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED | WNOWAIT), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); +} + +/* + * Test that we fail to decode a pidfs file handle of a process that has + * already been reaped. + */ +TEST_F(file_handle, pid_has_been_reaped) +{ + int mnt_id, pidfd, child_pidfd3; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + child_pidfd3 = self->child_pidfd3; + self->child_pidfd3 = -EBADF; + EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(close(child_pidfd3), 0); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_LT(pidfd, 0); +} + +/* + * Test valid flags to open a pidfd file handle. Note, that + * PIDFD_NONBLOCK is defined as O_NONBLOCK and O_NONBLOCK is an alias to + * O_NDELAY. Also note that PIDFD_THREAD is an alias for O_EXCL. + */ +TEST_F(file_handle, open_by_handle_at_valid_flags) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, + O_RDONLY | + O_WRONLY | + O_RDWR | + O_NONBLOCK | + O_NDELAY | + O_CLOEXEC | + O_EXCL); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); +} + +/* + * Test that invalid flags passed to open a pidfd file handle are + * rejected. + */ +TEST_F(file_handle, open_by_handle_at_invalid_flags) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + static const struct invalid_pidfs_file_handle_flags { + int oflag; + const char *oflag_name; + } invalid_pidfs_file_handle_flags[] = { + { FASYNC, "FASYNC" }, + { O_CREAT, "O_CREAT" }, + { O_NOCTTY, "O_NOCTTY" }, + { O_CREAT, "O_CREAT" }, + { O_TRUNC, "O_TRUNC" }, + { O_APPEND, "O_APPEND" }, + { O_SYNC, "O_SYNC" }, + { O_DSYNC, "O_DSYNC" }, + { O_DIRECT, "O_DIRECT" }, + { O_DIRECTORY, "O_DIRECTORY" }, + { O_NOFOLLOW, "O_NOFOLLOW" }, + { O_NOATIME, "O_NOATIME" }, + { O_PATH, "O_PATH" }, + { O_TMPFILE, "O_TMPFILE" }, + /* + * O_LARGEFILE is added implicitly by + * open_by_handle_at() so pidfs simply masks it off. + */ + }; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + for (int i = 0; i < ARRAY_SIZE(invalid_pidfs_file_handle_flags); i++) { + pidfd = open_by_handle_at(self->pidfd, fh, invalid_pidfs_file_handle_flags[i].oflag); + ASSERT_LT(pidfd, 0) { + TH_LOG("open_by_handle_at() succeeded with invalid flags: %s", invalid_pidfs_file_handle_flags[i].oflag_name); + } + } +} + +/* Test that lookup fails. */ +TEST_F(file_handle, lookup_must_fail) +{ + int mnt_id; + struct file_handle *fh; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, ENOTDIR); + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, 0), 0); + ASSERT_EQ(errno, ENOTDIR); +} + +#ifndef AT_HANDLE_CONNECTABLE +#define AT_HANDLE_CONNECTABLE 0x002 +#endif + +/* + * Test that AT_HANDLE_CONNECTABLE is rejected. Connectable file handles + * don't make sense for pidfs. Note that currently AT_HANDLE_CONNECTABLE + * is rejected because it is incompatible with AT_EMPTY_PATH which is + * required with pidfds as we don't support lookup. + */ +TEST_F(file_handle, invalid_name_to_handle_at_flags) +{ + int mnt_id; + struct file_handle *fh; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_CONNECTABLE), 0); +} + +#ifndef AT_HANDLE_FID +#define AT_HANDLE_FID 0x200 +#endif + +/* + * Test that a request with AT_HANDLE_FID always leads to decodable file + * handle as pidfs always provides export operations. + */ +TEST_F(file_handle, valid_name_to_handle_at_flags) +{ + int mnt_id, pidfd; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_FID), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 7c2a4349170a..222f8131283b 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -19,7 +19,6 @@ #include <linux/ioctl.h> #include "pidfd.h" -#include "../clone3/clone3_selftests.h" #include "../kselftest_harness.h" #ifndef PIDFS_IOCTL_MAGIC @@ -118,22 +117,6 @@ FIXTURE(current_nsset) int child_pidfd_derived_nsfds2[PIDFD_NS_MAX]; }; -static int sys_waitid(int which, pid_t pid, int options) -{ - return syscall(__NR_waitid, which, pid, NULL, options, NULL); -} - -pid_t create_child(int *pidfd, unsigned flags) -{ - struct __clone_args args = { - .flags = CLONE_PIDFD | flags, - .exit_signal = SIGCHLD, - .pidfd = ptr_to_u64(pidfd), - }; - - return sys_clone3(&args, sizeof(struct clone_args)); -} - static bool switch_timens(void) { int fd, ret; @@ -150,28 +133,6 @@ static bool switch_timens(void) return ret == 0; } -static ssize_t read_nointr(int fd, void *buf, size_t count) -{ - ssize_t ret; - - do { - ret = read(fd, buf, count); - } while (ret < 0 && errno == EINTR); - - return ret; -} - -static ssize_t write_nointr(int fd, const void *buf, size_t count) -{ - ssize_t ret; - - do { - ret = write(fd, buf, count); - } while (ret < 0 && errno == EINTR); - - return ret; -} - FIXTURE_SETUP(current_nsset) { int i, proc_fd, ret; @@ -229,7 +190,7 @@ FIXTURE_SETUP(current_nsset) _exit(EXIT_SUCCESS); } - ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED | WNOWAIT), 0); self->pidfd = sys_pidfd_open(self->pid, 0); EXPECT_GE(self->pidfd, 0) { @@ -432,9 +393,9 @@ FIXTURE_TEARDOWN(current_nsset) EXPECT_EQ(0, close(self->child_pidfd1)); if (self->child_pidfd2 >= 0) EXPECT_EQ(0, close(self->child_pidfd2)); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0); } static int preserve_ns(const int pid, const char *ns) diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index 9faa686f90e4..e9728e86b4f2 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -497,7 +497,7 @@ static int child_poll_leader_exit_test(void *args) pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL); /* - * glibc exit calls exit_group syscall, so explicity call exit only + * glibc exit calls exit_group syscall, so explicitly call exit only * so that only the group leader exits, leaving the threads alone. */ *child_exit_secs = time(NULL); diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 0dcb8365ddc3..1e2d49751cde 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -26,22 +26,11 @@ #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) #endif -static pid_t sys_clone3(struct clone_args *args) -{ - return syscall(__NR_clone3, args, sizeof(struct clone_args)); -} - -static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, - struct rusage *ru) -{ - return syscall(__NR_waitid, which, pid, info, options, ru); -} - TEST(wait_simple) { int pidfd = -1; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .pidfd = ptr_to_u64(&pidfd), .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, @@ -55,7 +44,7 @@ TEST(wait_simple) pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); ASSERT_GE(pidfd, 0); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_NE(pid, 0); EXPECT_EQ(close(pidfd), 0); pidfd = -1; @@ -63,18 +52,18 @@ TEST(wait_simple) pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(pidfd, 0); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_NE(pid, 0); EXPECT_EQ(close(pidfd), 0); pidfd = -1; - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) exit(EXIT_SUCCESS); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_GE(pid, 0); ASSERT_EQ(WIFEXITED(info.si_status), true); ASSERT_EQ(WEXITSTATUS(info.si_status), 0); @@ -89,7 +78,7 @@ TEST(wait_states) { int pidfd = -1; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .pidfd = ptr_to_u64(&pidfd), .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, @@ -102,7 +91,7 @@ TEST(wait_states) }; ASSERT_EQ(pipe(pfd), 0); - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) { @@ -117,28 +106,28 @@ TEST(wait_states) } close(pfd[0]); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED), 0); ASSERT_EQ(write(pfd[1], "C", 1), 1); close(pfd[1]); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_CONTINUED); ASSERT_EQ(info.si_pid, parent_tid); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_KILLED); ASSERT_EQ(info.si_pid, parent_tid); @@ -151,7 +140,7 @@ TEST(wait_nonblock) int pidfd; unsigned int flags = 0; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .flags = CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, @@ -173,12 +162,12 @@ TEST(wait_nonblock) SKIP(return, "Skipping PIDFD_NONBLOCK test"); } - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_LT(ret, 0); ASSERT_EQ(errno, ECHILD); EXPECT_EQ(close(pidfd), 0); - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) { @@ -201,7 +190,7 @@ TEST(wait_nonblock) * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when * child processes exist but none have exited. */ - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_LT(ret, 0); ASSERT_EQ(errno, EAGAIN); @@ -210,19 +199,19 @@ TEST(wait_nonblock) * WNOHANG raised explicitly when child processes exist but none have * exited. */ - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG); ASSERT_EQ(ret, 0); ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_EXITED); ASSERT_EQ(info.si_pid, parent_tid); diff --git a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c index 580fcac0a09f..b71ef8a493ed 100644 --- a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c +++ b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c @@ -20,7 +20,7 @@ static int test_gettimeofday(void) gettimeofday(&tv_end, NULL); } - timersub(&tv_start, &tv_end, &tv_diff); + timersub(&tv_end, &tv_start, &tv_diff); printf("time = %.6f\n", tv_diff.tv_sec + (tv_diff.tv_usec) * 1e-6); diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h index 51729d9a7111..3a0129467de6 100644 --- a/tools/testing/selftests/powerpc/include/pkeys.h +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -35,10 +35,18 @@ #define __NR_pkey_alloc 384 #define __NR_pkey_free 385 +#ifndef NT_PPC_PKEY +#define NT_PPC_PKEY 0x110 +#endif + #define PKEY_BITS_PER_PKEY 2 #define NR_PKEYS 32 #define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1) +#define AMR_BITS_PER_PKEY 2 +#define PKEY_REG_BITS (sizeof(u64) * 8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) + inline unsigned long pkeyreg_get(void) { return mfspr(SPRN_AMR); diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index f6da4cb30cd6..f061434af452 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -16,26 +16,7 @@ #include <unistd.h> #include "ptrace.h" #include "child.h" - -#ifndef __NR_pkey_alloc -#define __NR_pkey_alloc 384 -#endif - -#ifndef __NR_pkey_free -#define __NR_pkey_free 385 -#endif - -#ifndef NT_PPC_PKEY -#define NT_PPC_PKEY 0x110 -#endif - -#ifndef PKEY_DISABLE_EXECUTE -#define PKEY_DISABLE_EXECUTE 0x4 -#endif - -#define AMR_BITS_PER_PKEY 2 -#define PKEY_REG_BITS (sizeof(u64) * 8) -#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) +#include "pkeys.h" #define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */ @@ -61,16 +42,6 @@ struct shared_info { time_t core_time; }; -static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) -{ - return syscall(__NR_pkey_alloc, flags, init_access_rights); -} - -static int sys_pkey_free(int pkey) -{ - return syscall(__NR_pkey_free, pkey); -} - static int increase_core_file_limit(void) { struct rlimit rlim; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index d89474377f11..fc633014424f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -7,26 +7,7 @@ */ #include "ptrace.h" #include "child.h" - -#ifndef __NR_pkey_alloc -#define __NR_pkey_alloc 384 -#endif - -#ifndef __NR_pkey_free -#define __NR_pkey_free 385 -#endif - -#ifndef NT_PPC_PKEY -#define NT_PPC_PKEY 0x110 -#endif - -#ifndef PKEY_DISABLE_EXECUTE -#define PKEY_DISABLE_EXECUTE 0x4 -#endif - -#define AMR_BITS_PER_PKEY 2 -#define PKEY_REG_BITS (sizeof(u64) * 8) -#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) +#include "pkeys.h" static const char user_read[] = "[User Read (Running)]"; static const char user_write[] = "[User Write (Running)]"; @@ -61,11 +42,6 @@ struct shared_info { unsigned long invalid_uamor; }; -static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) -{ - return syscall(__NR_pkey_alloc, flags, init_access_rights); -} - static int child(struct shared_info *info) { unsigned long reg; diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c index 81d3069ffb84..f348f54914a9 100644 --- a/tools/testing/selftests/powerpc/vphn/test-vphn.c +++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c @@ -275,7 +275,7 @@ static struct test { } }, { - /* Parse a 32-bit value split accross two consecutives 64-bit + /* Parse a 32-bit value split across two consecutives 64-bit * input values. */ "vphn: 16-bit value followed by 2 x 32-bit values", diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 134cdef5a6e0..48a8052d5dae 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -181,10 +181,11 @@ done # Function to check for presence of a file on the specified system. # Complain if the system cannot be reached, and retry after a wait. -# Currently just waits forever if a machine disappears. +# Currently just waits 15 minutes if a machine disappears. # # Usage: checkremotefile system pathname checkremotefile () { + local nsshfails=0 local ret local sleeptime=60 @@ -195,6 +196,11 @@ checkremotefile () { if test "$ret" -eq 255 then echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" + nsshfails=$((nsshfails+1)) + if ((nsshfails > 15)) + then + return 255 + fi elif test "$ret" -eq 0 then return 0 @@ -268,12 +274,23 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log" for i in $systems do echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log" - while checkremotefile "$i" "$resdir/$ds/remote.run" + while : do + checkremotefile "$i" "$resdir/$ds/remote.run" + ret=$? + if test "$ret" -eq 1 + then + echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" + ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) + break; + fi + if test "$ret" -eq 255 + then + echo System $i persistent ssh failure, lost results `date` | tee -a "$oldrun/remote-log" + break; + fi sleep 30 done - echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" - ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) done ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 8e50bfd4b710..90318591dae2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -5,3 +5,4 @@ rcutree.gp_cleanup_delay=3 rcutree.kthread_prio=2 threadirqs rcutree.use_softirq=0 +rcutorture.preempt_duration=10 diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index f408bd6bfc3d..984534cfbf1b 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -8,5 +8,6 @@ TEST_GEN_PROGS := resctrl_tests LOCAL_HDRS += $(wildcard *.h) include ../lib.mk +CFLAGS += -I$(top_srcdir)/tools/include $(OUTPUT)/resctrl_tests: $(wildcard *.c) diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index 3bbf3042fb06..d09e693dc739 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -169,8 +169,8 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param return ret; ret = check_results(¶m, span, n); - if (ret && (get_vendor() == ARCH_INTEL)) - ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); + if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n"); return ret; } diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index 536d9089d2f6..c7e9adc0368f 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -201,6 +201,8 @@ static int mba_run_test(const struct resctrl_test *test, const struct user_param return ret; ret = check_results(); + if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n"); return ret; } diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index 315b2ef3b3bc..84d8bc250539 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -160,8 +160,8 @@ static int mbm_run_test(const struct resctrl_test *test, const struct user_param return ret; ret = check_results(param.fill_buf ? param.fill_buf->buf_size : 0); - if (ret && (get_vendor() == ARCH_INTEL)) - ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); + if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n"); return ret; } diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index dab1953fc7a0..cd3adfc14969 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -11,6 +11,7 @@ #include <signal.h> #include <dirent.h> #include <stdbool.h> +#include <ctype.h> #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/mount.h> @@ -21,6 +22,7 @@ #include <sys/eventfd.h> #include <asm/unistd.h> #include <linux/perf_event.h> +#include <linux/compiler.h> #include "../kselftest.h" #define MB (1024 * 1024) @@ -156,8 +158,11 @@ struct perf_event_read { */ extern volatile int *value_sink; +extern int snc_unreliable; + extern char llc_occup_path[1024]; +int snc_nodes_per_l3_cache(void); int get_vendor(void); bool check_resctrlfs_support(void); int filter_dmesg(void); @@ -198,6 +203,7 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr); int signal_handler_register(const struct resctrl_test *test); void signal_handler_unregister(void); unsigned int count_bits(unsigned long n); +int snc_kernel_support(void); void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config); void perf_event_initialize_read_format(struct perf_event_read *pe_read); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 3335af815b21..5154ffd821c4 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -118,7 +118,7 @@ static bool test_vendor_specific_check(const struct resctrl_test *test) static void run_single_test(const struct resctrl_test *test, const struct user_params *uparams) { - int ret; + int ret, snc_mode; if (test->disabled) return; @@ -128,8 +128,15 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p return; } + snc_mode = snc_nodes_per_l3_cache(); + ksft_print_msg("Starting %s test ...\n", test->name); + if (snc_mode == 1 && snc_unreliable && get_vendor() == ARCH_INTEL) { + ksft_test_result_skip("SNC detection unreliable due to offline CPUs. Test results may not be accurate if SNC enabled.\n"); + return; + } + if (test_prepare(test)) { ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index d38d6dd90be4..195f04c4d158 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -13,6 +13,8 @@ #include "resctrl.h" +int snc_unreliable; + static int find_resctrl_mount(char *buffer) { FILE *mounts; @@ -157,6 +159,98 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id) } /* + * Count number of CPUs in a /sys bitmap + */ +static unsigned int count_sys_bitmap_bits(char *name) +{ + FILE *fp = fopen(name, "r"); + int count = 0, c; + + if (!fp) + return 0; + + while ((c = fgetc(fp)) != EOF) { + if (!isxdigit(c)) + continue; + switch (c) { + case 'f': + count++; + fallthrough; + case '7': case 'b': case 'd': case 'e': + count++; + fallthrough; + case '3': case '5': case '6': case '9': case 'a': case 'c': + count++; + fallthrough; + case '1': case '2': case '4': case '8': + count++; + break; + } + } + fclose(fp); + + return count; +} + +static bool cpus_offline_empty(void) +{ + char offline_cpus_str[64]; + FILE *fp; + + fp = fopen("/sys/devices/system/cpu/offline", "r"); + if (!fp) { + ksft_perror("Could not open /sys/devices/system/cpu/offline"); + return 0; + } + + if (fscanf(fp, "%63s", offline_cpus_str) < 0) { + if (!errno) { + fclose(fp); + return 1; + } + ksft_perror("Could not read /sys/devices/system/cpu/offline"); + } + + fclose(fp); + + return 0; +} + +/* + * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0. + * If any CPUs are offline declare the detection as unreliable. + */ +int snc_nodes_per_l3_cache(void) +{ + int node_cpus, cache_cpus; + static int snc_mode; + + if (!snc_mode) { + snc_mode = 1; + if (!cpus_offline_empty()) { + ksft_print_msg("Runtime SNC detection unreliable due to offline CPUs.\n"); + ksft_print_msg("Setting SNC mode to disabled.\n"); + snc_unreliable = 1; + return snc_mode; + } + node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap"); + cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map"); + + if (!node_cpus || !cache_cpus) { + ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n"); + snc_unreliable = 1; + return snc_mode; + } + snc_mode = cache_cpus / node_cpus; + + if (snc_mode > 1) + ksft_print_msg("SNC-%d mode discovered.\n", snc_mode); + } + + return snc_mode; +} + +/* * get_cache_size - Get cache size for a specified CPU * @cpu_no: CPU number * @cache_type: Cache level L2/L3 @@ -211,6 +305,17 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size break; } + /* + * The amount of cache represented by each bit in the masks + * in the schemata file is reduced by a factor equal to SNC + * nodes per L3 cache. + * E.g. on a SNC-2 system with a 100MB L3 cache a test that + * allocates memory from its local SNC node (default behavior + * without using libnuma) will only see 50 MB llc_occupancy + * with a fully populated L3 mask in the schemata file. + */ + if (cache_num == 3) + *cache_size /= snc_nodes_per_l3_cache(); return 0; } @@ -852,3 +957,35 @@ unsigned int count_bits(unsigned long n) return count; } + +/** + * snc_kernel_support - Check for existence of mon_sub_L3_00 file that indicates + * SNC resctrl support on the kernel side. + * + * Return: 0 if not supported, 1 if SNC is disabled or SNC discovery is + * unreliable or SNC is both enabled and supported. + */ +int snc_kernel_support(void) +{ + char node_path[PATH_MAX]; + struct stat statbuf; + int ret; + + ret = snc_nodes_per_l3_cache(); + /* + * If SNC is disabled then its kernel support isn't important. If SNC + * got disabled because the discovery process was unreliable the + * snc_unreliable variable was set. It can be used to verify the SNC + * discovery reliability elsewhere in the selftest. + */ + if (ret == 1) + return ret; + + snprintf(node_path, sizeof(node_path), "%s/%s", RESCTRL_PATH, + "mon_data/mon_L3_00/mon_sub_L3_00"); + + if (!stat(node_path, &statbuf)) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c index d10a847130fb..a58f520f2f41 100644 --- a/tools/testing/selftests/ring-buffer/map_test.c +++ b/tools/testing/selftests/ring-buffer/map_test.c @@ -233,12 +233,18 @@ TEST_F(map, data_mmap) ASSERT_NE(data, MAP_FAILED); munmap(data, data_len); - /* Overflow the available subbufs by 1 */ + /* Offset within ring-buffer bounds, mapping size overflow */ meta_len += desc->meta->subbuf_size * 2; data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, desc->cpu_fd, meta_len); ASSERT_EQ(data, MAP_FAILED); + /* Offset outside ring-buffer bounds */ + data_len = desc->meta->subbuf_size * desc->meta->nr_subbufs; + data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, + desc->cpu_fd, data_len + (desc->meta->subbuf_size * 2)); + ASSERT_EQ(data, MAP_FAILED); + /* Verify meta-page padding */ if (desc->meta->meta_page_size > getpagesize()) { data_len = desc->meta->meta_page_size; diff --git a/tools/testing/selftests/riscv/abi/pointer_masking.c b/tools/testing/selftests/riscv/abi/pointer_masking.c index dee41b7ee3e3..059d2e87eb1f 100644 --- a/tools/testing/selftests/riscv/abi/pointer_masking.c +++ b/tools/testing/selftests/riscv/abi/pointer_masking.c @@ -185,8 +185,20 @@ static void test_fork_exec(void) } } +static bool pwrite_wrapper(int fd, void *buf, size_t count, const char *msg) +{ + int ret = pwrite(fd, buf, count, 0); + + if (ret != count) { + ksft_perror(msg); + return false; + } + return true; +} + static void test_tagged_addr_abi_sysctl(void) { + char *err_pwrite_msg = "failed to write to /proc/sys/abi/tagged_addr_disabled\n"; char value; int fd; @@ -200,14 +212,18 @@ static void test_tagged_addr_abi_sysctl(void) } value = '1'; - pwrite(fd, &value, 1, 0); - ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == -EINVAL, - "sysctl disabled\n"); + if (!pwrite_wrapper(fd, &value, 1, "write '1'")) + ksft_test_result_fail(err_pwrite_msg); + else + ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == -EINVAL, + "sysctl disabled\n"); value = '0'; - pwrite(fd, &value, 1, 0); - ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == 0, - "sysctl enabled\n"); + if (!pwrite_wrapper(fd, &value, 1, "write '0'")) + ksft_test_result_fail(err_pwrite_msg); + else + ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == 0, + "sysctl enabled\n"); set_tagged_addr_ctrl(0, false); diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore index 9ae7964491d5..7d9c87cd0649 100644 --- a/tools/testing/selftests/riscv/vector/.gitignore +++ b/tools/testing/selftests/riscv/vector/.gitignore @@ -1,3 +1,4 @@ vstate_exec_nolibc vstate_prctl -v_initval_nolibc +v_initval +v_exec_initval_nolibc diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile index bfff0ff4f3be..6f7497f4e7b3 100644 --- a/tools/testing/selftests/riscv/vector/Makefile +++ b/tools/testing/selftests/riscv/vector/Makefile @@ -2,18 +2,27 @@ # Copyright (C) 2021 ARM Limited # Originally tools/testing/arm64/abi/Makefile -TEST_GEN_PROGS := vstate_prctl v_initval_nolibc -TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc +TEST_GEN_PROGS := v_initval vstate_prctl +TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc include ../../lib.mk -$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S +$(OUTPUT)/sys_hwprobe.o: ../hwprobe/sys_hwprobe.S + $(CC) -static -c -o$@ $(CFLAGS) $^ + +$(OUTPUT)/v_helpers.o: v_helpers.c + $(CC) -static -c -o$@ $(CFLAGS) $^ + +$(OUTPUT)/vstate_prctl: vstate_prctl.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ $(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc -$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c +$(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ + +$(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c new file mode 100644 index 000000000000..35c0812e32de --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Get values of vector registers as soon as the program starts to test if + * is properly cleaning the values before starting a new program. Vector + * registers are caller saved, so no function calls may happen before reading + * the values. To further ensure consistency, this file is compiled without + * libc and without auto-vectorization. + * + * To be "clean" all values must be either all ones or all zeroes. + */ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +int main(int argc, char **argv) +{ + char prev_value = 0, value; + unsigned long vl; + int first = 1; + + if (argc > 2 && strcmp(argv[2], "x")) + asm volatile ( + // 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli + // vsetvli t4, x0, e8, m1, d1 + ".4byte 0b00000000000000000111111011010111\n\t" + "mv %[vl], t4\n\t" + : [vl] "=r" (vl) : : "t4" + ); + else + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %[vl], x0, e8, m1, ta, ma\n\t" + ".option pop\n\t" + : [vl] "=r" (vl) + ); + +#define CHECK_VECTOR_REGISTER(register) ({ \ + for (int i = 0; i < vl; i++) { \ + asm volatile ( \ + ".option push\n\t" \ + ".option arch, +v\n\t" \ + "vmv.x.s %0, " __stringify(register) "\n\t" \ + "vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \ + ".option pop\n\t" \ + : "=r" (value)); \ + if (first) { \ + first = 0; \ + } else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \ + printf("Register " __stringify(register) \ + " values not clean! value: %u\n", value); \ + exit(-1); \ + } \ + prev_value = value; \ + } \ +}) + + CHECK_VECTOR_REGISTER(v0); + CHECK_VECTOR_REGISTER(v1); + CHECK_VECTOR_REGISTER(v2); + CHECK_VECTOR_REGISTER(v3); + CHECK_VECTOR_REGISTER(v4); + CHECK_VECTOR_REGISTER(v5); + CHECK_VECTOR_REGISTER(v6); + CHECK_VECTOR_REGISTER(v7); + CHECK_VECTOR_REGISTER(v8); + CHECK_VECTOR_REGISTER(v9); + CHECK_VECTOR_REGISTER(v10); + CHECK_VECTOR_REGISTER(v11); + CHECK_VECTOR_REGISTER(v12); + CHECK_VECTOR_REGISTER(v13); + CHECK_VECTOR_REGISTER(v14); + CHECK_VECTOR_REGISTER(v15); + CHECK_VECTOR_REGISTER(v16); + CHECK_VECTOR_REGISTER(v17); + CHECK_VECTOR_REGISTER(v18); + CHECK_VECTOR_REGISTER(v19); + CHECK_VECTOR_REGISTER(v20); + CHECK_VECTOR_REGISTER(v21); + CHECK_VECTOR_REGISTER(v22); + CHECK_VECTOR_REGISTER(v23); + CHECK_VECTOR_REGISTER(v24); + CHECK_VECTOR_REGISTER(v25); + CHECK_VECTOR_REGISTER(v26); + CHECK_VECTOR_REGISTER(v27); + CHECK_VECTOR_REGISTER(v28); + CHECK_VECTOR_REGISTER(v29); + CHECK_VECTOR_REGISTER(v30); + CHECK_VECTOR_REGISTER(v31); + +#undef CHECK_VECTOR_REGISTER + + return 0; +} diff --git a/tools/testing/selftests/riscv/vector/v_helpers.c b/tools/testing/selftests/riscv/vector/v_helpers.c new file mode 100644 index 000000000000..01a8799dcb78 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_helpers.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../hwprobe/hwprobe.h" +#include <asm/vendor/thead.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/wait.h> + +bool is_xtheadvector_supported(void) +{ + struct riscv_hwprobe pair; + + pair.key = RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0; + riscv_hwprobe(&pair, 1, 0, NULL, 0); + return pair.value & RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR; +} + +bool is_vector_supported(void) +{ + struct riscv_hwprobe pair; + + pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; + riscv_hwprobe(&pair, 1, 0, NULL, 0); + return pair.value & RISCV_HWPROBE_EXT_ZVE32X; +} + +int launch_test(char *next_program, int test_inherit, int xtheadvector) +{ + char *exec_argv[4], *exec_envp[1]; + int rc, pid, status; + + pid = fork(); + if (pid < 0) { + printf("fork failed %d", pid); + return -1; + } + + if (!pid) { + exec_argv[0] = next_program; + exec_argv[1] = test_inherit != 0 ? "x" : NULL; + exec_argv[2] = xtheadvector != 0 ? "x" : NULL; + exec_argv[3] = NULL; + exec_envp[0] = NULL; + /* launch the program again to check inherit */ + rc = execve(next_program, exec_argv, exec_envp); + if (rc) { + perror("execve"); + printf("child execve failed %d\n", rc); + exit(-1); + } + } + + rc = waitpid(-1, &status, 0); + if (rc < 0) { + printf("waitpid failed\n"); + return -3; + } + + if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) || + WIFSIGNALED(status)) { + printf("child exited abnormally\n"); + return -4; + } + + return WEXITSTATUS(status); +} diff --git a/tools/testing/selftests/riscv/vector/v_helpers.h b/tools/testing/selftests/riscv/vector/v_helpers.h new file mode 100644 index 000000000000..763cddfe26da --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_helpers.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <stdbool.h> + +bool is_xtheadvector_supported(void); + +bool is_vector_supported(void); + +int launch_test(char *next_program, int test_inherit, int xtheadvector); diff --git a/tools/testing/selftests/riscv/vector/v_initval.c b/tools/testing/selftests/riscv/vector/v_initval.c new file mode 100644 index 000000000000..be9e1d18ad29 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_initval.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../../kselftest_harness.h" +#include "v_helpers.h" + +#define NEXT_PROGRAM "./v_exec_initval_nolibc" + +TEST(v_initval) +{ + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + + ASSERT_EQ(0, launch_test(NEXT_PROGRAM, 0, xtheadvector)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c deleted file mode 100644 index 1dd94197da30..000000000000 --- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c +++ /dev/null @@ -1,68 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include "../../kselftest.h" -#define MAX_VSIZE (8192 * 32) - -void dump(char *ptr, int size) -{ - int i = 0; - - for (i = 0; i < size; i++) { - if (i != 0) { - if (i % 16 == 0) - printf("\n"); - else if (i % 8 == 0) - printf(" "); - } - printf("%02x ", ptr[i]); - } - printf("\n"); -} - -int main(void) -{ - int i; - unsigned long vl; - char *datap, *tmp; - - datap = malloc(MAX_VSIZE); - if (!datap) { - ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE); - exit(-1); - } - - tmp = datap; - asm volatile ( - ".option push\n\t" - ".option arch, +v\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vse8.v v0, (%2)\n\t" - "add %1, %2, %0\n\t" - "vse8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory"); - - ksft_print_msg("vl = %lu\n", vl); - - if (datap[0] != 0x00 && datap[0] != 0xff) { - ksft_test_result_fail("v-regesters are not properly initialized\n"); - dump(datap, vl * 4); - exit(-1); - } - - for (i = 1; i < vl * 4; i++) { - if (datap[i] != datap[0]) { - ksft_test_result_fail("detect stale values on v-regesters\n"); - dump(datap, vl * 4); - exit(-2); - } - } - - free(datap); - ksft_exit_pass(); - return 0; -} diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c index 1f9969bed235..7b7d6f21acb4 100644 --- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c +++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c @@ -6,13 +6,16 @@ int main(int argc, char **argv) { - int rc, pid, status, test_inherit = 0; + int rc, pid, status, test_inherit = 0, xtheadvector = 0; long ctrl, ctrl_c; char *exec_argv[2], *exec_envp[2]; - if (argc > 1) + if (argc > 1 && strcmp(argv[1], "x")) test_inherit = 1; + if (argc > 2 && strcmp(argv[2], "x")) + xtheadvector = 1; + ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL); if (ctrl < 0) { puts("PR_RISCV_V_GET_CONTROL is not supported\n"); @@ -53,11 +56,14 @@ int main(int argc, char **argv) puts("child's vstate_ctrl not equal to parent's\n"); exit(-1); } - asm volatile (".option push\n\t" - ".option arch, +v\n\t" - "vsetvli x0, x0, e32, m8, ta, ma\n\t" - ".option pop\n\t" - ); + if (xtheadvector) + asm volatile (".4byte 0x00007ed7"); + else + asm volatile (".option push\n\t" + ".option arch, +v\n\t" + "vsetvli x0, x0, e32, m8, ta, ma\n\t" + ".option pop\n\t" + ); exit(ctrl); } } diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 895177f6bf4c..62fbb17a0556 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -3,179 +3,244 @@ #include <unistd.h> #include <errno.h> #include <sys/wait.h> +#include <sys/types.h> +#include <stdlib.h> -#include "../hwprobe/hwprobe.h" -#include "../../kselftest.h" +#include "../../kselftest_harness.h" +#include "v_helpers.h" #define NEXT_PROGRAM "./vstate_exec_nolibc" -static int launch_test(int test_inherit) -{ - char *exec_argv[3], *exec_envp[1]; - int rc, pid, status; - - pid = fork(); - if (pid < 0) { - ksft_test_result_fail("fork failed %d", pid); - return -1; - } - if (!pid) { - exec_argv[0] = NEXT_PROGRAM; - exec_argv[1] = test_inherit != 0 ? "x" : NULL; - exec_argv[2] = NULL; - exec_envp[0] = NULL; - /* launch the program again to check inherit */ - rc = execve(NEXT_PROGRAM, exec_argv, exec_envp); - if (rc) { - perror("execve"); - ksft_test_result_fail("child execve failed %d\n", rc); - exit(-1); - } - } - - rc = waitpid(-1, &status, 0); - if (rc < 0) { - ksft_test_result_fail("waitpid failed\n"); - return -3; - } - - if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) || - WIFSIGNALED(status)) { - ksft_test_result_fail("child exited abnormally\n"); - return -4; - } - - return WEXITSTATUS(status); -} - -int test_and_compare_child(long provided, long expected, int inherit) +int test_and_compare_child(long provided, long expected, int inherit, int xtheadvector) { int rc; rc = prctl(PR_RISCV_V_SET_CONTROL, provided); if (rc != 0) { - ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n", - provided, rc); + printf("prctl with provided arg %lx failed with code %d\n", + provided, rc); return -1; } - rc = launch_test(inherit); + rc = launch_test(NEXT_PROGRAM, inherit, xtheadvector); if (rc != expected) { - ksft_test_result_fail("Test failed, check %d != %ld\n", rc, - expected); + printf("Test failed, check %d != %ld\n", rc, expected); return -2; } return 0; } -#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0 -#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2 +#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0 +#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2 -int main(void) +TEST(get_control_no_v) { - struct riscv_hwprobe pair; - long flag, expected; long rc; - pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; - rc = riscv_hwprobe(&pair, 1, 0, NULL, 0); - if (rc < 0) { - ksft_test_result_fail("hwprobe() failed with %ld\n", rc); - return -1; - } + if (is_vector_supported() || is_xtheadvector_supported()) + SKIP(return, "Test expects vector to be not supported"); - if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) { - ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n"); - return -2; - } + rc = prctl(PR_RISCV_V_GET_CONTROL); + EXPECT_EQ(-1, rc) + TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X"); + EXPECT_EQ(EINVAL, errno) + TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X"); +} - if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) { - rc = prctl(PR_RISCV_V_GET_CONTROL); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n"); - return -3; - } - - rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n"); - return -4; - } - - ksft_test_result_skip("Vector not supported\n"); - return 0; - } +TEST(set_control_no_v) +{ + long rc; + + if (is_vector_supported() || is_xtheadvector_supported()) + SKIP(return, "Test expects vector to be not supported"); + + rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); + EXPECT_EQ(-1, rc) + TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X"); + EXPECT_EQ(EINVAL, errno) + TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X"); +} + +TEST(vstate_on_current) +{ + long flag; + long rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); flag = PR_RISCV_V_VSTATE_CTRL_ON; rc = prctl(PR_RISCV_V_SET_CONTROL, flag); - if (rc != 0) { - ksft_test_result_fail("Enabling V for current should always success\n"); - return -5; - } + EXPECT_EQ(0, rc) TH_LOG("Enabling V for current should always succeed"); +} + +TEST(vstate_off_eperm) +{ + long flag; + long rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); flag = PR_RISCV_V_VSTATE_CTRL_OFF; rc = prctl(PR_RISCV_V_SET_CONTROL, flag); - if (rc != -1 || errno != EPERM) { - ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n", - errno); - return -5; + EXPECT_EQ(EPERM, errno) + TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno); + EXPECT_EQ(-1, rc) + TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno); +} + +TEST(vstate_on_no_nesting) +{ + long flag; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); } /* Turn on next's vector explicitly and test */ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; - if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0)) - return -6; + + EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0, xtheadvector)); +} + +TEST(vstate_off_nesting) +{ + long flag; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn off next's vector explicitly and test */ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; - if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0)) - return -7; + + EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 1, xtheadvector)); +} + +TEST(vstate_on_inherit_no_nesting) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + + /* Turn on next's vector explicitly and test no inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_ON; + + EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector)); +} + +TEST(vstate_on_inherit) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn on next's vector explicitly and test inherit */ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; expected = flag | PR_RISCV_V_VSTATE_CTRL_ON; - if (test_and_compare_child(flag, expected, 0)) - return -8; - if (test_and_compare_child(flag, expected, 1)) - return -9; + EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector)); +} + +TEST(vstate_off_inherit_no_nesting) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + /* Turn off next's vector explicitly and test no inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF; + + EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector)); +} + +TEST(vstate_off_inherit) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn off next's vector explicitly and test inherit */ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF; - if (test_and_compare_child(flag, expected, 0)) - return -10; - if (test_and_compare_child(flag, expected, 1)) - return -11; + EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector)); +} + +/* arguments should fail with EINVAL */ +TEST(inval_set_control_1) +{ + int rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); - /* arguments should fail with EINVAL */ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +/* arguments should fail with EINVAL */ +TEST(inval_set_control_2) +{ + int rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} - rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } +/* arguments should fail with EINVAL */ +TEST(inval_set_control_3) +{ + int rc; - rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); - ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n"); - ksft_exit_pass(); - return 0; + rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 2f37961240ca..05d03e679e06 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -226,8 +226,32 @@ unsigned int yield_mod_cnt, nr_abort; "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ "bnez " INJECT_ASM_REG ", 222b\n\t" \ "333:\n\t" +#elif defined(__or1k__) +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1]"m"(loop_cnt[1]) \ + , [loop_cnt_2]"m"(loop_cnt[2]) \ + , [loop_cnt_3]"m"(loop_cnt[3]) \ + , [loop_cnt_4]"m"(loop_cnt[4]) \ + , [loop_cnt_5]"m"(loop_cnt[5]) \ + , [loop_cnt_6]"m"(loop_cnt[6]) +#define INJECT_ASM_REG "r31" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#define RSEQ_INJECT_ASM(n) \ + "l.lwz " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ + "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \ + "l.bf 333f\n\t" \ + " l.nop\n\t" \ + "222:\n\t" \ + "l.addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ + "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \ + "l.bf 222f\n\t" \ + " l.nop\n\t" \ + "333:\n\t" #else #error unsupported target #endif diff --git a/tools/testing/selftests/rseq/rseq-or1k-bits.h b/tools/testing/selftests/rseq/rseq-or1k-bits.h new file mode 100644 index 000000000000..15d0e8200cd1 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-or1k-bits.h @@ -0,0 +1,412 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ + +#include "rseq-bits-template.h" + +#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, + int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_FINAL_STORE(v, newv, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]") +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_STORE(load) + RSEQ_ASM_OP_R_LOAD_OFF(voffp) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expectnot] "r" (expectnot), + [load] "m" (*load), + [voffp] "Ir" (voffp) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_ADD(count) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) + RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]") + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") + RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]") +#endif + RSEQ_ASM_OP_FINAL_STORE(v, newv, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [v] "m" (*v), + [expect] "r" (expect), + [v2] "m" (*v2), + [expect2] "r" (expect2), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV + +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, intptr_t inc, + int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") +#endif + RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [ptr] "r" (ptr), + [off] "r" (off), + [inc] "r" (inc) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \ + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_STORE(v2, newv2) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + RSEQ_ASM_OP_FINAL_STORE_RELEASE(v, newv, 3) +#else + RSEQ_ASM_OP_FINAL_STORE(v, newv, 3) +#endif + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [v2] "m" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __always_inline +int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]") +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]") + RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]") +#endif + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]") + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") + RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]") +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_TEMPLATE_MO_RELEASE + RSEQ_ASM_OP_FINAL_STORE_RELEASE(v, newv, 3) +#else + RSEQ_ASM_OP_FINAL_STORE(v, newv, 3) +#endif + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), + [expect] "r" (expect), + [v] "m" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2, + RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4 + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && + (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */ + +#include "rseq-bits-reset.h" diff --git a/tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h b/tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h new file mode 100644 index 000000000000..cda740f7aff3 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +#ifndef _RSEQ_OR1K_THREAD_POINTER +#define _RSEQ_OR1K_THREAD_POINTER + +static inline void *rseq_thread_pointer(void) +{ + void *__thread_register; + + __asm__ ("l.or %0, r10, r0" : "=r" (__thread_register)); + return __thread_register; +} + +#endif diff --git a/tools/testing/selftests/rseq/rseq-or1k.h b/tools/testing/selftests/rseq/rseq-or1k.h new file mode 100644 index 000000000000..9e78eebdf79a --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-or1k.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ + +/* + * Select the instruction "l.nop 0x35" as the RSEQ_SIG. + */ +#define RSEQ_SIG 0x15000035 + +#define rseq_smp_mb() __asm__ __volatile__ ("l.msync" ::: "memory") +#define rseq_smp_rmb() rseq_smp_mb() +#define rseq_smp_wmb() rseq_smp_mb() +#define RSEQ_ASM_TMP_REG_1 "r31" +#define RSEQ_ASM_TMP_REG_2 "r29" +#define RSEQ_ASM_TMP_REG_3 "r27" +#define RSEQ_ASM_TMP_REG_4 "r25" + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ + rseq_smp_mb(); \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_smp_mb(); \ + RSEQ_WRITE_ONCE(*(p), v); \ +} while (0) + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ + post_commit_offset, abort_ip) \ + ".pushsection __rseq_cs, \"aw\"\n" \ + ".balign 32\n" \ + __rseq_str(label) ":\n" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n" \ + ".long 0x0, " __rseq_str(start_ip) ", " \ + "0x0, " __rseq_str(post_commit_offset) ", " \ + "0x0, " __rseq_str(abort_ip) "\n" \ + ".popsection\n\t" \ + ".pushsection __rseq_cs_ptr_array, \"aw\"\n" \ + ".long 0x0, " __rseq_str(label) "b\n" \ + ".popsection\n" + +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + ((post_commit_ip) - (start_ip)), abort_ip) + +/* + * Exit points of a rseq critical section consist of all instructions outside + * of the critical section where a critical section can either branch to or + * reach through the normal course of its execution. The abort IP and the + * post-commit IP are already part of the __rseq_cs section and should not be + * explicitly defined as additional exit points. Knowing all exit points is + * useful to assist debuggers stepping over the critical section. + */ +#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \ + ".pushsection __rseq_exit_point_array, \"aw\"\n" \ + ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) "\n" \ + ".popsection\n" + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "l.movhi " RSEQ_ASM_TMP_REG_1 ", hi(" __rseq_str(cs_label) ")\n"\ + "l.ori " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \ + ", lo(" __rseq_str(cs_label) ")\n"\ + "l.sw %[" __rseq_str(rseq_cs) "], " RSEQ_ASM_TMP_REG_1 "\n" \ + __rseq_str(label) ":\n" + +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ + "l.j 222f\n" \ + " l.nop\n" \ + ".balign 4\n" \ + ".long " __rseq_str(RSEQ_SIG) "\n" \ + __rseq_str(label) ":\n" \ + "l.j %l[" __rseq_str(abort_label) "]\n" \ + " l.nop\n" \ + "222:\n" + +#define RSEQ_ASM_OP_STORE(var, value) \ + "l.sw %[" __rseq_str(var) "], %[" __rseq_str(value) "]\n" + +#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ + "l.lwz " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "l.sfne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "]\n" \ + "l.bf " __rseq_str(label) "\n" \ + " l.nop\n" + +#define RSEQ_ASM_OP_CMPNE(var, expect, label) \ + "l.lwz " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "l.sfeq " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "]\n" \ + "l.bf " __rseq_str(label) "\n" \ + " l.nop\n" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + RSEQ_ASM_OP_CMPEQ(current_cpu_id, cpu_id, label) + +#define RSEQ_ASM_OP_R_LOAD(var) \ + "l.lwz " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_R_STORE(var) \ + "l.sw %[" __rseq_str(var) "], " RSEQ_ASM_TMP_REG_1 "\n" + +#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \ + "l.lwz " RSEQ_ASM_TMP_REG_1 ", " \ + "%[" __rseq_str(offset) "](" RSEQ_ASM_TMP_REG_1 ")\n" + +#define RSEQ_ASM_OP_R_ADD(count) \ + "l.add " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \ + ", %[" __rseq_str(count) "]\n" + +#define RSEQ_ASM_OP_FINAL_STORE(var, value, post_commit_label) \ + RSEQ_ASM_OP_STORE(var, value) \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(var, value, post_commit_label) \ + "l.msync\n" \ + RSEQ_ASM_OP_STORE(var, value) \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ + "l.sw %[" __rseq_str(var) "], " RSEQ_ASM_TMP_REG_1 "\n" \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \ + "l.sfeq %[" __rseq_str(len) "], r0\n" \ + "l.bf 333f\n" \ + " l.nop\n" \ + "l.ori " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(len) "], 0\n" \ + "l.ori " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(src) "], 0\n" \ + "l.ori " RSEQ_ASM_TMP_REG_3 ", %[" __rseq_str(dst) "], 0\n" \ + "222:\n" \ + "l.lbz " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_2 ")\n" \ + "l.sb 0(" RSEQ_ASM_TMP_REG_3 "), " RSEQ_ASM_TMP_REG_4 "\n" \ + "l.addi " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 ", -1\n" \ + "l.addi " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", 1\n" \ + "l.addi " RSEQ_ASM_TMP_REG_3 ", " RSEQ_ASM_TMP_REG_3 ", 1\n" \ + "l.sfne " RSEQ_ASM_TMP_REG_1 ", r0\n" \ + "l.bf 222b\n" \ + " l.nop\n" \ + "333:\n" + +#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, post_commit_label) \ + "l.ori " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "], 0\n" \ + RSEQ_ASM_OP_R_ADD(off) \ + "l.lwz " RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \ + RSEQ_ASM_OP_R_ADD(inc) \ + __rseq_str(post_commit_label) ":\n" + +/* Per-cpu-id indexing. */ + +#define RSEQ_TEMPLATE_CPU_ID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-or1k-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-or1k-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_CPU_ID + +/* Per-mm-cid indexing. */ + +#define RSEQ_TEMPLATE_MM_CID +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-or1k-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED + +#define RSEQ_TEMPLATE_MO_RELEASE +#include "rseq-or1k-bits.h" +#undef RSEQ_TEMPLATE_MO_RELEASE +#undef RSEQ_TEMPLATE_MM_CID + +/* APIs which are not based on cpu ids. */ + +#define RSEQ_TEMPLATE_CPU_ID_NONE +#define RSEQ_TEMPLATE_MO_RELAXED +#include "rseq-or1k-bits.h" +#undef RSEQ_TEMPLATE_MO_RELAXED +#undef RSEQ_TEMPLATE_CPU_ID_NONE diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h index 977c25d758b2..3d5019307a1b 100644 --- a/tools/testing/selftests/rseq/rseq-thread-pointer.h +++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h @@ -12,6 +12,8 @@ #include "rseq-x86-thread-pointer.h" #elif defined(__PPC__) #include "rseq-ppc-thread-pointer.h" +#elif defined(__or1k__) +#include "rseq-or1k-thread-pointer.h" #else #include "rseq-generic-thread-pointer.h" #endif diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 5b9772cdf265..f6156790c3b4 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -61,7 +61,6 @@ unsigned int rseq_size = -1U; unsigned int rseq_flags; static int rseq_ownership; -static int rseq_reg_success; /* At least one rseq registration has succeded. */ /* Allocate a large area for the TLS. */ #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 @@ -152,14 +151,27 @@ int rseq_register_current_thread(void) } rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { - if (RSEQ_READ_ONCE(rseq_reg_success)) { + /* + * After at least one thread has registered successfully + * (rseq_size > 0), the registration of other threads should + * never fail. + */ + if (RSEQ_READ_ONCE(rseq_size) > 0) { /* Incoherent success/failure within process. */ abort(); } return -1; } assert(rseq_current_cpu_raw() >= 0); - RSEQ_WRITE_ONCE(rseq_reg_success, 1); + + /* + * The first thread to register sets the rseq_size to mimic the libc + * behavior. + */ + if (RSEQ_READ_ONCE(rseq_size) == 0) { + RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size()); + } + return 0; } @@ -235,12 +247,18 @@ void rseq_init(void) return; } rseq_ownership = 1; - if (!rseq_available()) { - rseq_size = 0; - return; - } + + /* Calculate the offset of the rseq area from the thread pointer. */ rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); + + /* rseq flags are deprecated, always set to 0. */ rseq_flags = 0; + + /* + * Set the size to 0 until at least one thread registers to mimic the + * libc behavior. + */ + rseq_size = 0; } static __attribute__((destructor)) diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 4e217b620e0c..ba424ce80a71 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -60,7 +60,14 @@ extern ptrdiff_t rseq_offset; /* - * Size of the registered rseq area. 0 if the registration was + * The rseq ABI is composed of extensible feature fields. The extensions + * are done by appending additional fields at the end of the structure. + * The rseq_size defines the size of the active feature set which can be + * used by the application for the current rseq registration. Features + * starting at offset >= rseq_size are inactive and should not be used. + * + * The rseq_size is the intersection between the available allocation + * size for the rseq area and the feature size supported by the kernel. * unsuccessful. */ extern unsigned int rseq_size; @@ -122,6 +129,8 @@ static inline struct rseq_abi *rseq_get_abi(void) #include <rseq-s390.h> #elif defined(__riscv) #include <rseq-riscv.h> +#elif defined(__or1k__) +#include <rseq-or1k.h> #else #error unsupported target #endif diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index a28c1416cb89..50e03eefe7ac 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -21,7 +21,7 @@ usage() cat <<EOF Usage: $0 [OPTIONS] -s | --summary Print summary with detailed log in output.log (conflict with -p) - -p | --per_test_log Print test log in /tmp with each test name (conflict with -s) + -p | --per-test-log Print test log in /tmp with each test name (conflict with -s) -t | --test COLLECTION:TEST Run TEST from COLLECTION -c | --collection COLLECTION Run all tests from COLLECTION -l | --list List the available collection:test entries diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c index 37d9bf6fb745..6f4c3f5a1c5d 100644 --- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c @@ -20,7 +20,7 @@ s32 BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_select_cpu, struct task_struct *p, * If we dispatch to a bogus DSQ that will fall back to the * builtin global DSQ, we fail gracefully. */ - scx_bpf_dispatch_vtime(p, 0xcafef00d, SCX_SLICE_DFL, + scx_bpf_dsq_insert_vtime(p, 0xcafef00d, SCX_SLICE_DFL, p->scx.dsq_vtime, 0); return cpu; } diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c index dffc97d9cdf1..e4a55027778f 100644 --- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c @@ -17,8 +17,8 @@ s32 BPF_STRUCT_OPS(ddsp_vtimelocal_fail_select_cpu, struct task_struct *p, if (cpu >= 0) { /* Shouldn't be allowed to vtime dispatch to a builtin DSQ. */ - scx_bpf_dispatch_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, - p->scx.dsq_vtime, 0); + scx_bpf_dsq_insert_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, + p->scx.dsq_vtime, 0); return cpu; } diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index 6a7db1502c29..fbda6bf54671 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -43,9 +43,12 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) if (!p) return; - target = bpf_get_prandom_u32() % nr_cpus; + if (p->nr_cpus_allowed == nr_cpus) + target = bpf_get_prandom_u32() % nr_cpus; + else + target = scx_bpf_task_cpu(p); - scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0); bpf_task_release(p); } diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c index 472851b56854..0ff27e57fe43 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.c @@ -34,9 +34,10 @@ static enum scx_test_status run(void *ctx) /* Just sleeping is fine, plenty of scheduling events happening */ sleep(1); - SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR)); bpf_link__destroy(link); + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_UNREG)); + return SCX_TEST_PASS; } @@ -50,7 +51,7 @@ static void cleanup(void *ctx) struct scx_test dsp_local_on = { .name = "dsp_local_on", .description = "Verify we can directly dispatch tasks to a local DSQs " - "from osp.dispatch()", + "from ops.dispatch()", .setup = setup, .run = run, .cleanup = cleanup, diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c index 1efb50d61040..a7cf868d5e31 100644 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c +++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c @@ -31,7 +31,7 @@ void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p, /* Can only call from ops.select_cpu() */ scx_bpf_select_cpu_dfl(p, 0, 0, &found); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); } SEC(".struct_ops.link") diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c index d75d4faf07f6..4bc36182d3ff 100644 --- a/tools/testing/selftests/sched_ext/exit.bpf.c +++ b/tools/testing/selftests/sched_ext/exit.bpf.c @@ -33,7 +33,7 @@ void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags) if (exit_point == EXIT_ENQUEUE) EXIT_CLEANLY(); - scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) @@ -41,7 +41,7 @@ void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) if (exit_point == EXIT_DISPATCH) EXIT_CLEANLY(); - scx_bpf_consume(DSQ_ID); + scx_bpf_dsq_move_to_local(DSQ_ID); } void BPF_STRUCT_OPS(exit_enable, struct task_struct *p) diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 4d4cd8d966db..430f5e13bf55 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -12,6 +12,8 @@ char _license[] SEC("license") = "GPL"; +#define DSQ_ID 0 + s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { @@ -20,7 +22,7 @@ s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags) { - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) @@ -28,7 +30,7 @@ void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev) { - scx_bpf_consume(SCX_DSQ_GLOBAL); + scx_bpf_dsq_move_to_local(DSQ_ID); } void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags) @@ -123,7 +125,7 @@ void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight) s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init) { - return 0; + return scx_bpf_create_dsq(DSQ_ID, -1); } void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info) diff --git a/tools/testing/selftests/sched_ext/runner.c b/tools/testing/selftests/sched_ext/runner.c index eab48c7ff309..aa2d7d32dda9 100644 --- a/tools/testing/selftests/sched_ext/runner.c +++ b/tools/testing/selftests/sched_ext/runner.c @@ -22,11 +22,12 @@ const char help_fmt[] = "\n" " -t TEST Only run tests whose name includes this string\n" " -s Include print output for skipped tests\n" +" -l List all available tests\n" " -q Don't print the test descriptions during run\n" " -h Display this help and exit\n"; static volatile int exit_req; -static bool quiet, print_skipped; +static bool quiet, print_skipped, list; #define MAX_SCX_TESTS 2048 @@ -133,7 +134,7 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - while ((opt = getopt(argc, argv, "qst:h")) != -1) { + while ((opt = getopt(argc, argv, "qslt:h")) != -1) { switch (opt) { case 'q': quiet = true; @@ -141,6 +142,9 @@ int main(int argc, char **argv) case 's': print_skipped = true; break; + case 'l': + list = true; + break; case 't': filter = optarg; break; @@ -154,6 +158,13 @@ int main(int argc, char **argv) enum scx_test_status status; struct scx_test *test = &__scx_tests[i]; + if (list) { + printf("%s\n", test->name); + if (i == (__scx_num_tests - 1)) + return 0; + continue; + } + if (filter && should_skip_test(test, filter)) { /* * Printing the skipped tests and their preambles can diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c index f171ac470970..13d0f5be788d 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c @@ -30,7 +30,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p, } scx_bpf_put_idle_cpumask(idle_mask); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); } SEC(".struct_ops.link") diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c index 9efdbb7da928..815f1d5d61ac 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c @@ -67,7 +67,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_enqueue, struct task_struct *p, saw_local = true; } - scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, enq_flags); } s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task, diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c index 59bfc4f36167..4bb99699e920 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c @@ -29,7 +29,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_select_cpu, struct task_struct *p, cpu = prev_cpu; dispatch: - scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, 0); return cpu; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c index 3bbd5fcdfb18..2a75de11b2cf 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c @@ -18,7 +18,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_select_cpu, struct task_struct *p s32 prev_cpu, u64 wake_flags) { /* Dispatching to a random DSQ should fail. */ - scx_bpf_dispatch(p, 0xcafef00d, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, 0xcafef00d, SCX_SLICE_DFL, 0); return prev_cpu; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c index 0fda57fe0ecf..99d075695c97 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c @@ -18,8 +18,8 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_select_cpu, struct task_struct *p s32 prev_cpu, u64 wake_flags) { /* Dispatching twice in a row is disallowed. */ - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); return prev_cpu; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c index e6c67bcf5e6e..bfcb96cd4954 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c @@ -2,8 +2,8 @@ /* * A scheduler that validates that enqueue flags are properly stored and * applied at dispatch time when a task is directly dispatched from - * ops.select_cpu(). We validate this by using scx_bpf_dispatch_vtime(), and - * making the test a very basic vtime scheduler. + * ops.select_cpu(). We validate this by using scx_bpf_dsq_insert_vtime(), + * and making the test a very basic vtime scheduler. * * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. * Copyright (c) 2024 David Vernet <dvernet@meta.com> @@ -47,13 +47,13 @@ s32 BPF_STRUCT_OPS(select_cpu_vtime_select_cpu, struct task_struct *p, cpu = prev_cpu; scx_bpf_test_and_clear_cpu_idle(cpu); ddsp: - scx_bpf_dispatch_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0); + scx_bpf_dsq_insert_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0); return cpu; } void BPF_STRUCT_OPS(select_cpu_vtime_dispatch, s32 cpu, struct task_struct *p) { - if (scx_bpf_consume(VTIME_DSQ)) + if (scx_bpf_dsq_move_to_local(VTIME_DSQ)) consumed = true; } diff --git a/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py new file mode 100755 index 000000000000..0f44a6199495 --- /dev/null +++ b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Script that checks that SFQ rejects a limit of 1 at the kernel +# level. We can't use iproute2's tc because it does not accept a limit +# of 1. + +import sys +import os + +from pyroute2 import IPRoute +from pyroute2.netlink.exceptions import NetlinkError + +ip = IPRoute() +ifidx = ip.link_lookup(ifname=sys.argv[1]) + +try: + ip.tc('add', 'sfq', ifidx, limit=1) + sys.exit(1) +except NetlinkError: + sys.exit(0) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json index 996448afe31b..91d120548bf5 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json @@ -78,10 +78,10 @@ "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0xff", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0x1f", "expExitCode": "0", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow", - "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 255 baseclass", + "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 31 baseclass", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DEV1 ingress" diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 16d51936b385..50e8d72781cb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -208,5 +208,25 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "4d6f", + "name": "Check that limit of 1 is rejected", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "./scripts/sfq_rejects_limit_1.py $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [ + ] } ] diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index c5264594064c..83faa4e354e3 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -156,8 +156,8 @@ int main(int argc, char **argv) /* Check everything is sane before we start switching asynchronously */ if (do_sanity_check) { for (i = 0; i < count; i++) { - printf("Validating clocksource %s\n", - clocksource_list[i]); + ksft_print_msg("Validating clocksource %s\n", + clocksource_list[i]); if (change_clocksource(clocksource_list[i])) { status = -1; goto out; @@ -169,7 +169,7 @@ int main(int argc, char **argv) } } - printf("Running Asynchronous Switching Tests...\n"); + ksft_print_msg("Running Asynchronous Switching Tests...\n"); pid = fork(); if (!pid) return run_tests(runtime); diff --git a/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c b/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c index b5c3ddb90942..02ecfe687dc2 100644 --- a/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c +++ b/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c @@ -23,45 +23,56 @@ #include <sys/mount.h> #include <unistd.h> +#include "../kselftest.h" + int main(void) { int fd; + // Setting up kselftest framework + ksft_print_header(); + ksft_set_plan(1); + + // Check if test is run as root + if (geteuid()) { + ksft_exit_skip("This test needs root to run!\n"); + return 1; + } + if (unshare(CLONE_NEWNS) == -1) { if (errno == ENOSYS || errno == EPERM) { - fprintf(stderr, "error: unshare, errno %d\n", errno); - return 4; + ksft_exit_skip("unshare() error: unshare, errno %d\n", errno); + } else { + ksft_exit_fail_msg("unshare() error: unshare, errno %d\n", errno); } - fprintf(stderr, "error: unshare, errno %d\n", errno); - return 1; } + if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { - fprintf(stderr, "error: mount '/', errno %d\n", errno); - return 1; + ksft_exit_fail_msg("mount() error: Root filesystem private mount: Fail %d\n", errno); } /* Our heroes: 1 root inode, 1 O_TMPFILE inode, 1 permanent inode. */ if (mount(NULL, "/tmp", "tmpfs", 0, "nr_inodes=3") == -1) { - fprintf(stderr, "error: mount tmpfs, errno %d\n", errno); - return 1; + ksft_exit_fail_msg("mount() error: Mounting tmpfs on /tmp: Fail %d\n", errno); } fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_TMPFILE, 0600); if (fd == -1) { - fprintf(stderr, "error: open 1, errno %d\n", errno); - return 1; + ksft_exit_fail_msg("openat() error: Open first temporary file: Fail %d\n", errno); } + if (linkat(fd, "", AT_FDCWD, "/tmp/1", AT_EMPTY_PATH) == -1) { - fprintf(stderr, "error: linkat, errno %d\n", errno); - return 1; + ksft_exit_fail_msg("linkat() error: Linking the temporary file: Fail %d\n", errno); + /* Ensure fd is closed on failure */ + close(fd); } close(fd); fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_TMPFILE, 0600); if (fd == -1) { - fprintf(stderr, "error: open 2, errno %d\n", errno); - return 1; + ksft_exit_fail_msg("openat() error: Opening the second temporary file: Fail %d\n", errno); } - + ksft_test_result_pass(" "); + ksft_exit_pass(); return 0; } diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 28f35620c499..2fe5e983cb22 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -53,6 +53,7 @@ static struct vdso_info /* Symbol table */ ELF(Sym) *symtab; const char *symstrings; + ELF(Word) *gnu_hash; ELF_HASH_ENTRY *bucket, *chain; ELF_HASH_ENTRY nbucket, nchain; @@ -81,6 +82,16 @@ static unsigned long elf_hash(const char *name) return h; } +static uint32_t gnu_hash(const char *name) +{ + const unsigned char *s = (void *)name; + uint32_t h = 5381; + + for (; *s; s++) + h += h * 32 + *s; + return h; +} + void vdso_init_from_sysinfo_ehdr(uintptr_t base) { size_t i; @@ -123,6 +134,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) */ ELF_HASH_ENTRY *hash = 0; vdso_info.symstrings = 0; + vdso_info.gnu_hash = 0; vdso_info.symtab = 0; vdso_info.versym = 0; vdso_info.verdef = 0; @@ -143,6 +155,11 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; + case DT_GNU_HASH: + vdso_info.gnu_hash = + (ELF(Word) *)((uintptr_t)dyn[i].d_un.d_ptr + + vdso_info.load_offset); + break; case DT_VERSYM: vdso_info.versym = (ELF(Versym) *) ((uintptr_t)dyn[i].d_un.d_ptr @@ -155,17 +172,27 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) break; } } - if (!vdso_info.symstrings || !vdso_info.symtab || !hash) + if (!vdso_info.symstrings || !vdso_info.symtab || + (!hash && !vdso_info.gnu_hash)) return; /* Failed */ if (!vdso_info.verdef) vdso_info.versym = 0; /* Parse the hash table header. */ - vdso_info.nbucket = hash[0]; - vdso_info.nchain = hash[1]; - vdso_info.bucket = &hash[2]; - vdso_info.chain = &hash[vdso_info.nbucket + 2]; + if (vdso_info.gnu_hash) { + vdso_info.nbucket = vdso_info.gnu_hash[0]; + /* The bucket array is located after the header (4 uint32) and the bloom + * filter (size_t array of gnu_hash[2] elements). + */ + vdso_info.bucket = vdso_info.gnu_hash + 4 + + sizeof(size_t) / 4 * vdso_info.gnu_hash[2]; + } else { + vdso_info.nbucket = hash[0]; + vdso_info.nchain = hash[1]; + vdso_info.bucket = &hash[2]; + vdso_info.chain = &hash[vdso_info.nbucket + 2]; + } /* That's all we need. */ vdso_info.valid = true; @@ -209,6 +236,26 @@ static bool vdso_match_version(ELF(Versym) ver, && !strcmp(name, vdso_info.symstrings + aux->vda_name); } +static bool check_sym(ELF(Sym) *sym, ELF(Word) i, const char *name, + const char *version, unsigned long ver_hash) +{ + /* Check for a defined global or weak function w/ right name. */ + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) + return false; + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) + return false; + if (strcmp(name, vdso_info.symstrings + sym->st_name)) + return false; + + /* Check symbol version. */ + if (vdso_info.versym && + !vdso_match_version(vdso_info.versym[i], version, ver_hash)) + return false; + + return true; +} + void *vdso_sym(const char *version, const char *name) { unsigned long ver_hash; @@ -216,29 +263,36 @@ void *vdso_sym(const char *version, const char *name) return 0; ver_hash = elf_hash(version); - ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; - - for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) { - ELF(Sym) *sym = &vdso_info.symtab[chain]; - - /* Check for a defined global or weak function w/ right name. */ - if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) - continue; - if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && - ELF64_ST_BIND(sym->st_info) != STB_WEAK) - continue; - if (sym->st_shndx == SHN_UNDEF) - continue; - if (strcmp(name, vdso_info.symstrings + sym->st_name)) - continue; - - /* Check symbol version. */ - if (vdso_info.versym - && !vdso_match_version(vdso_info.versym[chain], - version, ver_hash)) - continue; - - return (void *)(vdso_info.load_offset + sym->st_value); + ELF(Word) i; + + if (vdso_info.gnu_hash) { + uint32_t h1 = gnu_hash(name), h2, *hashval; + + i = vdso_info.bucket[h1 % vdso_info.nbucket]; + if (i == 0) + return 0; + h1 |= 1; + hashval = vdso_info.bucket + vdso_info.nbucket + + (i - vdso_info.gnu_hash[1]); + for (;; i++) { + ELF(Sym) *sym = &vdso_info.symtab[i]; + h2 = *hashval++; + if (h1 == (h2 | 1) && + check_sym(sym, i, name, version, ver_hash)) + return (void *)(vdso_info.load_offset + + sym->st_value); + if (h2 & 1) + break; + } + } else { + i = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; + for (; i; i = vdso_info.chain[i]) { + ELF(Sym) *sym = &vdso_info.symtab[i]; + if (sym->st_shndx != SHN_UNDEF && + check_sym(sym, i, name, version, ver_hash)) + return (void *)(vdso_info.load_offset + + sym->st_value); + } } return 0; diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 0ea4f6813930..4d4a76532dc9 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -237,7 +237,7 @@ static uint64_t set_metadata(uint64_t src, unsigned long lam) * both pointers should point to the same address. * * @return: - * 0: value on the pointer with metadate and value on original are same + * 0: value on the pointer with metadata and value on original are same * 1: not same. */ static int handle_lam_test(void *src, unsigned int lam) diff --git a/tools/testing/selftests/zram/.gitignore b/tools/testing/selftests/zram/.gitignore new file mode 100644 index 000000000000..088cd9bad87a --- /dev/null +++ b/tools/testing/selftests/zram/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +err.log diff --git a/tools/testing/shared/linux/maple_tree.h b/tools/testing/shared/linux/maple_tree.h index 06c89bdcc515..f67d47d32857 100644 --- a/tools/testing/shared/linux/maple_tree.h +++ b/tools/testing/shared/linux/maple_tree.h @@ -2,6 +2,6 @@ #define atomic_t int32_t #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) do {} while (0) +#define atomic_set(x, y) uatomic_set(x, y) #define U8_MAX UCHAR_MAX #include "../../../../include/linux/maple_tree.h" diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h index e01f66f98982..3e1b6adc027b 100644 --- a/tools/testing/vma/linux/atomic.h +++ b/tools/testing/vma/linux/atomic.h @@ -6,7 +6,7 @@ #define atomic_t int32_t #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) do {} while (0) +#define atomic_set(x, y) uatomic_set(x, y) #define U8_MAX UCHAR_MAX #endif /* _LINUX_ATOMIC_H */ diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index 8fab5e13c7c3..04ab45e27fb8 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -18,6 +18,12 @@ static bool fail_prealloc; #define vma_iter_prealloc(vmi, vma) \ (fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL)) +#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536 + +unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; +unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; +unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT; + /* * Directly import the VMA implementation here. Our vma_internal.h wrapper * provides userland-equivalent functionality for everything vma.c uses. @@ -47,6 +53,11 @@ struct task_struct *get_current(void) return &__current; } +unsigned long rlimit(unsigned int limit) +{ + return (unsigned long)-1; +} + /* Helper function to simply allocate a VMA. */ static struct vm_area_struct *alloc_vma(struct mm_struct *mm, unsigned long start, @@ -89,7 +100,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, * begun. Linking to the tree will have caused this to be incremented, * which means we will get a false positive otherwise. */ - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return vma; } @@ -214,7 +225,7 @@ static bool vma_write_started(struct vm_area_struct *vma) int seq = vma->vm_lock_seq; /* We reset after each check. */ - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; /* The vma_start_write() stub simply increments this value. */ return seq > -1; @@ -1563,6 +1574,57 @@ static bool test_expand_only_mode(void) return true; } +static bool test_mmap_region_basic(void) +{ + struct mm_struct mm = {}; + unsigned long addr; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, &mm, 0); + + current->mm = &mm; + + /* Map at 0x300000, length 0x3000. */ + addr = __mmap_region(NULL, 0x300000, 0x3000, + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, + 0x300, NULL); + ASSERT_EQ(addr, 0x300000); + + /* Map at 0x250000, length 0x3000. */ + addr = __mmap_region(NULL, 0x250000, 0x3000, + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, + 0x250, NULL); + ASSERT_EQ(addr, 0x250000); + + /* Map at 0x303000, merging to 0x300000 of length 0x6000. */ + addr = __mmap_region(NULL, 0x303000, 0x3000, + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, + 0x303, NULL); + ASSERT_EQ(addr, 0x303000); + + /* Map at 0x24d000, merging to 0x250000 of length 0x6000. */ + addr = __mmap_region(NULL, 0x24d000, 0x3000, + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE, + 0x24d, NULL); + ASSERT_EQ(addr, 0x24d000); + + ASSERT_EQ(mm.map_count, 2); + + for_each_vma(vmi, vma) { + if (vma->vm_start == 0x300000) { + ASSERT_EQ(vma->vm_end, 0x306000); + ASSERT_EQ(vma->vm_pgoff, 0x300); + } else if (vma->vm_start == 0x24d000) { + ASSERT_EQ(vma->vm_end, 0x253000); + ASSERT_EQ(vma->vm_pgoff, 0x24d); + } else { + ASSERT_FALSE(true); + } + } + + cleanup_mm(&mm, &vmi); + return true; +} + int main(void) { int num_tests = 0, num_fail = 0; @@ -1596,6 +1658,8 @@ int main(void) TEST(copy_vma); TEST(expand_only_mode); + TEST(mmap_region_basic); + #undef TEST printf("%d tests run, %d passed, %d failed.\n", diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index e76ff579e1fd..1eae23039854 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -27,11 +27,23 @@ #include <linux/rbtree.h> #include <linux/rwsem.h> +extern unsigned long stack_guard_gap; +#ifdef CONFIG_MMU +extern unsigned long mmap_min_addr; +extern unsigned long dac_mmap_min_addr; +#else +#define mmap_min_addr 0UL +#define dac_mmap_min_addr 0UL +#endif + #define VM_WARN_ON(_expr) (WARN_ON(_expr)) #define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr)) +#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr)) #define VM_BUG_ON(_expr) (BUG_ON(_expr)) #define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr)) +#define MMF_HAS_MDWE 28 + #define VM_NONE 0x00000000 #define VM_READ 0x00000001 #define VM_WRITE 0x00000002 @@ -39,6 +51,7 @@ #define VM_SHARED 0x00000008 #define VM_MAYREAD 0x00000010 #define VM_MAYWRITE 0x00000020 +#define VM_MAYEXEC 0x00000040 #define VM_GROWSDOWN 0x00000100 #define VM_PFNMAP 0x00000400 #define VM_LOCKED 0x00002000 @@ -51,6 +64,8 @@ #define VM_STACK VM_GROWSDOWN #define VM_SHADOW_STACK VM_NONE #define VM_SOFTDIRTY 0 +#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ +#define VM_GROWSUP VM_NONE #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) @@ -58,6 +73,20 @@ /* This mask represents all the VMA flag bits used by mlock */ #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) +#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) + +#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC + +#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) + +#define RLIMIT_STACK 3 /* max stack size */ +#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ + +#define CAP_IPC_LOCK 14 + #ifdef CONFIG_64BIT /* VM is sealed, in vm_flags */ #define VM_SEALED _BITUL(63) @@ -122,10 +151,22 @@ enum { TASK_COMM_LEN = 16, }; +/* + * Flags for bug emulation. + * + * These occupy the top three bytes. + */ +enum { + READ_IMPLIES_EXEC = 0x0400000, +}; + struct task_struct { char comm[TASK_COMM_LEN]; pid_t pid; struct mm_struct *mm; + + /* Used for emulating ABI behavior of previous Linux versions: */ + unsigned int personality; }; struct task_struct *get_current(void); @@ -186,6 +227,10 @@ struct mm_struct { unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ unsigned long stack_vm; /* VM_STACK */ + + unsigned long def_flags; + + unsigned long flags; /* Must use atomic bitops to access */ }; struct vma_lock { @@ -241,7 +286,7 @@ struct vm_area_struct { * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; + unsigned int vm_lock_seq; struct vma_lock *vm_lock; #endif @@ -373,6 +418,17 @@ struct vm_operations_struct { unsigned long addr); }; +struct vm_unmapped_area_info { +#define VM_UNMAPPED_AREA_TOPDOWN 1 + unsigned long flags; + unsigned long length; + unsigned long low_limit; + unsigned long high_limit; + unsigned long align_mask; + unsigned long align_offset; + unsigned long start_gap; +}; + static inline void vma_iter_invalidate(struct vma_iterator *vmi) { mas_pause(&vmi->mas); @@ -416,7 +472,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma) return false; init_rwsem(&vma->vm_lock->lock); - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return true; } @@ -432,6 +488,8 @@ static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) extern const struct vm_operations_struct vma_dummy_vm_ops; +extern unsigned long rlimit(unsigned int limit); + static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { memset(vma, 0, sizeof(*vma)); @@ -853,6 +911,11 @@ static inline void mmap_write_unlock(struct mm_struct *) { } +static inline int mmap_write_lock_killable(struct mm_struct *) +{ + return 0; +} + static inline bool can_modify_mm(struct mm_struct *mm, unsigned long start, unsigned long end) @@ -938,7 +1001,7 @@ static inline bool is_file_hugepages(struct file *) static inline int security_vm_enough_memory_mm(struct mm_struct *, long) { - return true; + return 0; } static inline bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long) @@ -1033,4 +1096,159 @@ static inline int mmap_file(struct file *, struct vm_area_struct *) return 0; } +static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_GROWSDOWN) + return stack_guard_gap; + + /* See reasoning around the VM_SHADOW_STACK definition */ + if (vma->vm_flags & VM_SHADOW_STACK) + return PAGE_SIZE; + + return 0; +} + +static inline unsigned long vm_start_gap(struct vm_area_struct *vma) +{ + unsigned long gap = stack_guard_start_gap(vma); + unsigned long vm_start = vma->vm_start; + + vm_start -= gap; + if (vm_start > vma->vm_start) + vm_start = 0; + return vm_start; +} + +static inline unsigned long vm_end_gap(struct vm_area_struct *vma) +{ + unsigned long vm_end = vma->vm_end; + + if (vma->vm_flags & VM_GROWSUP) { + vm_end += stack_guard_gap; + if (vm_end < vma->vm_end) + vm_end = -PAGE_SIZE; + } + return vm_end; +} + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline bool vma_is_accessible(struct vm_area_struct *vma) +{ + return vma->vm_flags & VM_ACCESS_FLAGS; +} + +static inline bool capable(int cap) +{ + return true; +} + +static inline bool mlock_future_ok(struct mm_struct *mm, unsigned long flags, + unsigned long bytes) +{ + unsigned long locked_pages, limit_pages; + + if (!(flags & VM_LOCKED) || capable(CAP_IPC_LOCK)) + return true; + + locked_pages = bytes >> PAGE_SHIFT; + locked_pages += mm->locked_vm; + + limit_pages = rlimit(RLIMIT_MEMLOCK); + limit_pages >>= PAGE_SHIFT; + + return locked_pages <= limit_pages; +} + +static inline int __anon_vma_prepare(struct vm_area_struct *vma) +{ + struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma)); + + if (!anon_vma) + return -ENOMEM; + + anon_vma->root = anon_vma; + vma->anon_vma = anon_vma; + + return 0; +} + +static inline int anon_vma_prepare(struct vm_area_struct *vma) +{ + if (likely(vma->anon_vma)) + return 0; + + return __anon_vma_prepare(vma); +} + +static inline void userfaultfd_unmap_complete(struct mm_struct *mm, + struct list_head *uf) +{ +} + +/* + * Denies creating a writable executable mapping or gaining executable permissions. + * + * This denies the following: + * + * a) mmap(PROT_WRITE | PROT_EXEC) + * + * b) mmap(PROT_WRITE) + * mprotect(PROT_EXEC) + * + * c) mmap(PROT_WRITE) + * mprotect(PROT_READ) + * mprotect(PROT_EXEC) + * + * But allows the following: + * + * d) mmap(PROT_READ | PROT_EXEC) + * mmap(PROT_READ | PROT_EXEC | PROT_BTI) + * + * This is only applicable if the user has set the Memory-Deny-Write-Execute + * (MDWE) protection mask for the current process. + * + * @old specifies the VMA flags the VMA originally possessed, and @new the ones + * we propose to set. + * + * Return: false if proposed change is OK, true if not ok and should be denied. + */ +static inline bool map_deny_write_exec(unsigned long old, unsigned long new) +{ + /* If MDWE is disabled, we have nothing to deny. */ + if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + return false; + + /* If the new VMA is not executable, we have nothing to deny. */ + if (!(new & VM_EXEC)) + return false; + + /* Under MDWE we do not accept newly writably executable VMAs... */ + if (new & VM_WRITE) + return true; + + /* ...nor previously non-executable VMAs becoming executable. */ + if (!(old & VM_EXEC)) + return true; + + return false; +} + +static inline int mapping_map_writable(struct address_space *mapping) +{ + int c = atomic_read(&mapping->i_mmap_writable); + + /* Derived from the raw_atomic_inc_unless_negative() implementation. */ + do { + if (c < 0) + return -EPERM; + } while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1)); + + return 0; +} + #endif /* __MM_VMA_INTERNAL_H */ diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 84ee217ba8ee..680ce666ceb5 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -36,6 +36,21 @@ Invoke test binaries in both directions as follows: --control-port=1234 \ --peer-cid=3 +Some tests are designed to produce kernel memory leaks. Leaks detection, +however, is deferred to Kernel Memory Leak Detector. It is recommended to enable +kmemleak (CONFIG_DEBUG_KMEMLEAK=y) and explicitly trigger a scan after each test +suite run, e.g. + + # echo clear > /sys/kernel/debug/kmemleak + # $TEST_BINARY ... + # echo "wait for any grace periods" && sleep 2 + # echo scan > /sys/kernel/debug/kmemleak + # echo "wait for kmemleak" && sleep 5 + # echo scan > /sys/kernel/debug/kmemleak + # cat /sys/kernel/debug/kmemleak + +For more information see Documentation/dev-tools/kmemleak.rst. + vsock_perf utility ------------------- 'vsock_perf' is a simple tool to measure vsock performance. It works in diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 34e9dac0a105..de25892f865f 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -96,41 +96,57 @@ void vsock_wait_remote_close(int fd) close(epollfd); } -/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */ -int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type) +/* Create socket <type>, bind to <cid, port> and return the file descriptor. */ +int vsock_bind(unsigned int cid, unsigned int port, int type) { - struct sockaddr_vm sa_client = { - .svm_family = AF_VSOCK, - .svm_cid = VMADDR_CID_ANY, - .svm_port = bind_port, - }; - struct sockaddr_vm sa_server = { + struct sockaddr_vm sa = { .svm_family = AF_VSOCK, .svm_cid = cid, .svm_port = port, }; + int fd; - int client_fd, ret; - - client_fd = socket(AF_VSOCK, type, 0); - if (client_fd < 0) { + fd = socket(AF_VSOCK, type, 0); + if (fd < 0) { perror("socket"); exit(EXIT_FAILURE); } - if (bind(client_fd, (struct sockaddr *)&sa_client, sizeof(sa_client))) { + if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) { perror("bind"); exit(EXIT_FAILURE); } + return fd; +} + +int vsock_connect_fd(int fd, unsigned int cid, unsigned int port) +{ + struct sockaddr_vm sa = { + .svm_family = AF_VSOCK, + .svm_cid = cid, + .svm_port = port, + }; + int ret; + timeout_begin(TIMEOUT); do { - ret = connect(client_fd, (struct sockaddr *)&sa_server, sizeof(sa_server)); + ret = connect(fd, (struct sockaddr *)&sa, sizeof(sa)); timeout_check("connect"); } while (ret < 0 && errno == EINTR); timeout_end(); - if (ret < 0) { + return ret; +} + +/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */ +int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type) +{ + int client_fd; + + client_fd = vsock_bind(VMADDR_CID_ANY, bind_port, type); + + if (vsock_connect_fd(client_fd, cid, port)) { perror("connect"); exit(EXIT_FAILURE); } @@ -141,17 +157,6 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po /* Connect to <cid, port> and return the file descriptor. */ int vsock_connect(unsigned int cid, unsigned int port, int type) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = port, - .svm_cid = cid, - }, - }; - int ret; int fd; control_expectln("LISTENING"); @@ -162,20 +167,14 @@ int vsock_connect(unsigned int cid, unsigned int port, int type) exit(EXIT_FAILURE); } - timeout_begin(TIMEOUT); - do { - ret = connect(fd, &addr.sa, sizeof(addr.svm)); - timeout_check("connect"); - } while (ret < 0 && errno == EINTR); - timeout_end(); - - if (ret < 0) { + if (vsock_connect_fd(fd, cid, port)) { int old_errno = errno; close(fd); fd = -1; errno = old_errno; } + return fd; } @@ -192,28 +191,9 @@ int vsock_seqpacket_connect(unsigned int cid, unsigned int port) /* Listen on <cid, port> and return the file descriptor. */ static int vsock_listen(unsigned int cid, unsigned int port, int type) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = port, - .svm_cid = cid, - }, - }; int fd; - fd = socket(AF_VSOCK, type, 0); - if (fd < 0) { - perror("socket"); - exit(EXIT_FAILURE); - } - - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } + fd = vsock_bind(cid, port, type); if (listen(fd, 1) < 0) { perror("listen"); @@ -401,7 +381,7 @@ void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret) */ void send_byte(int fd, int expected_ret, int flags) { - const uint8_t byte = 'A'; + static const uint8_t byte = 'A'; send_buf(fd, &byte, sizeof(byte), flags, expected_ret); } @@ -420,7 +400,7 @@ void recv_byte(int fd, int expected_ret, int flags) recv_buf(fd, &byte, sizeof(byte), flags, expected_ret); if (byte != 'A') { - fprintf(stderr, "unexpected byte read %c\n", byte); + fprintf(stderr, "unexpected byte read 0x%02x\n", byte); exit(EXIT_FAILURE); } } @@ -486,8 +466,7 @@ void list_tests(const struct test_case *test_cases) exit(EXIT_FAILURE); } -void skip_test(struct test_case *test_cases, size_t test_cases_len, - const char *test_id_str) +static unsigned long parse_test_id(const char *test_id_str, size_t test_cases_len) { unsigned long test_id; char *endptr = NULL; @@ -505,9 +484,35 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len, exit(EXIT_FAILURE); } + return test_id; +} + +void skip_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + unsigned long test_id = parse_test_id(test_id_str, test_cases_len); test_cases[test_id].skip = true; } +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + static bool skip_all = true; + unsigned long test_id; + + if (skip_all) { + unsigned long i; + + for (i = 0; i < test_cases_len; ++i) + test_cases[i].skip = true; + + skip_all = false; + } + + test_id = parse_test_id(test_id_str, test_cases_len); + test_cases[test_id].skip = false; +} + unsigned long hash_djb2(const void *data, size_t len) { unsigned long hash = 5381; diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index ba84d296d8b7..d1f765ce3eee 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -39,10 +39,12 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); unsigned int parse_port(const char *str); +int vsock_connect_fd(int fd, unsigned int cid, unsigned int port); int vsock_connect(unsigned int cid, unsigned int port, int type); int vsock_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp, int type); int vsock_stream_connect(unsigned int cid, unsigned int port); +int vsock_bind(unsigned int cid, unsigned int port, int type); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); int vsock_seqpacket_connect(unsigned int cid, unsigned int port); @@ -62,6 +64,8 @@ void run_tests(const struct test_case *test_cases, void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str); unsigned long hash_djb2(const void *data, size_t len); size_t iovec_bytes(const struct iovec *iov, size_t iovnum); unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 48f17641ca50..dfff8b288265 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -22,12 +22,17 @@ #include <signal.h> #include <sys/ioctl.h> #include <linux/sockios.h> +#include <linux/time64.h> #include "vsock_test_zerocopy.h" #include "timeout.h" #include "control.h" #include "util.h" +/* Basic messages for control_writeulong(), control_readulong() */ +#define CONTROL_CONTINUE 1 +#define CONTROL_DONE 0 + static void test_stream_connection_reset(const struct test_opts *opts) { union { @@ -108,24 +113,9 @@ static void test_stream_bind_only_client(const struct test_opts *opts) static void test_stream_bind_only_server(const struct test_opts *opts) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = opts->peer_port, - .svm_cid = VMADDR_CID_ANY, - }, - }; int fd; - fd = socket(AF_VSOCK, SOCK_STREAM, 0); - - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } + fd = vsock_bind(VMADDR_CID_ANY, opts->peer_port, SOCK_STREAM); /* Notify the client that the server is ready */ control_writeln("BIND"); @@ -559,7 +549,7 @@ static time_t current_nsec(void) exit(EXIT_FAILURE); } - return (ts.tv_sec * 1000000000ULL) + ts.tv_nsec; + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; } #define RCVTIMEO_TIMEOUT_SEC 1 @@ -599,7 +589,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) } read_overhead_ns = current_nsec() - read_enter_ns - - 1000000000ULL * RCVTIMEO_TIMEOUT_SEC; + NSEC_PER_SEC * RCVTIMEO_TIMEOUT_SEC; if (read_overhead_ns > READ_OVERHEAD_NSEC) { fprintf(stderr, @@ -1473,6 +1463,331 @@ static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts) test_stream_credit_update_test(opts, false); } +/* The goal of test leak_acceptq is to stress the race between connect() and + * close(listener). Implementation of client/server loops boils down to: + * + * client server + * ------ ------ + * write(CONTINUE) + * expect(CONTINUE) + * listen() + * write(LISTENING) + * expect(LISTENING) + * connect() close() + */ +#define ACCEPTQ_LEAK_RACE_TIMEOUT 2 /* seconds */ + +static void test_stream_leak_acceptq_client(const struct test_opts *opts) +{ + time_t tout; + int fd; + + tout = current_nsec() + ACCEPTQ_LEAK_RACE_TIMEOUT * NSEC_PER_SEC; + do { + control_writeulong(CONTROL_CONTINUE); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd >= 0) + close(fd); + } while (current_nsec() < tout); + + control_writeulong(CONTROL_DONE); +} + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_leak_acceptq_server(const struct test_opts *opts) +{ + int fd; + + while (control_readulong() == CONTROL_CONTINUE) { + fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); + control_writeln("LISTENING"); + close(fd); + } +} + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_errq_client(const struct test_opts *opts) +{ + struct pollfd fds = { 0 }; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + send_byte(fd, 1, MSG_ZEROCOPY); + + fds.fd = fd; + fds.events = 0; + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + recv_byte(fd, 1, 0); + vsock_wait_remote_close(fd); + close(fd); +} + +/* Test msgzcopy_leak_zcskb is meant to exercise sendmsg() error handling path, + * that might leak an skb. The idea is to fail virtio_transport_init_zcopy_skb() + * by hitting net.core.optmem_max limit in sock_omalloc(), specifically + * + * vsock_connectible_sendmsg + * virtio_transport_stream_enqueue + * virtio_transport_send_pkt_info + * virtio_transport_init_zcopy_skb + * . msg_zerocopy_realloc + * . msg_zerocopy_alloc + * . sock_omalloc + * . sk_omem_alloc + size > sysctl_optmem_max + * return -ENOMEM + * + * We abuse the implementation detail of net/socket.c:____sys_sendmsg(). + * sk_omem_alloc can be precisely bumped by sock_kmalloc(), as it is used to + * fetch user-provided control data. + * + * While this approach works for now, it relies on assumptions regarding the + * implementation and configuration (for example, order of net.core.optmem_max + * can not exceed MAX_PAGE_ORDER), which may not hold in the future. A more + * resilient testing could be implemented by leveraging the Fault injection + * framework (CONFIG_FAULT_INJECTION), e.g. + * + * client# echo N > /sys/kernel/debug/failslab/ignore-gfp-wait + * client# echo 0 > /sys/kernel/debug/failslab/verbose + * + * void client(const struct test_opts *opts) + * { + * char buf[16]; + * int f, s, i; + * + * f = open("/proc/self/fail-nth", O_WRONLY); + * + * for (i = 1; i < 32; i++) { + * control_writeulong(CONTROL_CONTINUE); + * + * s = vsock_stream_connect(opts->peer_cid, opts->peer_port); + * enable_so_zerocopy_check(s); + * + * sprintf(buf, "%d", i); + * write(f, buf, strlen(buf)); + * + * send(s, &(char){ 0 }, 1, MSG_ZEROCOPY); + * + * write(f, "0", 1); + * close(s); + * } + * + * control_writeulong(CONTROL_DONE); + * close(f); + * } + * + * void server(const struct test_opts *opts) + * { + * int fd; + * + * while (control_readulong() == CONTROL_CONTINUE) { + * fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + * vsock_wait_remote_close(fd); + * close(fd); + * } + * } + * + * Refer to Documentation/fault-injection/fault-injection.rst. + */ +#define MAX_PAGE_ORDER 10 /* usually */ +#define PAGE_SIZE 4096 + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) +{ + size_t optmem_max, ctl_len, chunk_size; + struct msghdr msg = { 0 }; + struct iovec iov; + char *chunk; + int fd, res; + FILE *f; + + f = fopen("/proc/sys/net/core/optmem_max", "r"); + if (!f) { + perror("fopen(optmem_max)"); + exit(EXIT_FAILURE); + } + + if (fscanf(f, "%zu", &optmem_max) != 1) { + fprintf(stderr, "fscanf(optmem_max) failed\n"); + exit(EXIT_FAILURE); + } + + fclose(f); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + + ctl_len = optmem_max - 1; + if (ctl_len > PAGE_SIZE << MAX_PAGE_ORDER) { + fprintf(stderr, "Try with net.core.optmem_max = 100000\n"); + exit(EXIT_FAILURE); + } + + chunk_size = CMSG_SPACE(ctl_len); + chunk = malloc(chunk_size); + if (!chunk) { + perror("malloc"); + exit(EXIT_FAILURE); + } + memset(chunk, 0, chunk_size); + + iov.iov_base = &(char){ 0 }; + iov.iov_len = 1; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = chunk; + msg.msg_controllen = ctl_len; + + errno = 0; + res = sendmsg(fd, &msg, MSG_ZEROCOPY); + if (res >= 0 || errno != ENOMEM) { + fprintf(stderr, "Expected ENOMEM, got errno=%d res=%d\n", + errno, res); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + +#define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */ + +/* Test attempts to trigger a transport release for an unbound socket. This can + * lead to a reference count mishandling. + */ +static void test_stream_transport_uaf_client(const struct test_opts *opts) +{ + int sockets[MAX_PORT_RETRIES]; + struct sockaddr_vm addr; + int fd, i, alen; + + fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM); + + alen = sizeof(addr); + if (getsockname(fd, (struct sockaddr *)&addr, &alen)) { + perror("getsockname"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < MAX_PORT_RETRIES; ++i) + sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port, + SOCK_STREAM); + + close(fd); + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) { + perror("Unexpected connect() #1 success"); + exit(EXIT_FAILURE); + } + + /* Vulnerable system may crash now. */ + if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) { + perror("Unexpected connect() #2 success"); + exit(EXIT_FAILURE); + } + + close(fd); + while (i--) + close(sockets[i]); + + control_writeln("DONE"); +} + +static void test_stream_transport_uaf_server(const struct test_opts *opts) +{ + control_expectln("DONE"); +} + +static void test_stream_connect_retry_client(const struct test_opts *opts) +{ + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (!vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) { + fprintf(stderr, "Unexpected connect() #1 success\n"); + exit(EXIT_FAILURE); + } + + control_writeln("LISTEN"); + control_expectln("LISTENING"); + + if (vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) { + perror("connect() #2"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_connect_retry_server(const struct test_opts *opts) +{ + int fd; + + control_expectln("LISTEN"); + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1603,6 +1918,31 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_unsent_bytes_client, .run_server = test_seqpacket_unsent_bytes_server, }, + { + .name = "SOCK_STREAM leak accept queue", + .run_client = test_stream_leak_acceptq_client, + .run_server = test_stream_leak_acceptq_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak MSG_ERRQUEUE", + .run_client = test_stream_msgzcopy_leak_errq_client, + .run_server = test_stream_msgzcopy_leak_errq_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb", + .run_client = test_stream_msgzcopy_leak_zcskb_client, + .run_server = test_stream_msgzcopy_leak_zcskb_server, + }, + { + .name = "SOCK_STREAM transport release use-after-free", + .run_client = test_stream_transport_uaf_client, + .run_server = test_stream_transport_uaf_server, + }, + { + .name = "SOCK_STREAM retry failed connect()", + .run_client = test_stream_connect_retry_client, + .run_server = test_stream_connect_retry_server, + }, {}, }; @@ -1644,6 +1984,11 @@ static const struct option longopts[] = { .val = 's', }, { + .name = "pick", + .has_arg = required_argument, + .val = 't', + }, + { .name = "help", .has_arg = no_argument, .val = '?', @@ -1680,6 +2025,8 @@ static void usage(void) " --peer-cid <cid> CID of the other side\n" " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" + " --pick <test_id> Test ID to execute selectively;\n" + " use multiple --pick options to select more tests\n" " --skip <test_id> Test ID to skip;\n" " use multiple --skip options to skip more tests\n", DEFAULT_PEER_PORT @@ -1736,6 +2083,10 @@ int main(int argc, char **argv) skip_test(test_cases, ARRAY_SIZE(test_cases) - 1, optarg); break; + case 't': + pick_test(test_cases, ARRAY_SIZE(test_cases) - 1, + optarg); + break; case '?': default: usage(); diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index a6a7dee16622..8b5101457c70 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -85,4 +85,6 @@ clean: doc_clean fixdep-clean $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)rm -f rtla rtla-static fixdep FEATURE-DUMP rtla-* $(Q)rm -rf feature -.PHONY: FORCE clean +check: $(RTLA) + RTLA=$(RTLA) prove -o -f tests/ +.PHONY: FORCE clean check diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index 245e9344932b..85f398b89597 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -867,7 +867,7 @@ int osnoise_set_workload(struct osnoise_context *context, bool onoff) retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff); if (retval < 0) - return -1; + return -2; context->opt_workload = onoff; @@ -1079,6 +1079,42 @@ out_err: return NULL; } +bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record) +{ + /* + * The tool instance is always present, it is the one used to collect + * data. + */ + if (!tracefs_trace_is_on(tool->trace.inst)) + return true; + + /* + * The trace record instance is only enabled when -t is set. IOW, when the system + * is tracing. + */ + return record && !tracefs_trace_is_on(record->trace.inst); +} + +/* + * osnoise_report_missed_events - report number of events dropped by trace + * buffer + */ +void +osnoise_report_missed_events(struct osnoise_tool *tool) +{ + unsigned long long total_events; + + if (tool->trace.missed_events == UINT64_MAX) + printf("unknown number of events missed, results might not be accurate\n"); + else if (tool->trace.missed_events > 0) { + total_events = tool->trace.processed_events + tool->trace.missed_events; + + printf("%lld (%.2f%%) events missed, results might not be accurate\n", + tool->trace.missed_events, + (double) tool->trace.missed_events / total_events * 100.0); + } +} + static void osnoise_usage(int err) { int i; diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h index 555f4f4903cc..91835a7d8c2b 100644 --- a/tools/tracing/rtla/src/osnoise.h +++ b/tools/tracing/rtla/src/osnoise.h @@ -104,6 +104,8 @@ struct osnoise_tool { void osnoise_destroy_tool(struct osnoise_tool *top); struct osnoise_tool *osnoise_init_tool(char *tool_name); struct osnoise_tool *osnoise_init_trace_tool(char *tracer); +void osnoise_report_missed_events(struct osnoise_tool *tool); +bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record); int osnoise_hist_main(int argc, char *argv[]); int osnoise_top_main(int argc, char **argv); diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 214e2c93fde0..b4930b835b0a 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -440,6 +440,7 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too trace_seq_reset(trace->seq); osnoise_print_summary(params, trace, data); + osnoise_report_missed_events(tool); } /* @@ -970,7 +971,7 @@ int osnoise_hist_main(int argc, char *argv[]) goto out_hist; } - if (trace_is_off(&tool->trace, &record->trace)) + if (osnoise_trace_is_off(tool, record)) break; } @@ -980,7 +981,7 @@ int osnoise_hist_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&tool->trace, &record->trace)) { + if (osnoise_trace_is_off(tool, record)) { printf("rtla osnoise hit stop tracing\n"); if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 45647495ce3b..4772677ac762 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -280,6 +280,7 @@ osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top) trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); + osnoise_report_missed_events(top); } /* @@ -801,7 +802,7 @@ int osnoise_top_main(int argc, char **argv) if (!params->quiet) osnoise_print_stats(params, tool); - if (trace_is_off(&tool->trace, &record->trace)) + if (osnoise_trace_is_off(tool, record)) break; } @@ -810,7 +811,7 @@ int osnoise_top_main(int argc, char **argv) return_value = 0; - if (trace_is_off(&tool->trace, &record->trace)) { + if (osnoise_trace_is_off(tool, record)) { printf("osnoise hit stop tracing\n"); if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 8b66387e5f35..6d7d0a2d45b4 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -282,6 +282,21 @@ static void timerlat_hist_header(struct osnoise_tool *tool) } /* + * format_summary_value - format a line of summary value (min, max or avg) + * of hist data + */ +static void format_summary_value(struct trace_seq *seq, + int count, + unsigned long long val, + bool avg) +{ + if (count) + trace_seq_printf(seq, "%9llu ", avg ? val / count : val); + else + trace_seq_printf(seq, "%9c ", '-'); +} + +/* * timerlat_print_summary - print the summary of the hist data to the output */ static void @@ -328,29 +343,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_irq); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].min_irq, + false); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_thread); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].min_thread, + false); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_user); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].min_user, + false); } trace_seq_printf(trace->seq, "\n"); @@ -364,29 +373,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_irq / data->hist[cpu].irq_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].sum_irq, + true); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_thread / data->hist[cpu].thread_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].sum_thread, + true); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_user / data->hist[cpu].user_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].sum_user, + true); } trace_seq_printf(trace->seq, "\n"); @@ -400,29 +403,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_irq); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].max_irq, + false); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_thread); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].max_thread, + false); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_user); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].max_user, + false); } trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); @@ -506,16 +503,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "min: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_irq); + format_summary_value(trace->seq, + sum.irq_count, + sum.min_irq, + false); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_thread); + format_summary_value(trace->seq, + sum.thread_count, + sum.min_thread, + false); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_user); + format_summary_value(trace->seq, + sum.user_count, + sum.min_user, + false); trace_seq_printf(trace->seq, "\n"); @@ -523,16 +526,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "avg: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_irq / sum.irq_count); + format_summary_value(trace->seq, + sum.irq_count, + sum.sum_irq, + true); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_thread / sum.thread_count); + format_summary_value(trace->seq, + sum.thread_count, + sum.sum_thread, + true); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_user / sum.user_count); + format_summary_value(trace->seq, + sum.user_count, + sum.sum_user, + true); trace_seq_printf(trace->seq, "\n"); @@ -540,16 +549,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "max: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_irq); + format_summary_value(trace->seq, + sum.irq_count, + sum.max_irq, + false); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_thread); + format_summary_value(trace->seq, + sum.thread_count, + sum.max_thread, + false); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_user); + format_summary_value(trace->seq, + sum.user_count, + sum.max_user, + false); trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); @@ -641,6 +656,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t timerlat_print_summary(params, trace, data); timerlat_print_stats_all(params, trace, data); + osnoise_report_missed_events(tool); } /* @@ -1085,12 +1101,15 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param } } - if (params->user_hist) { - retval = osnoise_set_workload(tool->context, 0); - if (retval) { - err_msg("Failed to set OSNOISE_WORKLOAD option\n"); - goto out_err; - } + /* + * Set workload according to type of thread if the kernel supports it. + * On kernels without support, user threads will have already failed + * on missing timerlat_fd, and kernel threads do not need it. + */ + retval = osnoise_set_workload(tool->context, params->kernel_workload); + if (retval < -1) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; } return 0; @@ -1131,9 +1150,20 @@ out_err: } static int stop_tracing; +static struct trace_instance *hist_inst = NULL; static void stop_hist(int sig) { + if (stop_tracing) { + /* + * Stop requested twice in a row; abort event processing and + * exit immediately + */ + tracefs_iterate_stop(hist_inst->inst); + return; + } stop_tracing = 1; + if (hist_inst) + trace_instance_stop(hist_inst); } /* @@ -1180,6 +1210,12 @@ int timerlat_hist_main(int argc, char *argv[]) } trace = &tool->trace; + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + hist_inst = trace; retval = enable_timerlat(trace); if (retval) { @@ -1327,7 +1363,7 @@ int timerlat_hist_main(int argc, char *argv[]) goto out_hist; } - if (trace_is_off(&tool->trace, &record->trace)) + if (osnoise_trace_is_off(tool, record)) break; /* is there still any user-threads ? */ @@ -1348,7 +1384,7 @@ int timerlat_hist_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&tool->trace, &record->trace)) { + if (osnoise_trace_is_off(tool, record) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); if (!params->no_aa) diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 059b468981e4..05a9403b01d2 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -435,6 +435,7 @@ timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *to trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); + osnoise_report_missed_events(top); } /* @@ -851,12 +852,15 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * } } - if (params->user_top) { - retval = osnoise_set_workload(top->context, 0); - if (retval) { - err_msg("Failed to set OSNOISE_WORKLOAD option\n"); - goto out_err; - } + /* + * Set workload according to type of thread if the kernel supports it. + * On kernels without support, user threads will have already failed + * on missing timerlat_fd, and kernel threads do not need it. + */ + retval = osnoise_set_workload(top->context, params->kernel_workload); + if (retval < -1) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; } if (isatty(STDOUT_FILENO) && !params->quiet) @@ -900,9 +904,20 @@ out_err: } static int stop_tracing; +static struct trace_instance *top_inst = NULL; static void stop_top(int sig) { + if (stop_tracing) { + /* + * Stop requested twice in a row; abort event processing and + * exit immediately + */ + tracefs_iterate_stop(top_inst->inst); + return; + } stop_tracing = 1; + if (top_inst) + trace_instance_stop(top_inst); } /* @@ -950,6 +965,13 @@ int timerlat_top_main(int argc, char *argv[]) } trace = &top->trace; + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + top_inst = trace; + retval = enable_timerlat(trace); if (retval) { @@ -1093,7 +1115,7 @@ int timerlat_top_main(int argc, char *argv[]) while (!stop_tracing) { sleep(params->sleep_time); - if (params->aa_only && !trace_is_off(&top->trace, &record->trace)) + if (params->aa_only && !osnoise_trace_is_off(top, record)) continue; retval = tracefs_iterate_raw_events(trace->tep, @@ -1110,7 +1132,7 @@ int timerlat_top_main(int argc, char *argv[]) if (!params->quiet) timerlat_print_stats(params, top); - if (trace_is_off(&top->trace, &record->trace)) + if (osnoise_trace_is_off(top, record)) break; /* is there still any user-threads ? */ @@ -1131,7 +1153,7 @@ int timerlat_top_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&top->trace, &record->trace)) { + if (osnoise_trace_is_off(top, record) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); if (!params->no_aa) diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index 170a706248ab..728f5029d533 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -118,6 +118,8 @@ collect_registered_events(struct tep_event *event, struct tep_record *record, struct trace_instance *trace = context; struct trace_seq *s = trace->seq; + trace->processed_events++; + if (!event->handler) return 0; @@ -127,6 +129,31 @@ collect_registered_events(struct tep_event *event, struct tep_record *record, } /* + * collect_missed_events - record number of missed events + * + * If rtla cannot keep up with events generated by tracer, events are going + * to fall out of the ring buffer. + * Collect how many events were missed so it can be reported to the user. + */ +static int +collect_missed_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + + if (trace->missed_events == UINT64_MAX) + return 0; + + if (record->missed_events > 0) + trace->missed_events += record->missed_events; + else + /* Events missed but no data on how many */ + trace->missed_events = UINT64_MAX; + + return 0; +} + +/* * trace_instance_destroy - destroy and free a rtla trace instance */ void trace_instance_destroy(struct trace_instance *trace) @@ -181,6 +208,17 @@ int trace_instance_init(struct trace_instance *trace, char *tool_name) */ tracefs_trace_off(trace->inst); + /* + * Collect the number of events missed due to tracefs buffer + * overflow. + */ + trace->missed_events = 0; + tracefs_follow_missed_events(trace->inst, + collect_missed_events, + trace); + + trace->processed_events = 0; + return 0; out_err: @@ -197,6 +235,14 @@ int trace_instance_start(struct trace_instance *trace) } /* + * trace_instance_stop - stop tracing a given rtla instance + */ +int trace_instance_stop(struct trace_instance *trace) +{ + return tracefs_trace_off(trace->inst); +} + +/* * trace_events_free - free a list of trace events */ static void trace_events_free(struct trace_events *events) @@ -522,25 +568,6 @@ void trace_events_destroy(struct trace_instance *instance, trace_events_free(events); } -int trace_is_off(struct trace_instance *tool, struct trace_instance *trace) -{ - /* - * The tool instance is always present, it is the one used to collect - * data. - */ - if (!tracefs_trace_is_on(tool->inst)) - return 1; - - /* - * The trace instance is only enabled when -t is set. IOW, when the system - * is tracing. - */ - if (trace && !tracefs_trace_is_on(trace->inst)) - return 1; - - return 0; -} - /* * trace_set_buffer_size - set the per-cpu tracing buffer size. */ diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h index c7c92dc9a18a..3cd40dd3f06c 100644 --- a/tools/tracing/rtla/src/trace.h +++ b/tools/tracing/rtla/src/trace.h @@ -17,10 +17,13 @@ struct trace_instance { struct tracefs_instance *inst; struct tep_handle *tep; struct trace_seq *seq; + unsigned long long missed_events; + unsigned long long processed_events; }; int trace_instance_init(struct trace_instance *trace, char *tool_name); int trace_instance_start(struct trace_instance *trace); +int trace_instance_stop(struct trace_instance *trace); void trace_instance_destroy(struct trace_instance *trace); struct trace_seq *get_trace_seq(void); @@ -47,5 +50,4 @@ int trace_events_enable(struct trace_instance *instance, int trace_event_add_filter(struct trace_events *event, char *filter); int trace_event_add_trigger(struct trace_events *event, char *trigger); -int trace_is_off(struct trace_instance *tool, struct trace_instance *trace); int trace_set_buffer_size(struct trace_instance *trace, int size); diff --git a/tools/tracing/rtla/tests/engine.sh b/tools/tracing/rtla/tests/engine.sh new file mode 100644 index 000000000000..64d0446dc28e --- /dev/null +++ b/tools/tracing/rtla/tests/engine.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +test_begin() { + # Count tests to allow the test harness to double-check if all were + # included correctly. + ctr=0 + [ -z "$RTLA" ] && RTLA="./rtla" + [ -n "$TEST_COUNT" ] && echo "1..$TEST_COUNT" +} + +check() { + # Simple check: run rtla with given arguments and test exit code. + # If TEST_COUNT is set, run the test. Otherwise, just count. + ctr=$(($ctr + 1)) + if [ -n "$TEST_COUNT" ] + then + # Run rtla; in case of failure, include its output as comment + # in the test results. + result=$(stdbuf -oL $TIMEOUT "$RTLA" $2 2>&1); exitcode=$? + if [ $exitcode -eq 0 ] + then + echo "ok $ctr - $1" + else + echo "not ok $ctr - $1" + # Add rtla output and exit code as comments in case of failure + echo "$result" | col -b | while read line; do echo "# $line"; done + printf "#\n# exit code %s\n" $exitcode + fi + fi +} + +set_timeout() { + TIMEOUT="timeout -v -k 15s $1" +} + +unset_timeout() { + unset TIMEOUT +} + +test_end() { + # If running without TEST_COUNT, tests are not actually run, just + # counted. In that case, re-run the test with the correct count. + [ -z "$TEST_COUNT" ] && TEST_COUNT=$ctr exec bash $0 || true +} + +# Avoid any environmental discrepancies +export LC_ALL=C +unset_timeout diff --git a/tools/tracing/rtla/tests/hwnoise.t b/tools/tracing/rtla/tests/hwnoise.t new file mode 100644 index 000000000000..bbed17580537 --- /dev/null +++ b/tools/tracing/rtla/tests/hwnoise.t @@ -0,0 +1,21 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source tests/engine.sh +test_begin + +set_timeout 2m + +check "verify help page" \ + "hwnoise --help" +check "detect noise higher than one microsecond" \ + "hwnoise -c 0 -T 1 -d 5s -q" +check "set the automatic trace mode" \ + "hwnoise -a 5 -d 30s" +check "set scheduling param to the osnoise tracer threads" \ + "hwnoise -P F:1 -c 0 -r 900000 -d 1M -q" +check "stop the trace if a single sample is higher than 1 us" \ + "hwnoise -s 1 -T 1 -t -d 30s" +check "enable a trace event trigger" \ + "hwnoise -t -e osnoise:irq_noise trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 1m" + +test_end diff --git a/tools/tracing/rtla/tests/osnoise.t b/tools/tracing/rtla/tests/osnoise.t new file mode 100644 index 000000000000..86596e547893 --- /dev/null +++ b/tools/tracing/rtla/tests/osnoise.t @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source tests/engine.sh +test_begin + +set_timeout 2m + +check "verify help page" \ + "osnoise --help" +check "verify the --priority/-P param" \ + "osnoise top -P F:1 -c 0 -r 900000 -d 1M -q" +check "verify the --stop/-s param" \ + "osnoise top -s 30 -T 1 -t" +check "verify the --trace param" \ + "osnoise hist -s 30 -T 1 -t" +check "verify the --entries/-E param" \ + "osnoise hist -P F:1 -c 0 -r 900000 -d 1M -b 10 -E 25" + +test_end diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t new file mode 100644 index 000000000000..e86f40e5749e --- /dev/null +++ b/tools/tracing/rtla/tests/timerlat.t @@ -0,0 +1,27 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source tests/engine.sh +test_begin + +set_timeout 2m + +check "verify help page" \ + "timerlat --help" +check "verify -s/--stack" \ + "timerlat top -s 3 -T 10 -t" +check "verify -P/--priority" \ + "timerlat top -P F:1 -c 0 -d 1M -q" +check "test in nanoseconds" \ + "timerlat top -i 2 -c 0 -n -d 30s" +check "set the automatic trace mode" \ + "timerlat top -a 5 --dump-tasks" +check "print the auto-analysis if hits the stop tracing condition" \ + "timerlat top --aa-only 5" +check "disable auto-analysis" \ + "timerlat top -s 3 -T 10 -t --no-aa" +check "verify -c/--cpus" \ + "timerlat hist -c 0 -d 30s" +check "hist test in nanoseconds" \ + "timerlat hist -i 2 -c 0 -n -d 30s" + +test_end diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py index bdeb98baa8b0..d9a3fe2b74bf 100644 --- a/tools/verification/dot2/automata.py +++ b/tools/verification/dot2/automata.py @@ -19,13 +19,14 @@ class Automata: invalid_state_str = "INVALID_STATE" - def __init__(self, file_path): + def __init__(self, file_path, model_name=None): self.__dot_path = file_path - self.name = self.__get_model_name() + self.name = model_name or self.__get_model_name() self.__dot_lines = self.__open_dot() self.states, self.initial_state, self.final_states = self.__get_state_variables() self.events = self.__get_event_variables() self.function = self.__create_matrix() + self.events_start, self.events_start_run = self.__store_init_events() def __get_model_name(self): basename = ntpath.basename(self.__dot_path) @@ -172,3 +173,34 @@ class Automata: cursor += 1 return matrix + + def __store_init_events(self): + events_start = [False] * len(self.events) + events_start_run = [False] * len(self.events) + for i, _ in enumerate(self.events): + curr_event_will_init = 0 + curr_event_from_init = False + curr_event_used = 0 + for j, _ in enumerate(self.states): + if self.function[j][i] != self.invalid_state_str: + curr_event_used += 1 + if self.function[j][i] == self.initial_state: + curr_event_will_init += 1 + if self.function[0][i] != self.invalid_state_str: + curr_event_from_init = True + # this event always leads to init + if curr_event_will_init and curr_event_used == curr_event_will_init: + events_start[i] = True + # this event is only called from init + if curr_event_from_init and curr_event_used == 1: + events_start_run[i] = True + return events_start, events_start_run + + def is_start_event(self, event): + return self.events_start[self.events.index(event)] + + def is_start_run_event(self, event): + # prefer handle_start_event if there + if any(self.events_start): + return False + return self.events_start_run[self.events.index(event)] diff --git a/tools/verification/dot2/dot2c.py b/tools/verification/dot2/dot2c.py index 87d8a1e1470c..fa2816ac7b61 100644 --- a/tools/verification/dot2/dot2c.py +++ b/tools/verification/dot2/dot2c.py @@ -22,8 +22,8 @@ class Dot2c(Automata): struct_automaton_def = "automaton" var_automaton_def = "aut" - def __init__(self, file_path): - super().__init__(file_path) + def __init__(self, file_path, model_name=None): + super().__init__(file_path, model_name) self.line_length = 100 def __buff_to_string(self, buff): diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k index d4d7e52d549e..559ba191a1f6 100644 --- a/tools/verification/dot2/dot2k +++ b/tools/verification/dot2/dot2k @@ -21,25 +21,24 @@ if __name__ == '__main__': parser.add_argument('-t', "--monitor_type", dest="monitor_type", required=True) parser.add_argument('-n', "--model_name", dest="model_name", required=False) parser.add_argument("-D", "--description", dest="description", required=False) + parser.add_argument("-a", "--auto_patch", dest="auto_patch", + action="store_true", required=False, + help="Patch the kernel in place") params = parser.parse_args() print("Opening and parsing the dot file %s" % params.dot_file) try: - monitor=dot2k(params.dot_file, params.monitor_type) + monitor=dot2k(params.dot_file, params.monitor_type, vars(params)) except Exception as e: print('Error: '+ str(e)) print("Sorry : :-(") sys.exit(1) - # easier than using argparse action. - if params.model_name != None: - print(params.model_name) - print("Writing the monitor into the directory %s" % monitor.name) monitor.print_files() print("Almost done, checklist") print(" - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name)) - print(" - Edit include/trace/events/rv.h to add the tracepoint entry") - print(" - Move it to the kernel's monitor directory") - print(" - Edit kernel/trace/rv/Makefile") - print(" - Edit kernel/trace/rv/Kconfig") + print(monitor.fill_tracepoint_tooltip()) + print(monitor.fill_makefile_tooltip()) + print(monitor.fill_kconfig_tooltip()) + print(monitor.fill_monitor_tooltip()) diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index 016550fccf1f..7547eb290b7d 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -14,50 +14,83 @@ import os class dot2k(Dot2c): monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 } - monitor_templates_dir = "dot2k/rv_templates/" + monitor_templates_dir = "dot2/dot2k_templates/" + rv_dir = "kernel/trace/rv" monitor_type = "per_cpu" - def __init__(self, file_path, MonitorType): - super().__init__(file_path) + def __init__(self, file_path, MonitorType, extra_params={}): + super().__init__(file_path, extra_params.get("model_name")) self.monitor_type = self.monitor_types.get(MonitorType) - if self.monitor_type == None: - raise Exception("Unknown monitor type: %s" % MonitorType) + if self.monitor_type is None: + raise ValueError("Unknown monitor type: %s" % MonitorType) self.monitor_type = MonitorType self.__fill_rv_templates_dir() - self.main_c = self.__open_file(self.monitor_templates_dir + "main_" + MonitorType + ".c") + self.main_c = self.__read_file(self.monitor_templates_dir + "main.c") + self.trace_h = self.__read_file(self.monitor_templates_dir + "trace.h") + self.kconfig = self.__read_file(self.monitor_templates_dir + "Kconfig") self.enum_suffix = "_%s" % self.name + self.description = extra_params.get("description", self.name) or "auto-generated" + self.auto_patch = extra_params.get("auto_patch") + if self.auto_patch: + self.__fill_rv_kernel_dir() def __fill_rv_templates_dir(self): - if os.path.exists(self.monitor_templates_dir) == True: + if os.path.exists(self.monitor_templates_dir): return if platform.system() != "Linux": - raise Exception("I can only run on Linux.") + raise OSError("I can only run on Linux.") kernel_path = "/lib/modules/%s/build/tools/verification/dot2/dot2k_templates/" % (platform.release()) - if os.path.exists(kernel_path) == True: + if os.path.exists(kernel_path): self.monitor_templates_dir = kernel_path return - if os.path.exists("/usr/share/dot2/dot2k_templates/") == True: + if os.path.exists("/usr/share/dot2/dot2k_templates/"): self.monitor_templates_dir = "/usr/share/dot2/dot2k_templates/" return - raise Exception("Could not find the template directory, do you have the kernel source installed?") + raise FileNotFoundError("Could not find the template directory, do you have the kernel source installed?") + def __fill_rv_kernel_dir(self): - def __open_file(self, path): + # first try if we are running in the kernel tree root + if os.path.exists(self.rv_dir): + return + + # offset if we are running inside the kernel tree from verification/dot2 + kernel_path = os.path.join("../..", self.rv_dir) + + if os.path.exists(kernel_path): + self.rv_dir = kernel_path + return + + if platform.system() != "Linux": + raise OSError("I can only run on Linux.") + + kernel_path = os.path.join("/lib/modules/%s/build" % platform.release(), self.rv_dir) + + # if the current kernel is from a distro this may not be a full kernel tree + # verify that one of the files we are going to modify is available + if os.path.exists(os.path.join(kernel_path, "rv_trace.h")): + self.rv_dir = kernel_path + return + + raise FileNotFoundError("Could not find the rv directory, do you have the kernel source installed?") + + def __read_file(self, path): try: - fd = open(path) + fd = open(path, 'r') except OSError: raise Exception("Cannot open the file: %s" % path) content = fd.read() + fd.close() return content def __buff_to_string(self, buff): @@ -69,16 +102,26 @@ class dot2k(Dot2c): # cut off the last \n return string[:-1] + def fill_monitor_type(self): + return self.monitor_type.upper() + def fill_tracepoint_handlers_skel(self): buff = [] for event in self.events: buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event) buff.append("{") + handle = "handle_event" + if self.is_start_event(event): + buff.append("\t/* XXX: validate that this event always leads to the initial state */") + handle = "handle_start_event" + elif self.is_start_run_event(event): + buff.append("\t/* XXX: validate that this event is only valid in the initial state */") + handle = "handle_start_run_event" if self.monitor_type == "per_task": buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;"); - buff.append("\tda_handle_event_%s(p, %s%s);" % (self.name, event, self.enum_suffix)); + buff.append("\tda_%s_%s(p, %s%s);" % (handle, self.name, event, self.enum_suffix)); else: - buff.append("\tda_handle_event_%s(%s%s);" % (self.name, event, self.enum_suffix)); + buff.append("\tda_%s_%s(%s%s);" % (handle, self.name, event, self.enum_suffix)); buff.append("}") buff.append("") return self.__buff_to_string(buff) @@ -97,18 +140,21 @@ class dot2k(Dot2c): def fill_main_c(self): main_c = self.main_c + monitor_type = self.fill_monitor_type() min_type = self.get_minimun_type() - nr_events = self.events.__len__() + nr_events = len(self.events) tracepoint_handlers = self.fill_tracepoint_handlers_skel() tracepoint_attach = self.fill_tracepoint_attach_probe() tracepoint_detach = self.fill_tracepoint_detach_helper() - main_c = main_c.replace("MIN_TYPE", min_type) - main_c = main_c.replace("MODEL_NAME", self.name) - main_c = main_c.replace("NR_EVENTS", str(nr_events)) - main_c = main_c.replace("TRACEPOINT_HANDLERS_SKEL", tracepoint_handlers) - main_c = main_c.replace("TRACEPOINT_ATTACH", tracepoint_attach) - main_c = main_c.replace("TRACEPOINT_DETACH", tracepoint_detach) + main_c = main_c.replace("%%MONITOR_TYPE%%", monitor_type) + main_c = main_c.replace("%%MIN_TYPE%%", min_type) + main_c = main_c.replace("%%MODEL_NAME%%", self.name) + main_c = main_c.replace("%%NR_EVENTS%%", str(nr_events)) + main_c = main_c.replace("%%TRACEPOINT_HANDLERS_SKEL%%", tracepoint_handlers) + main_c = main_c.replace("%%TRACEPOINT_ATTACH%%", tracepoint_attach) + main_c = main_c.replace("%%TRACEPOINT_DETACH%%", tracepoint_detach) + main_c = main_c.replace("%%DESCRIPTION%%", self.description) return main_c @@ -137,31 +183,142 @@ class dot2k(Dot2c): return self.__buff_to_string(buff) + def fill_monitor_class_type(self): + if self.monitor_type == "per_task": + return "DA_MON_EVENTS_ID" + return "DA_MON_EVENTS_IMPLICIT" + + def fill_monitor_class(self): + if self.monitor_type == "per_task": + return "da_monitor_id" + return "da_monitor" + + def fill_tracepoint_args_skel(self, tp_type): + buff = [] + tp_args_event = [ + ("char *", "state"), + ("char *", "event"), + ("char *", "next_state"), + ("bool ", "final_state"), + ] + tp_args_error = [ + ("char *", "state"), + ("char *", "event"), + ] + tp_args_id = ("int ", "id") + tp_args = tp_args_event if tp_type == "event" else tp_args_error + if self.monitor_type == "per_task": + tp_args.insert(0, tp_args_id) + tp_proto_c = ", ".join([a+b for a,b in tp_args]) + tp_args_c = ", ".join([b for a,b in tp_args]) + buff.append(" TP_PROTO(%s)," % tp_proto_c) + buff.append(" TP_ARGS(%s)" % tp_args_c) + return self.__buff_to_string(buff) + + def fill_trace_h(self): + trace_h = self.trace_h + monitor_class = self.fill_monitor_class() + monitor_class_type = self.fill_monitor_class_type() + tracepoint_args_skel_event = self.fill_tracepoint_args_skel("event") + tracepoint_args_skel_error = self.fill_tracepoint_args_skel("error") + trace_h = trace_h.replace("%%MODEL_NAME%%", self.name) + trace_h = trace_h.replace("%%MODEL_NAME_UP%%", self.name.upper()) + trace_h = trace_h.replace("%%MONITOR_CLASS%%", monitor_class) + trace_h = trace_h.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type) + trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_EVENT%%", tracepoint_args_skel_event) + trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_ERROR%%", tracepoint_args_skel_error) + return trace_h + + def fill_kconfig(self): + kconfig = self.kconfig + monitor_class_type = self.fill_monitor_class_type() + kconfig = kconfig.replace("%%MODEL_NAME%%", self.name) + kconfig = kconfig.replace("%%MODEL_NAME_UP%%", self.name.upper()) + kconfig = kconfig.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type) + kconfig = kconfig.replace("%%DESCRIPTION%%", self.description) + return kconfig + + def __patch_file(self, file, marker, line): + file_to_patch = os.path.join(self.rv_dir, file) + content = self.__read_file(file_to_patch) + content = content.replace(marker, line + "\n" + marker) + self.__write_file(file_to_patch, content) + + def fill_tracepoint_tooltip(self): + monitor_class_type = self.fill_monitor_class_type() + if self.auto_patch: + self.__patch_file("rv_trace.h", + "// Add new monitors based on CONFIG_%s here" % monitor_class_type, + "#include <monitors/%s/%s_trace.h>" % (self.name, self.name)) + return " - Patching %s/rv_trace.h, double check the result" % self.rv_dir + + return """ - Edit %s/rv_trace.h: +Add this line where other tracepoints are included and %s is defined: +#include <monitors/%s/%s_trace.h> +""" % (self.rv_dir, monitor_class_type, self.name, self.name) + + def fill_kconfig_tooltip(self): + if self.auto_patch: + self.__patch_file("Kconfig", + "# Add new monitors here", + "source \"kernel/trace/rv/monitors/%s/Kconfig\"" % (self.name)) + return " - Patching %s/Kconfig, double check the result" % self.rv_dir + + return """ - Edit %s/Kconfig: +Add this line where other monitors are included: +source \"kernel/trace/rv/monitors/%s/Kconfig\" +""" % (self.rv_dir, self.name) + + def fill_makefile_tooltip(self): + name = self.name + name_up = name.upper() + if self.auto_patch: + self.__patch_file("Makefile", + "# Add new monitors here", + "obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o" % (name_up, name, name)) + return " - Patching %s/Makefile, double check the result" % self.rv_dir + + return """ - Edit %s/Makefile: +Add this line where other monitors are included: +obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o +""" % (self.rv_dir, name_up, name, name) + + def fill_monitor_tooltip(self): + if self.auto_patch: + return " - Monitor created in %s/monitors/%s" % (self.rv_dir, self. name) + return " - Move %s/ to the kernel's monitor directory (%s/monitors)" % (self.name, self.rv_dir) + def __create_directory(self): + path = self.name + if self.auto_patch: + path = os.path.join(self.rv_dir, "monitors", path) try: - os.mkdir(self.name) + os.mkdir(path) except FileExistsError: return except: print("Fail creating the output dir: %s" % self.name) - def __create_file(self, file_name, content): - path = "%s/%s" % (self.name, file_name) + def __write_file(self, file_name, content): try: - file = open(path, 'w') - except FileExistsError: - return + file = open(file_name, 'w') except: - print("Fail creating file: %s" % path) + print("Fail writing to file: %s" % file_name) file.write(content) file.close() + def __create_file(self, file_name, content): + path = "%s/%s" % (self.name, file_name) + if self.auto_patch: + path = os.path.join(self.rv_dir, "monitors", path) + self.__write_file(path, content) + def __get_main_name(self): path = "%s/%s" % (self.name, "main.c") - if os.path.exists(path) == False: - return "main.c" + if not os.path.exists(path): + return "main.c" return "__main.c" def print_files(self): @@ -175,3 +332,10 @@ class dot2k(Dot2c): path = "%s.h" % self.name self.__create_file(path, model_h) + + trace_h = self.fill_trace_h() + path = "%s_trace.h" % self.name + self.__create_file(path, trace_h) + + kconfig = self.fill_kconfig() + self.__create_file("Kconfig", kconfig) diff --git a/tools/verification/dot2/dot2k_templates/Kconfig b/tools/verification/dot2/dot2k_templates/Kconfig new file mode 100644 index 000000000000..90cdc1e9379e --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/Kconfig @@ -0,0 +1,6 @@ +config RV_MON_%%MODEL_NAME_UP%% + depends on RV + select %%MONITOR_CLASS_TYPE%% + bool "%%MODEL_NAME%% monitor" + help + %%DESCRIPTION%% diff --git a/tools/verification/dot2/dot2k_templates/main.c b/tools/verification/dot2/dot2k_templates/main.c new file mode 100644 index 000000000000..9605ca994416 --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/main.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> +#include <rv/da_monitor.h> + +#define MODULE_NAME "%%MODEL_NAME%%" + +/* + * XXX: include required tracepoint headers, e.g., + * #include <trace/events/sched.h> + */ +#include <rv_trace.h> + +/* + * This is the self-generated part of the monitor. Generally, there is no need + * to touch this section. + */ +#include "%%MODEL_NAME%%.h" + +/* + * Declare the deterministic automata monitor. + * + * The rv monitor reference is needed for the monitor declaration. + */ +static struct rv_monitor rv_%%MODEL_NAME%%; +DECLARE_DA_MON_%%MONITOR_TYPE%%(%%MODEL_NAME%%, %%MIN_TYPE%%); + +/* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ +%%TRACEPOINT_HANDLERS_SKEL%% +static int enable_%%MODEL_NAME%%(void) +{ + int retval; + + retval = da_monitor_init_%%MODEL_NAME%%(); + if (retval) + return retval; + +%%TRACEPOINT_ATTACH%% + + return 0; +} + +static void disable_%%MODEL_NAME%%(void) +{ + rv_%%MODEL_NAME%%.enabled = 0; + +%%TRACEPOINT_DETACH%% + + da_monitor_destroy_%%MODEL_NAME%%(); +} + +/* + * This is the monitor register section. + */ +static struct rv_monitor rv_%%MODEL_NAME%% = { + .name = "%%MODEL_NAME%%", + .description = "%%DESCRIPTION%%", + .enable = enable_%%MODEL_NAME%%, + .disable = disable_%%MODEL_NAME%%, + .reset = da_monitor_reset_all_%%MODEL_NAME%%, + .enabled = 0, +}; + +static int __init register_%%MODEL_NAME%%(void) +{ + rv_register_monitor(&rv_%%MODEL_NAME%%); + return 0; +} + +static void __exit unregister_%%MODEL_NAME%%(void) +{ + rv_unregister_monitor(&rv_%%MODEL_NAME%%); +} + +module_init(register_%%MODEL_NAME%%); +module_exit(unregister_%%MODEL_NAME%%); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("dot2k: auto-generated"); +MODULE_DESCRIPTION("%%MODEL_NAME%%: %%DESCRIPTION%%"); diff --git a/tools/verification/dot2/dot2k_templates/main_global.c b/tools/verification/dot2/dot2k_templates/main_global.c deleted file mode 100644 index a5658bfb9044..000000000000 --- a/tools/verification/dot2/dot2k_templates/main_global.c +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/ftrace.h> -#include <linux/tracepoint.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/rv.h> -#include <rv/instrumentation.h> -#include <rv/da_monitor.h> - -#define MODULE_NAME "MODEL_NAME" - -/* - * XXX: include required tracepoint headers, e.g., - * #include <trace/events/sched.h> - */ -#include <trace/events/rv.h> - -/* - * This is the self-generated part of the monitor. Generally, there is no need - * to touch this section. - */ -#include "MODEL_NAME.h" - -/* - * Declare the deterministic automata monitor. - * - * The rv monitor reference is needed for the monitor declaration. - */ -static struct rv_monitor rv_MODEL_NAME; -DECLARE_DA_MON_GLOBAL(MODEL_NAME, MIN_TYPE); - -/* - * This is the instrumentation part of the monitor. - * - * This is the section where manual work is required. Here the kernel events - * are translated into model's event. - * - */ -TRACEPOINT_HANDLERS_SKEL -static int enable_MODEL_NAME(void) -{ - int retval; - - retval = da_monitor_init_MODEL_NAME(); - if (retval) - return retval; - -TRACEPOINT_ATTACH - - return 0; -} - -static void disable_MODEL_NAME(void) -{ - rv_MODEL_NAME.enabled = 0; - -TRACEPOINT_DETACH - - da_monitor_destroy_MODEL_NAME(); -} - -/* - * This is the monitor register section. - */ -static struct rv_monitor rv_MODEL_NAME = { - .name = "MODEL_NAME", - .description = "auto-generated MODEL_NAME", - .enable = enable_MODEL_NAME, - .disable = disable_MODEL_NAME, - .reset = da_monitor_reset_all_MODEL_NAME, - .enabled = 0, -}; - -static int __init register_MODEL_NAME(void) -{ - rv_register_monitor(&rv_MODEL_NAME); - return 0; -} - -static void __exit unregister_MODEL_NAME(void) -{ - rv_unregister_monitor(&rv_MODEL_NAME); -} - -module_init(register_MODEL_NAME); -module_exit(unregister_MODEL_NAME); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("dot2k: auto-generated"); -MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/dot2/dot2k_templates/main_per_cpu.c b/tools/verification/dot2/dot2k_templates/main_per_cpu.c deleted file mode 100644 index 03539a97633f..000000000000 --- a/tools/verification/dot2/dot2k_templates/main_per_cpu.c +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/ftrace.h> -#include <linux/tracepoint.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/rv.h> -#include <rv/instrumentation.h> -#include <rv/da_monitor.h> - -#define MODULE_NAME "MODEL_NAME" - -/* - * XXX: include required tracepoint headers, e.g., - * #include <linux/trace/events/sched.h> - */ -#include <trace/events/rv.h> - -/* - * This is the self-generated part of the monitor. Generally, there is no need - * to touch this section. - */ -#include "MODEL_NAME.h" - -/* - * Declare the deterministic automata monitor. - * - * The rv monitor reference is needed for the monitor declaration. - */ -static struct rv_monitor rv_MODEL_NAME; -DECLARE_DA_MON_PER_CPU(MODEL_NAME, MIN_TYPE); - -/* - * This is the instrumentation part of the monitor. - * - * This is the section where manual work is required. Here the kernel events - * are translated into model's event. - * - */ -TRACEPOINT_HANDLERS_SKEL -static int enable_MODEL_NAME(void) -{ - int retval; - - retval = da_monitor_init_MODEL_NAME(); - if (retval) - return retval; - -TRACEPOINT_ATTACH - - return 0; -} - -static void disable_MODEL_NAME(void) -{ - rv_MODEL_NAME.enabled = 0; - -TRACEPOINT_DETACH - - da_monitor_destroy_MODEL_NAME(); -} - -/* - * This is the monitor register section. - */ -static struct rv_monitor rv_MODEL_NAME = { - .name = "MODEL_NAME", - .description = "auto-generated MODEL_NAME", - .enable = enable_MODEL_NAME, - .disable = disable_MODEL_NAME, - .reset = da_monitor_reset_all_MODEL_NAME, - .enabled = 0, -}; - -static int __init register_MODEL_NAME(void) -{ - rv_register_monitor(&rv_MODEL_NAME); - return 0; -} - -static void __exit unregister_MODEL_NAME(void) -{ - rv_unregister_monitor(&rv_MODEL_NAME); -} - -module_init(register_MODEL_NAME); -module_exit(unregister_MODEL_NAME); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("dot2k: auto-generated"); -MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/dot2/dot2k_templates/main_per_task.c b/tools/verification/dot2/dot2k_templates/main_per_task.c deleted file mode 100644 index ffd92af87a86..000000000000 --- a/tools/verification/dot2/dot2k_templates/main_per_task.c +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/ftrace.h> -#include <linux/tracepoint.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/rv.h> -#include <rv/instrumentation.h> -#include <rv/da_monitor.h> - -#define MODULE_NAME "MODEL_NAME" - -/* - * XXX: include required tracepoint headers, e.g., - * #include <linux/trace/events/sched.h> - */ -#include <trace/events/rv.h> - -/* - * This is the self-generated part of the monitor. Generally, there is no need - * to touch this section. - */ -#include "MODEL_NAME.h" - -/* - * Declare the deterministic automata monitor. - * - * The rv monitor reference is needed for the monitor declaration. - */ -static struct rv_monitor rv_MODEL_NAME; -DECLARE_DA_MON_PER_TASK(MODEL_NAME, MIN_TYPE); - -/* - * This is the instrumentation part of the monitor. - * - * This is the section where manual work is required. Here the kernel events - * are translated into model's event. - * - */ -TRACEPOINT_HANDLERS_SKEL -static int enable_MODEL_NAME(void) -{ - int retval; - - retval = da_monitor_init_MODEL_NAME(); - if (retval) - return retval; - -TRACEPOINT_ATTACH - - return 0; -} - -static void disable_MODEL_NAME(void) -{ - rv_MODEL_NAME.enabled = 0; - -TRACEPOINT_DETACH - - da_monitor_destroy_MODEL_NAME(); -} - -/* - * This is the monitor register section. - */ -static struct rv_monitor rv_MODEL_NAME = { - .name = "MODEL_NAME", - .description = "auto-generated MODEL_NAME", - .enable = enable_MODEL_NAME, - .disable = disable_MODEL_NAME, - .reset = da_monitor_reset_all_MODEL_NAME, - .enabled = 0, -}; - -static int __init register_MODEL_NAME(void) -{ - rv_register_monitor(&rv_MODEL_NAME); - return 0; -} - -static void __exit unregister_MODEL_NAME(void) -{ - rv_unregister_monitor(&rv_MODEL_NAME); -} - -module_init(register_MODEL_NAME); -module_exit(unregister_MODEL_NAME); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("dot2k: auto-generated"); -MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/dot2/dot2k_templates/trace.h b/tools/verification/dot2/dot2k_templates/trace.h new file mode 100644 index 000000000000..87d3a1308926 --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/trace.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_%%MODEL_NAME_UP%% +DEFINE_EVENT(event_%%MONITOR_CLASS%%, event_%%MODEL_NAME%%, +%%TRACEPOINT_ARGS_SKEL_EVENT%%); + +DEFINE_EVENT(error_%%MONITOR_CLASS%%, error_%%MODEL_NAME%%, +%%TRACEPOINT_ARGS_SKEL_ERROR%%); +#endif /* CONFIG_RV_MON_%%MODEL_NAME_UP%% */ |
