From fee7114298cf54bbd221cdb2ab49738be8b94f4c Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 23 Jul 2013 17:38:41 -0400 Subject: SELinux: Reduce overhead of mls_level_isvalid() function call While running the high_systime workload of the AIM7 benchmark on a 2-socket 12-core Westmere x86-64 machine running 3.10-rc4 kernel (with HT on), it was found that a pretty sizable amount of time was spent in the SELinux code. Below was the perf trace of the "perf record -a -s" of a test run at 1500 users: 5.04% ls [kernel.kallsyms] [k] ebitmap_get_bit 1.96% ls [kernel.kallsyms] [k] mls_level_isvalid 1.95% ls [kernel.kallsyms] [k] find_next_bit The ebitmap_get_bit() was the hottest function in the perf-report output. Both the ebitmap_get_bit() and find_next_bit() functions were, in fact, called by mls_level_isvalid(). As a result, the mls_level_isvalid() call consumed 8.95% of the total CPU time of all the 24 virtual CPUs which is quite a lot. The majority of the mls_level_isvalid() function invocations come from the socket creation system call. Looking at the mls_level_isvalid() function, it is checking to see if all the bits set in one of the ebitmap structure are also set in another one as well as the highest set bit is no bigger than the one specified by the given policydb data structure. It is doing it in a bit-by-bit manner. So if the ebitmap structure has many bits set, the iteration loop will be done many times. The current code can be rewritten to use a similar algorithm as the ebitmap_contains() function with an additional check for the highest set bit. The ebitmap_contains() function was extended to cover an optional additional check for the highest set bit, and the mls_level_isvalid() function was modified to call ebitmap_contains(). With that change, the perf trace showed that the used CPU time drop down to just 0.08% (ebitmap_contains + mls_level_isvalid) of the total which is about 100X less than before. 0.07% ls [kernel.kallsyms] [k] ebitmap_contains 0.05% ls [kernel.kallsyms] [k] ebitmap_get_bit 0.01% ls [kernel.kallsyms] [k] mls_level_isvalid 0.01% ls [kernel.kallsyms] [k] find_next_bit The remaining ebitmap_get_bit() and find_next_bit() functions calls are made by other kernel routines as the new mls_level_isvalid() function will not call them anymore. This patch also improves the high_systime AIM7 benchmark result, though the improvement is not as impressive as is suggested by the reduction in CPU time spent in the ebitmap functions. The table below shows the performance change on the 2-socket x86-64 system (with HT on) mentioned above. +--------------+---------------+----------------+-----------------+ | Workload | mean % change | mean % change | mean % change | | | 10-100 users | 200-1000 users | 1100-2000 users | +--------------+---------------+----------------+-----------------+ | high_systime | +0.1% | +0.9% | +2.6% | +--------------+---------------+----------------+-----------------+ Signed-off-by: Waiman Long Acked-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Eric Paris --- security/selinux/ss/ebitmap.c | 20 ++++++++++++++++++-- security/selinux/ss/ebitmap.h | 2 +- security/selinux/ss/mls.c | 22 +++++++--------------- security/selinux/ss/mls_types.h | 2 +- 4 files changed, 27 insertions(+), 19 deletions(-) (limited to 'security/selinux/ss') diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index 30f119b1d1ec..820313a04d49 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -213,7 +213,12 @@ netlbl_import_failure: } #endif /* CONFIG_NETLABEL */ -int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2) +/* + * Check to see if all the bits set in e2 are also set in e1. Optionally, + * if last_e2bit is non-zero, the highest set bit in e2 cannot exceed + * last_e2bit. + */ +int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit) { struct ebitmap_node *n1, *n2; int i; @@ -223,14 +228,25 @@ int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2) n1 = e1->node; n2 = e2->node; + while (n1 && n2 && (n1->startbit <= n2->startbit)) { if (n1->startbit < n2->startbit) { n1 = n1->next; continue; } - for (i = 0; i < EBITMAP_UNIT_NUMS; i++) { + for (i = EBITMAP_UNIT_NUMS - 1; (i >= 0) && !n2->maps[i]; ) + i--; /* Skip trailing NULL map entries */ + if (last_e2bit && (i >= 0)) { + u32 lastsetbit = n2->startbit + i * EBITMAP_UNIT_SIZE + + __fls(n2->maps[i]); + if (lastsetbit > last_e2bit) + return 0; + } + + while (i >= 0) { if ((n1->maps[i] & n2->maps[i]) != n2->maps[i]) return 0; + i--; } n1 = n1->next; diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index 922f8afa89dd..e7eb3a9c5ab7 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h @@ -117,7 +117,7 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n, int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2); int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src); -int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2); +int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit); int ebitmap_get_bit(struct ebitmap *e, unsigned long bit); int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); void ebitmap_destroy(struct ebitmap *e); diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 40de8d3f208e..c85bc1ec040c 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -160,8 +160,6 @@ void mls_sid_to_context(struct context *context, int mls_level_isvalid(struct policydb *p, struct mls_level *l) { struct level_datum *levdatum; - struct ebitmap_node *node; - int i; if (!l->sens || l->sens > p->p_levels.nprim) return 0; @@ -170,19 +168,13 @@ int mls_level_isvalid(struct policydb *p, struct mls_level *l) if (!levdatum) return 0; - ebitmap_for_each_positive_bit(&l->cat, node, i) { - if (i > p->p_cats.nprim) - return 0; - if (!ebitmap_get_bit(&levdatum->level->cat, i)) { - /* - * Category may not be associated with - * sensitivity. - */ - return 0; - } - } - - return 1; + /* + * Return 1 iff all the bits set in l->cat are also be set in + * levdatum->level->cat and no bit in l->cat is larger than + * p->p_cats.nprim. + */ + return ebitmap_contains(&levdatum->level->cat, &l->cat, + p->p_cats.nprim); } int mls_range_isvalid(struct policydb *p, struct mls_range *r) diff --git a/security/selinux/ss/mls_types.h b/security/selinux/ss/mls_types.h index 03bed52a8052..e93648774137 100644 --- a/security/selinux/ss/mls_types.h +++ b/security/selinux/ss/mls_types.h @@ -35,7 +35,7 @@ static inline int mls_level_eq(struct mls_level *l1, struct mls_level *l2) static inline int mls_level_dom(struct mls_level *l1, struct mls_level *l2) { return ((l1->sens >= l2->sens) && - ebitmap_contains(&l1->cat, &l2->cat)); + ebitmap_contains(&l1->cat, &l2->cat, 0)); } #define mls_level_incomp(l1, l2) \ -- cgit v1.2.3 From a767f680e34bf14a36fefbbb6d85783eef99fd57 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 23 Jul 2013 17:38:41 -0400 Subject: SELinux: Increase ebitmap_node size for 64-bit configuration Currently, the ebitmap_node structure has a fixed size of 32 bytes. On a 32-bit system, the overhead is 8 bytes, leaving 24 bytes for being used as bitmaps. The overhead ratio is 1/4. On a 64-bit system, the overhead is 16 bytes. Therefore, only 16 bytes are left for bitmap purpose and the overhead ratio is 1/2. With a 3.8.2 kernel, a boot-up operation will cause the ebitmap_get_bit() function to be called about 9 million times. The average number of ebitmap_node traversal is about 3.7. This patch increases the size of the ebitmap_node structure to 64 bytes for 64-bit system to keep the overhead ratio at 1/4. This may also improve performance a little bit by making node to node traversal less frequent (< 2) as more bits are available in each node. Signed-off-by: Waiman Long Acked-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Eric Paris --- security/selinux/ss/ebitmap.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'security/selinux/ss') diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index e7eb3a9c5ab7..712c8a7b8e8b 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h @@ -16,7 +16,13 @@ #include -#define EBITMAP_UNIT_NUMS ((32 - sizeof(void *) - sizeof(u32)) \ +#ifdef CONFIG_64BIT +#define EBITMAP_NODE_SIZE 64 +#else +#define EBITMAP_NODE_SIZE 32 +#endif + +#define EBITMAP_UNIT_NUMS ((EBITMAP_NODE_SIZE-sizeof(void *)-sizeof(u32))\ / sizeof(unsigned long)) #define EBITMAP_UNIT_SIZE BITS_PER_LONG #define EBITMAP_SIZE (EBITMAP_UNIT_NUMS * EBITMAP_UNIT_SIZE) -- cgit v1.2.3 From b138004ea0382bdc6d02599c39392651b4f63889 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 23 Jul 2013 17:38:42 -0400 Subject: SELinux: fix selinuxfs policy file on big endian systems The /sys/fs/selinux/policy file is not valid on big endian systems like ppc64 or s390. Let's see why: static int hashtab_cnt(void *key, void *data, void *ptr) { int *cnt = ptr; *cnt = *cnt + 1; return 0; } static int range_write(struct policydb *p, void *fp) { size_t nel; [...] /* count the number of entries in the hashtab */ nel = 0; rc = hashtab_map(p->range_tr, hashtab_cnt, &nel); if (rc) return rc; buf[0] = cpu_to_le32(nel); rc = put_entry(buf, sizeof(u32), 1, fp); So size_t is 64 bits. But then we pass a pointer to it as we do to hashtab_cnt. hashtab_cnt thinks it is a 32 bit int and only deals with the first 4 bytes. On x86_64 which is little endian, those first 4 bytes and the least significant, so this works out fine. On ppc64/s390 those first 4 bytes of memory are the high order bits. So at the end of the call to hashtab_map nel has a HUGE number. But the least significant 32 bits are all 0's. We then pass that 64 bit number to cpu_to_le32() which happily truncates it to a 32 bit number and does endian swapping. But the low 32 bits are all 0's. So no matter how many entries are in the hashtab, big endian systems always say there are 0 entries because I screwed up the counting. The fix is easy. Use a 32 bit int, as the hashtab_cnt expects, for nel. Signed-off-by: Eric Paris Signed-off-by: Paul Moore --- security/selinux/ss/policydb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'security/selinux/ss') diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 9cd9b7c661ec..3fc8969b499c 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -3200,9 +3200,8 @@ static int range_write_helper(void *key, void *data, void *ptr) static int range_write(struct policydb *p, void *fp) { - size_t nel; __le32 buf[1]; - int rc; + int rc, nel; struct policy_data pd; pd.p = p; -- cgit v1.2.3 From f936c6e502d3bc21b87c9830b3a24d1e07e6b6e1 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 10 Oct 2012 10:38:47 -0400 Subject: SELinux: change sbsec->behavior to short We only have 6 options, so char is good enough, but use a short as that packs nicely. This shrinks the superblock_security_struct just a little bit. Signed-off-by: Eric Paris --- security/selinux/ss/services.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'security/selinux/ss') diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b4feecc3fe01..603c638434bb 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2329,7 +2329,7 @@ out: */ int security_fs_use( const char *fstype, - unsigned int *behavior, + short unsigned int *behavior, u32 *sid) { int rc = 0; -- cgit v1.2.3 From a64c54cf0811b8032fdab8c9d52576f0370837fa Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 24 Aug 2012 15:59:07 -0400 Subject: SELinux: pass a superblock to security_fs_use Rather than passing pointers to memory locations, strings, and other stuff just give up on the separation and give security_fs_use the superblock. It just makes the code easier to read (even if not easier to reuse on some other OS) Signed-off-by: Eric Paris --- security/selinux/ss/services.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'security/selinux/ss') diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 603c638434bb..a90721771615 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2323,17 +2323,14 @@ out: /** * security_fs_use - Determine how to handle labeling for a filesystem. - * @fstype: filesystem type - * @behavior: labeling behavior - * @sid: SID for filesystem (superblock) + * @sb: superblock in question */ -int security_fs_use( - const char *fstype, - short unsigned int *behavior, - u32 *sid) +int security_fs_use(struct super_block *sb) { int rc = 0; struct ocontext *c; + struct superblock_security_struct *sbsec = sb->s_security; + const char *fstype = sb->s_type->name; read_lock(&policy_rwlock); @@ -2345,21 +2342,21 @@ int security_fs_use( } if (c) { - *behavior = c->v.behavior; + sbsec->behavior = c->v.behavior; if (!c->sid[0]) { rc = sidtab_context_to_sid(&sidtab, &c->context[0], &c->sid[0]); if (rc) goto out; } - *sid = c->sid[0]; + sbsec->sid = c->sid[0]; } else { - rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid); + rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, &sbsec->sid); if (rc) { - *behavior = SECURITY_FS_USE_NONE; + sbsec->behavior = SECURITY_FS_USE_NONE; rc = 0; } else { - *behavior = SECURITY_FS_USE_GENFS; + sbsec->behavior = SECURITY_FS_USE_GENFS; } } -- cgit v1.2.3 From 2be4d74f2fd45460d70d4fe65cc1972ef45bf849 Mon Sep 17 00:00:00 2001 From: Chris PeBenito Date: Fri, 3 May 2013 09:05:39 -0400 Subject: Add SELinux policy capability for always checking packet and peer classes. Currently the packet class in SELinux is not checked if there are no SECMARK rules in the security or mangle netfilter tables. Some systems prefer that packets are always checked, for example, to protect the system should the netfilter rules fail to load or if the nefilter rules were maliciously flushed. Add the always_check_network policy capability which, when enabled, treats SECMARK as enabled, even if there are no netfilter SECMARK rules and treats peer labeling as enabled, even if there is no Netlabel or labeled IPSEC configuration. Includes definition of "redhat1" SELinux policy capability, which exists in the SELinux userpace library, to keep ordering correct. The SELinux userpace portion of this was merged last year, but this kernel change fell on the floor. Signed-off-by: Chris PeBenito Signed-off-by: Eric Paris --- security/selinux/ss/services.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'security/selinux/ss') diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index a90721771615..d106733ad987 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -72,6 +72,7 @@ int selinux_policycap_netpeer; int selinux_policycap_openperm; +int selinux_policycap_alwaysnetwork; static DEFINE_RWLOCK(policy_rwlock); @@ -1812,6 +1813,8 @@ static void security_load_policycaps(void) POLICYDB_CAPABILITY_NETPEER); selinux_policycap_openperm = ebitmap_get_bit(&policydb.policycaps, POLICYDB_CAPABILITY_OPENPERM); + selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps, + POLICYDB_CAPABILITY_ALWAYSNETWORK); } static int security_preserve_bools(struct policydb *p); -- cgit v1.2.3 From 102aefdda4d8275ce7d7100bc16c88c74272b260 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Tue, 16 Apr 2013 18:56:19 -0400 Subject: selinux: consider filesystem subtype in policies Not considering sub filesystem has the following limitation. Support for SELinux in FUSE is dependent on the particular userspace filesystem, which is identified by the subtype. For e.g, GlusterFS, a FUSE based filesystem supports SELinux (by mounting and processing FUSE requests in different threads, avoiding the mount time deadlock), whereas other FUSE based filesystems (identified by a different subtype) have the mount time deadlock. By considering the subtype of the filesytem in the SELinux policies, allows us to specify a filesystem subtype, in the following way: fs_use_xattr fuse.glusterfs gen_context(system_u:object_r:fs_t,s0); This way not all FUSE filesystems are put in the same bucket and subjected to the limitations of the other subtypes. Signed-off-by: Anand Avati Signed-off-by: Eric Paris --- security/selinux/ss/services.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'security/selinux/ss') diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index d106733ad987..ee470a0b5c27 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2334,16 +2334,50 @@ int security_fs_use(struct super_block *sb) struct ocontext *c; struct superblock_security_struct *sbsec = sb->s_security; const char *fstype = sb->s_type->name; + const char *subtype = (sb->s_subtype && sb->s_subtype[0]) ? sb->s_subtype : NULL; + struct ocontext *base = NULL; read_lock(&policy_rwlock); - c = policydb.ocontexts[OCON_FSUSE]; - while (c) { - if (strcmp(fstype, c->u.name) == 0) + for (c = policydb.ocontexts[OCON_FSUSE]; c; c = c->next) { + char *sub; + int baselen; + + baselen = strlen(fstype); + + /* if base does not match, this is not the one */ + if (strncmp(fstype, c->u.name, baselen)) + continue; + + /* if there is no subtype, this is the one! */ + if (!subtype) + break; + + /* skip past the base in this entry */ + sub = c->u.name + baselen; + + /* entry is only a base. save it. keep looking for subtype */ + if (sub[0] == '\0') { + base = c; + continue; + } + + /* entry is not followed by a subtype, so it is not a match */ + if (sub[0] != '.') + continue; + + /* whew, we found a subtype of this fstype */ + sub++; /* move past '.' */ + + /* exact match of fstype AND subtype */ + if (!strcmp(subtype, sub)) break; - c = c->next; } + /* in case we had found an fstype match but no subtype match */ + if (!c) + c = base; + if (c) { sbsec->behavior = c->v.behavior; if (!c->sid[0]) { -- cgit v1.2.3