summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c434
-rw-r--r--kernel/auditfilter.c54
-rw-r--r--kernel/auditsc.c349
-rw-r--r--kernel/futex.c42
-rw-r--r--kernel/futex_compat.c3
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/power/Kconfig65
-rw-r--r--kernel/power/disk.c204
-rw-r--r--kernel/power/main.c171
-rw-r--r--kernel/power/power.h90
-rw-r--r--kernel/power/snapshot.c31
-rw-r--r--kernel/power/swap.c33
-rw-r--r--kernel/power/swsusp.c48
-rw-r--r--kernel/power/user.c109
-rw-r--r--kernel/softlockup.c30
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/time.c1
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/timekeeping.c6
-rw-r--r--kernel/time/timer_list.c2
20 files changed, 1038 insertions, 649 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 801c946dd24b..c8555b180213 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -66,10 +66,11 @@
* (Initialization happens after skb_init is called.) */
static int audit_initialized;
-/* 0 - no auditing
- * 1 - auditing enabled
- * 2 - auditing enabled and configuration is locked/unchangeable. */
+#define AUDIT_OFF 0
+#define AUDIT_ON 1
+#define AUDIT_LOCKED 2
int audit_enabled;
+int audit_ever_enabled;
/* Default state when kernel boots without any parameters. */
static int audit_default;
@@ -152,8 +153,10 @@ struct audit_buffer {
static void audit_set_pid(struct audit_buffer *ab, pid_t pid)
{
- struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
- nlh->nlmsg_pid = pid;
+ if (ab) {
+ struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
+ nlh->nlmsg_pid = pid;
+ }
}
void audit_panic(const char *message)
@@ -163,7 +166,8 @@ void audit_panic(const char *message)
case AUDIT_FAIL_SILENT:
break;
case AUDIT_FAIL_PRINTK:
- printk(KERN_ERR "audit: %s\n", message);
+ if (printk_ratelimit())
+ printk(KERN_ERR "audit: %s\n", message);
break;
case AUDIT_FAIL_PANIC:
panic("audit: %s\n", message);
@@ -231,161 +235,107 @@ void audit_log_lost(const char *message)
}
if (print) {
- printk(KERN_WARNING
- "audit: audit_lost=%d audit_rate_limit=%d audit_backlog_limit=%d\n",
- atomic_read(&audit_lost),
- audit_rate_limit,
- audit_backlog_limit);
+ if (printk_ratelimit())
+ printk(KERN_WARNING
+ "audit: audit_lost=%d audit_rate_limit=%d "
+ "audit_backlog_limit=%d\n",
+ atomic_read(&audit_lost),
+ audit_rate_limit,
+ audit_backlog_limit);
audit_panic(message);
}
}
-static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
+static int audit_log_config_change(char *function_name, int new, int old,
+ uid_t loginuid, u32 sid, int allow_changes)
{
- int res, rc = 0, old = audit_rate_limit;
-
- /* check if we are locked */
- if (audit_enabled == 2)
- res = 0;
- else
- res = 1;
+ struct audit_buffer *ab;
+ int rc = 0;
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ audit_log_format(ab, "%s=%d old=%d by auid=%u", function_name, new,
+ old, loginuid);
if (sid) {
char *ctx = NULL;
u32 len;
- if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) {
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "audit_rate_limit=%d old=%d by auid=%u"
- " subj=%s res=%d",
- limit, old, loginuid, ctx, res);
+
+ rc = selinux_sid_to_string(sid, &ctx, &len);
+ if (rc) {
+ audit_log_format(ab, " sid=%u", sid);
+ allow_changes = 0; /* Something weird, deny request */
+ } else {
+ audit_log_format(ab, " subj=%s", ctx);
kfree(ctx);
- } else
- res = 0; /* Something weird, deny request */
+ }
}
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "audit_rate_limit=%d old=%d by auid=%u res=%d",
- limit, old, loginuid, res);
-
- /* If we are allowed, make the change */
- if (res == 1)
- audit_rate_limit = limit;
- /* Not allowed, update reason */
- else if (rc == 0)
- rc = -EPERM;
+ audit_log_format(ab, " res=%d", allow_changes);
+ audit_log_end(ab);
return rc;
}
-static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
+static int audit_do_config_change(char *function_name, int *to_change,
+ int new, uid_t loginuid, u32 sid)
{
- int res, rc = 0, old = audit_backlog_limit;
+ int allow_changes, rc = 0, old = *to_change;
/* check if we are locked */
- if (audit_enabled == 2)
- res = 0;
+ if (audit_enabled == AUDIT_LOCKED)
+ allow_changes = 0;
else
- res = 1;
+ allow_changes = 1;
- if (sid) {
- char *ctx = NULL;
- u32 len;
- if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) {
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "audit_backlog_limit=%d old=%d by auid=%u"
- " subj=%s res=%d",
- limit, old, loginuid, ctx, res);
- kfree(ctx);
- } else
- res = 0; /* Something weird, deny request */
+ if (audit_enabled != AUDIT_OFF) {
+ rc = audit_log_config_change(function_name, new, old,
+ loginuid, sid, allow_changes);
+ if (rc)
+ allow_changes = 0;
}
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "audit_backlog_limit=%d old=%d by auid=%u res=%d",
- limit, old, loginuid, res);
/* If we are allowed, make the change */
- if (res == 1)
- audit_backlog_limit = limit;
+ if (allow_changes == 1)
+ *to_change = new;
/* Not allowed, update reason */
else if (rc == 0)
rc = -EPERM;
return rc;
}
-static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
+static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
+{
+ return audit_do_config_change("audit_rate_limit", &audit_rate_limit,
+ limit, loginuid, sid);
+}
+
+static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
{
- int res, rc = 0, old = audit_enabled;
+ return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit,
+ limit, loginuid, sid);
+}
- if (state < 0 || state > 2)
+static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
+{
+ int rc;
+ if (state < AUDIT_OFF || state > AUDIT_LOCKED)
return -EINVAL;
- /* check if we are locked */
- if (audit_enabled == 2)
- res = 0;
- else
- res = 1;
+ rc = audit_do_config_change("audit_enabled", &audit_enabled, state,
+ loginuid, sid);
- if (sid) {
- char *ctx = NULL;
- u32 len;
- if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) {
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "audit_enabled=%d old=%d by auid=%u"
- " subj=%s res=%d",
- state, old, loginuid, ctx, res);
- kfree(ctx);
- } else
- res = 0; /* Something weird, deny request */
- }
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "audit_enabled=%d old=%d by auid=%u res=%d",
- state, old, loginuid, res);
+ if (!rc)
+ audit_ever_enabled |= !!state;
- /* If we are allowed, make the change */
- if (res == 1)
- audit_enabled = state;
- /* Not allowed, update reason */
- else if (rc == 0)
- rc = -EPERM;
return rc;
}
static int audit_set_failure(int state, uid_t loginuid, u32 sid)
{
- int res, rc = 0, old = audit_failure;
-
if (state != AUDIT_FAIL_SILENT
&& state != AUDIT_FAIL_PRINTK
&& state != AUDIT_FAIL_PANIC)
return -EINVAL;
- /* check if we are locked */
- if (audit_enabled == 2)
- res = 0;
- else
- res = 1;
-
- if (sid) {
- char *ctx = NULL;
- u32 len;
- if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) {
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "audit_failure=%d old=%d by auid=%u"
- " subj=%s res=%d",
- state, old, loginuid, ctx, res);
- kfree(ctx);
- } else
- res = 0; /* Something weird, deny request */
- }
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "audit_failure=%d old=%d by auid=%u res=%d",
- state, old, loginuid, res);
-
- /* If we are allowed, make the change */
- if (res == 1)
- audit_failure = state;
- /* Not allowed, update reason */
- else if (rc == 0)
- rc = -EPERM;
- return rc;
+ return audit_do_config_change("audit_failure", &audit_failure, state,
+ loginuid, sid);
}
static int kauditd_thread(void *dummy)
@@ -405,7 +355,11 @@ static int kauditd_thread(void *dummy)
audit_pid = 0;
}
} else {
- printk(KERN_NOTICE "%s\n", skb->data + NLMSG_SPACE(0));
+ if (printk_ratelimit())
+ printk(KERN_NOTICE "%s\n", skb->data +
+ NLMSG_SPACE(0));
+ else
+ audit_log_lost("printk limit exceeded\n");
kfree_skb(skb);
}
} else {
@@ -573,6 +527,33 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
return err;
}
+static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,
+ u32 pid, u32 uid, uid_t auid, u32 sid)
+{
+ int rc = 0;
+ char *ctx = NULL;
+ u32 len;
+
+ if (!audit_enabled) {
+ *ab = NULL;
+ return rc;
+ }
+
+ *ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
+ audit_log_format(*ab, "user pid=%d uid=%u auid=%u",
+ pid, uid, auid);
+ if (sid) {
+ rc = selinux_sid_to_string(sid, &ctx, &len);
+ if (rc)
+ audit_log_format(*ab, " ssid=%u", sid);
+ else
+ audit_log_format(*ab, " subj=%s", ctx);
+ kfree(ctx);
+ }
+
+ return rc;
+}
+
static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
u32 uid, pid, seq, sid;
@@ -583,7 +564,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
u16 msg_type = nlh->nlmsg_type;
uid_t loginuid; /* loginuid of sender */
struct audit_sig_info *sig_data;
- char *ctx;
+ char *ctx = NULL;
u32 len;
err = audit_netlink_ok(skb, msg_type);
@@ -634,23 +615,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err < 0) return err;
}
if (status_get->mask & AUDIT_STATUS_PID) {
- int old = audit_pid;
- if (sid) {
- if ((err = selinux_sid_to_string(
- sid, &ctx, &len)))
- return err;
- else
- audit_log(NULL, GFP_KERNEL,
- AUDIT_CONFIG_CHANGE,
- "audit_pid=%d old=%d by auid=%u subj=%s",
- status_get->pid, old,
- loginuid, ctx);
- kfree(ctx);
- } else
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "audit_pid=%d old=%d by auid=%u",
- status_get->pid, old, loginuid);
- audit_pid = status_get->pid;
+ int new_pid = status_get->pid;
+
+ if (audit_enabled != AUDIT_OFF)
+ audit_log_config_change("audit_pid", new_pid,
+ audit_pid, loginuid,
+ sid, 1);
+
+ audit_pid = new_pid;
}
if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
err = audit_set_rate_limit(status_get->rate_limit,
@@ -673,64 +645,35 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err)
break;
}
- ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
- if (ab) {
- audit_log_format(ab,
- "user pid=%d uid=%u auid=%u",
- pid, uid, loginuid);
- if (sid) {
- if (selinux_sid_to_string(
- sid, &ctx, &len)) {
- audit_log_format(ab,
- " ssid=%u", sid);
- /* Maybe call audit_panic? */
- } else
- audit_log_format(ab,
- " subj=%s", ctx);
- kfree(ctx);
- }
- if (msg_type != AUDIT_USER_TTY)
- audit_log_format(ab, " msg='%.1024s'",
- (char *)data);
- else {
- int size;
-
- audit_log_format(ab, " msg=");
- size = nlmsg_len(nlh);
- audit_log_n_untrustedstring(ab, size,
- data);
- }
- audit_set_pid(ab, pid);
- audit_log_end(ab);
+ audit_log_common_recv_msg(&ab, msg_type, pid, uid,
+ loginuid, sid);
+
+ if (msg_type != AUDIT_USER_TTY)
+ audit_log_format(ab, " msg='%.1024s'",
+ (char *)data);
+ else {
+ int size;
+
+ audit_log_format(ab, " msg=");
+ size = nlmsg_len(nlh);
+ audit_log_n_untrustedstring(ab, size,
+ data);
}
+ audit_set_pid(ab, pid);
+ audit_log_end(ab);
}
break;
case AUDIT_ADD:
case AUDIT_DEL:
if (nlmsg_len(nlh) < sizeof(struct audit_rule))
return -EINVAL;
- if (audit_enabled == 2) {
- ab = audit_log_start(NULL, GFP_KERNEL,
- AUDIT_CONFIG_CHANGE);
- if (ab) {
- audit_log_format(ab,
- "pid=%d uid=%u auid=%u",
- pid, uid, loginuid);
- if (sid) {
- if (selinux_sid_to_string(
- sid, &ctx, &len)) {
- audit_log_format(ab,
- " ssid=%u", sid);
- /* Maybe call audit_panic? */
- } else
- audit_log_format(ab,
- " subj=%s", ctx);
- kfree(ctx);
- }
- audit_log_format(ab, " audit_enabled=%d res=0",
- audit_enabled);
- audit_log_end(ab);
- }
+ if (audit_enabled == AUDIT_LOCKED) {
+ audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid,
+ uid, loginuid, sid);
+
+ audit_log_format(ab, " audit_enabled=%d res=0",
+ audit_enabled);
+ audit_log_end(ab);
return -EPERM;
}
/* fallthrough */
@@ -743,28 +686,13 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
case AUDIT_DEL_RULE:
if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
return -EINVAL;
- if (audit_enabled == 2) {
- ab = audit_log_start(NULL, GFP_KERNEL,
- AUDIT_CONFIG_CHANGE);
- if (ab) {
- audit_log_format(ab,
- "pid=%d uid=%u auid=%u",
- pid, uid, loginuid);
- if (sid) {
- if (selinux_sid_to_string(
- sid, &ctx, &len)) {
- audit_log_format(ab,
- " ssid=%u", sid);
- /* Maybe call audit_panic? */
- } else
- audit_log_format(ab,
- " subj=%s", ctx);
- kfree(ctx);
- }
- audit_log_format(ab, " audit_enabled=%d res=0",
- audit_enabled);
- audit_log_end(ab);
- }
+ if (audit_enabled == AUDIT_LOCKED) {
+ audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid,
+ uid, loginuid, sid);
+
+ audit_log_format(ab, " audit_enabled=%d res=0",
+ audit_enabled);
+ audit_log_end(ab);
return -EPERM;
}
/* fallthrough */
@@ -775,19 +703,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
break;
case AUDIT_TRIM:
audit_trim_trees();
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
- if (!ab)
- break;
- audit_log_format(ab, "auid=%u", loginuid);
- if (sid) {
- u32 len;
- ctx = NULL;
- if (selinux_sid_to_string(sid, &ctx, &len))
- audit_log_format(ab, " ssid=%u", sid);
- else
- audit_log_format(ab, " subj=%s", ctx);
- kfree(ctx);
- }
+
+ audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid,
+ uid, loginuid, sid);
+
audit_log_format(ab, " op=trim res=1");
audit_log_end(ab);
break;
@@ -817,22 +736,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
/* OK, here comes... */
err = audit_tag_tree(old, new);
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
- if (!ab) {
- kfree(old);
- kfree(new);
- break;
- }
- audit_log_format(ab, "auid=%u", loginuid);
- if (sid) {
- u32 len;
- ctx = NULL;
- if (selinux_sid_to_string(sid, &ctx, &len))
- audit_log_format(ab, " ssid=%u", sid);
- else
- audit_log_format(ab, " subj=%s", ctx);
- kfree(ctx);
- }
+ audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid,
+ uid, loginuid, sid);
+
audit_log_format(ab, " op=make_equiv old=");
audit_log_untrustedstring(ab, old);
audit_log_format(ab, " new=");
@@ -965,6 +871,7 @@ static int __init audit_init(void)
skb_queue_head_init(&audit_skb_queue);
audit_initialized = 1;
audit_enabled = audit_default;
+ audit_ever_enabled |= !!audit_default;
/* Register the callback with selinux. This callback will be invoked
* when a new policy is loaded. */
@@ -992,8 +899,10 @@ static int __init audit_enable(char *str)
printk(KERN_INFO "audit: %s%s\n",
audit_default ? "enabled" : "disabled",
audit_initialized ? "" : " (after initialization)");
- if (audit_initialized)
+ if (audit_initialized) {
audit_enabled = audit_default;
+ audit_ever_enabled |= !!audit_default;
+ }
return 1;
}
@@ -1130,7 +1039,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
{
struct audit_buffer *ab = NULL;
struct timespec t;
- unsigned int serial;
+ unsigned int uninitialized_var(serial);
int reserve;
unsigned long timeout_start = jiffies;
@@ -1164,7 +1073,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
remove_wait_queue(&audit_backlog_wait, &wait);
continue;
}
- if (audit_rate_check())
+ if (audit_rate_check() && printk_ratelimit())
printk(KERN_WARNING
"audit: audit_backlog=%d > "
"audit_backlog_limit=%d\n",
@@ -1249,6 +1158,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
goto out;
len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2);
}
+ va_end(args2);
if (len > 0)
skb_put(skb, len);
out:
@@ -1350,6 +1260,21 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
}
/**
+ * audit_string_contains_control - does a string need to be logged in hex
+ * @string - string to be checked
+ * @len - max length of the string to check
+ */
+int audit_string_contains_control(const char *string, size_t len)
+{
+ const unsigned char *p;
+ for (p = string; p < (const unsigned char *)string + len && *p; p++) {
+ if (*p == '"' || *p < 0x21 || *p > 0x7f)
+ return 1;
+ }
+ return 0;
+}
+
+/**
* audit_log_n_untrustedstring - log a string that may contain random characters
* @ab: audit_buffer
* @len: lenth of string (not including trailing null)
@@ -1363,19 +1288,13 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
* The caller specifies the number of characters in the string to log, which may
* or may not be the entire string.
*/
-const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
- const char *string)
+void audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
+ const char *string)
{
- const unsigned char *p;
-
- for (p = string; p < (const unsigned char *)string + len && *p; p++) {
- if (*p == '"' || *p < 0x21 || *p > 0x7f) {
- audit_log_hex(ab, string, len);
- return string + len + 1;
- }
- }
- audit_log_n_string(ab, len, string);
- return p + 1;
+ if (audit_string_contains_control(string, len))
+ audit_log_hex(ab, string, len);
+ else
+ audit_log_n_string(ab, len, string);
}
/**
@@ -1386,9 +1305,9 @@ const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
* Same as audit_log_n_untrustedstring(), except that strlen is used to
* determine string length.
*/
-const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
+void audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
{
- return audit_log_n_untrustedstring(ab, strlen(string), string);
+ audit_log_n_untrustedstring(ab, strlen(string), string);
}
/* This is a helper-function to print the escaped d_path */
@@ -1437,8 +1356,11 @@ void audit_log_end(struct audit_buffer *ab)
skb_queue_tail(&audit_skb_queue, ab->skb);
ab->skb = NULL;
wake_up_interruptible(&kauditd_wait);
+ } else if (printk_ratelimit()) {
+ struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
+ printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, ab->skb->data + NLMSG_SPACE(0));
} else {
- printk(KERN_NOTICE "%s\n", ab->skb->data + NLMSG_SPACE(0));
+ audit_log_lost("printk limit exceeded\n");
}
}
audit_buffer_free(ab);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 5d96f2cc7be8..6f19fd477aac 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -95,6 +95,8 @@ extern struct inotify_handle *audit_ih;
/* Inotify events we care about. */
#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
+extern int audit_enabled;
+
void audit_free_parent(struct inotify_watch *i_watch)
{
struct audit_parent *parent;
@@ -974,7 +976,6 @@ static void audit_update_watch(struct audit_parent *parent,
struct audit_watch *owatch, *nwatch, *nextw;
struct audit_krule *r, *nextr;
struct audit_entry *oentry, *nentry;
- struct audit_buffer *ab;
mutex_lock(&audit_filter_mutex);
list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
@@ -1014,13 +1015,18 @@ static void audit_update_watch(struct audit_parent *parent,
call_rcu(&oentry->rcu, audit_free_rule_rcu);
}
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
- audit_log_format(ab, "op=updated rules specifying path=");
- audit_log_untrustedstring(ab, owatch->path);
- audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino);
- audit_log_format(ab, " list=%d res=1", r->listnr);
- audit_log_end(ab);
-
+ if (audit_enabled) {
+ struct audit_buffer *ab;
+ ab = audit_log_start(NULL, GFP_KERNEL,
+ AUDIT_CONFIG_CHANGE);
+ audit_log_format(ab,
+ "op=updated rules specifying path=");
+ audit_log_untrustedstring(ab, owatch->path);
+ audit_log_format(ab, " with dev=%u ino=%lu\n",
+ dev, ino);
+ audit_log_format(ab, " list=%d res=1", r->listnr);
+ audit_log_end(ab);
+ }
audit_remove_watch(owatch);
goto add_watch_to_parent; /* event applies to a single watch */
}
@@ -1039,25 +1045,28 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
struct audit_watch *w, *nextw;
struct audit_krule *r, *nextr;
struct audit_entry *e;
- struct audit_buffer *ab;
mutex_lock(&audit_filter_mutex);
parent->flags |= AUDIT_PARENT_INVALID;
list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
e = container_of(r, struct audit_entry, rule);
-
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
- audit_log_format(ab, "op=remove rule path=");
- audit_log_untrustedstring(ab, w->path);
- if (r->filterkey) {
- audit_log_format(ab, " key=");
- audit_log_untrustedstring(ab, r->filterkey);
- } else
- audit_log_format(ab, " key=(null)");
- audit_log_format(ab, " list=%d res=1", r->listnr);
- audit_log_end(ab);
-
+ if (audit_enabled) {
+ struct audit_buffer *ab;
+ ab = audit_log_start(NULL, GFP_KERNEL,
+ AUDIT_CONFIG_CHANGE);
+ audit_log_format(ab, "op=remove rule path=");
+ audit_log_untrustedstring(ab, w->path);
+ if (r->filterkey) {
+ audit_log_format(ab, " key=");
+ audit_log_untrustedstring(ab,
+ r->filterkey);
+ } else
+ audit_log_format(ab, " key=(null)");
+ audit_log_format(ab, " list=%d res=1",
+ r->listnr);
+ audit_log_end(ab);
+ }
list_del(&r->rlist);
list_del_rcu(&e->list);
call_rcu(&e->rcu, audit_free_rule_rcu);
@@ -1495,6 +1504,9 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
{
struct audit_buffer *ab;
+ if (!audit_enabled)
+ return;
+
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (!ab)
return;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index bce9ecdb7712..1c06ecf38d7b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -70,6 +70,7 @@
#include "audit.h"
extern struct list_head audit_filter_list[];
+extern int audit_ever_enabled;
/* AUDIT_NAMES is the number of slots we reserve in the audit_context
* for saving names from getname(). */
@@ -78,6 +79,9 @@ extern struct list_head audit_filter_list[];
/* Indicates that audit should log the full pathname. */
#define AUDIT_NAME_FULL -1
+/* no execve audit message should be longer than this (userspace limits) */
+#define MAX_EXECVE_AUDIT_LEN 7500
+
/* number of audit rules */
int audit_n_rules;
@@ -176,7 +180,11 @@ struct audit_aux_data_fd_pair {
struct audit_aux_data_pids {
struct audit_aux_data d;
pid_t target_pid[AUDIT_AUX_PIDS];
+ uid_t target_auid[AUDIT_AUX_PIDS];
+ uid_t target_uid[AUDIT_AUX_PIDS];
+ unsigned int target_sessionid[AUDIT_AUX_PIDS];
u32 target_sid[AUDIT_AUX_PIDS];
+ char target_comm[AUDIT_AUX_PIDS][TASK_COMM_LEN];
int pid_count;
};
@@ -192,7 +200,6 @@ struct audit_context {
enum audit_state state;
unsigned int serial; /* serial number for record */
struct timespec ctime; /* time of syscall entry */
- uid_t loginuid; /* login uid (identity) */
int major; /* syscall number */
unsigned long argv[4]; /* syscall arguments */
int return_valid; /* return code is valid */
@@ -215,7 +222,11 @@ struct audit_context {
int arch;
pid_t target_pid;
+ uid_t target_auid;
+ uid_t target_uid;
+ unsigned int target_sessionid;
u32 target_sid;
+ char target_comm[TASK_COMM_LEN];
struct audit_tree_refs *trees, *first_trees;
int tree_count;
@@ -506,7 +517,7 @@ static int audit_filter_rules(struct task_struct *tsk,
case AUDIT_LOGINUID:
result = 0;
if (ctx)
- result = audit_comparator(ctx->loginuid, f->op, f->val);
+ result = audit_comparator(tsk->loginuid, f->op, f->val);
break;
case AUDIT_SUBJ_USER:
case AUDIT_SUBJ_ROLE:
@@ -702,7 +713,24 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
if (likely(!context))
return NULL;
context->return_valid = return_valid;
- context->return_code = return_code;
+
+ /*
+ * we need to fix up the return code in the audit logs if the actual
+ * return codes are later going to be fixed up by the arch specific
+ * signal handlers
+ *
+ * This is actually a test for:
+ * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) ||
+ * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK)
+ *
+ * but is faster than a bunch of ||
+ */
+ if (unlikely(return_code <= -ERESTARTSYS) &&
+ (return_code >= -ERESTART_RESTARTBLOCK) &&
+ (return_code != -ENOIOCTLCMD))
+ context->return_code = -EINTR;
+ else
+ context->return_code = return_code;
if (context->in_syscall && !context->dummy && !context->auditable) {
enum audit_state state;
@@ -783,11 +811,8 @@ static inline void audit_free_aux(struct audit_context *context)
static inline void audit_zero_context(struct audit_context *context,
enum audit_state state)
{
- uid_t loginuid = context->loginuid;
-
memset(context, 0, sizeof(*context));
context->state = state;
- context->loginuid = loginuid;
}
static inline struct audit_context *audit_alloc_context(enum audit_state state)
@@ -814,7 +839,7 @@ int audit_alloc(struct task_struct *tsk)
struct audit_context *context;
enum audit_state state;
- if (likely(!audit_enabled))
+ if (likely(!audit_ever_enabled))
return 0; /* Return if not auditing. */
state = audit_filter_task(tsk);
@@ -826,11 +851,6 @@ int audit_alloc(struct task_struct *tsk)
return -ENOMEM;
}
- /* Preserve login uid */
- context->loginuid = -1;
- if (current->audit_context)
- context->loginuid = current->audit_context->loginuid;
-
tsk->audit_context = context;
set_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
return 0;
@@ -922,7 +942,8 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
}
static int audit_log_pid_context(struct audit_context *context, pid_t pid,
- u32 sid)
+ uid_t auid, uid_t uid, unsigned int sessionid,
+ u32 sid, char *comm)
{
struct audit_buffer *ab;
char *s = NULL;
@@ -931,68 +952,204 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid,
ab = audit_log_start(context, GFP_KERNEL, AUDIT_OBJ_PID);
if (!ab)
- return 1;
+ return rc;
+ audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, auid,
+ uid, sessionid);
if (selinux_sid_to_string(sid, &s, &len)) {
- audit_log_format(ab, "opid=%d obj=(none)", pid);
+ audit_log_format(ab, " obj=(none)");
rc = 1;
} else
- audit_log_format(ab, "opid=%d obj=%s", pid, s);
+ audit_log_format(ab, " obj=%s", s);
+ audit_log_format(ab, " ocomm=");
+ audit_log_untrustedstring(ab, comm);
audit_log_end(ab);
kfree(s);
return rc;
}
-static void audit_log_execve_info(struct audit_buffer *ab,
- struct audit_aux_data_execve *axi)
+/*
+ * to_send and len_sent accounting are very loose estimates. We aren't
+ * really worried about a hard cap to MAX_EXECVE_AUDIT_LEN so much as being
+ * within about 500 bytes (next page boundry)
+ *
+ * why snprintf? an int is up to 12 digits long. if we just assumed when
+ * logging that a[%d]= was going to be 16 characters long we would be wasting
+ * space in every audit message. In one 7500 byte message we can log up to
+ * about 1000 min size arguments. That comes down to about 50% waste of space
+ * if we didn't do the snprintf to find out how long arg_num_len was.
+ */
+static int audit_log_single_execve_arg(struct audit_context *context,
+ struct audit_buffer **ab,
+ int arg_num,
+ size_t *len_sent,
+ const char __user *p,
+ char *buf)
{
- int i;
- long len, ret;
- const char __user *p;
- char *buf;
+ char arg_num_len_buf[12];
+ const char __user *tmp_p = p;
+ /* how many digits are in arg_num? 3 is the length of a=\n */
+ size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 3;
+ size_t len, len_left, to_send;
+ size_t max_execve_audit_len = MAX_EXECVE_AUDIT_LEN;
+ unsigned int i, has_cntl = 0, too_long = 0;
+ int ret;
+
+ /* strnlen_user includes the null we don't want to send */
+ len_left = len = strnlen_user(p, MAX_ARG_STRLEN) - 1;
- if (axi->mm != current->mm)
- return; /* execve failed, no additional info */
-
- p = (const char __user *)axi->mm->arg_start;
+ /*
+ * We just created this mm, if we can't find the strings
+ * we just copied into it something is _very_ wrong. Similar
+ * for strings that are too long, we should not have created
+ * any.
+ */
+ if (unlikely((len = -1) || len > MAX_ARG_STRLEN - 1)) {
+ WARN_ON(1);
+ send_sig(SIGKILL, current, 0);
+ }
- for (i = 0; i < axi->argc; i++, p += len) {
- len = strnlen_user(p, MAX_ARG_STRLEN);
+ /* walk the whole argument looking for non-ascii chars */
+ do {
+ if (len_left > MAX_EXECVE_AUDIT_LEN)
+ to_send = MAX_EXECVE_AUDIT_LEN;
+ else
+ to_send = len_left;
+ ret = copy_from_user(buf, tmp_p, to_send);
/*
- * We just created this mm, if we can't find the strings
- * we just copied into it something is _very_ wrong. Similar
- * for strings that are too long, we should not have created
- * any.
+ * There is no reason for this copy to be short. We just
+ * copied them here, and the mm hasn't been exposed to user-
+ * space yet.
*/
- if (!len || len > MAX_ARG_STRLEN) {
+ if (ret) {
WARN_ON(1);
send_sig(SIGKILL, current, 0);
}
-
- buf = kmalloc(len, GFP_KERNEL);
- if (!buf) {
- audit_panic("out of memory for argv string\n");
+ buf[to_send] = '\0';
+ has_cntl = audit_string_contains_control(buf, to_send);
+ if (has_cntl) {
+ /*
+ * hex messages get logged as 2 bytes, so we can only
+ * send half as much in each message
+ */
+ max_execve_audit_len = MAX_EXECVE_AUDIT_LEN / 2;
break;
}
+ len_left -= to_send;
+ tmp_p += to_send;
+ } while (len_left > 0);
+
+ len_left = len;
+
+ if (len > max_execve_audit_len)
+ too_long = 1;
+
+ /* rewalk the argument actually logging the message */
+ for (i = 0; len_left > 0; i++) {
+ int room_left;
+
+ if (len_left > max_execve_audit_len)
+ to_send = max_execve_audit_len;
+ else
+ to_send = len_left;
+
+ /* do we have space left to send this argument in this ab? */
+ room_left = MAX_EXECVE_AUDIT_LEN - arg_num_len - *len_sent;
+ if (has_cntl)
+ room_left -= (to_send * 2);
+ else
+ room_left -= to_send;
+ if (room_left < 0) {
+ *len_sent = 0;
+ audit_log_end(*ab);
+ *ab = audit_log_start(context, GFP_KERNEL, AUDIT_EXECVE);
+ if (!*ab)
+ return 0;
+ }
- ret = copy_from_user(buf, p, len);
/*
- * There is no reason for this copy to be short. We just
- * copied them here, and the mm hasn't been exposed to user-
- * space yet.
+ * first record needs to say how long the original string was
+ * so we can be sure nothing was lost.
+ */
+ if ((i == 0) && (too_long))
+ audit_log_format(*ab, "a%d_len=%ld ", arg_num,
+ has_cntl ? 2*len : len);
+
+ /*
+ * normally arguments are small enough to fit and we already
+ * filled buf above when we checked for control characters
+ * so don't bother with another copy_from_user
*/
+ if (len >= max_execve_audit_len)
+ ret = copy_from_user(buf, p, to_send);
+ else
+ ret = 0;
if (ret) {
WARN_ON(1);
send_sig(SIGKILL, current, 0);
}
+ buf[to_send] = '\0';
+
+ /* actually log it */
+ audit_log_format(*ab, "a%d", arg_num);
+ if (too_long)
+ audit_log_format(*ab, "[%d]", i);
+ audit_log_format(*ab, "=");
+ if (has_cntl)
+ audit_log_hex(*ab, buf, to_send);
+ else
+ audit_log_format(*ab, "\"%s\"", buf);
+ audit_log_format(*ab, "\n");
+
+ p += to_send;
+ len_left -= to_send;
+ *len_sent += arg_num_len;
+ if (has_cntl)
+ *len_sent += to_send * 2;
+ else
+ *len_sent += to_send;
+ }
+ /* include the null we didn't log */
+ return len + 1;
+}
- audit_log_format(ab, "a%d=", i);
- audit_log_untrustedstring(ab, buf);
- audit_log_format(ab, "\n");
+static void audit_log_execve_info(struct audit_context *context,
+ struct audit_buffer **ab,
+ struct audit_aux_data_execve *axi)
+{
+ int i;
+ size_t len, len_sent = 0;
+ const char __user *p;
+ char *buf;
+
+ if (axi->mm != current->mm)
+ return; /* execve failed, no additional info */
+
+ p = (const char __user *)axi->mm->arg_start;
+
+ audit_log_format(*ab, "argc=%d ", axi->argc);
+
+ /*
+ * we need some kernel buffer to hold the userspace args. Just
+ * allocate one big one rather than allocating one of the right size
+ * for every single argument inside audit_log_single_execve_arg()
+ * should be <8k allocation so should be pretty safe.
+ */
+ buf = kmalloc(MAX_EXECVE_AUDIT_LEN + 1, GFP_KERNEL);
+ if (!buf) {
+ audit_panic("out of memory for argv string\n");
+ return;
+ }
- kfree(buf);
+ for (i = 0; i < axi->argc; i++) {
+ len = audit_log_single_execve_arg(context, ab, i,
+ &len_sent, p, buf);
+ if (len <= 0)
+ break;
+ p += len;
}
+ kfree(buf);
}
static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
@@ -1039,7 +1196,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
" a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
" ppid=%d pid=%d auid=%u uid=%u gid=%u"
" euid=%u suid=%u fsuid=%u"
- " egid=%u sgid=%u fsgid=%u tty=%s",
+ " egid=%u sgid=%u fsgid=%u tty=%s ses=%u",
context->argv[0],
context->argv[1],
context->argv[2],
@@ -1047,11 +1204,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
context->name_count,
context->ppid,
context->pid,
- context->loginuid,
+ tsk->loginuid,
context->uid,
context->gid,
context->euid, context->suid, context->fsuid,
- context->egid, context->sgid, context->fsgid, tty);
+ context->egid, context->sgid, context->fsgid, tty,
+ tsk->sessionid);
mutex_unlock(&tty_mutex);
@@ -1135,7 +1293,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
case AUDIT_EXECVE: {
struct audit_aux_data_execve *axi = (void *)aux;
- audit_log_execve_info(ab, axi);
+ audit_log_execve_info(context, &ab, axi);
break; }
case AUDIT_SOCKETCALL: {
@@ -1168,13 +1326,19 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
for (i = 0; i < axs->pid_count; i++)
if (audit_log_pid_context(context, axs->target_pid[i],
- axs->target_sid[i]))
+ axs->target_auid[i],
+ axs->target_uid[i],
+ axs->target_sessionid[i],
+ axs->target_sid[i],
+ axs->target_comm[i]))
call_panic = 1;
}
if (context->target_pid &&
audit_log_pid_context(context, context->target_pid,
- context->target_sid))
+ context->target_auid, context->target_uid,
+ context->target_sessionid,
+ context->target_sid, context->target_comm))
call_panic = 1;
if (context->pwd && context->pwdmnt) {
@@ -1242,6 +1406,11 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
audit_log_end(ab);
}
+
+ /* Send end of event record to help user space know we are finished */
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
+ if (ab)
+ audit_log_end(ab);
if (call_panic)
audit_panic("error converting sid to string");
}
@@ -1766,6 +1935,9 @@ void auditsc_get_stamp(struct audit_context *ctx,
ctx->auditable = 1;
}
+/* global counter which is incremented every time something logs in */
+static atomic_t session_id = ATOMIC_INIT(0);
+
/**
* audit_set_loginuid - set a task's audit_context loginuid
* @task: task whose audit context is being modified
@@ -1777,41 +1949,29 @@ void auditsc_get_stamp(struct audit_context *ctx,
*/
int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
{
+ unsigned int sessionid = atomic_inc_return(&session_id);
struct audit_context *context = task->audit_context;
- if (context) {
- /* Only log if audit is enabled */
- if (context->in_syscall) {
- struct audit_buffer *ab;
-
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
- if (ab) {
- audit_log_format(ab, "login pid=%d uid=%u "
- "old auid=%u new auid=%u",
- task->pid, task->uid,
- context->loginuid, loginuid);
- audit_log_end(ab);
- }
+ if (context && context->in_syscall) {
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+ if (ab) {
+ audit_log_format(ab, "login pid=%d uid=%u "
+ "old auid=%u new auid=%u"
+ " old ses=%u new ses=%u",
+ task->pid, task->uid,
+ task->loginuid, loginuid,
+ task->sessionid, sessionid);
+ audit_log_end(ab);
}
- context->loginuid = loginuid;
}
+ task->sessionid = sessionid;
+ task->loginuid = loginuid;
return 0;
}
/**
- * audit_get_loginuid - get the loginuid for an audit_context
- * @ctx: the audit_context
- *
- * Returns the context's loginuid or -1 if @ctx is NULL.
- */
-uid_t audit_get_loginuid(struct audit_context *ctx)
-{
- return ctx ? ctx->loginuid : -1;
-}
-
-EXPORT_SYMBOL(audit_get_loginuid);
-
-/**
* __audit_mq_open - record audit data for a POSIX MQ open
* @oflag: open flag
* @mode: mode bits
@@ -2070,8 +2230,6 @@ int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode
return 0;
}
-int audit_argv_kb = 32;
-
int audit_bprm(struct linux_binprm *bprm)
{
struct audit_aux_data_execve *ax;
@@ -2080,14 +2238,6 @@ int audit_bprm(struct linux_binprm *bprm)
if (likely(!audit_enabled || !context || context->dummy))
return 0;
- /*
- * Even though the stack code doesn't limit the arg+env size any more,
- * the audit code requires that _all_ arguments be logged in a single
- * netlink skb. Hence cap it :-(
- */
- if (bprm->argv_len > (audit_argv_kb << 10))
- return -E2BIG;
-
ax = kmalloc(sizeof(*ax), GFP_KERNEL);
if (!ax)
return -ENOMEM;
@@ -2193,7 +2343,11 @@ void __audit_ptrace(struct task_struct *t)
struct audit_context *context = current->audit_context;
context->target_pid = t->pid;
+ context->target_auid = audit_get_loginuid(t);
+ context->target_uid = t->uid;
+ context->target_sessionid = audit_get_sessionid(t);
selinux_get_task_sid(t, &context->target_sid);
+ memcpy(context->target_comm, t->comm, TASK_COMM_LEN);
}
/**
@@ -2216,8 +2370,8 @@ int __audit_signal_info(int sig, struct task_struct *t)
if (audit_pid && t->tgid == audit_pid) {
if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
audit_sig_pid = tsk->pid;
- if (ctx)
- audit_sig_uid = ctx->loginuid;
+ if (tsk->loginuid != -1)
+ audit_sig_uid = tsk->loginuid;
else
audit_sig_uid = tsk->uid;
selinux_get_task_sid(tsk, &audit_sig_sid);
@@ -2230,7 +2384,11 @@ int __audit_signal_info(int sig, struct task_struct *t)
* in audit_context */
if (!ctx->target_pid) {
ctx->target_pid = t->tgid;
+ ctx->target_auid = audit_get_loginuid(t);
+ ctx->target_uid = t->uid;
+ ctx->target_sessionid = audit_get_sessionid(t);
selinux_get_task_sid(t, &ctx->target_sid);
+ memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN);
return 0;
}
@@ -2247,7 +2405,11 @@ int __audit_signal_info(int sig, struct task_struct *t)
BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS);
axp->target_pid[axp->pid_count] = t->tgid;
+ axp->target_auid[axp->pid_count] = audit_get_loginuid(t);
+ axp->target_uid[axp->pid_count] = t->uid;
+ axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t);
selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]);
+ memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN);
axp->pid_count++;
return 0;
@@ -2264,6 +2426,8 @@ void audit_core_dumps(long signr)
{
struct audit_buffer *ab;
u32 sid;
+ uid_t auid = audit_get_loginuid(current);
+ unsigned int sessionid = audit_get_sessionid(current);
if (!audit_enabled)
return;
@@ -2272,9 +2436,8 @@ void audit_core_dumps(long signr)
return;
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
- audit_log_format(ab, "auid=%u uid=%u gid=%u",
- audit_get_loginuid(current->audit_context),
- current->uid, current->gid);
+ audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
+ auid, current->uid, current->gid, sessionid);
selinux_get_task_sid(current, &sid);
if (sid) {
char *ctx = NULL;
diff --git a/kernel/futex.c b/kernel/futex.c
index db9824de8bf0..a6baaec44b8f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -109,6 +109,9 @@ struct futex_q {
/* Optional priority inheritance state: */
struct futex_pi_state *pi_state;
struct task_struct *task;
+
+ /* Bitset for the optional bitmasked wakeup */
+ u32 bitset;
};
/*
@@ -722,7 +725,7 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
* to this virtual address:
*/
static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
- int nr_wake)
+ int nr_wake, u32 bitset)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
@@ -730,6 +733,9 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
union futex_key key;
int ret;
+ if (!bitset)
+ return -EINVAL;
+
futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &key);
@@ -746,6 +752,11 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
ret = -EINVAL;
break;
}
+
+ /* Check if one of the bits is set in both bitsets */
+ if (!(this->bitset & bitset))
+ continue;
+
wake_futex(this);
if (++ret >= nr_wake)
break;
@@ -1156,7 +1167,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
static long futex_wait_restart(struct restart_block *restart);
static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
- u32 val, ktime_t *abs_time)
+ u32 val, ktime_t *abs_time, u32 bitset)
{
struct task_struct *curr = current;
DECLARE_WAITQUEUE(wait, curr);
@@ -1167,7 +1178,11 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
struct hrtimer_sleeper t;
int rem = 0;
+ if (!bitset)
+ return -EINVAL;
+
q.pi_state = NULL;
+ q.bitset = bitset;
retry:
futex_lock_mm(fshared);
@@ -1252,6 +1267,8 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
t.timer.expires = *abs_time;
hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS);
+ if (!hrtimer_active(&t.timer))
+ t.task = NULL;
/*
* the timer could have already expired, in which
@@ -1293,6 +1310,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
restart->futex.uaddr = (u32 *)uaddr;
restart->futex.val = val;
restart->futex.time = abs_time->tv64;
+ restart->futex.bitset = bitset;
restart->futex.flags = 0;
if (fshared)
@@ -1319,7 +1337,8 @@ static long futex_wait_restart(struct restart_block *restart)
restart->fn = do_no_restart_syscall;
if (restart->futex.flags & FLAGS_SHARED)
fshared = &current->mm->mmap_sem;
- return (long)futex_wait(uaddr, fshared, restart->futex.val, &t);
+ return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
+ restart->futex.bitset);
}
@@ -1535,9 +1554,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
owner = rt_mutex_owner(&q.pi_state->pi_mutex);
res = fixup_pi_state_owner(uaddr, &q, owner);
- WARN_ON(rt_mutex_owner(&q.pi_state->pi_mutex) !=
- owner);
-
/* propagate -EFAULT, if the fixup failed */
if (res)
ret = res;
@@ -1943,7 +1959,8 @@ retry:
* PI futexes happens in exit_pi_state():
*/
if (!pi && (uval & FUTEX_WAITERS))
- futex_wake(uaddr, &curr->mm->mmap_sem, 1);
+ futex_wake(uaddr, &curr->mm->mmap_sem, 1,
+ FUTEX_BITSET_MATCH_ANY);
}
return 0;
}
@@ -2043,10 +2060,14 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
switch (cmd) {
case FUTEX_WAIT:
- ret = futex_wait(uaddr, fshared, val, timeout);
+ val3 = FUTEX_BITSET_MATCH_ANY;
+ case FUTEX_WAIT_BITSET:
+ ret = futex_wait(uaddr, fshared, val, timeout, val3);
break;
case FUTEX_WAKE:
- ret = futex_wake(uaddr, fshared, val);
+ val3 = FUTEX_BITSET_MATCH_ANY;
+ case FUTEX_WAKE_BITSET:
+ ret = futex_wake(uaddr, fshared, val, val3);
break;
case FUTEX_FD:
/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
@@ -2086,7 +2107,8 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
u32 val2 = 0;
int cmd = op & FUTEX_CMD_MASK;
- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {
+ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+ cmd == FUTEX_WAIT_BITSET)) {
if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
return -EFAULT;
if (!timespec_valid(&ts))
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 0a43def6fee7..133d558db452 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -167,7 +167,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
int val2 = 0;
int cmd = op & FUTEX_CMD_MASK;
- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {
+ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+ cmd == FUTEX_WAIT_BITSET)) {
if (get_compat_timespec(&ts, utime))
return -EFAULT;
if (!timespec_valid(&ts))
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index bd5d6b5060bc..1069998fe25f 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1315,6 +1315,8 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
} while (t->task && !signal_pending(current));
+ __set_current_state(TASK_RUNNING);
+
return t->task == NULL;
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 8e186c678149..ef9b802738a5 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -44,9 +44,30 @@ config PM_VERBOSE
---help---
This option enables verbose messages from the Power Management code.
+config CAN_PM_TRACE
+ def_bool y
+ depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL
+
config PM_TRACE
+ bool
+ help
+ This enables code to save the last PM event point across
+ reboot. The architecture needs to support this, x86 for
+ example does by saving things in the RTC, see below.
+
+ The architecture specific code must provide the extern
+ functions from <linux/resume-trace.h> as well as the
+ <asm/resume-trace.h> header with a TRACE_RESUME() macro.
+
+ The way the information is presented is architecture-
+ dependent, x86 will print the information during a
+ late_initcall.
+
+config PM_TRACE_RTC
bool "Suspend/resume event tracing"
- depends on PM_DEBUG && X86 && PM_SLEEP && EXPERIMENTAL
+ depends on CAN_PM_TRACE
+ depends on X86
+ select PM_TRACE
default n
---help---
This enables some cheesy code to save the last PM event point in the
@@ -63,7 +84,8 @@ config PM_TRACE
config PM_SLEEP_SMP
bool
- depends on SUSPEND_SMP_POSSIBLE || HIBERNATION_SMP_POSSIBLE
+ depends on SMP
+ depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
depends on PM_SLEEP
select HOTPLUG_CPU
default y
@@ -73,46 +95,29 @@ config PM_SLEEP
depends on SUSPEND || HIBERNATION
default y
-config SUSPEND_UP_POSSIBLE
- bool
- depends on (X86 && !X86_VOYAGER) || PPC || ARM || BLACKFIN || MIPS \
- || SUPERH || FRV
- depends on !SMP
- default y
-
-config SUSPEND_SMP_POSSIBLE
- bool
- depends on (X86 && !X86_VOYAGER) \
- || (PPC && (PPC_PSERIES || PPC_PMAC)) || ARM
- depends on SMP
- default y
-
config SUSPEND
bool "Suspend to RAM and standby"
- depends on PM
- depends on SUSPEND_UP_POSSIBLE || SUSPEND_SMP_POSSIBLE
+ depends on PM && ARCH_SUSPEND_POSSIBLE
default y
---help---
Allow the system to enter sleep states in which main memory is
powered and thus its contents are preserved, such as the
- suspend-to-RAM state (i.e. the ACPI S3 state).
+ suspend-to-RAM state (e.g. the ACPI S3 state).
-config HIBERNATION_UP_POSSIBLE
- bool
- depends on X86 || PPC64_SWSUSP || PPC32
- depends on !SMP
+config SUSPEND_FREEZER
+ bool "Enable freezer for suspend to RAM/standby" \
+ if ARCH_WANTS_FREEZER_CONTROL || BROKEN
+ depends on SUSPEND
default y
+ help
+ This allows you to turn off the freezer for suspend. If this is
+ done, no tasks are frozen for suspend to RAM/standby.
-config HIBERNATION_SMP_POSSIBLE
- bool
- depends on (X86 && !X86_VOYAGER) || PPC64_SWSUSP
- depends on SMP
- default y
+ Turning OFF this setting is NOT recommended! If in doubt, say Y.
config HIBERNATION
bool "Hibernation (aka 'suspend to disk')"
- depends on PM && SWAP
- depends on HIBERNATION_UP_POSSIBLE || HIBERNATION_SMP_POSSIBLE
+ depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
---help---
Enable the suspend to disk (STD) functionality, which is usually
called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index b138b431e271..d09da0895174 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -54,8 +54,8 @@ static struct platform_hibernation_ops *hibernation_ops;
void hibernation_set_ops(struct platform_hibernation_ops *ops)
{
- if (ops && !(ops->start && ops->pre_snapshot && ops->finish
- && ops->prepare && ops->enter && ops->pre_restore
+ if (ops && !(ops->begin && ops->end && ops->pre_snapshot
+ && ops->prepare && ops->finish && ops->enter && ops->pre_restore
&& ops->restore_cleanup)) {
WARN_ON(1);
return;
@@ -70,15 +70,55 @@ void hibernation_set_ops(struct platform_hibernation_ops *ops)
mutex_unlock(&pm_mutex);
}
+#ifdef CONFIG_PM_DEBUG
+static void hibernation_debug_sleep(void)
+{
+ printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+}
+
+static int hibernation_testmode(int mode)
+{
+ if (hibernation_mode == mode) {
+ hibernation_debug_sleep();
+ return 1;
+ }
+ return 0;
+}
+
+static int hibernation_test(int level)
+{
+ if (pm_test_level == level) {
+ hibernation_debug_sleep();
+ return 1;
+ }
+ return 0;
+}
+#else /* !CONFIG_PM_DEBUG */
+static int hibernation_testmode(int mode) { return 0; }
+static int hibernation_test(int level) { return 0; }
+#endif /* !CONFIG_PM_DEBUG */
+
/**
- * platform_start - tell the platform driver that we're starting
+ * platform_begin - tell the platform driver that we're starting
* hibernation
*/
-static int platform_start(int platform_mode)
+static int platform_begin(int platform_mode)
{
return (platform_mode && hibernation_ops) ?
- hibernation_ops->start() : 0;
+ hibernation_ops->begin() : 0;
+}
+
+/**
+ * platform_end - tell the platform driver that we've entered the
+ * working state
+ */
+
+static void platform_end(int platform_mode)
+{
+ if (platform_mode && hibernation_ops)
+ hibernation_ops->end();
}
/**
@@ -162,19 +202,25 @@ int create_image(int platform_mode)
*/
error = device_power_down(PMSG_FREEZE);
if (error) {
- printk(KERN_ERR "Some devices failed to power down, "
- KERN_ERR "aborting suspend\n");
+ printk(KERN_ERR "PM: Some devices failed to power down, "
+ "aborting hibernation\n");
goto Enable_irqs;
}
+ if (hibernation_test(TEST_CORE))
+ goto Power_up;
+
+ in_suspend = 1;
save_processor_state();
error = swsusp_arch_suspend();
if (error)
- printk(KERN_ERR "Error %d while creating the image\n", error);
+ printk(KERN_ERR "PM: Error %d creating hibernation image\n",
+ error);
/* Restore control flow magically appears here */
restore_processor_state();
if (!in_suspend)
platform_leave(platform_mode);
+ Power_up:
/* NOTE: device_power_up() is just a resume() for devices
* that suspended with irqs off ... no overall powerup.
*/
@@ -202,36 +248,90 @@ int hibernation_snapshot(int platform_mode)
if (error)
return error;
- error = platform_start(platform_mode);
+ error = platform_begin(platform_mode);
if (error)
- return error;
+ goto Close;
suspend_console();
error = device_suspend(PMSG_FREEZE);
if (error)
goto Resume_console;
- error = platform_pre_snapshot(platform_mode);
- if (error)
+ if (hibernation_test(TEST_DEVICES))
goto Resume_devices;
+ error = platform_pre_snapshot(platform_mode);
+ if (error || hibernation_test(TEST_PLATFORM))
+ goto Finish;
+
error = disable_nonboot_cpus();
if (!error) {
- if (hibernation_mode != HIBERNATION_TEST) {
- in_suspend = 1;
- error = create_image(platform_mode);
- /* Control returns here after successful restore */
- } else {
- printk("swsusp debug: Waiting for 5 seconds.\n");
- mdelay(5000);
- }
+ if (hibernation_test(TEST_CPUS))
+ goto Enable_cpus;
+
+ if (hibernation_testmode(HIBERNATION_TEST))
+ goto Enable_cpus;
+
+ error = create_image(platform_mode);
+ /* Control returns here after successful restore */
}
+ Enable_cpus:
enable_nonboot_cpus();
- Resume_devices:
+ Finish:
platform_finish(platform_mode);
+ Resume_devices:
device_resume();
Resume_console:
resume_console();
+ Close:
+ platform_end(platform_mode);
+ return error;
+}
+
+/**
+ * resume_target_kernel - prepare devices that need to be suspended with
+ * interrupts off, restore the contents of highmem that have not been
+ * restored yet from the image and run the low level code that will restore
+ * the remaining contents of memory and switch to the just restored target
+ * kernel.
+ */
+
+static int resume_target_kernel(void)
+{
+ int error;
+
+ local_irq_disable();
+ error = device_power_down(PMSG_PRETHAW);
+ if (error) {
+ printk(KERN_ERR "PM: Some devices failed to power down, "
+ "aborting resume\n");
+ goto Enable_irqs;
+ }
+ /* We'll ignore saved state, but this gets preempt count (etc) right */
+ save_processor_state();
+ error = restore_highmem();
+ if (!error) {
+ error = swsusp_arch_resume();
+ /*
+ * The code below is only ever reached in case of a failure.
+ * Otherwise execution continues at place where
+ * swsusp_arch_suspend() was called
+ */
+ BUG_ON(!error);
+ /* This call to restore_highmem() undos the previous one */
+ restore_highmem();
+ }
+ /*
+ * The only reason why swsusp_arch_resume() can fail is memory being
+ * very tight, so we have to free it as soon as we can to avoid
+ * subsequent failures
+ */
+ swsusp_free();
+ restore_processor_state();
+ touch_softlockup_watchdog();
+ device_power_up();
+ Enable_irqs:
+ local_irq_enable();
return error;
}
@@ -258,7 +358,7 @@ int hibernation_restore(int platform_mode)
if (!error) {
error = disable_nonboot_cpus();
if (!error)
- error = swsusp_resume();
+ error = resume_target_kernel();
enable_nonboot_cpus();
}
platform_restore_cleanup(platform_mode);
@@ -286,9 +386,9 @@ int hibernation_platform_enter(void)
* hibernation_ops->finish() before saving the image, so we should let
* the firmware know that we're going to enter the sleep state after all
*/
- error = hibernation_ops->start();
+ error = hibernation_ops->begin();
if (error)
- return error;
+ goto Close;
suspend_console();
error = device_suspend(PMSG_SUSPEND);
@@ -322,6 +422,8 @@ int hibernation_platform_enter(void)
device_resume();
Resume_console:
resume_console();
+ Close:
+ hibernation_ops->end();
return error;
}
@@ -352,24 +454,17 @@ static void power_down(void)
* Valid image is on the disk, if we continue we risk serious data
* corruption after resume.
*/
- printk(KERN_CRIT "Please power me down manually\n");
+ printk(KERN_CRIT "PM: Please power down manually\n");
while(1);
}
-static void unprepare_processes(void)
-{
- thaw_processes();
- pm_restore_console();
-}
-
static int prepare_processes(void)
{
int error = 0;
- pm_prepare_console();
if (freeze_processes()) {
error = -EBUSY;
- unprepare_processes();
+ thaw_processes();
}
return error;
}
@@ -389,6 +484,7 @@ int hibernate(void)
goto Unlock;
}
+ pm_prepare_console();
error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
if (error)
goto Exit;
@@ -398,7 +494,7 @@ int hibernate(void)
if (error)
goto Exit;
- printk("Syncing filesystems ... ");
+ printk(KERN_INFO "PM: Syncing filesystems ... ");
sys_sync();
printk("done.\n");
@@ -406,11 +502,12 @@ int hibernate(void)
if (error)
goto Finish;
- if (hibernation_mode == HIBERNATION_TESTPROC) {
- printk("swsusp debug: Waiting for 5 seconds.\n");
- mdelay(5000);
+ if (hibernation_test(TEST_FREEZER))
goto Thaw;
- }
+
+ if (hibernation_testmode(HIBERNATION_TESTPROC))
+ goto Thaw;
+
error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
if (in_suspend && !error) {
unsigned int flags = 0;
@@ -427,11 +524,12 @@ int hibernate(void)
swsusp_free();
}
Thaw:
- unprepare_processes();
+ thaw_processes();
Finish:
free_basic_memory_bitmaps();
Exit:
pm_notifier_call_chain(PM_POST_HIBERNATION);
+ pm_restore_console();
atomic_inc(&snapshot_device_available);
Unlock:
mutex_unlock(&pm_mutex);
@@ -473,22 +571,23 @@ static int software_resume(void)
return -ENOENT;
}
swsusp_resume_device = name_to_dev_t(resume_file);
- pr_debug("swsusp: Resume From Partition %s\n", resume_file);
+ pr_debug("PM: Resume from partition %s\n", resume_file);
} else {
- pr_debug("swsusp: Resume From Partition %d:%d\n",
- MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
+ pr_debug("PM: Resume from partition %d:%d\n",
+ MAJOR(swsusp_resume_device),
+ MINOR(swsusp_resume_device));
}
if (noresume) {
/**
- * FIXME: If noresume is specified, we need to find the partition
- * and reset it back to normal swap space.
+ * FIXME: If noresume is specified, we need to find the
+ * partition and reset it back to normal swap space.
*/
mutex_unlock(&pm_mutex);
return 0;
}
- pr_debug("PM: Checking swsusp image.\n");
+ pr_debug("PM: Checking hibernation image.\n");
error = swsusp_check();
if (error)
goto Unlock;
@@ -499,6 +598,11 @@ static int software_resume(void)
goto Unlock;
}
+ pm_prepare_console();
+ error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+ if (error)
+ goto Finish;
+
error = create_basic_memory_bitmaps();
if (error)
goto Finish;
@@ -510,7 +614,7 @@ static int software_resume(void)
goto Done;
}
- pr_debug("PM: Reading swsusp image.\n");
+ pr_debug("PM: Reading hibernation image.\n");
error = swsusp_read(&flags);
if (!error)
@@ -518,10 +622,12 @@ static int software_resume(void)
printk(KERN_ERR "PM: Restore failed, recovering.\n");
swsusp_free();
- unprepare_processes();
+ thaw_processes();
Done:
free_basic_memory_bitmaps();
Finish:
+ pm_notifier_call_chain(PM_POST_RESTORE);
+ pm_restore_console();
atomic_inc(&snapshot_device_available);
/* For success case, the suspend path will release the lock */
Unlock:
@@ -636,7 +742,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
error = -EINVAL;
if (!error)
- pr_debug("PM: suspend-to-disk mode set to '%s'\n",
+ pr_debug("PM: Hibernation mode set to '%s'\n",
hibernation_modes[mode]);
mutex_unlock(&pm_mutex);
return error ? error : n;
@@ -668,7 +774,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
mutex_lock(&pm_mutex);
swsusp_resume_device = res;
mutex_unlock(&pm_mutex);
- printk("Attempting manual resume\n");
+ printk(KERN_INFO "PM: Starting manual resume from disk\n");
noresume = 0;
software_resume();
ret = n;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index efc08360e627..6a6d5eb3524e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -24,13 +24,112 @@
#include "power.h"
-BLOCKING_NOTIFIER_HEAD(pm_chain_head);
-
DEFINE_MUTEX(pm_mutex);
unsigned int pm_flags;
EXPORT_SYMBOL(pm_flags);
+#ifdef CONFIG_PM_SLEEP
+
+/* Routines for PM-transition notifications */
+
+static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
+
+int register_pm_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&pm_chain_head, nb);
+}
+EXPORT_SYMBOL_GPL(register_pm_notifier);
+
+int unregister_pm_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&pm_chain_head, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_pm_notifier);
+
+int pm_notifier_call_chain(unsigned long val)
+{
+ return (blocking_notifier_call_chain(&pm_chain_head, val, NULL)
+ == NOTIFY_BAD) ? -EINVAL : 0;
+}
+
+#ifdef CONFIG_PM_DEBUG
+int pm_test_level = TEST_NONE;
+
+static int suspend_test(int level)
+{
+ if (pm_test_level == level) {
+ printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+ return 1;
+ }
+ return 0;
+}
+
+static const char * const pm_tests[__TEST_AFTER_LAST] = {
+ [TEST_NONE] = "none",
+ [TEST_CORE] = "core",
+ [TEST_CPUS] = "processors",
+ [TEST_PLATFORM] = "platform",
+ [TEST_DEVICES] = "devices",
+ [TEST_FREEZER] = "freezer",
+};
+
+static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ char *s = buf;
+ int level;
+
+ for (level = TEST_FIRST; level <= TEST_MAX; level++)
+ if (pm_tests[level]) {
+ if (level == pm_test_level)
+ s += sprintf(s, "[%s] ", pm_tests[level]);
+ else
+ s += sprintf(s, "%s ", pm_tests[level]);
+ }
+
+ if (s != buf)
+ /* convert the last space to a newline */
+ *(s-1) = '\n';
+
+ return (s - buf);
+}
+
+static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ const char * const *s;
+ int level;
+ char *p;
+ int len;
+ int error = -EINVAL;
+
+ p = memchr(buf, '\n', n);
+ len = p ? p - buf : n;
+
+ mutex_lock(&pm_mutex);
+
+ level = TEST_FIRST;
+ for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
+ if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) {
+ pm_test_level = level;
+ error = 0;
+ break;
+ }
+
+ mutex_unlock(&pm_mutex);
+
+ return error ? error : n;
+}
+
+power_attr(pm_test);
+#else /* !CONFIG_PM_DEBUG */
+static inline int suspend_test(int level) { return 0; }
+#endif /* !CONFIG_PM_DEBUG */
+
+#endif /* CONFIG_PM_SLEEP */
+
#ifdef CONFIG_SUSPEND
/* This is just an arbitrary number */
@@ -76,13 +175,13 @@ static int suspend_prepare(void)
if (!suspend_ops || !suspend_ops->enter)
return -EPERM;
+ pm_prepare_console();
+
error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
if (error)
goto Finish;
- pm_prepare_console();
-
- if (freeze_processes()) {
+ if (suspend_freeze_processes()) {
error = -EAGAIN;
goto Thaw;
}
@@ -100,10 +199,10 @@ static int suspend_prepare(void)
return 0;
Thaw:
- thaw_processes();
- pm_restore_console();
+ suspend_thaw_processes();
Finish:
pm_notifier_call_chain(PM_POST_SUSPEND);
+ pm_restore_console();
return error;
}
@@ -133,10 +232,13 @@ static int suspend_enter(suspend_state_t state)
BUG_ON(!irqs_disabled());
if ((error = device_power_down(PMSG_SUSPEND))) {
- printk(KERN_ERR "Some devices failed to power down\n");
+ printk(KERN_ERR "PM: Some devices failed to power down\n");
goto Done;
}
- error = suspend_ops->enter(state);
+
+ if (!suspend_test(TEST_CORE))
+ error = suspend_ops->enter(state);
+
device_power_up();
Done:
arch_suspend_enable_irqs();
@@ -145,8 +247,8 @@ static int suspend_enter(suspend_state_t state)
}
/**
- * suspend_devices_and_enter - suspend devices and enter the desired system sleep
- * state.
+ * suspend_devices_and_enter - suspend devices and enter the desired system
+ * sleep state.
* @state: state to enter
*/
int suspend_devices_and_enter(suspend_state_t state)
@@ -156,33 +258,45 @@ int suspend_devices_and_enter(suspend_state_t state)
if (!suspend_ops)
return -ENOSYS;
- if (suspend_ops->set_target) {
- error = suspend_ops->set_target(state);
+ if (suspend_ops->begin) {
+ error = suspend_ops->begin(state);
if (error)
- return error;
+ goto Close;
}
suspend_console();
error = device_suspend(PMSG_SUSPEND);
if (error) {
- printk(KERN_ERR "Some devices failed to suspend\n");
+ printk(KERN_ERR "PM: Some devices failed to suspend\n");
goto Resume_console;
}
+
+ if (suspend_test(TEST_DEVICES))
+ goto Resume_devices;
+
if (suspend_ops->prepare) {
error = suspend_ops->prepare();
if (error)
goto Resume_devices;
}
+
+ if (suspend_test(TEST_PLATFORM))
+ goto Finish;
+
error = disable_nonboot_cpus();
- if (!error)
+ if (!error && !suspend_test(TEST_CPUS))
suspend_enter(state);
enable_nonboot_cpus();
+ Finish:
if (suspend_ops->finish)
suspend_ops->finish();
Resume_devices:
device_resume();
Resume_console:
resume_console();
+ Close:
+ if (suspend_ops->end)
+ suspend_ops->end();
return error;
}
@@ -194,9 +308,9 @@ int suspend_devices_and_enter(suspend_state_t state)
*/
static void suspend_finish(void)
{
- thaw_processes();
- pm_restore_console();
+ suspend_thaw_processes();
pm_notifier_call_chain(PM_POST_SUSPEND);
+ pm_restore_console();
}
@@ -238,17 +352,22 @@ static int enter_state(suspend_state_t state)
if (!mutex_trylock(&pm_mutex))
return -EBUSY;
- printk("Syncing filesystems ... ");
+ printk(KERN_INFO "PM: Syncing filesystems ... ");
sys_sync();
printk("done.\n");
pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
- if ((error = suspend_prepare()))
+ error = suspend_prepare();
+ if (error)
goto Unlock;
+ if (suspend_test(TEST_FREEZER))
+ goto Finish;
+
pr_debug("PM: Entering %s sleep\n", pm_states[state]);
error = suspend_devices_and_enter(state);
+ Finish:
pr_debug("PM: Finishing wakeup.\n");
suspend_finish();
Unlock:
@@ -369,18 +488,18 @@ pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr,
}
power_attr(pm_trace);
+#endif /* CONFIG_PM_TRACE */
static struct attribute * g[] = {
&state_attr.attr,
+#ifdef CONFIG_PM_TRACE
&pm_trace_attr.attr,
+#endif
+#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_DEBUG)
+ &pm_test_attr.attr,
+#endif
NULL,
};
-#else
-static struct attribute * g[] = {
- &state_attr.attr,
- NULL,
-};
-#endif /* CONFIG_PM_TRACE */
static struct attribute_group attr_group = {
.attrs = g,
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 2093c3a9a994..700f44ec8406 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -1,5 +1,7 @@
#include <linux/suspend.h>
+#include <linux/suspend_ioctls.h>
#include <linux/utsname.h>
+#include <linux/freezer.h>
struct swsusp_info {
struct new_utsname uts;
@@ -128,42 +130,12 @@ struct snapshot_handle {
#define data_of(handle) ((handle).buffer + (handle).buf_offset)
extern unsigned int snapshot_additional_pages(struct zone *zone);
+extern unsigned long snapshot_get_image_size(void);
extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
extern void snapshot_write_finalize(struct snapshot_handle *handle);
extern int snapshot_image_loaded(struct snapshot_handle *handle);
-/*
- * This structure is used to pass the values needed for the identification
- * of the resume swap area from a user space to the kernel via the
- * SNAPSHOT_SET_SWAP_AREA ioctl
- */
-struct resume_swap_area {
- loff_t offset;
- u_int32_t dev;
-} __attribute__((packed));
-
-#define SNAPSHOT_IOC_MAGIC '3'
-#define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1)
-#define SNAPSHOT_UNFREEZE _IO(SNAPSHOT_IOC_MAGIC, 2)
-#define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *)
-#define SNAPSHOT_ATOMIC_RESTORE _IO(SNAPSHOT_IOC_MAGIC, 4)
-#define SNAPSHOT_FREE _IO(SNAPSHOT_IOC_MAGIC, 5)
-#define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long)
-#define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *)
-#define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *)
-#define SNAPSHOT_FREE_SWAP_PAGES _IO(SNAPSHOT_IOC_MAGIC, 9)
-#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
-#define SNAPSHOT_S2RAM _IO(SNAPSHOT_IOC_MAGIC, 11)
-#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
-#define SNAPSHOT_SET_SWAP_AREA _IOW(SNAPSHOT_IOC_MAGIC, 13, \
- struct resume_swap_area)
-#define SNAPSHOT_IOC_MAXNR 13
-
-#define PMOPS_PREPARE 1
-#define PMOPS_ENTER 2
-#define PMOPS_FINISH 3
-
/* If unset, the snapshot device cannot be open. */
extern atomic_t snapshot_device_available;
@@ -181,7 +153,6 @@ extern int swsusp_swap_in_use(void);
extern int swsusp_check(void);
extern int swsusp_shrink_memory(void);
extern void swsusp_free(void);
-extern int swsusp_resume(void);
extern int swsusp_read(unsigned int *flags_p);
extern int swsusp_write(unsigned int flags);
extern void swsusp_close(void);
@@ -201,11 +172,56 @@ static inline int suspend_devices_and_enter(suspend_state_t state)
}
#endif /* !CONFIG_SUSPEND */
-/* kernel/power/common.c */
-extern struct blocking_notifier_head pm_chain_head;
+#ifdef CONFIG_PM_SLEEP
+/* kernel/power/main.c */
+extern int pm_notifier_call_chain(unsigned long val);
+#endif
+
+#ifdef CONFIG_HIGHMEM
+unsigned int count_highmem_pages(void);
+int restore_highmem(void);
+#else
+static inline unsigned int count_highmem_pages(void) { return 0; }
+static inline int restore_highmem(void) { return 0; }
+#endif
+
+/*
+ * Suspend test levels
+ */
+enum {
+ /* keep first */
+ TEST_NONE,
+ TEST_CORE,
+ TEST_CPUS,
+ TEST_PLATFORM,
+ TEST_DEVICES,
+ TEST_FREEZER,
+ /* keep last */
+ __TEST_AFTER_LAST
+};
+
+#define TEST_FIRST TEST_NONE
+#define TEST_MAX (__TEST_AFTER_LAST - 1)
+
+extern int pm_test_level;
+
+#ifdef CONFIG_SUSPEND_FREEZER
+static inline int suspend_freeze_processes(void)
+{
+ return freeze_processes();
+}
-static inline int pm_notifier_call_chain(unsigned long val)
+static inline void suspend_thaw_processes(void)
{
- return (blocking_notifier_call_chain(&pm_chain_head, val, NULL)
- == NOTIFY_BAD) ? -EINVAL : 0;
+ thaw_processes();
}
+#else
+static inline int suspend_freeze_processes(void)
+{
+ return 0;
+}
+
+static inline void suspend_thaw_processes(void)
+{
+}
+#endif
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 78039b477d2b..f6a5df934f8d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -635,7 +635,7 @@ __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
region->end_pfn = end_pfn;
list_add_tail(&region->list, &nosave_regions);
Report:
- printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
+ printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n",
start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
}
@@ -704,7 +704,7 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
list_for_each_entry(region, &nosave_regions, list) {
unsigned long pfn;
- printk("swsusp: Marking nosave pages: %016lx - %016lx\n",
+ pr_debug("PM: Marking nosave pages: %016lx - %016lx\n",
region->start_pfn << PAGE_SHIFT,
region->end_pfn << PAGE_SHIFT);
@@ -749,7 +749,7 @@ int create_basic_memory_bitmaps(void)
free_pages_map = bm2;
mark_nosave_pages(forbidden_pages_map);
- printk("swsusp: Basic memory bitmaps created\n");
+ pr_debug("PM: Basic memory bitmaps created\n");
return 0;
@@ -784,7 +784,7 @@ void free_basic_memory_bitmaps(void)
memory_bm_free(bm2, PG_UNSAFE_CLEAR);
kfree(bm2);
- printk("swsusp: Basic memory bitmaps freed\n");
+ pr_debug("PM: Basic memory bitmaps freed\n");
}
/**
@@ -872,7 +872,6 @@ unsigned int count_highmem_pages(void)
}
#else
static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
-static inline unsigned int count_highmem_pages(void) { return 0; }
#endif /* CONFIG_HIGHMEM */
/**
@@ -1089,7 +1088,7 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
}
nr_pages += count_pages_for_highmem(nr_highmem);
- pr_debug("swsusp: Normal pages needed: %u + %u + %u, available pages: %u\n",
+ pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n",
nr_pages, PAGES_FOR_IO, meta, free);
return free > nr_pages + PAGES_FOR_IO + meta;
@@ -1202,20 +1201,20 @@ asmlinkage int swsusp_save(void)
{
unsigned int nr_pages, nr_highmem;
- printk("swsusp: critical section: \n");
+ printk(KERN_INFO "PM: Creating hibernation image: \n");
drain_local_pages();
nr_pages = count_data_pages();
nr_highmem = count_highmem_pages();
- printk("swsusp: Need to copy %u pages\n", nr_pages + nr_highmem);
+ printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
if (!enough_free_mem(nr_pages, nr_highmem)) {
- printk(KERN_ERR "swsusp: Not enough free memory\n");
+ printk(KERN_ERR "PM: Not enough free memory\n");
return -ENOMEM;
}
if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
- printk(KERN_ERR "swsusp: Memory allocation failed\n");
+ printk(KERN_ERR "PM: Memory allocation failed\n");
return -ENOMEM;
}
@@ -1235,7 +1234,8 @@ asmlinkage int swsusp_save(void)
nr_copy_pages = nr_pages;
nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
- printk("swsusp: critical section: done (%d pages copied)\n", nr_pages);
+ printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
+ nr_pages);
return 0;
}
@@ -1264,12 +1264,17 @@ static char *check_image_kernel(struct swsusp_info *info)
}
#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
+unsigned long snapshot_get_image_size(void)
+{
+ return nr_copy_pages + nr_meta_pages + 1;
+}
+
static int init_header(struct swsusp_info *info)
{
memset(info, 0, sizeof(struct swsusp_info));
info->num_physpages = num_physpages;
info->image_pages = nr_copy_pages;
- info->pages = nr_copy_pages + nr_meta_pages + 1;
+ info->pages = snapshot_get_image_size();
info->size = info->pages;
info->size <<= PAGE_SHIFT;
return init_header_complete(info);
@@ -1429,7 +1434,7 @@ static int check_header(struct swsusp_info *info)
if (!reason && info->num_physpages != num_physpages)
reason = "memory size";
if (reason) {
- printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
+ printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
return -EPERM;
}
return 0;
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 917aba100575..a0abf9a463f9 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -28,8 +28,6 @@
#include "power.h"
-extern char resume_file[];
-
#define SWSUSP_SIG "S1SUSPEND"
struct swsusp_header {
@@ -73,7 +71,8 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
bio->bi_end_io = end_swap_bio_read;
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
- printk("swsusp: ERROR: adding page to bio at %ld\n", page_off);
+ printk(KERN_ERR "PM: Adding page to bio failed at %ld\n",
+ page_off);
bio_put(bio);
return -EFAULT;
}
@@ -153,7 +152,7 @@ static int mark_swapfiles(sector_t start, unsigned int flags)
error = bio_write_page(swsusp_resume_block,
swsusp_header, NULL);
} else {
- printk(KERN_ERR "swsusp: Swap header not found!\n");
+ printk(KERN_ERR "PM: Swap header not found!\n");
error = -ENODEV;
}
return error;
@@ -325,7 +324,8 @@ static int save_image(struct swap_map_handle *handle,
struct timeval start;
struct timeval stop;
- printk("Saving image data pages (%u pages) ... ", nr_to_write);
+ printk(KERN_INFO "PM: Saving image data pages (%u pages) ... ",
+ nr_to_write);
m = nr_to_write / 100;
if (!m)
m = 1;
@@ -365,7 +365,7 @@ static int enough_swap(unsigned int nr_pages)
{
unsigned int free_swap = count_swap_pages(root_swap, 1);
- pr_debug("swsusp: free swap pages: %u\n", free_swap);
+ pr_debug("PM: Free swap pages: %u\n", free_swap);
return free_swap > nr_pages + PAGES_FOR_IO;
}
@@ -388,7 +388,7 @@ int swsusp_write(unsigned int flags)
error = swsusp_swap_check();
if (error) {
- printk(KERN_ERR "swsusp: Cannot find swap device, try "
+ printk(KERN_ERR "PM: Cannot find swap device, try "
"swapon -a.\n");
return error;
}
@@ -402,7 +402,7 @@ int swsusp_write(unsigned int flags)
}
header = (struct swsusp_info *)data_of(snapshot);
if (!enough_swap(header->pages)) {
- printk(KERN_ERR "swsusp: Not enough free swap\n");
+ printk(KERN_ERR "PM: Not enough free swap\n");
error = -ENOSPC;
goto out;
}
@@ -417,7 +417,7 @@ int swsusp_write(unsigned int flags)
if (!error) {
flush_swap_writer(&handle);
- printk("S");
+ printk(KERN_INFO "PM: S");
error = mark_swapfiles(start, flags);
printk("|\n");
}
@@ -507,7 +507,8 @@ static int load_image(struct swap_map_handle *handle,
int err2;
unsigned nr_pages;
- printk("Loading image data pages (%u pages) ... ", nr_to_read);
+ printk(KERN_INFO "PM: Loading image data pages (%u pages) ... ",
+ nr_to_read);
m = nr_to_read / 100;
if (!m)
m = 1;
@@ -558,7 +559,7 @@ int swsusp_read(unsigned int *flags_p)
*flags_p = swsusp_header->flags;
if (IS_ERR(resume_bdev)) {
- pr_debug("swsusp: block device not initialised\n");
+ pr_debug("PM: Image device not initialised\n");
return PTR_ERR(resume_bdev);
}
@@ -577,9 +578,9 @@ int swsusp_read(unsigned int *flags_p)
blkdev_put(resume_bdev);
if (!error)
- pr_debug("swsusp: Reading resume file was successful\n");
+ pr_debug("PM: Image successfully loaded\n");
else
- pr_debug("swsusp: Error %d resuming\n", error);
+ pr_debug("PM: Error %d resuming\n", error);
return error;
}
@@ -611,13 +612,13 @@ int swsusp_check(void)
if (error)
blkdev_put(resume_bdev);
else
- pr_debug("swsusp: Signature found, resuming\n");
+ pr_debug("PM: Signature found, resuming\n");
} else {
error = PTR_ERR(resume_bdev);
}
if (error)
- pr_debug("swsusp: Error %d check for resume file\n", error);
+ pr_debug("PM: Error %d checking image file\n", error);
return error;
}
@@ -629,7 +630,7 @@ int swsusp_check(void)
void swsusp_close(void)
{
if (IS_ERR(resume_bdev)) {
- pr_debug("swsusp: block device not initialised\n");
+ pr_debug("PM: Image device not initialised\n");
return;
}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index e1722d3155f1..023ff2a31d89 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -64,14 +64,6 @@ unsigned long image_size = 500 * 1024 * 1024;
int in_suspend __nosavedata = 0;
-#ifdef CONFIG_HIGHMEM
-unsigned int count_highmem_pages(void);
-int restore_highmem(void);
-#else
-static inline int restore_highmem(void) { return 0; }
-static inline unsigned int count_highmem_pages(void) { return 0; }
-#endif
-
/**
* The following functions are used for tracing the allocated
* swap pages, so that they can be freed in case of an error.
@@ -196,7 +188,8 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop,
centisecs = 1; /* avoid div-by-zero */
k = nr_pages * (PAGE_SIZE / 1024);
kps = (k * 100) / centisecs;
- printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
+ printk(KERN_INFO "PM: %s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n",
+ msg, k,
centisecs / 100, centisecs % 100,
kps / 1000, (kps % 1000) / 10);
}
@@ -227,7 +220,7 @@ int swsusp_shrink_memory(void)
char *p = "-\\|/";
struct timeval start, stop;
- printk("Shrinking memory... ");
+ printk(KERN_INFO "PM: Shrinking memory... ");
do_gettimeofday(&start);
do {
long size, highmem_size;
@@ -269,38 +262,3 @@ int swsusp_shrink_memory(void)
return 0;
}
-
-int swsusp_resume(void)
-{
- int error;
-
- local_irq_disable();
- /* NOTE: device_power_down() is just a suspend() with irqs off;
- * it has no special "power things down" semantics
- */
- if (device_power_down(PMSG_PRETHAW))
- printk(KERN_ERR "Some devices failed to power down, very bad\n");
- /* We'll ignore saved state, but this gets preempt count (etc) right */
- save_processor_state();
- error = restore_highmem();
- if (!error) {
- error = swsusp_arch_resume();
- /* The code below is only ever reached in case of a failure.
- * Otherwise execution continues at place where
- * swsusp_arch_suspend() was called
- */
- BUG_ON(!error);
- /* This call to restore_highmem() undos the previous one */
- restore_highmem();
- }
- /* The only reason why swsusp_arch_resume() can fail is memory being
- * very tight, so we have to free it as soon as we can to avoid
- * subsequent failures
- */
- swsusp_free();
- restore_processor_state();
- touch_softlockup_watchdog();
- device_power_up();
- local_irq_enable();
- return error;
-}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 5bd321bcbb75..f5512cb3aa86 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -28,6 +28,29 @@
#include "power.h"
+/*
+ * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and
+ * will be removed in the future. They are only preserved here for
+ * compatibility with existing userland utilities.
+ */
+#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
+#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
+
+#define PMOPS_PREPARE 1
+#define PMOPS_ENTER 2
+#define PMOPS_FINISH 3
+
+/*
+ * NOTE: The following ioctl definitions are wrong and have been replaced with
+ * correct ones. They are only preserved here for compatibility with existing
+ * userland utilities and will be removed in the future.
+ */
+#define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *)
+#define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long)
+#define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *)
+#define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *)
+
+
#define SNAPSHOT_MINOR 231
static struct snapshot_data {
@@ -36,7 +59,7 @@ static struct snapshot_data {
int mode;
char frozen;
char ready;
- char platform_suspend;
+ char platform_support;
} snapshot_state;
atomic_t snapshot_device_available = ATOMIC_INIT(1);
@@ -44,6 +67,7 @@ atomic_t snapshot_device_available = ATOMIC_INIT(1);
static int snapshot_open(struct inode *inode, struct file *filp)
{
struct snapshot_data *data;
+ int error;
if (!atomic_add_unless(&snapshot_device_available, -1, 0))
return -EBUSY;
@@ -64,13 +88,23 @@ static int snapshot_open(struct inode *inode, struct file *filp)
data->swap = swsusp_resume_device ?
swap_type_of(swsusp_resume_device, 0, NULL) : -1;
data->mode = O_RDONLY;
+ error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+ if (error)
+ pm_notifier_call_chain(PM_POST_RESTORE);
} else {
data->swap = -1;
data->mode = O_WRONLY;
+ error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+ if (error)
+ pm_notifier_call_chain(PM_POST_HIBERNATION);
+ }
+ if (error) {
+ atomic_inc(&snapshot_device_available);
+ return error;
}
data->frozen = 0;
data->ready = 0;
- data->platform_suspend = 0;
+ data->platform_support = 0;
return 0;
}
@@ -88,6 +122,8 @@ static int snapshot_release(struct inode *inode, struct file *filp)
thaw_processes();
mutex_unlock(&pm_mutex);
}
+ pm_notifier_call_chain(data->mode == O_WRONLY ?
+ PM_POST_HIBERNATION : PM_POST_RESTORE);
atomic_inc(&snapshot_device_available);
return 0;
}
@@ -133,7 +169,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
{
int error = 0;
struct snapshot_data *data;
- loff_t avail;
+ loff_t size;
sector_t offset;
if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC)
@@ -151,18 +187,13 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
if (data->frozen)
break;
mutex_lock(&pm_mutex);
- error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
- if (!error) {
- printk("Syncing filesystems ... ");
- sys_sync();
- printk("done.\n");
-
- error = freeze_processes();
- if (error)
- thaw_processes();
- }
+ printk("Syncing filesystems ... ");
+ sys_sync();
+ printk("done.\n");
+
+ error = freeze_processes();
if (error)
- pm_notifier_call_chain(PM_POST_HIBERNATION);
+ thaw_processes();
mutex_unlock(&pm_mutex);
if (!error)
data->frozen = 1;
@@ -173,19 +204,19 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
break;
mutex_lock(&pm_mutex);
thaw_processes();
- pm_notifier_call_chain(PM_POST_HIBERNATION);
mutex_unlock(&pm_mutex);
data->frozen = 0;
break;
+ case SNAPSHOT_CREATE_IMAGE:
case SNAPSHOT_ATOMIC_SNAPSHOT:
if (data->mode != O_RDONLY || !data->frozen || data->ready) {
error = -EPERM;
break;
}
- error = hibernation_snapshot(data->platform_suspend);
+ error = hibernation_snapshot(data->platform_support);
if (!error)
- error = put_user(in_suspend, (unsigned int __user *)arg);
+ error = put_user(in_suspend, (int __user *)arg);
if (!error)
data->ready = 1;
break;
@@ -197,7 +228,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
error = -EPERM;
break;
}
- error = hibernation_restore(data->platform_suspend);
+ error = hibernation_restore(data->platform_support);
break;
case SNAPSHOT_FREE:
@@ -206,16 +237,29 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
data->ready = 0;
break;
+ case SNAPSHOT_PREF_IMAGE_SIZE:
case SNAPSHOT_SET_IMAGE_SIZE:
image_size = arg;
break;
+ case SNAPSHOT_GET_IMAGE_SIZE:
+ if (!data->ready) {
+ error = -ENODATA;
+ break;
+ }
+ size = snapshot_get_image_size();
+ size <<= PAGE_SHIFT;
+ error = put_user(size, (loff_t __user *)arg);
+ break;
+
+ case SNAPSHOT_AVAIL_SWAP_SIZE:
case SNAPSHOT_AVAIL_SWAP:
- avail = count_swap_pages(data->swap, 1);
- avail <<= PAGE_SHIFT;
- error = put_user(avail, (loff_t __user *)arg);
+ size = count_swap_pages(data->swap, 1);
+ size <<= PAGE_SHIFT;
+ error = put_user(size, (loff_t __user *)arg);
break;
+ case SNAPSHOT_ALLOC_SWAP_PAGE:
case SNAPSHOT_GET_SWAP_PAGE:
if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
error = -ENODEV;
@@ -224,7 +268,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
offset = alloc_swapdev_block(data->swap);
if (offset) {
offset <<= PAGE_SHIFT;
- error = put_user(offset, (sector_t __user *)arg);
+ error = put_user(offset, (loff_t __user *)arg);
} else {
error = -ENOSPC;
}
@@ -238,7 +282,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
free_all_swap_pages(data->swap);
break;
- case SNAPSHOT_SET_SWAP_FILE:
+ case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */
if (!swsusp_swap_in_use()) {
/*
* User space encodes device types as two-byte values,
@@ -275,26 +319,33 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
mutex_unlock(&pm_mutex);
break;
- case SNAPSHOT_PMOPS:
+ case SNAPSHOT_PLATFORM_SUPPORT:
+ data->platform_support = !!arg;
+ break;
+
+ case SNAPSHOT_POWER_OFF:
+ if (data->platform_support)
+ error = hibernation_platform_enter();
+ break;
+
+ case SNAPSHOT_PMOPS: /* This ioctl is deprecated */
error = -EINVAL;
switch (arg) {
case PMOPS_PREPARE:
- data->platform_suspend = 1;
+ data->platform_support = 1;
error = 0;
break;
case PMOPS_ENTER:
- if (data->platform_suspend)
+ if (data->platform_support)
error = hibernation_platform_enter();
-
break;
case PMOPS_FINISH:
- if (data->platform_suspend)
+ if (data->platform_support)
error = 0;
-
break;
default:
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index c1d76552446e..7c2da88db4ed 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -101,6 +101,10 @@ void softlockup_tick(void)
now = get_timestamp(this_cpu);
+ /* Wake up the high-prio watchdog task every second: */
+ if (now > (touch_timestamp + 1))
+ wake_up_process(per_cpu(watchdog_task, this_cpu));
+
/* Warn about unreasonable delays: */
if (now <= (touch_timestamp + softlockup_thresh))
return;
@@ -191,11 +195,11 @@ static void check_hung_uninterruptible_tasks(int this_cpu)
read_lock(&tasklist_lock);
do_each_thread(g, t) {
if (!--max_count)
- break;
+ goto unlock;
if (t->state & TASK_UNINTERRUPTIBLE)
check_hung_task(t, now);
} while_each_thread(g, t);
-
+ unlock:
read_unlock(&tasklist_lock);
}
@@ -218,14 +222,19 @@ static int watchdog(void *__bind_cpu)
* debug-printout triggers in softlockup_tick().
*/
while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
touch_softlockup_watchdog();
- msleep_interruptible(10000);
+ schedule();
+
+ if (kthread_should_stop())
+ break;
if (this_cpu != check_cpu)
continue;
if (sysctl_hung_task_timeout_secs)
check_hung_uninterruptible_tasks(this_cpu);
+
}
return 0;
@@ -259,13 +268,6 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
wake_up_process(per_cpu(watchdog_task, hotcpu));
break;
#ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- if (!per_cpu(watchdog_task, hotcpu))
- break;
- /* Unbind so it can run. Fall thru. */
- kthread_bind(per_cpu(watchdog_task, hotcpu),
- any_online_cpu(cpu_online_map));
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
if (hotcpu == check_cpu) {
@@ -275,6 +277,14 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
check_cpu = any_online_cpu(temp_cpu_online_map);
}
break;
+
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ if (!per_cpu(watchdog_task, hotcpu))
+ break;
+ /* Unbind so it can run. Fall thru. */
+ kthread_bind(per_cpu(watchdog_task, hotcpu),
+ any_online_cpu(cpu_online_map));
case CPU_DEAD:
case CPU_DEAD_FROZEN:
p = per_cpu(watchdog_task, hotcpu);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 357b68ba23ec..7cb1ac3e6fff 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -81,7 +81,6 @@ extern int percpu_pagelist_fraction;
extern int compat_log;
extern int maps_protect;
extern int sysctl_stat_interval;
-extern int audit_argv_kb;
extern int latencytop_enabled;
/* Constants used for minimum and maximum */
@@ -390,16 +389,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
-#ifdef CONFIG_AUDITSYSCALL
- {
- .ctl_name = CTL_UNNUMBERED,
- .procname = "audit_argv_kb",
- .data = &audit_argv_kb,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
-#endif
{
.ctl_name = KERN_CORE_PATTERN,
.procname = "core_pattern",
diff --git a/kernel/time.c b/kernel/time.c
index 09d3c45c4da7..4064c0566e77 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -129,6 +129,7 @@ static inline void warp_clock(void)
write_seqlock_irq(&xtime_lock);
wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
xtime.tv_sec += sys_tz.tz_minuteswest * 60;
+ update_xtime_cache(0);
write_sequnlock_irq(&xtime_lock);
clock_was_set();
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 63f24b550695..88267f0a8471 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -137,6 +137,7 @@ void tick_nohz_update_jiffies(void)
cpu_clear(cpu, nohz_cpu_mask);
now = ktime_get();
+ ts->idle_waketime = now;
local_irq_save(flags);
tick_do_update_jiffies64(now);
@@ -400,6 +401,7 @@ void tick_nohz_restart_sched_tick(void)
* Cancel the scheduled timer and restore the tick
*/
ts->tick_stopped = 0;
+ ts->idle_exittime = now;
hrtimer_cancel(&ts->sched_timer);
ts->sched_timer.expires = ts->idle_tick;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 092a2366b5a9..cd5dbc4579c9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -47,7 +47,7 @@ struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
static unsigned long total_sleep_time; /* seconds */
static struct timespec xtime_cache __attribute__ ((aligned (16)));
-static inline void update_xtime_cache(u64 nsec)
+void update_xtime_cache(u64 nsec)
{
xtime_cache = xtime;
timespec_add_ns(&xtime_cache, nsec);
@@ -145,6 +145,7 @@ int do_settimeofday(struct timespec *tv)
set_normalized_timespec(&xtime, sec, nsec);
set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+ update_xtime_cache(0);
clock->error = 0;
ntp_clear();
@@ -252,8 +253,8 @@ void __init timekeeping_init(void)
xtime.tv_nsec = 0;
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
+ update_xtime_cache(0);
total_sleep_time = 0;
-
write_sequnlock_irqrestore(&xtime_lock, flags);
}
@@ -290,6 +291,7 @@ static int timekeeping_resume(struct sys_device *dev)
}
/* Make sure that we have the correct xtime reference */
timespec_add_ns(&xtime, timekeeping_suspend_nsecs);
+ update_xtime_cache(0);
/* re-base the last cycle value */
clock->cycle_last = clocksource_read(clock);
clock->error = 0;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 12c5f4cb6b8c..d3d94c1a0fd2 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -166,6 +166,8 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
P(idle_calls);
P(idle_sleeps);
P_ns(idle_entrytime);
+ P_ns(idle_waketime);
+ P_ns(idle_exittime);
P_ns(idle_sleeptime);
P(last_jiffies);
P(next_jiffies);