summaryrefslogtreecommitdiff
path: root/kernel/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c214
1 files changed, 161 insertions, 53 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index c61a7949387f..68a9ae7679b7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,8 +37,9 @@
#include <linux/delay.h>
#include <linux/tick.h>
#include <linux/kallsyms.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -89,8 +90,13 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
/*
* Note that all tvec_bases are 2 byte aligned and lower bit of
- * base in timer_list is guaranteed to be zero. Use the LSB for
- * the new flag to indicate whether the timer is deferrable
+ * base in timer_list is guaranteed to be zero. Use the LSB to
+ * indicate whether the timer is deferrable.
+ *
+ * A deferrable timer will work normally when the system is busy, but
+ * will not cause a CPU to come out of idle just to service it; instead,
+ * the timer will be serviced when the CPU eventually wakes up with a
+ * subsequent non-deferrable timer.
*/
#define TBASE_DEFERRABLE_FLAG (0x1)
@@ -318,6 +324,25 @@ unsigned long round_jiffies_up_relative(unsigned long j)
}
EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
+/**
+ * set_timer_slack - set the allowed slack for a timer
+ * @timer: the timer to be modified
+ * @slack_hz: the amount of time (in jiffies) allowed for rounding
+ *
+ * Set the amount of time, in jiffies, that a certain timer has
+ * in terms of slack. By setting this value, the timer subsystem
+ * will schedule the actual timer somewhere between
+ * the time mod_timer() asks for, and that time plus the slack.
+ *
+ * By setting the slack to -1, a percentage of the delay is used
+ * instead.
+ */
+void set_timer_slack(struct timer_list *timer, int slack_hz)
+{
+ timer->slack = slack_hz;
+}
+EXPORT_SYMBOL_GPL(set_timer_slack);
+
static inline void set_running_timer(struct tvec_base *base,
struct timer_list *timer)
@@ -549,6 +574,7 @@ static void __init_timer(struct timer_list *timer,
{
timer->entry.next = NULL;
timer->base = __raw_get_cpu_var(tvec_bases);
+ timer->slack = -1;
#ifdef CONFIG_TIMER_STATS
timer->start_site = NULL;
timer->start_pid = -1;
@@ -557,6 +583,19 @@ static void __init_timer(struct timer_list *timer,
lockdep_init_map(&timer->lockdep_map, name, key, 0);
}
+void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
+ const char *name,
+ struct lock_class_key *key,
+ void (*function)(unsigned long),
+ unsigned long data)
+{
+ timer->function = function;
+ timer->data = data;
+ init_timer_on_stack_key(timer, name, key);
+ timer_set_deferrable(timer);
+}
+EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
+
/**
* init_timer_key - initialize a timer
* @timer: the timer to be initialized
@@ -659,12 +698,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
- int preferred_cpu = get_nohz_load_balancer();
-
- if (preferred_cpu >= 0)
- cpu = preferred_cpu;
- }
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
+ cpu = get_nohz_timer_target();
#endif
new_base = per_cpu(tvec_bases, cpu);
@@ -714,6 +749,46 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
}
EXPORT_SYMBOL(mod_timer_pending);
+/*
+ * Decide where to put the timer while taking the slack into account
+ *
+ * Algorithm:
+ * 1) calculate the maximum (absolute) time
+ * 2) calculate the highest bit where the expires and new max are different
+ * 3) use this bit to make a mask
+ * 4) use the bitmask to round down the maximum time, so that all last
+ * bits are zeros
+ */
+static inline
+unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
+{
+ unsigned long expires_limit, mask;
+ int bit;
+
+ expires_limit = expires;
+
+ if (timer->slack >= 0) {
+ expires_limit = expires + timer->slack;
+ } else {
+ unsigned long now = jiffies;
+
+ /* No slack, if already expired else auto slack 0.4% */
+ if (time_after(expires, now))
+ expires_limit = expires + (expires - now)/256;
+ }
+ mask = expires ^ expires_limit;
+ if (mask == 0)
+ return expires;
+
+ bit = find_last_bit(&mask, BITS_PER_LONG);
+
+ mask = (1 << bit) - 1;
+
+ expires_limit = expires_limit & ~(mask);
+
+ return expires_limit;
+}
+
/**
* mod_timer - modify a timer's timeout
* @timer: the timer to be modified
@@ -744,6 +819,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
if (timer_pending(timer) && timer->expires == expires)
return 1;
+ expires = apply_slack(timer, expires);
+
return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
}
EXPORT_SYMBOL(mod_timer);
@@ -880,6 +957,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
if (base->running_timer == timer)
goto out;
+ timer_stats_timer_clear_start_info(timer);
ret = 0;
if (timer_pending(timer)) {
detach_timer(timer, 1);
@@ -953,6 +1031,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
return index;
}
+static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
+ unsigned long data)
+{
+ int preempt_count = preempt_count();
+
+#ifdef CONFIG_LOCKDEP
+ /*
+ * It is permissible to free the timer from inside the
+ * function that is called from it, this we need to take into
+ * account for lockdep too. To avoid bogus "held lock freed"
+ * warnings as well as problems when looking into
+ * timer->lockdep_map, make a copy and use that here.
+ */
+ struct lockdep_map lockdep_map = timer->lockdep_map;
+#endif
+ /*
+ * Couple the lock chain with the lock chain at
+ * del_timer_sync() by acquiring the lock_map around the fn()
+ * call here and in del_timer_sync().
+ */
+ lock_map_acquire(&lockdep_map);
+
+ trace_timer_expire_entry(timer);
+ fn(data);
+ trace_timer_expire_exit(timer);
+
+ lock_map_release(&lockdep_map);
+
+ if (preempt_count != preempt_count()) {
+ WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+ fn, preempt_count, preempt_count());
+ /*
+ * Restore the preempt count. That gives us a decent
+ * chance to survive and extract information. If the
+ * callback kept a lock held, bad luck, but not worse
+ * than the BUG() we had.
+ */
+ preempt_count() = preempt_count;
+ }
+}
+
#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
/**
@@ -996,45 +1115,7 @@ static inline void __run_timers(struct tvec_base *base)
detach_timer(timer, 1);
spin_unlock_irq(&base->lock);
- {
- int preempt_count = preempt_count();
-
-#ifdef CONFIG_LOCKDEP
- /*
- * It is permissible to free the timer from
- * inside the function that is called from
- * it, this we need to take into account for
- * lockdep too. To avoid bogus "held lock
- * freed" warnings as well as problems when
- * looking into timer->lockdep_map, make a
- * copy and use that here.
- */
- struct lockdep_map lockdep_map =
- timer->lockdep_map;
-#endif
- /*
- * Couple the lock chain with the lock chain at
- * del_timer_sync() by acquiring the lock_map
- * around the fn() call here and in
- * del_timer_sync().
- */
- lock_map_acquire(&lockdep_map);
-
- trace_timer_expire_entry(timer);
- fn(data);
- trace_timer_expire_exit(timer);
-
- lock_map_release(&lockdep_map);
-
- if (preempt_count != preempt_count()) {
- printk(KERN_ERR "huh, entered %p "
- "with preempt_count %08x, exited"
- " with %08x?\n",
- fn, preempt_count,
- preempt_count());
- BUG();
- }
- }
+ call_timer_fn(timer, fn, data);
spin_lock_irq(&base->lock);
}
}
@@ -1198,7 +1279,10 @@ void update_process_times(int user_tick)
run_local_timers();
rcu_check_callbacks(cpu, user_tick);
printk_tick();
- perf_event_do_pending();
+#ifdef CONFIG_IRQ_WORK
+ if (in_irq())
+ irq_work_run();
+#endif
scheduler_tick();
run_posix_cpu_timers(p);
}
@@ -1223,7 +1307,6 @@ void run_local_timers(void)
{
hrtimer_run_queues();
raise_softirq(TIMER_SOFTIRQ);
- softlockup_tick();
}
/*
@@ -1618,11 +1701,14 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
+ int err;
+
switch(action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- if (init_timers_cpu(cpu) < 0)
- return NOTIFY_BAD;
+ err = init_timers_cpu(cpu);
+ if (err < 0)
+ return notifier_from_errno(err);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
@@ -1648,7 +1734,7 @@ void __init init_timers(void)
init_timer_stats();
- BUG_ON(err == NOTIFY_BAD);
+ BUG_ON(err != NOTIFY_OK);
register_cpu_notifier(&timers_nb);
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}
@@ -1681,3 +1767,25 @@ unsigned long msleep_interruptible(unsigned int msecs)
}
EXPORT_SYMBOL(msleep_interruptible);
+
+static int __sched do_usleep_range(unsigned long min, unsigned long max)
+{
+ ktime_t kmin;
+ unsigned long delta;
+
+ kmin = ktime_set(0, min * NSEC_PER_USEC);
+ delta = (max - min) * NSEC_PER_USEC;
+ return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
+}
+
+/**
+ * usleep_range - Drop in replacement for udelay where wakeup is flexible
+ * @min: Minimum time in usecs to sleep
+ * @max: Maximum time in usecs to sleep
+ */
+void usleep_range(unsigned long min, unsigned long max)
+{
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ do_usleep_range(min, max);
+}
+EXPORT_SYMBOL(usleep_range);