Commit 27efed3e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'core-watchdog-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull watchddog clean-up and fixes from Thomas Gleixner:
 "The watchdog (hard/softlockup detector) code is pretty much broken in
  its current state. The patch series addresses this by removing all
  duct tape and refactoring it into a workable state.

  The reasons why I ask for inclusion that late in the cycle are:

   1) The code causes lockdep splats vs. hotplug locking which get
      reported over and over. Unfortunately there is no easy fix.

   2) The risk of breakage is minimal because it's already broken

   3) As 4.14 is a long term stable kernel, I prefer to have working
      watchdog code in that and the lockdep issues resolved. I wouldn't
      ask you to pull if 4.14 wouldn't be a LTS kernel or if the
      solution would be easy to backport.

   4) The series was around before the merge window opened, but then got
      delayed due to the UP failure caused by the for_each_cpu()
      surprise which we discussed recently.

  Changes vs. V1:

   - Addressed your review points

   - Addressed the warning in the powerpc code which was discovered late

   - Changed two function names which made sense up to a certain point
     in the series. Now they match what they do in the end.

   - Fixed a 'unused variable' warning, which got not detected by the
     intel robot. I triggered it when trying all possible related config
     combinations manually. Randconfig testing seems not random enough.

  The changes have been tested by and reviewed by Don Zickus and tested
  and acked by Micheal Ellerman for powerpc"

* 'core-watchdog-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits)
  watchdog/core: Put softlockup_threads_initialized under ifdef guard
  watchdog/core: Rename some softlockup_* functions
  powerpc/watchdog: Make use of watchdog_nmi_probe()
  watchdog/core, powerpc: Lock cpus across reconfiguration
  watchdog/core, powerpc: Replace watchdog_nmi_reconfigure()
  watchdog/hardlockup/perf: Fix spelling mistake: "permanetely" -> "permanently"
  watchdog/hardlockup/perf: Cure UP damage
  watchdog/hardlockup: Clean up hotplug locking mess
  watchdog/hardlockup/perf: Simplify deferred event destroy
  watchdog/hardlockup/perf: Use new perf CPU enable mechanism
  watchdog/hardlockup/perf: Implement CPU enable replacement
  watchdog/hardlockup/perf: Implement init time detection of perf
  watchdog/hardlockup/perf: Implement init time perf validation
  watchdog/core: Get rid of the racy update loop
  watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage
  watchdog/sysctl: Clean up sysctl variable name space
  watchdog/sysctl: Get rid of the #ifdeffery
  watchdog/core: Clean up header mess
  watchdog/core: Further simplify sysctl handling
  watchdog/core: Get rid of the thread teardown/setup dance
  ...
parents 7a92616c 0b62bf86
......@@ -146,7 +146,7 @@ void machine_power_off(void)
/* prevent soft lockup/stalled CPU messages for endless loop. */
rcu_sysrq_start();
lockup_detector_suspend();
lockup_detector_soft_poweroff();
for (;;);
}
......
......@@ -310,9 +310,6 @@ static int start_wd_on_cpu(unsigned int cpu)
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
return 0;
if (watchdog_suspended)
return 0;
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
return 0;
......@@ -358,36 +355,39 @@ static void watchdog_calc_timeouts(void)
wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
}
void watchdog_nmi_reconfigure(void)
void watchdog_nmi_stop(void)
{
int cpu;
watchdog_calc_timeouts();
for_each_cpu(cpu, &wd_cpus_enabled)
stop_wd_on_cpu(cpu);
}
void watchdog_nmi_start(void)
{
int cpu;
watchdog_calc_timeouts();
for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
start_wd_on_cpu(cpu);
}
/*
* This runs after lockup_detector_init() which sets up watchdog_cpumask.
* Invoked from core watchdog init.
*/
static int __init powerpc_watchdog_init(void)
int __init watchdog_nmi_probe(void)
{
int err;
watchdog_calc_timeouts();
err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online",
start_wd_on_cpu, stop_wd_on_cpu);
if (err < 0)
err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"powerpc/watchdog:online",
start_wd_on_cpu, stop_wd_on_cpu);
if (err < 0) {
pr_warn("Watchdog could not be initialized");
return err;
}
return 0;
}
arch_initcall(powerpc_watchdog_init);
static void handle_backtrace_ipi(struct pt_regs *regs)
{
......
......@@ -4409,10 +4409,9 @@ static __init int fixup_ht_bug(void)
return 0;
}
if (lockup_detector_suspend() != 0) {
pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
return 0;
}
cpus_read_lock();
hardlockup_detector_perf_stop();
x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
......@@ -4420,9 +4419,7 @@ static __init int fixup_ht_bug(void)
x86_pmu.commit_scheduling = NULL;
x86_pmu.stop_scheduling = NULL;
lockup_detector_resume();
cpus_read_lock();
hardlockup_detector_perf_restart();
for_each_online_cpu(c)
free_excl_cntrs(c);
......
......@@ -12,11 +12,31 @@
#ifdef CONFIG_LOCKUP_DETECTOR
void lockup_detector_init(void);
void lockup_detector_soft_poweroff(void);
void lockup_detector_cleanup(void);
bool is_hardlockup(void);
extern int watchdog_user_enabled;
extern int nmi_watchdog_user_enabled;
extern int soft_watchdog_user_enabled;
extern int watchdog_thresh;
extern unsigned long watchdog_enabled;
extern struct cpumask watchdog_cpumask;
extern unsigned long *watchdog_cpumask_bits;
#ifdef CONFIG_SMP
extern int sysctl_softlockup_all_cpu_backtrace;
extern int sysctl_hardlockup_all_cpu_backtrace;
#else
static inline void lockup_detector_init(void)
{
}
#endif
#define sysctl_softlockup_all_cpu_backtrace 0
#define sysctl_hardlockup_all_cpu_backtrace 0
#endif /* !CONFIG_SMP */
#else /* CONFIG_LOCKUP_DETECTOR */
static inline void lockup_detector_init(void) { }
static inline void lockup_detector_soft_poweroff(void) { }
static inline void lockup_detector_cleanup(void) { }
#endif /* !CONFIG_LOCKUP_DETECTOR */
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
extern void touch_softlockup_watchdog_sched(void);
......@@ -24,29 +44,17 @@ extern void touch_softlockup_watchdog(void);
extern void touch_softlockup_watchdog_sync(void);
extern void touch_all_softlockup_watchdogs(void);
extern unsigned int softlockup_panic;
extern int soft_watchdog_enabled;
extern atomic_t watchdog_park_in_progress;
#else
static inline void touch_softlockup_watchdog_sched(void)
{
}
static inline void touch_softlockup_watchdog(void)
{
}
static inline void touch_softlockup_watchdog_sync(void)
{
}
static inline void touch_all_softlockup_watchdogs(void)
{
}
static inline void touch_softlockup_watchdog_sched(void) { }
static inline void touch_softlockup_watchdog(void) { }
static inline void touch_softlockup_watchdog_sync(void) { }
static inline void touch_all_softlockup_watchdogs(void) { }
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
void reset_hung_task_detector(void);
#else
static inline void reset_hung_task_detector(void)
{
}
static inline void reset_hung_task_detector(void) { }
#endif
/*
......@@ -54,12 +62,12 @@ static inline void reset_hung_task_detector(void)
* 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
* bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
*
* 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
* are variables that are only used as an 'interface' between the parameters
* in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
* 'watchdog_thresh' variable is handled differently because its value is not
* boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
* is equal zero.
* 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and
* 'soft_watchdog_user_enabled' are variables that are only used as an
* 'interface' between the parameters in /proc/sys/kernel and the internal
* state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is
* handled differently because its value is not boolean, and the lockup
* detectors are 'suspended' while 'watchdog_thresh' is equal zero.
*/
#define NMI_WATCHDOG_ENABLED_BIT 0
#define SOFT_WATCHDOG_ENABLED_BIT 1
......@@ -73,17 +81,41 @@ extern unsigned int hardlockup_panic;
static inline void hardlockup_detector_disable(void) {}
#endif
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
# define NMI_WATCHDOG_SYSCTL_PERM 0644
#else
# define NMI_WATCHDOG_SYSCTL_PERM 0444
#endif
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
extern void arch_touch_nmi_watchdog(void);
extern void hardlockup_detector_perf_stop(void);
extern void hardlockup_detector_perf_restart(void);
extern void hardlockup_detector_perf_disable(void);
extern void hardlockup_detector_perf_enable(void);
extern void hardlockup_detector_perf_cleanup(void);
extern int hardlockup_detector_perf_init(void);
#else
#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
static inline void hardlockup_detector_perf_stop(void) { }
static inline void hardlockup_detector_perf_restart(void) { }
static inline void hardlockup_detector_perf_disable(void) { }
static inline void hardlockup_detector_perf_enable(void) { }
static inline void hardlockup_detector_perf_cleanup(void) { }
# if !defined(CONFIG_HAVE_NMI_WATCHDOG)
static inline int hardlockup_detector_perf_init(void) { return -ENODEV; }
static inline void arch_touch_nmi_watchdog(void) {}
# else
static inline int hardlockup_detector_perf_init(void) { return 0; }
# endif
#endif
#endif
void watchdog_nmi_stop(void);
void watchdog_nmi_start(void);
int watchdog_nmi_probe(void);
/**
* touch_nmi_watchdog - restart NMI watchdog timeout.
*
*
* If the architecture supports the NMI watchdog, touch_nmi_watchdog()
* may be used to reset the timeout - for code which intentionally
* disables interrupts for a long time. This call is stateless.
......@@ -153,22 +185,6 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
u64 hw_nmi_get_sample_period(int watchdog_thresh);
#endif
#ifdef CONFIG_LOCKUP_DETECTOR
extern int nmi_watchdog_enabled;
extern int watchdog_user_enabled;
extern int watchdog_thresh;
extern unsigned long watchdog_enabled;
extern struct cpumask watchdog_cpumask;
extern unsigned long *watchdog_cpumask_bits;
extern int __read_mostly watchdog_suspended;
#ifdef CONFIG_SMP
extern int sysctl_softlockup_all_cpu_backtrace;
extern int sysctl_hardlockup_all_cpu_backtrace;
#else
#define sysctl_softlockup_all_cpu_backtrace 0
#define sysctl_hardlockup_all_cpu_backtrace 0
#endif
#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
defined(CONFIG_HARDLOCKUP_DETECTOR)
void watchdog_update_hrtimer_threshold(u64 period);
......@@ -176,7 +192,6 @@ void watchdog_update_hrtimer_threshold(u64 period);
static inline void watchdog_update_hrtimer_threshold(u64 period) { }
#endif
extern bool is_hardlockup(void);
struct ctl_table;
extern int proc_watchdog(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
......@@ -188,18 +203,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
extern int proc_watchdog_cpumask(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int lockup_detector_suspend(void);
extern void lockup_detector_resume(void);
#else
static inline int lockup_detector_suspend(void)
{
return 0;
}
static inline void lockup_detector_resume(void)
{
}
#endif
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
#include <asm/nmi.h>
......
......@@ -55,7 +55,7 @@ smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
}
void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
const struct cpumask *);
void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
const struct cpumask *);
#endif
......@@ -24,6 +24,7 @@
#include <linux/lockdep.h>
#include <linux/tick.h>
#include <linux/irq.h>
#include <linux/nmi.h>
#include <linux/smpboot.h>
#include <linux/relay.h>
#include <linux/slab.h>
......@@ -897,6 +898,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
out:
cpus_write_unlock();
/*
* Do post unplug cleanup. This is still protected against
* concurrent CPU hotplug via cpu_add_remove_lock.
*/
lockup_detector_cleanup();
return ret;
}
......
......@@ -344,39 +344,30 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
* by the client, but only by calling this function.
* This function can only be called on a registered smp_hotplug_thread.
*/
int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
const struct cpumask *new)
void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
const struct cpumask *new)
{
struct cpumask *old = plug_thread->cpumask;
cpumask_var_t tmp;
static struct cpumask tmp;
unsigned int cpu;
if (!alloc_cpumask_var(&tmp, GFP_KERNEL))
return -ENOMEM;
get_online_cpus();
lockdep_assert_cpus_held();
mutex_lock(&smpboot_threads_lock);
/* Park threads that were exclusively enabled on the old mask. */
cpumask_andnot(tmp, old, new);
for_each_cpu_and(cpu, tmp, cpu_online_mask)
cpumask_andnot(&tmp, old, new);
for_each_cpu_and(cpu, &tmp, cpu_online_mask)
smpboot_park_thread(plug_thread, cpu);
/* Unpark threads that are exclusively enabled on the new mask. */
cpumask_andnot(tmp, new, old);
for_each_cpu_and(cpu, tmp, cpu_online_mask)
cpumask_andnot(&tmp, new, old);
for_each_cpu_and(cpu, &tmp, cpu_online_mask)
smpboot_unpark_thread(plug_thread, cpu);
cpumask_copy(old, new);
mutex_unlock(&smpboot_threads_lock);
put_online_cpus();
free_cpumask_var(tmp);
return 0;
}
EXPORT_SYMBOL_GPL(smpboot_update_cpumask_percpu_thread);
static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
......
......@@ -872,9 +872,9 @@ static struct ctl_table kern_table[] = {
#if defined(CONFIG_LOCKUP_DETECTOR)
{
.procname = "watchdog",
.data = &watchdog_user_enabled,
.maxlen = sizeof (int),
.mode = 0644,
.data = &watchdog_user_enabled,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_watchdog,
.extra1 = &zero,
.extra2 = &one,
......@@ -890,16 +890,12 @@ static struct ctl_table kern_table[] = {
},
{
.procname = "nmi_watchdog",
.data = &nmi_watchdog_enabled,
.maxlen = sizeof (int),
.mode = 0644,
.data = &nmi_watchdog_user_enabled,
.maxlen = sizeof(int),
.mode = NMI_WATCHDOG_SYSCTL_PERM,
.proc_handler = proc_nmi_watchdog,
.extra1 = &zero,
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
.extra2 = &one,
#else
.extra2 = &zero,
#endif
},
{
.procname = "watchdog_cpumask",
......@@ -911,9 +907,9 @@ static struct ctl_table kern_table[] = {
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
{
.procname = "soft_watchdog",
.data = &soft_watchdog_enabled,
.maxlen = sizeof (int),
.mode = 0644,
.data = &soft_watchdog_user_enabled,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_soft_watchdog,
.extra1 = &zero,
.extra2 = &one,
......
This diff is collapsed.
......@@ -21,8 +21,10 @@
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
static struct cpumask dead_events_mask;
static unsigned long hardlockup_allcpu_dumped;
static unsigned int watchdog_cpus;
void arch_touch_nmi_watchdog(void)
{
......@@ -103,15 +105,12 @@ static struct perf_event_attr wd_hw_attr = {
/* Callback function for perf event subsystem */
static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
struct perf_sample_data *data,
struct pt_regs *regs)
{
/* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0;
if (atomic_read(&watchdog_park_in_progress) != 0)
return;
if (__this_cpu_read(watchdog_nmi_touch) == true) {
__this_cpu_write(watchdog_nmi_touch, false);
return;
......@@ -160,104 +159,131 @@ static void watchdog_overflow_callback(struct perf_event *event,
return;
}
/*
* People like the simple clean cpu node info on boot.
* Reduce the watchdog noise by only printing messages
* that are different from what cpu0 displayed.
*/
static unsigned long firstcpu_err;
static atomic_t watchdog_cpus;
int watchdog_nmi_enable(unsigned int cpu)
static int hardlockup_detector_event_create(void)
{
unsigned int cpu = smp_processor_id();
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
int firstcpu = 0;
/* nothing to do if the hard lockup detector is disabled */
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
goto out;
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
/* it is setup but not enabled */
if (event != NULL)
goto out_enable;
if (atomic_inc_return(&watchdog_cpus) == 1)
firstcpu = 1;
struct perf_event *evt;
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
/* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
watchdog_overflow_callback, NULL);
if (IS_ERR(evt)) {
pr_info("Perf event create on CPU %d failed with %ld\n", cpu,
PTR_ERR(evt));
return PTR_ERR(evt);
}
this_cpu_write(watchdog_ev, evt);
return 0;
}
/* save the first cpu's error for future comparision */
if (firstcpu && IS_ERR(event))
firstcpu_err = PTR_ERR(event);
/**
* hardlockup_detector_perf_enable - Enable the local event
*/
void hardlockup_detector_perf_enable(void)
{
if (hardlockup_detector_event_create())
return;
if (!IS_ERR(event)) {
/* only print for the first cpu initialized */
if (firstcpu || firstcpu_err)
pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
goto out_save;
}
if (!watchdog_cpus++)
pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
/*
* Disable the hard lockup detector if _any_ CPU fails to set up
* set up the hardware perf event. The watchdog() function checks
* the NMI_WATCHDOG_ENABLED bit periodically.
*
* The barriers are for syncing up watchdog_enabled across all the
* cpus, as clear_bit() does not use barriers.
*/
smp_mb__before_atomic();
clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
smp_mb__after_atomic();
/* skip displaying the same error again */
if (!firstcpu && (PTR_ERR(event) == firstcpu_err))
return PTR_ERR(event);
/* vary the KERN level based on the returned errno */
if (PTR_ERR(event) == -EOPNOTSUPP)
pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
else if (PTR_ERR(event) == -ENOENT)
pr_warn("disabled (cpu%i): hardware events not enabled\n",
cpu);
else
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
cpu, PTR_ERR(event));
pr_info("Shutting down hard lockup detector on all cpus\n");
return PTR_ERR(event);
/* success path */
out_save:
per_cpu(watchdog_ev, cpu) = event;
out_enable:
perf_event_enable(per_cpu(watchdog_ev, cpu));
out:
return 0;
perf_event_enable(this_cpu_read(watchdog_ev));
}
void watchdog_nmi_disable(unsigned int cpu)
/**
* hardlockup_detector_perf_disable - Disable the local event
*/
void hardlockup_detector_perf_disable(void)
{
struct perf_event *event = per_cpu(watchdog_ev, cpu);
struct perf_event *event = this_cpu_read(watchdog_ev);
if (event) {
perf_event_disable(event);
cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
watchdog_cpus--;
}
}
/**
* hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
*
* Called from lockup_detector_cleanup(). Serialized by the caller.
*/
void hardlockup_detector_perf_cleanup(void)
{
int cpu;
for_each_cpu(cpu, &dead_events_mask) {
struct perf_event *event = per_cpu(watchdog_ev, cpu);
/*
* Required because for_each_cpu() reports unconditionally
* CPU0 as set on UP kernels. Sigh.
*/
if (event)
perf_event_release_kernel(event);
per_cpu(watchdog_ev, cpu) = NULL;
}
cpumask_clear(&dead_events_mask);
}
/**
* hardlockup_detector_perf_stop - Globally stop watchdog events
*
* Special interface for x86 to handle the perf HT bug.
*/
void __init hardlockup_detector_perf_stop(void)
{
int cpu;
lockdep_assert_cpus_held();
for_each_online_cpu(cpu) {
struct perf_event *event = per_cpu(watchdog_ev, cpu);
if (event)
perf_event_disable(event);
}
}
/* should be in cleanup, but blocks oprofile */
perf_event_release_kernel(event);
/**
* hardlockup_detector_perf_restart - Globally restart watchdog events
*
* Special interface for x86 to handle the perf HT bug.
*/
void __init hardlockup_detector_perf_restart(void)
{
int cpu;
lockdep_assert_cpus_held();
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
return;
for_each_online_cpu(cpu) {
struct perf_event *event = per_cpu(watchdog_ev, cpu);
if (event)
perf_event_enable(event);
}
}
/**
* hardlockup_detector_perf_init - Probe whether NMI event is available at all
*/
int __init hardlockup_detector_perf_init(void)
{