From 6d0fa0a1a947b99a223694d7284e6df7902b3cc3 Mon Sep 17 00:00:00 2001
From: Andy Ross <andrew.j.ross@intel.com>
Date: Fri, 4 Jan 2019 12:47:21 -0800
Subject: [PATCH 1/3] kernel: Add CONFIG_SWAP_NONATOMIC flag

On ARM, _Swap() isn't atomic and a hardware interrupt can land after
the (irq_locked) caller has entered _Swap() but before the context
switch actually happens.  This will require some platform-specific
workarounds in a few places in the scheduler.

This commit is just the Kconfig and selection on ARM.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
---
 arch/arm/core/Kconfig |  1 +
 kernel/Kconfig        | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/arch/arm/core/Kconfig b/arch/arm/core/Kconfig
index 3f988744752a9..75c35d6513251 100644
--- a/arch/arm/core/Kconfig
+++ b/arch/arm/core/Kconfig
@@ -23,6 +23,7 @@ config CPU_CORTEX_M
 	select ARCH_HAS_STACK_PROTECTION if ARM_MPU || CPU_CORTEX_M_HAS_SPLIM
 	select ARCH_HAS_USERSPACE if ARM_MPU
 	select ARCH_HAS_NOCACHE_MEMORY_SUPPORT if ARM_MPU && CPU_HAS_ARM_MPU && CPU_CORTEX_M7
+	select SWAP_NONATOMIC
 	help
 	  This option signifies the use of a CPU of the Cortex-M family.
 
diff --git a/kernel/Kconfig b/kernel/Kconfig
index 6463d9e38de74..4c49bf63b2118 100644
--- a/kernel/Kconfig
+++ b/kernel/Kconfig
@@ -483,6 +483,18 @@ config ARCH_HAS_CUSTOM_SWAP_TO_MAIN
 	  the _main() thread, but instead must do something custom. It must
 	  enable this option in that case.
 
+config SWAP_NONATOMIC
+	bool
+	help
+	  On some architectures, the _Swap() primitive cannot be made
+	  atomic with respect to the irq_lock being released.  That
+	  is, interrupts may be received between the entry to _Swap
+	  and the completion of the context switch.  There are a
+	  handful of workaround cases in the kernel that need to be
+	  enabled when this is true.  Currently, this only happens on
+	  ARM when the PendSV exception priority sits below that of
+	  Zephyr-handled interrupts.
+
 config ARCH_HAS_CUSTOM_BUSY_WAIT
 	bool
 	# hidden

From ead5cfc058ccefae087edf37f6b9fa9479c051d3 Mon Sep 17 00:00:00 2001
From: Andy Ross <andrew.j.ross@intel.com>
Date: Fri, 4 Jan 2019 12:52:17 -0800
Subject: [PATCH 2/3] kernel/sched: Predicate SWAP_NONATOMIC workaround
 properly

This is a refactoring of the fix in commit 6c95dafd8260 to limit its
application to affected platforms now that the root cause is
understood.

Note that the bug that fix was addressing was rare and seen only on
after multi-hour sessions on Michael Scott's test rig.  So if
something regresses, this is where to look!

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
---
 kernel/sched.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 0776649fed856..393962bf061b6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -121,7 +121,17 @@ static bool should_preempt(struct k_thread *th, int preempt_ok)
 	}
 
 	/* Or if we're pended/suspended/dummy (duh) */
-	if (!_current || !_is_thread_ready(_current)) {
+	if (!_current || _is_thread_prevented_from_running(_current)) {
+		return true;
+	}
+
+	/* Edge case on ARM where a thread can be pended out of an
+	 * interrupt handler before the "synchronous" swap starts
+	 * context switching.  Platforms with atomic swap can never
+	 * hit this.
+	 */
+	if (IS_ENABLED(CONFIG_SWAP_NONATOMIC)
+	    && _is_thread_timeout_active(th)) {
 		return true;
 	}
 

From 845e3835f06d892d4bd7ac3493c1ac4fc8e04d12 Mon Sep 17 00:00:00 2001
From: Andy Ross <andrew.j.ross@intel.com>
Date: Fri, 4 Jan 2019 12:54:23 -0800
Subject: [PATCH 3/3] kernel/sched: SWAP_NONATOMIC workaround for timeslicing

Timeslicing works by removing the _current thread from the run queue
and re-adding it at the end of its priority.  On systems with a
_Swap() that can be preempted by a timer interrupt, that means it's
possible for the timeslice to try to slice out a thread that had
already pended itself!

This behavior used to be benign (or at least undetectable) as the
duplicated list operations were idempotent.  But now the dlist code is
stricter about correctness and has exposed the bug -- it will blow up
if you try to remove an already-removed list node.

Fix (on affected platforms) by stashing the _current pointer in
_pend_current_thread() that is checked and cleared in the timer
interrupt.  If we discover we're trying to interrupt a thread that's
already interrupted itself, we can safely exit z_time_slice() as a
noop.  The timeslicing bookeeping was already done for us underneath
the pend code.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
---
 kernel/sched.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/kernel/sched.c b/kernel/sched.c
index 393962bf061b6..b3e4bd33563f7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -217,6 +217,15 @@ static struct k_thread *next_up(void)
 static int slice_time;
 static int slice_max_prio;
 
+#ifdef CONFIG_SWAP_NONATOMIC
+/* If _Swap() isn't atomic, then it's possible for a timer interrupt
+ * to try to timeslice away _current after it has already pended
+ * itself but before the corresponding context switch.  Treat that as
+ * a noop condition in z_time_slice().
+ */
+static struct k_thread *pending_current;
+#endif
+
 static void reset_time_slice(void)
 {
 	/* Add the elapsed time since the last announced tick to the
@@ -249,6 +258,15 @@ static inline int sliceable(struct k_thread *t)
 /* Called out of each timer interrupt */
 void z_time_slice(int ticks)
 {
+#ifdef CONFIG_SWAP_NONATOMIC
+	if (pending_current == _current) {
+		pending_current = NULL;
+		reset_time_slice();
+		return;
+	}
+	pending_current = NULL;
+#endif
+
 	if (slice_time && sliceable(_current)) {
 		if (ticks >= _current_cpu->slice_ticks) {
 			_move_thread_to_end_of_prio_q(_current);
@@ -389,6 +407,9 @@ void z_thread_timeout(struct _timeout *to)
 
 int _pend_current_thread(u32_t key, _wait_q_t *wait_q, s32_t timeout)
 {
+#if defined(CONFIG_TIMESLICING) && defined(CONFIG_SWAP_NONATOMIC)
+	pending_current = _current;
+#endif
 	pend(_current, wait_q, timeout);
 	return _Swap(key);
 }