From 51bca1fe6a2d0aa0e577aaf0b041ab06614e1d87 Mon Sep 17 00:00:00 2001
From: Ion Lupascu <ionlupascu@gmail.com>
Date: Tue, 19 Jan 2016 08:25:58 +0000
Subject: [PATCH 1/2] add time-out parameter to wait_dequeue

---
 blockingconcurrentqueue.h     | 1395 ++++++++++++++++-----------------
 tests/unittests/unittests.cpp |    3 +
 2 files changed, 700 insertions(+), 698 deletions(-)

diff --git a/blockingconcurrentqueue.h b/blockingconcurrentqueue.h
index a0412a73..13c60ebc 100644
--- a/blockingconcurrentqueue.h
+++ b/blockingconcurrentqueue.h
@@ -17,11 +17,11 @@
 // I know this is an ugly hack but it still beats polluting the global
 // namespace with thousands of generic names or adding a .cpp for nothing.
 extern "C" {
-	struct _SECURITY_ATTRIBUTES;
-	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
-	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
-	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
-	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
+    struct _SECURITY_ATTRIBUTES;
+    __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
+    __declspec(dllimport) int __stdcall CloseHandle(void* hObject);
+    __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
+    __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
 }
 #elif defined(__MACH__)
 #include <mach/mach.h>
@@ -33,280 +33,320 @@ namespace moodycamel
 {
 namespace details
 {
-	// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
-	// portable + lightweight semaphore implementations, originally from
-	// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
-	// LICENSE:
-	// Copyright (c) 2015 Jeff Preshing
-	//
-	// This software is provided 'as-is', without any express or implied
-	// warranty. In no event will the authors be held liable for any damages
-	// arising from the use of this software.
-	//
-	// Permission is granted to anyone to use this software for any purpose,
-	// including commercial applications, and to alter it and redistribute it
-	// freely, subject to the following restrictions:
-	//
-	// 1. The origin of this software must not be misrepresented; you must not
-	//    claim that you wrote the original software. If you use this software
-	//    in a product, an acknowledgement in the product documentation would be
-	//    appreciated but is not required.
-	// 2. Altered source versions must be plainly marked as such, and must not be
-	//    misrepresented as being the original software.
-	// 3. This notice may not be removed or altered from any source distribution.
-	namespace mpmc_sema
-	{
+    // Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
+    // portable + lightweight semaphore implementations, originally from
+    // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
+    // LICENSE:
+    // Copyright (c) 2015 Jeff Preshing
+    //
+    // This software is provided 'as-is', without any express or implied
+    // warranty. In no event will the authors be held liable for any damages
+    // arising from the use of this software.
+    //
+    // Permission is granted to anyone to use this software for any purpose,
+    // including commercial applications, and to alter it and redistribute it
+    // freely, subject to the following restrictions:
+    //
+    // 1. The origin of this software must not be misrepresented; you must not
+    //    claim that you wrote the original software. If you use this software
+    //    in a product, an acknowledgement in the product documentation would be
+    //    appreciated but is not required.
+    // 2. Altered source versions must be plainly marked as such, and must not be
+    //    misrepresented as being the original software.
+    // 3. This notice may not be removed or altered from any source distribution.
+    namespace mpmc_sema
+    {
 #if defined(_WIN32)
-		class Semaphore
-		{
-		private:
-		    void* m_hSema;
-		    
-		    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-		    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-		public:
-		    Semaphore(int initialCount = 0)
-		    {
-		        assert(initialCount >= 0);
-		        const long maxLong = 0x7fffffff;
-		        m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
-		    }
-
-		    ~Semaphore()
-		    {
-		        CloseHandle(m_hSema);
-		    }
-
-		    void wait()
-		    {
-		    	const unsigned long infinite = 0xffffffff;
-		        WaitForSingleObject(m_hSema, infinite);
-		    }
-
-		    void signal(int count = 1)
-		    {
-		        ReleaseSemaphore(m_hSema, count, nullptr);
-		    }
-		};
+        class Semaphore
+        {
+        private:
+            void* m_hSema;
+            Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+            Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+        public:
+            Semaphore(int initialCount = 0)
+            {
+                assert(initialCount >= 0);
+                const long maxLong = 0x7fffffff;
+                m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
+            }
+
+            ~Semaphore()
+            {
+                CloseHandle(m_hSema);
+            }
+
+            bool wait(const unsigned long &ms)
+            {
+                const unsigned long timeout = (ms > 0UL ? ms : 0xffffffff);
+                const DWORD rc = WaitForSingleObject(m_hSema, timeout);
+                return (rc != WAIT_TIMEOUT);
+            }
+
+            void signal(int count = 1)
+            {
+                ReleaseSemaphore(m_hSema, count, nullptr);
+            }
+        };
 #elif defined(__MACH__)
-		//---------------------------------------------------------
-		// Semaphore (Apple iOS and OSX)
-		// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
-		//---------------------------------------------------------
-		class Semaphore
-		{
-		private:
-		    semaphore_t m_sema;
-
-		    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-		    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-		public:
-		    Semaphore(int initialCount = 0)
-		    {
-		        assert(initialCount >= 0);
-		        semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
-		    }
-
-		    ~Semaphore()
-		    {
-		        semaphore_destroy(mach_task_self(), m_sema);
-		    }
-
-		    void wait()
-		    {
-		        semaphore_wait(m_sema);
-		    }
-
-		    void signal()
-		    {
-		        semaphore_signal(m_sema);
-		    }
-
-		    void signal(int count)
-		    {
-		        while (count-- > 0)
-		        {
-		            semaphore_signal(m_sema);
-		        }
-		    }
-		};
+        //---------------------------------------------------------
+        // Semaphore (Apple iOS and OSX)
+        // Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
+        //---------------------------------------------------------
+        class Semaphore
+        {
+        private:
+            semaphore_t m_sema;
+
+            Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+            Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+        public:
+            Semaphore(int initialCount = 0)
+            {
+                assert(initialCount >= 0);
+                semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
+            }
+
+            ~Semaphore()
+            {
+                semaphore_destroy(mach_task_self(), m_sema);
+            }
+
+            bool wait(const unsigned long &ms)
+            {
+                if(ms == 0UL)
+                {
+                    semaphore_wait(m_sema);
+                    return true;
+                }
+
+                kern_return_t rc;
+                mach_timespec_t ts;
+                ts.tv_sec = ms / 1000;
+                ts.tv_nsec = (ms % 1000) * 1000000;
+
+                // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
+                rc = semaphore_timedwait(m_sema, ts);
+
+                return (rc != KERN_OPERATION_TIMED_OUT);
+            }
+
+            void signal()
+            {
+                semaphore_signal(m_sema);
+            }
+
+            void signal(int count)
+            {
+                while (count-- > 0)
+                {
+                    semaphore_signal(m_sema);
+                }
+            }
+        };
 #elif defined(__unix__)
-		//---------------------------------------------------------
-		// Semaphore (POSIX, Linux)
-		//---------------------------------------------------------
-		class Semaphore
-		{
-		private:
-		    sem_t m_sema;
-
-		    Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-		    Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-		public:
-		    Semaphore(int initialCount = 0)
-		    {
-		        assert(initialCount >= 0);
-		        sem_init(&m_sema, 0, initialCount);
-		    }
-
-		    ~Semaphore()
-		    {
-		        sem_destroy(&m_sema);
-		    }
-
-		    void wait()
-		    {
-		        // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
-		        int rc;
-		        do
-		        {
-		            rc = sem_wait(&m_sema);
-		        }
-		        while (rc == -1 && errno == EINTR);
-		    }
-
-		    void signal()
-		    {
-		        sem_post(&m_sema);
-		    }
-
-		    void signal(int count)
-		    {
-		        while (count-- > 0)
-		        {
-		            sem_post(&m_sema);
-		        }
-		    }
-		};
+        //---------------------------------------------------------
+        // Semaphore (POSIX, Linux)
+        //---------------------------------------------------------
+        class Semaphore
+        {
+        private:
+            sem_t m_sema;
+
+            Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+            Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+        public:
+            Semaphore(int initialCount = 0)
+            {
+                assert(initialCount >= 0);
+                sem_init(&m_sema, 0, initialCount);
+            }
+
+            ~Semaphore()
+            {
+                sem_destroy(&m_sema);
+            }
+
+            bool wait(const unsigned long &ms)
+            {
+                int rc;
+
+                if(ms == 0UL)
+                {
+                    // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
+                    do
+                    {
+                        rc = sem_wait(&m_sema);
+                    }
+                    while (rc == -1 && errno == EINTR);
+
+                    return true;
+                }
+
+                // wait with timeout
+                struct timespec ts;
+                if (clock_gettime(CLOCK_REALTIME, &ts) == -1)
+                {
+                    return true;
+                }
+                ts.tv_nsec += (ms % 1000) * 1000000000;
+                ts.tv_sec += ms / 1000 + ts.tv_nsec / 1000000000;
+                ts.tv_nsec %= 1000000000;
+
+                do
+                {
+                    rc = sem_timedwait(&m_sema, &ts);
+                }
+                while (rc == -1 && errno == EINTR);
+
+                return !(rc == -1 && errno == ETIMEDOUT);
+            }
+
+            void signal()
+            {
+                sem_post(&m_sema);
+            }
+
+            void signal(int count)
+            {
+                while (count-- > 0)
+                {
+                    sem_post(&m_sema);
+                }
+            }
+        };
 #else
 #error Unsupported platform! (No semaphore wrapper available)
 #endif
 
-		//---------------------------------------------------------
-		// LightweightSemaphore
-		//---------------------------------------------------------
-		class LightweightSemaphore
-		{
-		public:
-			typedef std::make_signed<std::size_t>::type ssize_t;
-			
-		private:
-		    std::atomic<ssize_t> m_count;
-		    Semaphore m_sema;
-
-		    void waitWithPartialSpinning()
-		    {
-		        ssize_t oldCount;
-		        // Is there a better way to set the initial spin count?
-		        // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
-		        // as threads start hitting the kernel semaphore.
-		        int spin = 10000;
-		        while (--spin >= 0)
-		        {
-		            oldCount = m_count.load(std::memory_order_relaxed);
-		            if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
-		                return;
-		            std::atomic_signal_fence(std::memory_order_acquire);     // Prevent the compiler from collapsing the loop.
-		        }
-		        oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
-		        if (oldCount <= 0)
-		        {
-		            m_sema.wait();
-		        }
-		    }
-
-		    ssize_t waitManyWithPartialSpinning(ssize_t max)
-		    {
-		    	assert(max > 0);
-		        ssize_t oldCount;
-		        int spin = 10000;
-		        while (--spin >= 0)
-		        {
-		            oldCount = m_count.load(std::memory_order_relaxed);
-		            if (oldCount > 0)
-	            	{
-	            		ssize_t newCount = oldCount > max ? oldCount - max : 0;
-			        	if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
-			        		return oldCount - newCount;
-		            }
-		            std::atomic_signal_fence(std::memory_order_acquire);
-		        }
-		        oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
-		        if (oldCount <= 0)
-		            m_sema.wait();
-		        if (max > 1)
-		        	return 1 + tryWaitMany(max - 1);
-		        return 1;
-		    }
-
-		public:
-		    LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
-		    {
-		        assert(initialCount >= 0);
-		    }
-
-		    bool tryWait()
-		    {
-		        ssize_t oldCount = m_count.load(std::memory_order_relaxed);
-		        while (oldCount > 0)
-		        {
-		        	if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
-		        		return true;
-		        }
-		        return false;
-		    }
-
-		    void wait()
-		    {
-		        if (!tryWait())
-		            waitWithPartialSpinning();
-		    }
-
-		    // Acquires between 0 and (greedily) max, inclusive
-		    ssize_t tryWaitMany(ssize_t max)
-		    {
-		    	assert(max >= 0);
-		    	ssize_t oldCount = m_count.load(std::memory_order_relaxed);
-		        while (oldCount > 0)
-		        {
-		        	ssize_t newCount = oldCount > max ? oldCount - max : 0;
-		        	if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
-		        		return oldCount - newCount;
-		        }
-		        return 0;
-		    }
-
-		    // Acquires at least one, and (greedily) at most max
-		    ssize_t waitMany(ssize_t max)
-		    {
-		    	assert(max >= 0);
-		    	ssize_t result = tryWaitMany(max);
-		    	if (result == 0 && max > 0)
-		            result = waitManyWithPartialSpinning(max);
-		        return result;
-		    }
-
-		    void signal(ssize_t count = 1)
-		    {
-		    	assert(count >= 0);
-		        ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
-		        ssize_t toRelease = -oldCount < count ? -oldCount : count;
-		        if (toRelease > 0)
-		        {
-		            m_sema.signal((int)toRelease);
-		        }
-		    }
-		    
-		    ssize_t availableApprox() const
-		    {
-		    	ssize_t count = m_count.load(std::memory_order_relaxed);
-		    	return count > 0 ? count : 0;
-		    }
-		};
-	}	// end namespace mpmc_sema
-}	// end namespace details
+        //---------------------------------------------------------
+        // LightweightSemaphore
+        //---------------------------------------------------------
+        class LightweightSemaphore
+        {
+        public:
+            typedef std::make_signed<std::size_t>::type ssize_t;
+        private:
+            std::atomic<ssize_t> m_count;
+            Semaphore m_sema;
+
+            bool waitWithPartialSpinning(const unsigned long &ms)
+            {
+                ssize_t oldCount;
+                // Is there a better way to set the initial spin count?
+                // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
+                // as threads start hitting the kernel semaphore.
+                int spin = 10000;
+                while (--spin >= 0)
+                {
+                    oldCount = m_count.load(std::memory_order_relaxed);
+                    if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+                        return true;
+                    std::atomic_signal_fence(std::memory_order_acquire);     // Prevent the compiler from collapsing the loop.
+                }
+                oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+                if (oldCount <= 0)
+                {
+                    return m_sema.wait(ms);
+                }
+            }
+
+            ssize_t waitManyWithPartialSpinning(ssize_t max, const unsigned long &ms)
+            {
+                assert(max > 0);
+                ssize_t oldCount;
+                int spin = 10000;
+                while (--spin >= 0)
+                {
+                    oldCount = m_count.load(std::memory_order_relaxed);
+                    if (oldCount > 0)
+                    {
+                        ssize_t newCount = oldCount > max ? oldCount - max : 0;
+                        if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+                            return oldCount - newCount;
+                    }
+                    std::atomic_signal_fence(std::memory_order_acquire);
+                }
+                oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+                if (oldCount <= 0)
+                    if(!m_sema.wait(ms))
+                    {
+                        return 1;
+                    }
+
+                if (max > 1)
+                    return 1 + tryWaitMany(max - 1);
+                return 1;
+            }
+
+        public:
+            LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
+            {
+                assert(initialCount >= 0);
+            }
+
+            bool tryWait()
+            {
+                ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+                while (oldCount > 0)
+                {
+                    if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+                        return true;
+                }
+                return false;
+            }
+
+            void wait(const unsigned long &ms)
+            {
+                if (!tryWait())
+                    waitWithPartialSpinning(ms);
+            }
+
+            // Acquires between 0 and (greedily) max, inclusive
+            ssize_t tryWaitMany(ssize_t max)
+            {
+                assert(max >= 0);
+                ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+                while (oldCount > 0)
+                {
+                    ssize_t newCount = oldCount > max ? oldCount - max : 0;
+                    if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+                        return oldCount - newCount;
+                }
+                return 0;
+            }
+
+            // Acquires at least one, and (greedily) at most max
+            ssize_t waitMany(ssize_t max, const unsigned long &ms)
+            {
+                assert(max >= 0);
+                ssize_t result = tryWaitMany(max);
+                if (result == 0 && max > 0)
+                    result = waitManyWithPartialSpinning(max, ms);
+                return result;
+            }
+
+            void signal(ssize_t count = 1)
+            {
+                assert(count >= 0);
+                ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
+                ssize_t toRelease = -oldCount < count ? -oldCount : count;
+                if (toRelease > 0)
+                {
+                    m_sema.signal((int)toRelease);
+                }
+            }
+            ssize_t availableApprox() const
+            {
+                ssize_t count = m_count.load(std::memory_order_relaxed);
+                return count > 0 ? count : 0;
+            }
+        };
+    }    // end namespace mpmc_sema
+}    // end namespace details
 
 
 // This is a blocking version of the queue. It has an almost identical interface to
@@ -316,445 +356,404 @@ template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
 class BlockingConcurrentQueue
 {
 private:
-	typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
-	typedef details::mpmc_sema::LightweightSemaphore LightweightSemaphore;
+    typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
+    typedef details::mpmc_sema::LightweightSemaphore LightweightSemaphore;
 
 public:
-	typedef typename ConcurrentQueue::producer_token_t producer_token_t;
-	typedef typename ConcurrentQueue::consumer_token_t consumer_token_t;
-	
-	typedef typename ConcurrentQueue::index_t index_t;
-	typedef typename ConcurrentQueue::size_t size_t;
-	typedef typename std::make_signed<size_t>::type ssize_t;
-	
-	static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
-	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
-	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
-	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
-	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
-	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE;
-	static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE;
-	
+    typedef typename ConcurrentQueue::producer_token_t producer_token_t;
+    typedef typename ConcurrentQueue::consumer_token_t consumer_token_t;
+    typedef typename ConcurrentQueue::index_t index_t;
+    typedef typename ConcurrentQueue::size_t size_t;
+    typedef typename std::make_signed<size_t>::type ssize_t;
+    static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
+    static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
+    static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
+    static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
+    static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+    static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE;
+    static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE;
 public:
-	// Creates a queue with at least `capacity` element slots; note that the
-	// actual number of elements that can be inserted without additional memory
-	// allocation depends on the number of producers and the block size (e.g. if
-	// the block size is equal to `capacity`, only a single block will be allocated
-	// up-front, which means only a single producer will be able to enqueue elements
-	// without an extra allocation -- blocks aren't shared between producers).
-	// This method is not thread safe -- it is up to the user to ensure that the
-	// queue is fully constructed before it starts being used by other threads (this
-	// includes making the memory effects of construction visible, possibly with a
-	// memory barrier).
-	explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
-		: inner(capacity), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
-	{
-		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
-		if (!sema) {
-			MOODYCAMEL_THROW(std::bad_alloc());
-		}
-	}
-	
-	BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
-		: inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
-	{
-		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
-		if (!sema) {
-			MOODYCAMEL_THROW(std::bad_alloc());
-		}
-	}
-	
-	// Disable copying and copy assignment
-	BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
-	BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
-	
-	// Moving is supported, but note that it is *not* a thread-safe operation.
-	// Nobody can use the queue while it's being moved, and the memory effects
-	// of that move must be propagated to other threads before they can use it.
-	// Note: When a queue is moved, its tokens are still valid but can only be
-	// used with the destination queue (i.e. semantically they are moved along
-	// with the queue itself).
-	BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
-		: inner(std::move(other.inner)), sema(std::move(other.sema))
-	{ }
-	
-	inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
-	{
-		return swap_internal(other);
-	}
-	
-	// Swaps this queue's state with the other's. Not thread-safe.
-	// Swapping two queues does not invalidate their tokens, however
-	// the tokens that were created for one queue must be used with
-	// only the swapped queue (i.e. the tokens are tied to the
-	// queue's movable state, not the object itself).
-	inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
-	{
-		swap_internal(other);
-	}
-	
+    // Creates a queue with at least `capacity` element slots; note that the
+    // actual number of elements that can be inserted without additional memory
+    // allocation depends on the number of producers and the block size (e.g. if
+    // the block size is equal to `capacity`, only a single block will be allocated
+    // up-front, which means only a single producer will be able to enqueue elements
+    // without an extra allocation -- blocks aren't shared between producers).
+    // This method is not thread safe -- it is up to the user to ensure that the
+    // queue is fully constructed before it starts being used by other threads (this
+    // includes making the memory effects of construction visible, possibly with a
+    // memory barrier).
+    explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
+        : inner(capacity), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+    {
+        assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
+        if (!sema) {
+            MOODYCAMEL_THROW(std::bad_alloc());
+        }
+    }
+    BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+        : inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+    {
+        assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
+        if (!sema) {
+            MOODYCAMEL_THROW(std::bad_alloc());
+        }
+    }
+    // Disable copying and copy assignment
+    BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+    BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+    // Moving is supported, but note that it is *not* a thread-safe operation.
+    // Nobody can use the queue while it's being moved, and the memory effects
+    // of that move must be propagated to other threads before they can use it.
+    // Note: When a queue is moved, its tokens are still valid but can only be
+    // used with the destination queue (i.e. semantically they are moved along
+    // with the queue itself).
+    BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+        : inner(std::move(other.inner)), sema(std::move(other.sema))
+    { }
+    inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+    {
+        return swap_internal(other);
+    }
+    // Swaps this queue's state with the other's. Not thread-safe.
+    // Swapping two queues does not invalidate their tokens, however
+    // the tokens that were created for one queue must be used with
+    // only the swapped queue (i.e. the tokens are tied to the
+    // queue's movable state, not the object itself).
+    inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
+    {
+        swap_internal(other);
+    }
 private:
-	BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other)
-	{
-		if (this == &other) {
-			return *this;
-		}
-		
-		inner.swap(other.inner);
-		sema.swap(other.sema);
-		return *this;
-	}
-	
+    BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other)
+    {
+        if (this == &other) {
+            return *this;
+        }
+        inner.swap(other.inner);
+        sema.swap(other.sema);
+        return *this;
+    }
 public:
-	// Enqueues a single item (by copying it).
-	// Allocates memory if required. Only fails if memory allocation fails (or implicit
-	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
-	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(T const& item)
-	{
-		if (details::likely(inner.enqueue(item))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by moving it, if possible).
-	// Allocates memory if required. Only fails if memory allocation fails (or implicit
-	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
-	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(T&& item)
-	{
-		if (details::likely(inner.enqueue(std::move(item)))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by copying it) using an explicit producer token.
-	// Allocates memory if required. Only fails if memory allocation fails (or
-	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(producer_token_t const& token, T const& item)
-	{
-		if (details::likely(inner.enqueue(token, item))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
-	// Allocates memory if required. Only fails if memory allocation fails (or
-	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Thread-safe.
-	inline bool enqueue(producer_token_t const& token, T&& item)
-	{
-		if (details::likely(inner.enqueue(token, std::move(item)))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues several items.
-	// Allocates memory if required. Only fails if memory allocation fails (or
-	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
-	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
-	// Thread-safe.
-	template<typename It>
-	inline bool enqueue_bulk(It itemFirst, size_t count)
-	{
-		if (details::likely(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) {
-			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues several items using an explicit producer token.
-	// Allocates memory if required. Only fails if memory allocation fails
-	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-	// Note: Use std::make_move_iterator if the elements should be moved
-	// instead of copied.
-	// Thread-safe.
-	template<typename It>
-	inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
-	{
-		if (details::likely(inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) {
-			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by copying it).
-	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
-	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
-	// is 0).
-	// Thread-safe.
-	inline bool try_enqueue(T const& item)
-	{
-		if (inner.try_enqueue(item)) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by moving it, if possible).
-	// Does not allocate memory (except for one-time implicit producer).
-	// Fails if not enough room to enqueue (or implicit production is
-	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
-	// Thread-safe.
-	inline bool try_enqueue(T&& item)
-	{
-		if (inner.try_enqueue(std::move(item))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by copying it) using an explicit producer token.
-	// Does not allocate memory. Fails if not enough room to enqueue.
-	// Thread-safe.
-	inline bool try_enqueue(producer_token_t const& token, T const& item)
-	{
-		if (inner.try_enqueue(token, item)) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
-	// Does not allocate memory. Fails if not enough room to enqueue.
-	// Thread-safe.
-	inline bool try_enqueue(producer_token_t const& token, T&& item)
-	{
-		if (inner.try_enqueue(token, std::move(item))) {
-			sema->signal();
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues several items.
-	// Does not allocate memory (except for one-time implicit producer).
-	// Fails if not enough room to enqueue (or implicit production is
-	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
-	// Note: Use std::make_move_iterator if the elements should be moved
-	// instead of copied.
-	// Thread-safe.
-	template<typename It>
-	inline bool try_enqueue_bulk(It itemFirst, size_t count)
-	{
-		if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) {
-			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-			return true;
-		}
-		return false;
-	}
-	
-	// Enqueues several items using an explicit producer token.
-	// Does not allocate memory. Fails if not enough room to enqueue.
-	// Note: Use std::make_move_iterator if the elements should be moved
-	// instead of copied.
-	// Thread-safe.
-	template<typename It>
-	inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
-	{
-		if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) {
-			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-			return true;
-		}
-		return false;
-	}
-	
-	
-	// Attempts to dequeue from the queue.
-	// Returns false if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline bool try_dequeue(U& item)
-	{
-		if (sema->tryWait()) {
-			while (!inner.try_dequeue(item)) {
-				continue;
-			}
-			return true;
-		}
-		return false;
-	}
-	
-	// Attempts to dequeue from the queue using an explicit consumer token.
-	// Returns false if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline bool try_dequeue(consumer_token_t& token, U& item)
-	{
-		if (sema->tryWait()) {
-			while (!inner.try_dequeue(token, item)) {
-				continue;
-			}
-			return true;
-		}
-		return false;
-	}
-	
-	// Attempts to dequeue several elements from the queue.
-	// Returns the number of items actually dequeued.
-	// Returns 0 if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t try_dequeue_bulk(It itemFirst, size_t max)
-	{
-		size_t count = 0;
-		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
-		while (count != max) {
-			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
-		}
-		return count;
-	}
-	
-	// Attempts to dequeue several elements from the queue using an explicit consumer token.
-	// Returns the number of items actually dequeued.
-	// Returns 0 if all producer streams appeared empty at the time they
-	// were checked (so, the queue is likely but not guaranteed to be empty).
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
-	{
-		size_t count = 0;
-		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
-		while (count != max) {
-			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
-		}
-		return count;
-	}
-	
-	
-	
-	// Blocks the current thread until there's something to dequeue, then
-	// dequeues it.
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline void wait_dequeue(U& item)
-	{
-		sema->wait();
-		while (!inner.try_dequeue(item)) {
-			continue;
-		}
-	}
-	
-	// Blocks the current thread until there's something to dequeue, then
-	// dequeues it using an explicit consumer token.
-	// Never allocates. Thread-safe.
-	template<typename U>
-	inline void wait_dequeue(consumer_token_t& token, U& item)
-	{
-		sema->wait();
-		while (!inner.try_dequeue(token, item)) {
-			continue;
-		}
-	}
-	
-	// Attempts to dequeue several elements from the queue.
-	// Returns the number of items actually dequeued, which will
-	// always be at least one (this method blocks until the queue
-	// is non-empty) and at most max.
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t wait_dequeue_bulk(It itemFirst, size_t max)
-	{
-		size_t count = 0;
-		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
-		while (count != max) {
-			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
-		}
-		return count;
-	}
-	
-	// Attempts to dequeue several elements from the queue using an explicit consumer token.
-	// Returns the number of items actually dequeued, which will
-	// always be at least one (this method blocks until the queue
-	// is non-empty) and at most max.
-	// Never allocates. Thread-safe.
-	template<typename It>
-	inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
-	{
-		size_t count = 0;
-		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
-		while (count != max) {
-			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
-		}
-		return count;
-	}
-	
-	
-	// Returns an estimate of the total number of elements currently in the queue. This
-	// estimate is only accurate if the queue has completely stabilized before it is called
-	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
-	// visible on the calling thread, and no further operations start while this method is
-	// being called).
-	// Thread-safe.
-	inline size_t size_approx() const
-	{
-		return (size_t)sema->availableApprox();
-	}
-	
-	
-	// Returns true if the underlying atomic variables used by
-	// the queue are lock-free (they should be on most platforms).
-	// Thread-safe.
-	static bool is_lock_free()
-	{
-		return ConcurrentQueue::is_lock_free();
-	}
-	
+    // Enqueues a single item (by copying it).
+    // Allocates memory if required. Only fails if memory allocation fails (or implicit
+    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+    // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Thread-safe.
+    inline bool enqueue(T const& item)
+    {
+        if (details::likely(inner.enqueue(item))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    // Enqueues a single item (by moving it, if possible).
+    // Allocates memory if required. Only fails if memory allocation fails (or implicit
+    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+    // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Thread-safe.
+    inline bool enqueue(T&& item)
+    {
+        if (details::likely(inner.enqueue(std::move(item)))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    // Enqueues a single item (by copying it) using an explicit producer token.
+    // Allocates memory if required. Only fails if memory allocation fails (or
+    // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Thread-safe.
+    inline bool enqueue(producer_token_t const& token, T const& item)
+    {
+        if (details::likely(inner.enqueue(token, item))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    // Enqueues a single item (by moving it, if possible) using an explicit producer token.
+    // Allocates memory if required. Only fails if memory allocation fails (or
+    // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Thread-safe.
+    inline bool enqueue(producer_token_t const& token, T&& item)
+    {
+        if (details::likely(inner.enqueue(token, std::move(item)))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    // Enqueues several items.
+    // Allocates memory if required. Only fails if memory allocation fails (or
+    // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+    // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Note: Use std::make_move_iterator if the elements should be moved instead of copied.
+    // Thread-safe.
+    template<typename It>
+    inline bool enqueue_bulk(It itemFirst, size_t count)
+    {
+        if (details::likely(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) {
+            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+            return true;
+        }
+        return false;
+    }
+    // Enqueues several items using an explicit producer token.
+    // Allocates memory if required. Only fails if memory allocation fails
+    // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+    // Note: Use std::make_move_iterator if the elements should be moved
+    // instead of copied.
+    // Thread-safe.
+    template<typename It>
+    inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+    {
+        if (details::likely(inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) {
+            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+            return true;
+        }
+        return false;
+    }
+    // Enqueues a single item (by copying it).
+    // Does not allocate memory. Fails if not enough room to enqueue (or implicit
+    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+    // is 0).
+    // Thread-safe.
+    inline bool try_enqueue(T const& item)
+    {
+        if (inner.try_enqueue(item)) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    // Enqueues a single item (by moving it, if possible).
+    // Does not allocate memory (except for one-time implicit producer).
+    // Fails if not enough room to enqueue (or implicit production is
+    // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+    // Thread-safe.
+    inline bool try_enqueue(T&& item)
+    {
+        if (inner.try_enqueue(std::move(item))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    // Enqueues a single item (by copying it) using an explicit producer token.
+    // Does not allocate memory. Fails if not enough room to enqueue.
+    // Thread-safe.
+    inline bool try_enqueue(producer_token_t const& token, T const& item)
+    {
+        if (inner.try_enqueue(token, item)) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    // Enqueues a single item (by moving it, if possible) using an explicit producer token.
+    // Does not allocate memory. Fails if not enough room to enqueue.
+    // Thread-safe.
+    inline bool try_enqueue(producer_token_t const& token, T&& item)
+    {
+        if (inner.try_enqueue(token, std::move(item))) {
+            sema->signal();
+            return true;
+        }
+        return false;
+    }
+    // Enqueues several items.
+    // Does not allocate memory (except for one-time implicit producer).
+    // Fails if not enough room to enqueue (or implicit production is
+    // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+    // Note: Use std::make_move_iterator if the elements should be moved
+    // instead of copied.
+    // Thread-safe.
+    template<typename It>
+    inline bool try_enqueue_bulk(It itemFirst, size_t count)
+    {
+        if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) {
+            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+            return true;
+        }
+        return false;
+    }
+    // Enqueues several items using an explicit producer token.
+    // Does not allocate memory. Fails if not enough room to enqueue.
+    // Note: Use std::make_move_iterator if the elements should be moved
+    // instead of copied.
+    // Thread-safe.
+    template<typename It>
+    inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+    {
+        if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) {
+            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+            return true;
+        }
+        return false;
+    }
+    // Attempts to dequeue from the queue.
+    // Returns false if all producer streams appeared empty at the time they
+    // were checked (so, the queue is likely but not guaranteed to be empty).
+    // Never allocates. Thread-safe.
+    template<typename U>
+    inline bool try_dequeue(U& item)
+    {
+        if (sema->tryWait()) {
+            while (!inner.try_dequeue(item)) {
+                continue;
+            }
+            return true;
+        }
+        return false;
+    }
+    // Attempts to dequeue from the queue using an explicit consumer token.
+    // Returns false if all producer streams appeared empty at the time they
+    // were checked (so, the queue is likely but not guaranteed to be empty).
+    // Never allocates. Thread-safe.
+    template<typename U>
+    inline bool try_dequeue(consumer_token_t& token, U& item)
+    {
+        if (sema->tryWait()) {
+            while (!inner.try_dequeue(token, item)) {
+                continue;
+            }
+            return true;
+        }
+        return false;
+    }
+    // Attempts to dequeue several elements from the queue.
+    // Returns the number of items actually dequeued.
+    // Returns 0 if all producer streams appeared empty at the time they
+    // were checked (so, the queue is likely but not guaranteed to be empty).
+    // Never allocates. Thread-safe.
+    template<typename It>
+    inline size_t try_dequeue_bulk(It itemFirst, size_t max)
+    {
+        size_t count = 0;
+        max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+        while (count != max) {
+            count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+        }
+        return count;
+    }
+    // Attempts to dequeue several elements from the queue using an explicit consumer token.
+    // Returns the number of items actually dequeued.
+    // Returns 0 if all producer streams appeared empty at the time they
+    // were checked (so, the queue is likely but not guaranteed to be empty).
+    // Never allocates. Thread-safe.
+    template<typename It>
+    inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+    {
+        size_t count = 0;
+        max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+        while (count != max) {
+            count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+        }
+        return count;
+    }
+    // Blocks the current thread until there's something to dequeue, then
+    // dequeues it.
+    // Never allocates. Thread-safe.
+    template<typename U>
+    inline void wait_dequeue(U& item, const unsigned long &ms = 0UL)
+    {
+        sema->wait(ms);
+        while (!inner.try_dequeue(item)) {
+            continue;
+        }
+    }
+    // Blocks the current thread until there's something to dequeue, then
+    // dequeues it using an explicit consumer token.
+    // Never allocates. Thread-safe.
+    template<typename U>
+    inline void wait_dequeue(consumer_token_t& token, U& item, const unsigned long &ms = 0UL)
+    {
+        sema->wait(ms);
+        while (!inner.try_dequeue(token, item)) {
+            continue;
+        }
+    }
+    // Attempts to dequeue several elements from the queue.
+    // Returns the number of items actually dequeued, which will
+    // always be at least one (this method blocks until the queue
+    // is non-empty) and at most max.
+    // Never allocates. Thread-safe.
+    template<typename It>
+    inline size_t wait_dequeue_bulk(It itemFirst, size_t max, const unsigned long &ms = 0UL)
+    {
+        size_t count = 0;
+        max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, ms);
+        while (count != max) {
+            count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+        }
+        return count;
+    }
+    // Attempts to dequeue several elements from the queue using an explicit consumer token.
+    // Returns the number of items actually dequeued, which will
+    // always be at least one (this method blocks until the queue
+    // is non-empty) and at most max.
+    // Never allocates. Thread-safe.
+    template<typename It>
+    inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max, const unsigned long &ms = 0UL)
+    {
+        size_t count = 0;
+        max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, ms);
+        while (count != max) {
+            count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+        }
+        return count;
+    }
+    // Returns an estimate of the total number of elements currently in the queue. This
+    // estimate is only accurate if the queue has completely stabilized before it is called
+    // (i.e. all enqueue and dequeue operations have completed and their memory effects are
+    // visible on the calling thread, and no further operations start while this method is
+    // being called).
+    // Thread-safe.
+    inline size_t size_approx() const
+    {
+        return (size_t)sema->availableApprox();
+    }
+    // Returns true if the underlying atomic variables used by
+    // the queue are lock-free (they should be on most platforms).
+    // Thread-safe.
+    static bool is_lock_free()
+    {
+        return ConcurrentQueue::is_lock_free();
+    }
 
 private:
-	template<typename U>
-	static inline U* create()
-	{
-		auto p = (Traits::malloc)(sizeof(U));
-		return p != nullptr ? new (p) U : nullptr;
-	}
-	
-	template<typename U, typename A1>
-	static inline U* create(A1&& a1)
-	{
-		auto p = (Traits::malloc)(sizeof(U));
-		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
-	}
-	
-	template<typename U>
-	static inline void destroy(U* p)
-	{
-		if (p != nullptr) {
-			p->~U();
-		}
-		(Traits::free)(p);
-	}
-	
+    template<typename U>
+    static inline U* create()
+    {
+        auto p = (Traits::malloc)(sizeof(U));
+        return p != nullptr ? new (p) U : nullptr;
+    }
+    template<typename U, typename A1>
+    static inline U* create(A1&& a1)
+    {
+        auto p = (Traits::malloc)(sizeof(U));
+        return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
+    }
+    template<typename U>
+    static inline void destroy(U* p)
+    {
+        if (p != nullptr) {
+            p->~U();
+        }
+        (Traits::free)(p);
+    }
 private:
-	ConcurrentQueue inner;
-	std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema;
+    ConcurrentQueue inner;
+    std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema;
 };
 
 
 template<typename T, typename Traits>
 inline void swap(BlockingConcurrentQueue<T, Traits>& a, BlockingConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
 {
-	a.swap(b);
+    a.swap(b);
 }
 
-}	// end namespace moodycamel
+}    // end namespace moodycamel
diff --git a/tests/unittests/unittests.cpp b/tests/unittests/unittests.cpp
index 97550e5b..51290029 100644
--- a/tests/unittests/unittests.cpp
+++ b/tests/unittests/unittests.cpp
@@ -10,6 +10,9 @@
 #include <string>
 #include <cstddef>
 #include <string>
+#include <chrono>
+#include <thread>
+#include <iostream>
 
 #ifdef _WIN32
 #ifndef NOMINMAX

From 98a57e8ad8210d5d274e1f62f85cb4aeb0025d6f Mon Sep 17 00:00:00 2001
From: Ion Lupascu <ionlupascu@gmail.com>
Date: Thu, 21 Jan 2016 07:50:20 +0000
Subject: [PATCH 2/2] timedwait: added test to test the functionality and
 corrected the code

---
 blockingconcurrentqueue.h     | 1416 +++++++++++++++++----------------
 tests/unittests/unittests.cpp | 1302 +++++++++++++++++-------------
 2 files changed, 1455 insertions(+), 1263 deletions(-)

diff --git a/blockingconcurrentqueue.h b/blockingconcurrentqueue.h
index 13c60ebc..d9301480 100644
--- a/blockingconcurrentqueue.h
+++ b/blockingconcurrentqueue.h
@@ -17,11 +17,11 @@
 // I know this is an ugly hack but it still beats polluting the global
 // namespace with thousands of generic names or adding a .cpp for nothing.
 extern "C" {
-    struct _SECURITY_ATTRIBUTES;
-    __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
-    __declspec(dllimport) int __stdcall CloseHandle(void* hObject);
-    __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
-    __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
+	struct _SECURITY_ATTRIBUTES;
+	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
+	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
+	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
+	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
 }
 #elif defined(__MACH__)
 #include <mach/mach.h>
@@ -33,320 +33,334 @@ namespace moodycamel
 {
 namespace details
 {
-    // Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
-    // portable + lightweight semaphore implementations, originally from
-    // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
-    // LICENSE:
-    // Copyright (c) 2015 Jeff Preshing
-    //
-    // This software is provided 'as-is', without any express or implied
-    // warranty. In no event will the authors be held liable for any damages
-    // arising from the use of this software.
-    //
-    // Permission is granted to anyone to use this software for any purpose,
-    // including commercial applications, and to alter it and redistribute it
-    // freely, subject to the following restrictions:
-    //
-    // 1. The origin of this software must not be misrepresented; you must not
-    //    claim that you wrote the original software. If you use this software
-    //    in a product, an acknowledgement in the product documentation would be
-    //    appreciated but is not required.
-    // 2. Altered source versions must be plainly marked as such, and must not be
-    //    misrepresented as being the original software.
-    // 3. This notice may not be removed or altered from any source distribution.
-    namespace mpmc_sema
-    {
+	// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
+	// portable + lightweight semaphore implementations, originally from
+	// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
+	// LICENSE:
+	// Copyright (c) 2015 Jeff Preshing
+	//
+	// This software is provided 'as-is', without any express or implied
+	// warranty. In no event will the authors be held liable for any damages
+	// arising from the use of this software.
+	//
+	// Permission is granted to anyone to use this software for any purpose,
+	// including commercial applications, and to alter it and redistribute it
+	// freely, subject to the following restrictions:
+	//
+	// 1. The origin of this software must not be misrepresented; you must not
+	//	claim that you wrote the original software. If you use this software
+	//	in a product, an acknowledgement in the product documentation would be
+	//	appreciated but is not required.
+	// 2. Altered source versions must be plainly marked as such, and must not be
+	//	misrepresented as being the original software.
+	// 3. This notice may not be removed or altered from any source distribution.
+	namespace mpmc_sema
+	{
 #if defined(_WIN32)
-        class Semaphore
-        {
-        private:
-            void* m_hSema;
-            Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-            Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-        public:
-            Semaphore(int initialCount = 0)
-            {
-                assert(initialCount >= 0);
-                const long maxLong = 0x7fffffff;
-                m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
-            }
-
-            ~Semaphore()
-            {
-                CloseHandle(m_hSema);
-            }
-
-            bool wait(const unsigned long &ms)
-            {
-                const unsigned long timeout = (ms > 0UL ? ms : 0xffffffff);
-                const DWORD rc = WaitForSingleObject(m_hSema, timeout);
-                return (rc != WAIT_TIMEOUT);
-            }
-
-            void signal(int count = 1)
-            {
-                ReleaseSemaphore(m_hSema, count, nullptr);
-            }
-        };
+		class Semaphore
+		{
+		private:
+			void* m_hSema;
+			Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+			Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+		public:
+			Semaphore(int initialCount = 0)
+			{
+				assert(initialCount >= 0);
+				const long maxLong = 0x7fffffff;
+				m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
+			}
+
+			~Semaphore()
+			{
+				CloseHandle(m_hSema);
+			}
+
+			bool wait(const unsigned long &ms)
+			{
+				const unsigned long timeout = (ms > 0UL ? ms : 0xffffffff);
+				const DWORD rc = WaitForSingleObject(m_hSema, timeout);
+				return (rc != WAIT_TIMEOUT);
+			}
+
+			void signal(int count = 1)
+			{
+				ReleaseSemaphore(m_hSema, count, nullptr);
+			}
+		};
 #elif defined(__MACH__)
-        //---------------------------------------------------------
-        // Semaphore (Apple iOS and OSX)
-        // Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
-        //---------------------------------------------------------
-        class Semaphore
-        {
-        private:
-            semaphore_t m_sema;
-
-            Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-            Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-        public:
-            Semaphore(int initialCount = 0)
-            {
-                assert(initialCount >= 0);
-                semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
-            }
-
-            ~Semaphore()
-            {
-                semaphore_destroy(mach_task_self(), m_sema);
-            }
-
-            bool wait(const unsigned long &ms)
-            {
-                if(ms == 0UL)
-                {
-                    semaphore_wait(m_sema);
-                    return true;
-                }
-
-                kern_return_t rc;
-                mach_timespec_t ts;
-                ts.tv_sec = ms / 1000;
-                ts.tv_nsec = (ms % 1000) * 1000000;
-
-                // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
-                rc = semaphore_timedwait(m_sema, ts);
-
-                return (rc != KERN_OPERATION_TIMED_OUT);
-            }
-
-            void signal()
-            {
-                semaphore_signal(m_sema);
-            }
-
-            void signal(int count)
-            {
-                while (count-- > 0)
-                {
-                    semaphore_signal(m_sema);
-                }
-            }
-        };
+		//---------------------------------------------------------
+		// Semaphore (Apple iOS and OSX)
+		// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
+		//---------------------------------------------------------
+		class Semaphore
+		{
+		private:
+			semaphore_t m_sema;
+
+			Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+			Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+		public:
+			Semaphore(int initialCount = 0)
+			{
+				assert(initialCount >= 0);
+				semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
+			}
+
+			~Semaphore()
+			{
+				semaphore_destroy(mach_task_self(), m_sema);
+			}
+
+			bool wait(const unsigned long &ms)
+			{
+				if(ms == 0UL)
+				{
+					semaphore_wait(m_sema);
+					return true;
+				}
+
+				kern_return_t rc;
+				mach_timespec_t ts;
+				ts.tv_sec = ms / 1000;
+				ts.tv_nsec = (ms % 1000) * 1000000;
+
+				// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
+				rc = semaphore_timedwait(m_sema, ts);
+
+				return (rc != KERN_OPERATION_TIMED_OUT);
+			}
+
+			void signal()
+			{
+				semaphore_signal(m_sema);
+			}
+
+			void signal(int count)
+			{
+				while (count-- > 0)
+				{
+					semaphore_signal(m_sema);
+				}
+			}
+		};
 #elif defined(__unix__)
-        //---------------------------------------------------------
-        // Semaphore (POSIX, Linux)
-        //---------------------------------------------------------
-        class Semaphore
-        {
-        private:
-            sem_t m_sema;
-
-            Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-            Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-        public:
-            Semaphore(int initialCount = 0)
-            {
-                assert(initialCount >= 0);
-                sem_init(&m_sema, 0, initialCount);
-            }
-
-            ~Semaphore()
-            {
-                sem_destroy(&m_sema);
-            }
-
-            bool wait(const unsigned long &ms)
-            {
-                int rc;
-
-                if(ms == 0UL)
-                {
-                    // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
-                    do
-                    {
-                        rc = sem_wait(&m_sema);
-                    }
-                    while (rc == -1 && errno == EINTR);
-
-                    return true;
-                }
-
-                // wait with timeout
-                struct timespec ts;
-                if (clock_gettime(CLOCK_REALTIME, &ts) == -1)
-                {
-                    return true;
-                }
-                ts.tv_nsec += (ms % 1000) * 1000000000;
-                ts.tv_sec += ms / 1000 + ts.tv_nsec / 1000000000;
-                ts.tv_nsec %= 1000000000;
-
-                do
-                {
-                    rc = sem_timedwait(&m_sema, &ts);
-                }
-                while (rc == -1 && errno == EINTR);
-
-                return !(rc == -1 && errno == ETIMEDOUT);
-            }
-
-            void signal()
-            {
-                sem_post(&m_sema);
-            }
-
-            void signal(int count)
-            {
-                while (count-- > 0)
-                {
-                    sem_post(&m_sema);
-                }
-            }
-        };
+		//---------------------------------------------------------
+		// Semaphore (POSIX, Linux)
+		//---------------------------------------------------------
+		class Semaphore
+		{
+		private:
+			sem_t m_sema;
+
+			Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+			Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+		public:
+			Semaphore(int initialCount = 0)
+			{
+				assert(initialCount >= 0);
+				sem_init(&m_sema, 0, initialCount);
+			}
+
+			~Semaphore()
+			{
+				sem_destroy(&m_sema);
+			}
+
+			bool wait(const unsigned long &ms)
+			{
+				int rc;
+
+				if(ms == 0UL)
+				{
+					// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
+					do
+					{
+						rc = sem_wait(&m_sema);
+					}
+					while (rc == -1 && errno == EINTR);
+
+					return true;
+				}
+
+				// wait with timeout
+				struct timespec ts;
+				if (clock_gettime(CLOCK_REALTIME, &ts) == -1)
+				{
+					return true;
+				}
+				ts.tv_nsec += (ms % 1000) * 1000000000;
+				ts.tv_sec += ms / 1000 + ts.tv_nsec / 1000000000;
+				ts.tv_nsec %= 1000000000;
+
+				do
+				{
+					rc = sem_timedwait(&m_sema, &ts);
+				}
+				while (rc == -1 && errno == EINTR);
+
+				return !(rc == -1 && errno == ETIMEDOUT);
+			}
+
+			void signal()
+			{
+				sem_post(&m_sema);
+			}
+
+			void signal(int count)
+			{
+				while (count-- > 0)
+				{
+					sem_post(&m_sema);
+				}
+			}
+		};
 #else
 #error Unsupported platform! (No semaphore wrapper available)
 #endif
 
-        //---------------------------------------------------------
-        // LightweightSemaphore
-        //---------------------------------------------------------
-        class LightweightSemaphore
-        {
-        public:
-            typedef std::make_signed<std::size_t>::type ssize_t;
-        private:
-            std::atomic<ssize_t> m_count;
-            Semaphore m_sema;
-
-            bool waitWithPartialSpinning(const unsigned long &ms)
-            {
-                ssize_t oldCount;
-                // Is there a better way to set the initial spin count?
-                // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
-                // as threads start hitting the kernel semaphore.
-                int spin = 10000;
-                while (--spin >= 0)
-                {
-                    oldCount = m_count.load(std::memory_order_relaxed);
-                    if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
-                        return true;
-                    std::atomic_signal_fence(std::memory_order_acquire);     // Prevent the compiler from collapsing the loop.
-                }
-                oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
-                if (oldCount <= 0)
-                {
-                    return m_sema.wait(ms);
-                }
-            }
-
-            ssize_t waitManyWithPartialSpinning(ssize_t max, const unsigned long &ms)
-            {
-                assert(max > 0);
-                ssize_t oldCount;
-                int spin = 10000;
-                while (--spin >= 0)
-                {
-                    oldCount = m_count.load(std::memory_order_relaxed);
-                    if (oldCount > 0)
-                    {
-                        ssize_t newCount = oldCount > max ? oldCount - max : 0;
-                        if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
-                            return oldCount - newCount;
-                    }
-                    std::atomic_signal_fence(std::memory_order_acquire);
-                }
-                oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
-                if (oldCount <= 0)
-                    if(!m_sema.wait(ms))
+		//---------------------------------------------------------
+		// LightweightSemaphore
+		//---------------------------------------------------------
+		class LightweightSemaphore
+		{
+		public:
+			typedef std::make_signed<std::size_t>::type ssize_t;
+		private:
+			std::atomic<ssize_t> m_count;
+			Semaphore m_sema;
+
+			bool waitWithPartialSpinning(const unsigned long &ms)
+			{
+				ssize_t oldCount;
+				// Is there a better way to set the initial spin count?
+				// If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
+				// as threads start hitting the kernel semaphore.
+				int spin = 10000;
+				while (--spin >= 0)
+				{
+					oldCount = m_count.load(std::memory_order_relaxed);
+					if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+						return true;
+					std::atomic_signal_fence(std::memory_order_acquire);	 // Prevent the compiler from collapsing the loop.
+				}
+				oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+				if (oldCount <= 0)
+				{
+					if(!m_sema.wait(ms))
                     {
-                        return 1;
+                        m_count.fetch_add(1, std::memory_order_acquire);
+                        return false;
                     }
-
-                if (max > 1)
-                    return 1 + tryWaitMany(max - 1);
-                return 1;
-            }
-
-        public:
-            LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
-            {
-                assert(initialCount >= 0);
-            }
-
-            bool tryWait()
-            {
-                ssize_t oldCount = m_count.load(std::memory_order_relaxed);
-                while (oldCount > 0)
-                {
-                    if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
-                        return true;
-                }
-                return false;
-            }
-
-            void wait(const unsigned long &ms)
-            {
-                if (!tryWait())
-                    waitWithPartialSpinning(ms);
-            }
-
-            // Acquires between 0 and (greedily) max, inclusive
-            ssize_t tryWaitMany(ssize_t max)
-            {
-                assert(max >= 0);
-                ssize_t oldCount = m_count.load(std::memory_order_relaxed);
-                while (oldCount > 0)
-                {
-                    ssize_t newCount = oldCount > max ? oldCount - max : 0;
-                    if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
-                        return oldCount - newCount;
-                }
-                return 0;
-            }
-
-            // Acquires at least one, and (greedily) at most max
-            ssize_t waitMany(ssize_t max, const unsigned long &ms)
-            {
-                assert(max >= 0);
-                ssize_t result = tryWaitMany(max);
-                if (result == 0 && max > 0)
-                    result = waitManyWithPartialSpinning(max, ms);
-                return result;
-            }
-
-            void signal(ssize_t count = 1)
-            {
-                assert(count >= 0);
-                ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
-                ssize_t toRelease = -oldCount < count ? -oldCount : count;
-                if (toRelease > 0)
-                {
-                    m_sema.signal((int)toRelease);
-                }
-            }
-            ssize_t availableApprox() const
-            {
-                ssize_t count = m_count.load(std::memory_order_relaxed);
-                return count > 0 ? count : 0;
-            }
-        };
-    }    // end namespace mpmc_sema
-}    // end namespace details
+                    return true;
+				}
+
+				return true;
+			}
+
+			ssize_t waitManyWithPartialSpinning(ssize_t max, const unsigned long &ms)
+			{
+				assert(max > 0);
+				ssize_t oldCount;
+				int spin = 10000;
+				while (--spin >= 0)
+				{
+					oldCount = m_count.load(std::memory_order_relaxed);
+					if (oldCount > 0)
+					{
+						ssize_t newCount = oldCount > max ? oldCount - max : 0;
+						if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+							return oldCount - newCount;
+					}
+					std::atomic_signal_fence(std::memory_order_acquire);
+				}
+				oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+				if (oldCount <= 0)
+					if(!m_sema.wait(ms))
+					{
+                        m_count.fetch_add(1, std::memory_order_acquire);
+						if(max > 1)
+							return 0 + tryWaitMany(max - 1);
+
+						return 0;
+					}
+
+				if (max > 1)
+					return 1 + tryWaitMany(max - 1);
+
+				return 1;
+			}
+
+		public:
+			LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
+			{
+				assert(initialCount >= 0);
+			}
+
+			bool tryWait()
+			{
+				ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+				while (oldCount > 0)
+				{
+					if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+						return true;
+				}
+				return false;
+			}
+
+			bool wait(const unsigned long &ms)
+			{
+				if (!tryWait())
+					return waitWithPartialSpinning(ms);
+
+				return true;
+			}
+
+			// Acquires between 0 and (greedily) max, inclusive
+			ssize_t tryWaitMany(ssize_t max)
+			{
+				assert(max >= 0);
+				ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+				while (oldCount > 0)
+				{
+					ssize_t newCount = oldCount > max ? oldCount - max : 0;
+					if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+						return oldCount - newCount;
+				}
+				return 0;
+			}
+
+			// Acquires at least one, and (greedily) at most max
+			ssize_t waitMany(ssize_t max, const unsigned long &ms)
+			{
+				assert(max >= 0);
+				ssize_t result = tryWaitMany(max);
+				if (result == 0 && max > 0)
+					result = waitManyWithPartialSpinning(max, ms);
+				return result;
+			}
+
+			void signal(ssize_t count = 1)
+			{
+				assert(count >= 0);
+				ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
+				ssize_t toRelease = -oldCount < count ? -oldCount : count;
+				if (toRelease > 0)
+				{
+					m_sema.signal((int)toRelease);
+				}
+			}
+			ssize_t availableApprox() const
+			{
+				ssize_t count = m_count.load(std::memory_order_relaxed);
+				return count > 0 ? count : 0;
+			}
+		};
+	}	// end namespace mpmc_sema
+}	// end namespace details
 
 
 // This is a blocking version of the queue. It has an almost identical interface to
@@ -356,404 +370,416 @@ template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
 class BlockingConcurrentQueue
 {
 private:
-    typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
-    typedef details::mpmc_sema::LightweightSemaphore LightweightSemaphore;
+	typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
+	typedef details::mpmc_sema::LightweightSemaphore LightweightSemaphore;
 
 public:
-    typedef typename ConcurrentQueue::producer_token_t producer_token_t;
-    typedef typename ConcurrentQueue::consumer_token_t consumer_token_t;
-    typedef typename ConcurrentQueue::index_t index_t;
-    typedef typename ConcurrentQueue::size_t size_t;
-    typedef typename std::make_signed<size_t>::type ssize_t;
-    static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
-    static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
-    static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
-    static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
-    static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
-    static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE;
-    static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE;
+	typedef typename ConcurrentQueue::producer_token_t producer_token_t;
+	typedef typename ConcurrentQueue::consumer_token_t consumer_token_t;
+	typedef typename ConcurrentQueue::index_t index_t;
+	typedef typename ConcurrentQueue::size_t size_t;
+	typedef typename std::make_signed<size_t>::type ssize_t;
+	static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE;
+	static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE;
 public:
-    // Creates a queue with at least `capacity` element slots; note that the
-    // actual number of elements that can be inserted without additional memory
-    // allocation depends on the number of producers and the block size (e.g. if
-    // the block size is equal to `capacity`, only a single block will be allocated
-    // up-front, which means only a single producer will be able to enqueue elements
-    // without an extra allocation -- blocks aren't shared between producers).
-    // This method is not thread safe -- it is up to the user to ensure that the
-    // queue is fully constructed before it starts being used by other threads (this
-    // includes making the memory effects of construction visible, possibly with a
-    // memory barrier).
-    explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
-        : inner(capacity), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
-    {
-        assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
-        if (!sema) {
-            MOODYCAMEL_THROW(std::bad_alloc());
-        }
-    }
-    BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
-        : inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
-    {
-        assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
-        if (!sema) {
-            MOODYCAMEL_THROW(std::bad_alloc());
-        }
-    }
-    // Disable copying and copy assignment
-    BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
-    BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
-    // Moving is supported, but note that it is *not* a thread-safe operation.
-    // Nobody can use the queue while it's being moved, and the memory effects
-    // of that move must be propagated to other threads before they can use it.
-    // Note: When a queue is moved, its tokens are still valid but can only be
-    // used with the destination queue (i.e. semantically they are moved along
-    // with the queue itself).
-    BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
-        : inner(std::move(other.inner)), sema(std::move(other.sema))
-    { }
-    inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
-    {
-        return swap_internal(other);
-    }
-    // Swaps this queue's state with the other's. Not thread-safe.
-    // Swapping two queues does not invalidate their tokens, however
-    // the tokens that were created for one queue must be used with
-    // only the swapped queue (i.e. the tokens are tied to the
-    // queue's movable state, not the object itself).
-    inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
-    {
-        swap_internal(other);
-    }
+	// Creates a queue with at least `capacity` element slots; note that the
+	// actual number of elements that can be inserted without additional memory
+	// allocation depends on the number of producers and the block size (e.g. if
+	// the block size is equal to `capacity`, only a single block will be allocated
+	// up-front, which means only a single producer will be able to enqueue elements
+	// without an extra allocation -- blocks aren't shared between producers).
+	// This method is not thread safe -- it is up to the user to ensure that the
+	// queue is fully constructed before it starts being used by other threads (this
+	// includes making the memory effects of construction visible, possibly with a
+	// memory barrier).
+	explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
+		: inner(capacity), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+	{
+		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
+		if (!sema) {
+			MOODYCAMEL_THROW(std::bad_alloc());
+		}
+	}
+	BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+		: inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+	{
+		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
+		if (!sema) {
+			MOODYCAMEL_THROW(std::bad_alloc());
+		}
+	}
+	// Disable copying and copy assignment
+	BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	// Moving is supported, but note that it is *not* a thread-safe operation.
+	// Nobody can use the queue while it's being moved, and the memory effects
+	// of that move must be propagated to other threads before they can use it.
+	// Note: When a queue is moved, its tokens are still valid but can only be
+	// used with the destination queue (i.e. semantically they are moved along
+	// with the queue itself).
+	BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+		: inner(std::move(other.inner)), sema(std::move(other.sema))
+	{ }
+	inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+	{
+		return swap_internal(other);
+	}
+	// Swaps this queue's state with the other's. Not thread-safe.
+	// Swapping two queues does not invalidate their tokens, however
+	// the tokens that were created for one queue must be used with
+	// only the swapped queue (i.e. the tokens are tied to the
+	// queue's movable state, not the object itself).
+	inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap_internal(other);
+	}
 private:
-    BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other)
-    {
-        if (this == &other) {
-            return *this;
-        }
-        inner.swap(other.inner);
-        sema.swap(other.sema);
-        return *this;
-    }
+	BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other)
+	{
+		if (this == &other) {
+			return *this;
+		}
+		inner.swap(other.inner);
+		sema.swap(other.sema);
+		return *this;
+	}
 public:
-    // Enqueues a single item (by copying it).
-    // Allocates memory if required. Only fails if memory allocation fails (or implicit
-    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
-    // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-    // Thread-safe.
-    inline bool enqueue(T const& item)
-    {
-        if (details::likely(inner.enqueue(item))) {
-            sema->signal();
-            return true;
-        }
-        return false;
-    }
-    // Enqueues a single item (by moving it, if possible).
-    // Allocates memory if required. Only fails if memory allocation fails (or implicit
-    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
-    // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-    // Thread-safe.
-    inline bool enqueue(T&& item)
-    {
-        if (details::likely(inner.enqueue(std::move(item)))) {
-            sema->signal();
-            return true;
-        }
-        return false;
-    }
-    // Enqueues a single item (by copying it) using an explicit producer token.
-    // Allocates memory if required. Only fails if memory allocation fails (or
-    // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-    // Thread-safe.
-    inline bool enqueue(producer_token_t const& token, T const& item)
-    {
-        if (details::likely(inner.enqueue(token, item))) {
-            sema->signal();
-            return true;
-        }
-        return false;
-    }
-    // Enqueues a single item (by moving it, if possible) using an explicit producer token.
-    // Allocates memory if required. Only fails if memory allocation fails (or
-    // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-    // Thread-safe.
-    inline bool enqueue(producer_token_t const& token, T&& item)
-    {
-        if (details::likely(inner.enqueue(token, std::move(item)))) {
-            sema->signal();
-            return true;
-        }
-        return false;
-    }
-    // Enqueues several items.
-    // Allocates memory if required. Only fails if memory allocation fails (or
-    // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
-    // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-    // Note: Use std::make_move_iterator if the elements should be moved instead of copied.
-    // Thread-safe.
-    template<typename It>
-    inline bool enqueue_bulk(It itemFirst, size_t count)
-    {
-        if (details::likely(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) {
-            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-            return true;
-        }
-        return false;
-    }
-    // Enqueues several items using an explicit producer token.
-    // Allocates memory if required. Only fails if memory allocation fails
-    // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
-    // Note: Use std::make_move_iterator if the elements should be moved
-    // instead of copied.
-    // Thread-safe.
-    template<typename It>
-    inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
-    {
-        if (details::likely(inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) {
-            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-            return true;
-        }
-        return false;
-    }
-    // Enqueues a single item (by copying it).
-    // Does not allocate memory. Fails if not enough room to enqueue (or implicit
-    // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
-    // is 0).
-    // Thread-safe.
-    inline bool try_enqueue(T const& item)
-    {
-        if (inner.try_enqueue(item)) {
-            sema->signal();
-            return true;
-        }
-        return false;
-    }
-    // Enqueues a single item (by moving it, if possible).
-    // Does not allocate memory (except for one-time implicit producer).
-    // Fails if not enough room to enqueue (or implicit production is
-    // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
-    // Thread-safe.
-    inline bool try_enqueue(T&& item)
-    {
-        if (inner.try_enqueue(std::move(item))) {
-            sema->signal();
-            return true;
-        }
-        return false;
-    }
-    // Enqueues a single item (by copying it) using an explicit producer token.
-    // Does not allocate memory. Fails if not enough room to enqueue.
-    // Thread-safe.
-    inline bool try_enqueue(producer_token_t const& token, T const& item)
-    {
-        if (inner.try_enqueue(token, item)) {
-            sema->signal();
-            return true;
-        }
-        return false;
-    }
-    // Enqueues a single item (by moving it, if possible) using an explicit producer token.
-    // Does not allocate memory. Fails if not enough room to enqueue.
-    // Thread-safe.
-    inline bool try_enqueue(producer_token_t const& token, T&& item)
-    {
-        if (inner.try_enqueue(token, std::move(item))) {
-            sema->signal();
-            return true;
-        }
-        return false;
-    }
-    // Enqueues several items.
-    // Does not allocate memory (except for one-time implicit producer).
-    // Fails if not enough room to enqueue (or implicit production is
-    // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
-    // Note: Use std::make_move_iterator if the elements should be moved
-    // instead of copied.
-    // Thread-safe.
-    template<typename It>
-    inline bool try_enqueue_bulk(It itemFirst, size_t count)
-    {
-        if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) {
-            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-            return true;
-        }
-        return false;
-    }
-    // Enqueues several items using an explicit producer token.
-    // Does not allocate memory. Fails if not enough room to enqueue.
-    // Note: Use std::make_move_iterator if the elements should be moved
-    // instead of copied.
-    // Thread-safe.
-    template<typename It>
-    inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
-    {
-        if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) {
-            sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
-            return true;
-        }
-        return false;
-    }
-    // Attempts to dequeue from the queue.
-    // Returns false if all producer streams appeared empty at the time they
-    // were checked (so, the queue is likely but not guaranteed to be empty).
-    // Never allocates. Thread-safe.
-    template<typename U>
-    inline bool try_dequeue(U& item)
-    {
-        if (sema->tryWait()) {
-            while (!inner.try_dequeue(item)) {
-                continue;
-            }
-            return true;
-        }
-        return false;
-    }
-    // Attempts to dequeue from the queue using an explicit consumer token.
-    // Returns false if all producer streams appeared empty at the time they
-    // were checked (so, the queue is likely but not guaranteed to be empty).
-    // Never allocates. Thread-safe.
-    template<typename U>
-    inline bool try_dequeue(consumer_token_t& token, U& item)
-    {
-        if (sema->tryWait()) {
-            while (!inner.try_dequeue(token, item)) {
-                continue;
-            }
-            return true;
-        }
-        return false;
-    }
-    // Attempts to dequeue several elements from the queue.
-    // Returns the number of items actually dequeued.
-    // Returns 0 if all producer streams appeared empty at the time they
-    // were checked (so, the queue is likely but not guaranteed to be empty).
-    // Never allocates. Thread-safe.
-    template<typename It>
-    inline size_t try_dequeue_bulk(It itemFirst, size_t max)
-    {
-        size_t count = 0;
-        max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
-        while (count != max) {
-            count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
-        }
-        return count;
-    }
-    // Attempts to dequeue several elements from the queue using an explicit consumer token.
-    // Returns the number of items actually dequeued.
-    // Returns 0 if all producer streams appeared empty at the time they
-    // were checked (so, the queue is likely but not guaranteed to be empty).
-    // Never allocates. Thread-safe.
-    template<typename It>
-    inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
-    {
-        size_t count = 0;
-        max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
-        while (count != max) {
-            count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
-        }
-        return count;
-    }
-    // Blocks the current thread until there's something to dequeue, then
-    // dequeues it.
-    // Never allocates. Thread-safe.
-    template<typename U>
-    inline void wait_dequeue(U& item, const unsigned long &ms = 0UL)
-    {
-        sema->wait(ms);
-        while (!inner.try_dequeue(item)) {
-            continue;
-        }
-    }
-    // Blocks the current thread until there's something to dequeue, then
-    // dequeues it using an explicit consumer token.
-    // Never allocates. Thread-safe.
-    template<typename U>
-    inline void wait_dequeue(consumer_token_t& token, U& item, const unsigned long &ms = 0UL)
-    {
-        sema->wait(ms);
-        while (!inner.try_dequeue(token, item)) {
-            continue;
-        }
-    }
-    // Attempts to dequeue several elements from the queue.
-    // Returns the number of items actually dequeued, which will
-    // always be at least one (this method blocks until the queue
-    // is non-empty) and at most max.
-    // Never allocates. Thread-safe.
-    template<typename It>
-    inline size_t wait_dequeue_bulk(It itemFirst, size_t max, const unsigned long &ms = 0UL)
-    {
-        size_t count = 0;
-        max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, ms);
-        while (count != max) {
-            count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
-        }
-        return count;
-    }
-    // Attempts to dequeue several elements from the queue using an explicit consumer token.
-    // Returns the number of items actually dequeued, which will
-    // always be at least one (this method blocks until the queue
-    // is non-empty) and at most max.
-    // Never allocates. Thread-safe.
-    template<typename It>
-    inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max, const unsigned long &ms = 0UL)
-    {
-        size_t count = 0;
-        max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, ms);
-        while (count != max) {
-            count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
-        }
-        return count;
-    }
-    // Returns an estimate of the total number of elements currently in the queue. This
-    // estimate is only accurate if the queue has completely stabilized before it is called
-    // (i.e. all enqueue and dequeue operations have completed and their memory effects are
-    // visible on the calling thread, and no further operations start while this method is
-    // being called).
-    // Thread-safe.
-    inline size_t size_approx() const
-    {
-        return (size_t)sema->availableApprox();
-    }
-    // Returns true if the underlying atomic variables used by
-    // the queue are lock-free (they should be on most platforms).
-    // Thread-safe.
-    static bool is_lock_free()
-    {
-        return ConcurrentQueue::is_lock_free();
-    }
+	// Enqueues a single item (by copying it).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T const& item)
+	{
+		if (details::likely(inner.enqueue(item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	// Enqueues a single item (by moving it, if possible).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T&& item)
+	{
+		if (details::likely(inner.enqueue(std::move(item)))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T const& item)
+	{
+		if (details::likely(inner.enqueue(token, item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T&& item)
+	{
+		if (details::likely(inner.enqueue(token, std::move(item)))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	// Enqueues several items.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool enqueue_bulk(It itemFirst, size_t count)
+	{
+		if (details::likely(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	// Enqueues several items using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails
+	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		if (details::likely(inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	// Enqueues a single item (by copying it).
+	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T const& item)
+	{
+		if (inner.try_enqueue(item)) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	// Enqueues a single item (by moving it, if possible).
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T&& item)
+	{
+		if (inner.try_enqueue(std::move(item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T const& item)
+	{
+		if (inner.try_enqueue(token, item)) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T&& item)
+	{
+		if (inner.try_enqueue(token, std::move(item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	// Enqueues several items.
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool try_enqueue_bulk(It itemFirst, size_t count)
+	{
+		if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	// Enqueues several items using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool try_dequeue(U& item)
+	{
+		if (sema->tryWait()) {
+			while (!inner.try_dequeue(item)) {
+				continue;
+			}
+			return true;
+		}
+		return false;
+	}
+	// Attempts to dequeue from the queue using an explicit consumer token.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool try_dequeue(consumer_token_t& token, U& item)
+	{
+		if (sema->tryWait()) {
+			while (!inner.try_dequeue(token, item)) {
+				continue;
+			}
+			return true;
+		}
+		return false;
+	}
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t try_dequeue_bulk(It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+		}
+		return count;
+	}
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+		}
+		return count;
+	}
+	// Blocks the current thread until there's something to dequeue, then
+	// dequeues it.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool wait_dequeue(U& item, const unsigned long &ms = 0UL)
+	{
+		if(!sema->wait(ms))
+		{
+			return false;
+		}
+
+		while (!inner.try_dequeue(item)) {
+			continue;
+		}
+
+		return true;
+	}
+	// Blocks the current thread until there's something to dequeue, then
+	// dequeues it using an explicit consumer token.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool wait_dequeue(consumer_token_t& token, U& item, const unsigned long &ms = 0UL)
+	{
+		if(!sema->wait(ms))
+		{
+			return false;
+		}
+
+		while (!inner.try_dequeue(token, item)) {
+			continue;
+		}
+
+		return true;
+	}
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued, which will
+	// always be at least one (this method blocks until the queue
+	// is non-empty) and at most max.
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t wait_dequeue_bulk(It itemFirst, size_t max, const unsigned long &ms = 0UL)
+	{
+		size_t count = 0;
+		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, ms);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+		}
+		return count;
+	}
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued, which will
+	// always be at least one (this method blocks until the queue
+	// is non-empty) and at most max.
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max, const unsigned long &ms = 0UL)
+	{
+		size_t count = 0;
+		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, ms);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+		}
+		return count;
+	}
+	// Returns an estimate of the total number of elements currently in the queue. This
+	// estimate is only accurate if the queue has completely stabilized before it is called
+	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
+	// visible on the calling thread, and no further operations start while this method is
+	// being called).
+	// Thread-safe.
+	inline size_t size_approx() const
+	{
+		return (size_t)sema->availableApprox();
+	}
+	// Returns true if the underlying atomic variables used by
+	// the queue are lock-free (they should be on most platforms).
+	// Thread-safe.
+	static bool is_lock_free()
+	{
+		return ConcurrentQueue::is_lock_free();
+	}
 
 private:
-    template<typename U>
-    static inline U* create()
-    {
-        auto p = (Traits::malloc)(sizeof(U));
-        return p != nullptr ? new (p) U : nullptr;
-    }
-    template<typename U, typename A1>
-    static inline U* create(A1&& a1)
-    {
-        auto p = (Traits::malloc)(sizeof(U));
-        return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
-    }
-    template<typename U>
-    static inline void destroy(U* p)
-    {
-        if (p != nullptr) {
-            p->~U();
-        }
-        (Traits::free)(p);
-    }
+	template<typename U>
+	static inline U* create()
+	{
+		auto p = (Traits::malloc)(sizeof(U));
+		return p != nullptr ? new (p) U : nullptr;
+	}
+	template<typename U, typename A1>
+	static inline U* create(A1&& a1)
+	{
+		auto p = (Traits::malloc)(sizeof(U));
+		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
+	}
+	template<typename U>
+	static inline void destroy(U* p)
+	{
+		if (p != nullptr) {
+			p->~U();
+		}
+		(Traits::free)(p);
+	}
 private:
-    ConcurrentQueue inner;
-    std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema;
+	ConcurrentQueue inner;
+	std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema;
 };
 
 
 template<typename T, typename Traits>
 inline void swap(BlockingConcurrentQueue<T, Traits>& a, BlockingConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
 {
-    a.swap(b);
+	a.swap(b);
 }
 
-}    // end namespace moodycamel
+}	// end namespace moodycamel
diff --git a/tests/unittests/unittests.cpp b/tests/unittests/unittests.cpp
index 51290029..f11b5119 100644
--- a/tests/unittests/unittests.cpp
+++ b/tests/unittests/unittests.cpp
@@ -38,7 +38,7 @@ namespace {
 			std::max_align_t dummy;	// Others (e.g. MSVC) insist it can *only* be accessed via std::
 #endif
 		};
-		
+
 		static inline void* malloc(std::size_t size)
 		{
 			auto ptr = std::malloc(size + sizeof(tag));
@@ -46,7 +46,7 @@ namespace {
 			usage.fetch_add(size, std::memory_order_relaxed);
 			return reinterpret_cast<char*>(ptr) + sizeof(tag);
 		}
-		
+
 		static inline void free(void* ptr)
 		{
 			ptr = reinterpret_cast<char*>(ptr) - sizeof(tag);
@@ -54,13 +54,13 @@ namespace {
 			usage.fetch_add(-size, std::memory_order_relaxed);
 			std::free(ptr);
 		}
-		
+
 		static inline std::size_t current_usage() { return usage.load(std::memory_order_relaxed); }
-		
+
 	private:
 		static std::atomic<std::size_t> usage;
 	};
-	
+
 	std::atomic<std::size_t> tracking_allocator::usage(0);
 }
 
@@ -90,17 +90,17 @@ struct TestTraits : public MallocTrackingTraits
 {
 	typedef std::size_t size_t;
 	typedef uint64_t index_t;
-	
+
 	static const size_t BLOCK_SIZE = BlockSize;
 	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = InitialIndexSize;
 	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = InitialIndexSize * 2;
-	
+
 	static inline void reset() { _malloc_count() = 0; _free_count() = 0; }
 	static inline std::atomic<int>& _malloc_count() { static std::atomic<int> c; return c; }
 	static inline int malloc_count() { return _malloc_count().load(std::memory_order_seq_cst); }
 	static inline std::atomic<int>& _free_count() { static std::atomic<int> c; return c; }
 	static inline int free_count() { return _free_count().load(std::memory_order_seq_cst); }
-	
+
 	static inline void* malloc(ConcurrentQueueDefaultTraits::size_t bytes) { ++_malloc_count(); return tracking_allocator::malloc(bytes); }
 	static inline void free(void* obj) { ++_free_count(); return tracking_allocator::free(obj); }
 };
@@ -125,7 +125,7 @@ struct Foo
 	static int& destroyCount() { static int c; return c; }
 	static bool& destroyedInOrder() { static bool d = true; return d; }
 	static void reset() { createCount() = 0; destroyCount() = 0; nextId() = 0; destroyedInOrder() = true; lastDestroyedId() = -1; }
-	
+
 	Foo() { id = nextId()++; ++createCount(); }
 	Foo(Foo const&) MOODYCAMEL_DELETE_FUNCTION;
 	Foo(Foo&& other) { id = other.id; other.id = -1; }
@@ -145,7 +145,7 @@ struct Foo
 		}
 		id = -2;
 	}
-	
+
 private:
 	int id;
 	static int& lastDestroyedId() { static int i = -1; return i; }
@@ -179,13 +179,13 @@ struct ThrowingMovable {
 	static std::atomic<int>& ctorCount() { static std::atomic<int> c; return c; }
 	static std::atomic<int>& destroyCount() { static std::atomic<int> c; return c; }
 	static void reset() { ctorCount() = 0; destroyCount() = 0; }
-	
+
 	explicit ThrowingMovable(int id, bool throwOnCctor = false, bool throwOnAssignment = false, bool throwOnSecondCctor = false)
 		: id(id), moved(false), copied(false), throwOnCctor(throwOnCctor), throwOnAssignment(throwOnAssignment), throwOnSecondCctor(throwOnSecondCctor)
 	{
 		ctorCount().fetch_add(1, std::memory_order_relaxed);
 	}
-	
+
 	ThrowingMovable(ThrowingMovable const& o)
 		: id(o.id), moved(false), copied(true), throwOnCctor(o.throwOnCctor), throwOnAssignment(o.throwOnAssignment), throwOnSecondCctor(false)
 	{
@@ -195,7 +195,7 @@ struct ThrowingMovable {
 		ctorCount().fetch_add(1, std::memory_order_relaxed);
 		throwOnCctor = o.throwOnSecondCctor;
 	}
-	
+
 	ThrowingMovable(ThrowingMovable&& o)
 		: id(o.id), moved(true), copied(false), throwOnCctor(o.throwOnCctor), throwOnAssignment(o.throwOnAssignment), throwOnSecondCctor(false)
 	{
@@ -205,12 +205,12 @@ struct ThrowingMovable {
 		ctorCount().fetch_add(1, std::memory_order_relaxed);
 		throwOnCctor = o.throwOnSecondCctor;
 	}
-	
+
 	~ThrowingMovable()
 	{
 		destroyCount().fetch_add(1, std::memory_order_relaxed);
 	}
-	
+
 	void operator=(ThrowingMovable const& o)
 	{
 		id = o.id;
@@ -223,7 +223,7 @@ struct ThrowingMovable {
 			throw this;
 		}
 	}
-	
+
 	void operator=(ThrowingMovable&& o)
 	{
 		id = o.id;
@@ -236,11 +236,11 @@ struct ThrowingMovable {
 			throw this;
 		}
 	}
-	
+
 	int id;
 	bool moved;
 	bool copied;
-	
+
 public:
 	bool throwOnCctor;
 	bool throwOnAssignment;
@@ -283,16 +283,17 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		REGISTER_TEST(full_api<ConcurrentQueueDefaultTraits>);
 		REGISTER_TEST(full_api<SmallIndexTraits>);
 		REGISTER_TEST(blocking_wrappers);
-		
+		REGISTER_TEST(blocking_wrappers_timedwait);
+
 		// Core algos
 		REGISTER_TEST(core_add_only_list);
 		REGISTER_TEST(core_thread_local);
 		REGISTER_TEST(core_free_list);
 		REGISTER_TEST(core_spmc_hash);
-		
+
 		REGISTER_TEST(explicit_strings_threaded);
 	}
-	
+
 	bool postTest(bool testSucceeded) override
 	{
 		if (testSucceeded) {
@@ -301,190 +302,190 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		}
 		return true;
 	}
-	
-	
+
+
 	bool create_empty_queue()
 	{
 		ConcurrentQueue<int, MallocTrackingTraits> q;
 		return true;
 	}
-	
-	
+
+
 	bool create_token()
 	{
 		ConcurrentQueue<int, MallocTrackingTraits> q;
 		ProducerToken tok(q);
-		
+
 		return true;
 	}
-	
+
 	bool circular_less_than()
 	{
 		{
 			uint32_t a, b;
-			
+
 			a = 0; b = 100;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 100; b = 0;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(details::circular_less_than(b, a));
-			
+
 			a = 0; b = 0;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 100; b = 100;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 0; b = 1 << 31;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 1; b = 1 << 31;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 0; b = (1 << 31) + 1;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(details::circular_less_than(b, a));
-			
+
 			a = 100; b = (1 << 31) + 1;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = (1 << 31) + 7; b = 5;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = (1 << 16) + 7; b = (1 << 16) + 5;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(details::circular_less_than(b, a));
-			
+
 			a = 0xFFFFFFFF; b = 0;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 0xFFFFFFFF; b = 0xFFFFFF;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
 		}
-		
+
 		{
 			uint16_t a, b;
-			
+
 			a = 0; b = 100;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 100; b = 0;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(details::circular_less_than(b, a));
-			
+
 			a = 0; b = 0;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 100; b = 100;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 0; b = 1 << 15;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 1; b = 1 << 15;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 0; b = (1 << 15) + 1;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(details::circular_less_than(b, a));
-			
+
 			a = 100; b = (1 << 15) + 1;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = (1 << 15) + 7; b = 5;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = (1 << 15) + 7; b = (1 << 15) + 5;
 			ASSERT_OR_FAIL(!details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(details::circular_less_than(b, a));
-			
+
 			a = 0xFFFF; b = 0;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
-			
+
 			a = 0xFFFF; b = 0xFFF;
 			ASSERT_OR_FAIL(details::circular_less_than(a, b));
 			ASSERT_OR_FAIL(!details::circular_less_than(b, a));
 		}
-		
+
 		return true;
 	}
-	
-	
+
+
 	bool enqueue_one_explicit()
 	{
 		ConcurrentQueue<int, MallocTrackingTraits> q;
 		ProducerToken tok(q);
-		
+
 		bool result = q.enqueue(tok, 17);
-		
+
 		ASSERT_OR_FAIL(result);
 		return true;
 	}
-	
+
 	bool enqueue_and_dequeue_one_explicit()
 	{
 		ConcurrentQueue<int, MallocTrackingTraits> q;
 		ProducerToken tok(q);
-		
+
 		int item = 0;
 		ASSERT_OR_FAIL(q.enqueue(tok, 123));
 		ASSERT_OR_FAIL(q.try_dequeue_from_producer(tok, item));
 		ASSERT_OR_FAIL(item == 123);
-		
+
 		return true;
 	}
-	
+
 	bool enqueue_one_implicit()
 	{
 		ConcurrentQueue<int, MallocTrackingTraits> q;
-		
+
 		bool result = q.enqueue(17);
-		
+
 		ASSERT_OR_FAIL(result);
 		return true;
 	}
-	
+
 	bool enqueue_and_dequeue_one_implicit()
 	{
 		ConcurrentQueue<int, MallocTrackingTraits> q;
-		
+
 		int item = 0;
 		ASSERT_OR_FAIL(q.enqueue(123));
 		ASSERT_OR_FAIL(q.try_dequeue(item));
 		ASSERT_OR_FAIL(item == 123);
-		
+
 		return true;
 	}
-	
+
 	bool enqueue_and_dequeue_a_few()
 	{
 		// Fairly straightforward mass enqueue and dequeue
 		{
 			ConcurrentQueue<int, TestTraits<16>> q;
 			ProducerToken tok(q);
-			
+
 			for (int i = 0; i != 99999; ++i) {
 				ASSERT_OR_FAIL(q.enqueue(tok, i));
 			}
-			
+
 			int item;
 			for (int i = 0; i != 99999; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue_from_producer(tok, item));
@@ -492,12 +493,12 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue_from_producer(tok, item));
 		}
-		
+
 		// Interleaved enqueue and dequeue (though still no threads involved)
 		{
 			ConcurrentQueue<int, TestTraits<16>> q;
 			ProducerToken tok(q);
-			
+
 			int item;
 			for (int i = 0; i != 99999; ++i) {
 				ASSERT_OR_FAIL(q.enqueue(tok, i));
@@ -505,22 +506,22 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				ASSERT_OR_FAIL(q.try_dequeue_from_producer(tok, item));
 				ASSERT_OR_FAIL(item == (i / 2) * (i % 2 == 0 ? 1 : 2));
 			}
-			
+
 			for (int i = 0; i != 99999; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue_from_producer(tok, item));
 				ASSERT_OR_FAIL(item == ((i + 99999) / 2) * (i % 2 == 1 ? 1 : 2));
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue_from_producer(tok, item));
 		}
-		
+
 		// Implicit usage
 		{
 			ConcurrentQueue<int, TestTraits<16>> q;
-			
+
 			for (int i = 0; i != 99999; ++i) {
 				ASSERT_OR_FAIL(q.enqueue(i));
 			}
-			
+
 			int item;
 			for (int i = 0; i != 99999; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -528,10 +529,10 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		{
 			ConcurrentQueue<int, TestTraits<16>> q;
-			
+
 			int item;
 			for (int i = 0; i != 99999; ++i) {
 				ASSERT_OR_FAIL(q.enqueue(i));
@@ -539,35 +540,35 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				ASSERT_OR_FAIL(q.try_dequeue(item));
 				ASSERT_OR_FAIL(item == (i / 2) * (i % 2 == 0 ? 1 : 2));
 			}
-			
+
 			for (int i = 0; i != 99999; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
 				ASSERT_OR_FAIL(item == ((i + 99999) / 2) * (i % 2 == 1 ? 1 : 2));
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		return true;
 	}
-	
+
 	bool enqueue_bulk()
 	{
 		typedef TestTraits<2> Traits2;
 		typedef TestTraits<4> Traits4;
-		
+
 		int arr123[] = { 1, 2, 3 };
 		int arr1234[] = { 1, 2, 3, 4 };
 		int arr123456[] = { 1, 2, 3, 4, 5, 6 };
-		
+
 		Traits2::reset();
 		{
 			// Implicit, block allocation required
 			ConcurrentQueue<int, Traits2> q(2);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 1);
-			
+
 			q.enqueue_bulk(arr123, 3);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 4);		// One for producer, one for block index, one for block
-			
+
 			int item;
 			for (int i = 0; i != 3; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -575,16 +576,16 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits4::reset();
 		{
 			// Implicit, block allocation not required (end on block boundary)
 			ConcurrentQueue<int, Traits4> q(2);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 1);
-			
+
 			q.enqueue_bulk(arr1234, 4);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			int item;
 			for (int i = 0; i != 4; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -592,37 +593,37 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits2::reset();
 		{
 			// Implicit, allocation fail
 			ConcurrentQueue<int, Traits2> q(2);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 1);
-			
+
 			ASSERT_OR_FAIL(!q.try_enqueue_bulk(arr123, 3));
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);		// Still has to allocate implicit producer and block index
-			
+
 			int item;
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
-			
+
 			ASSERT_OR_FAIL(q.try_enqueue_bulk(arr123, 2));
 			for (int i = 0; i != 2; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
 				ASSERT_OR_FAIL(item == i + 1);
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
-			
+
 		}
-		
+
 		Traits2::reset();
 		{
 			// Implicit, block allocation not required
 			ConcurrentQueue<int, Traits2> q(4);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 1);
-			
+
 			q.enqueue_bulk(arr1234, 4);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			int item;
 			for (int i = 0; i != 4; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -630,18 +631,18 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits4::reset();
 		{
 			// Implicit, block allocation required (end not on block boundary)
 			ConcurrentQueue<int, Traits4> q(4);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 1);
-			
+
 			ASSERT_OR_FAIL(q.enqueue(0));
-			
+
 			ASSERT_OR_FAIL(q.enqueue_bulk(arr1234, 4));
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 4);		// One for producer, one for block index, one for block
-			
+
 			int item;
 			for (int i = 0; i != 5; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -649,18 +650,18 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits4::reset();
 		{
 			// Implicit, block allocation not required (end not on block boundary)
 			ConcurrentQueue<int, Traits4> q(5);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 1);
-			
+
 			ASSERT_OR_FAIL(q.enqueue(0));
-			
+
 			ASSERT_OR_FAIL(q.enqueue_bulk(arr1234, 4));
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			int item;
 			for (int i = 0; i != 5; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -668,52 +669,52 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits2::reset();
 		{
 			// Implicit, block allocation fail (end not on block boundary) -- test rewind
 			ConcurrentQueue<int, Traits2> q(4);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 1);
-			
+
 			ASSERT_OR_FAIL(q.enqueue(17));
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			ASSERT_OR_FAIL(!q.try_enqueue_bulk(arr123456, 6));
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);
-			
+
 			int item;
 			ASSERT_OR_FAIL(q.try_dequeue(item));
 			ASSERT_OR_FAIL(item == 17);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits2::reset();
 		{
 			// Implicit, enqueue nothing
 			ConcurrentQueue<int, Traits2> q(3);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 1);
-			
+
 			ASSERT_OR_FAIL(q.try_enqueue_bulk(arr123, 0));
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			int item;
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		////////
-		
+
 		Traits2::reset();
 		{
 			// Explicit, block allocation required
 			ConcurrentQueue<int, Traits2> q(2);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 1);
-			
+
 			ProducerToken tok(q);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			q.enqueue_bulk(tok, arr123, 3);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 4);		// One for block
-			
+
 			int item;
 			for (int i = 0; i != 3; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -721,19 +722,19 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits4::reset();
 		{
 			// Explicit, block allocation not required (end on block boundary)
 			ConcurrentQueue<int, Traits4> q(2);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 1);
-			
+
 			ProducerToken tok(q);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			q.enqueue_bulk(tok, arr1234, 4);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);
-			
+
 			int item;
 			for (int i = 0; i != 4; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -741,22 +742,22 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits2::reset();
 		{
 			// Explicit, allocation fail
 			ConcurrentQueue<int, Traits2> q(2);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 1);
-			
+
 			ProducerToken tok(q);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			ASSERT_OR_FAIL(!q.try_enqueue_bulk(tok, arr123, 3));
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);
-			
+
 			int item;
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
-			
+
 			ASSERT_OR_FAIL(q.try_enqueue_bulk(tok, arr123, 2));
 			for (int i = 0; i != 2; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -765,19 +766,19 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);
 		}
-		
+
 		Traits2::reset();
 		{
 			// Explicit, block allocation not required
 			ConcurrentQueue<int, Traits2> q(4);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 1);
-			
+
 			ProducerToken tok(q);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			q.enqueue_bulk(tok, arr1234, 4);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);
-			
+
 			int item;
 			for (int i = 0; i != 4; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -785,21 +786,21 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits4::reset();
 		{
 			// Explicit, block allocation required (end not on block boundary)
 			ConcurrentQueue<int, Traits4> q(4);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 1);
-			
+
 			ProducerToken tok(q);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			ASSERT_OR_FAIL(q.enqueue(tok, 0));
-			
+
 			ASSERT_OR_FAIL(q.enqueue_bulk(tok, arr1234, 4));
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 4);		// One for block
-			
+
 			int item;
 			for (int i = 0; i != 5; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -807,21 +808,21 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits4::reset();
 		{
 			// Explicit, block allocation not required (end not on block boundary)
 			ConcurrentQueue<int, Traits4> q(5);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 1);
-			
+
 			ProducerToken tok(q);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			ASSERT_OR_FAIL(q.enqueue(tok, 0));
-			
+
 			ASSERT_OR_FAIL(q.enqueue_bulk(tok, arr1234, 4));
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);
-			
+
 			int item;
 			for (int i = 0; i != 5; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -829,58 +830,58 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits2::reset();
 		{
 			// Explicit, block allocation fail (end not on block boundary) -- test rewind
 			ConcurrentQueue<int, Traits2> q(4);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 1);
-			
+
 			ProducerToken tok(q);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			ASSERT_OR_FAIL(q.enqueue(tok, 17));
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);
-			
+
 			ASSERT_OR_FAIL(!q.try_enqueue_bulk(tok, arr123456, 6));
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);
-			
+
 			int item;
 			ASSERT_OR_FAIL(q.try_dequeue(item));
 			ASSERT_OR_FAIL(item == 17);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits2::reset();
 		{
 			// Explicit, enqueue nothing
 			ConcurrentQueue<int, Traits2> q(3);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 1);
-			
+
 			ProducerToken tok(q);
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			ASSERT_OR_FAIL(q.try_enqueue_bulk(tok, arr123, 0));
 			ASSERT_OR_FAIL(Traits2::malloc_count() == 3);
-			
+
 			int item;
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
-			
+
 			ASSERT_OR_FAIL(q.enqueue(tok, 17));
 			ASSERT_OR_FAIL(q.try_dequeue(item));
 			ASSERT_OR_FAIL(item == 17);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		Traits4::reset();
 		{
 			// Explicit, re-use empty blocks
 			ConcurrentQueue<int, Traits4> q(8);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 1);
-			
+
 			ProducerToken tok(q);
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);		// One for producer, one for block index
-			
+
 			for (int i = 0; i != 5; ++i) {
 				ASSERT_OR_FAIL(q.enqueue(tok, i));
 			}
@@ -890,12 +891,12 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				ASSERT_OR_FAIL(item == i);
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
-			
+
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);
-			
+
 			ASSERT_OR_FAIL(q.enqueue_bulk(tok, arr123456, 6));
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);
-			
+
 			for (int i = 0; i != 6; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item));
 				ASSERT_OR_FAIL(item == i + 1);
@@ -903,88 +904,88 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 			ASSERT_OR_FAIL(Traits4::malloc_count() == 3);
 		}
-		
+
 		return true;
 	}
-	
+
 	bool block_alloc()
 	{
 		typedef TestTraits<2> Traits;
 		Traits::reset();
-		
+
 		{
 			ConcurrentQueue<int, Traits> q(7);
 			ASSERT_OR_FAIL(q.initialBlockPoolSize == 4);
-			
+
 			ASSERT_OR_FAIL(Traits::malloc_count() == 1);
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
-			
+
 			ProducerToken tok(q);
 			ASSERT_OR_FAIL(Traits::malloc_count() == 3);		// one for producer, one for its block index
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
-			
+
 			// Enqueue one item too many (force extra block allocation)
 			for (int i = 0; i != 9; ++i) {
 				ASSERT_OR_FAIL(q.enqueue(tok, i));
 			}
-			
+
 			ASSERT_OR_FAIL(Traits::malloc_count() == 4);
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
-			
+
 			// Still room for one more...
 			ASSERT_OR_FAIL(q.enqueue(tok, 9));
 			ASSERT_OR_FAIL(Traits::malloc_count() == 4);
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
-			
+
 			// No more room without further allocations
 			ASSERT_OR_FAIL(!q.try_enqueue(tok, 10));
 			ASSERT_OR_FAIL(Traits::malloc_count() == 4);
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
-			
+
 			// Check items were enqueued properly
 			int item;
 			for (int i = 0; i != 10; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue_from_producer(tok, item));
 				ASSERT_OR_FAIL(item == i);
 			}
-			
+
 			// Queue should be empty, but not freed
 			ASSERT_OR_FAIL(!q.try_dequeue_from_producer(tok, item));
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
 		}
-		
+
 		ASSERT_OR_FAIL(Traits::malloc_count() == 4);
 		ASSERT_OR_FAIL(Traits::free_count() == 4);
-		
+
 		// Implicit
 		Traits::reset();
 		{
 			ConcurrentQueue<int, Traits> q(7);
 			ASSERT_OR_FAIL(q.initialBlockPoolSize == 4);
-			
+
 			ASSERT_OR_FAIL(q.enqueue(39));
-			
+
 			ASSERT_OR_FAIL(Traits::malloc_count() == 3);		// one for producer, one for its block index
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
-			
+
 			// Enqueue one item too many (force extra block allocation)
 			for (int i = 0; i != 8; ++i) {
 				ASSERT_OR_FAIL(q.enqueue(i));
 			}
-			
+
 			ASSERT_OR_FAIL(Traits::malloc_count() == 4);
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
-			
+
 			// Still room for one more...
 			ASSERT_OR_FAIL(q.enqueue(8));
 			ASSERT_OR_FAIL(Traits::malloc_count() == 4);
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
-			
+
 			// No more room without further allocations
 			ASSERT_OR_FAIL(!q.try_enqueue(9));
 			ASSERT_OR_FAIL(Traits::malloc_count() == 4);
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
-			
+
 			// Check items were enqueued properly
 			int item;
 			ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -993,43 +994,43 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				ASSERT_OR_FAIL(q.try_dequeue(item));
 				ASSERT_OR_FAIL(item == i);
 			}
-			
+
 			// Queue should be empty, but not freed
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 			ASSERT_OR_FAIL(Traits::free_count() == 0);
 		}
-		
+
 		ASSERT_OR_FAIL(Traits::malloc_count() == 4);
 		ASSERT_OR_FAIL(Traits::free_count() == 4);
-		
+
 		return true;
 	}
-	
+
 	bool token_move()
 	{
 		typedef TestTraits<16> Traits;
 		Traits::reset();
-		
+
 		{
 			ConcurrentQueue<int, Traits> q;
 			ProducerToken t0(q);
-			
+
 			ASSERT_OR_FAIL(t0.valid());
-			
+
 			ProducerToken t1(std::move(t0));
 			ASSERT_OR_FAIL(t1.valid());
 			ASSERT_OR_FAIL(!t0.valid());
-			
+
 			t1 = std::move(t1);
 			ASSERT_OR_FAIL(t1.valid());
 			ASSERT_OR_FAIL(!t0.valid());
-			
+
 			ProducerToken t2(q);
 			t2 = std::move(t1);
 			ASSERT_OR_FAIL(t2.valid());
 			ASSERT_OR_FAIL(t1.valid());
 			ASSERT_OR_FAIL(!t0.valid());
-			
+
 			t0 = std::move(t1);
 			ASSERT_OR_FAIL(t2.valid());
 			ASSERT_OR_FAIL(!t1.valid());
@@ -1038,15 +1039,15 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 
 		ASSERT_OR_FAIL(Traits::malloc_count() == 5);		// 2 for each producer + 1 for initial block pool
 		ASSERT_OR_FAIL(Traits::free_count() == Traits::malloc_count());
-		
+
 		return true;
 	}
-	
+
 	bool multi_producers()
 	{
 		typedef TestTraits<16> Traits;
 		Traits::reset();
-		
+
 		{
 			ConcurrentQueue<int, Traits> q;
 			ProducerToken t0(q);
@@ -1054,13 +1055,13 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ProducerToken t2(q);
 			ProducerToken t3(q);
 			ProducerToken t4(q);
-			
+
 			ASSERT_OR_FAIL(q.enqueue(t0, 0));
 			ASSERT_OR_FAIL(q.enqueue(t1, 1));
 			ASSERT_OR_FAIL(q.enqueue(t2, 2));
 			ASSERT_OR_FAIL(q.enqueue(t3, 3));
 			ASSERT_OR_FAIL(q.enqueue(t4, 4));
-			
+
 			int item;
 			ASSERT_OR_FAIL(q.try_dequeue_from_producer(t0, item) && item == 0 && !q.try_dequeue_from_producer(t0, item));
 			ASSERT_OR_FAIL(q.try_dequeue_from_producer(t1, item) && item == 1 && !q.try_dequeue_from_producer(t1, item));
@@ -1068,21 +1069,21 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.try_dequeue_from_producer(t3, item) && item == 3 && !q.try_dequeue_from_producer(t3, item));
 			ASSERT_OR_FAIL(q.try_dequeue_from_producer(t4, item) && item == 4 && !q.try_dequeue_from_producer(t4, item));
 		}
-		
+
 		ASSERT_OR_FAIL(Traits::malloc_count() == 11);		// 2 for each producer + 1 for initial block pool
 		ASSERT_OR_FAIL(Traits::free_count() == Traits::malloc_count());
-		
+
 		// Implicit
 		Traits::reset();
 		{
 			ConcurrentQueue<int, Traits> q;
 			std::atomic<bool> success[5];
 			std::atomic<int> done(0);
-			
+
 			for (int i = 0; i != 5; ++i) {
 				success[i].store(false, std::memory_order_relaxed);
 			}
-			
+
 			for (int i = 0; i != 5; ++i) {
 				SimpleThread t([&](int j) {
 					success[j].store(q.enqueue(j), std::memory_order_relaxed);
@@ -1093,11 +1094,11 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			while (done.load(std::memory_order_acquire) != 5) {
 				continue;
 			}
-			
+
 			for (int i = 0; i != 5; ++i) {
 				ASSERT_OR_FAIL(success[i].load(std::memory_order_relaxed));
 			}
-			
+
 			// Cannot rely on order that producers are added (there's a race condition), only that they are all there somewhere.
 			// Also, all items may not be visible to this thread yet.
 			bool itemDequeued[5] = { false, false, false, false, false };
@@ -1112,57 +1113,57 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				ASSERT_OR_FAIL(itemDequeued[i]);
 			}
 		}
-		
+
 		ASSERT_OR_FAIL(Traits::malloc_count() <= 11 && Traits::malloc_count() >= 3);		// 2 for each producer (depending on thread ID re-use) + 1 for initial block pool
 		ASSERT_OR_FAIL(Traits::free_count() == Traits::malloc_count());
-		
+
 		return true;
 	}
-	
+
 	bool producer_reuse()
 	{
 		typedef TestTraits<16> Traits;
-		
+
 		Traits::reset();
 		{
 			// Explicit
 			ConcurrentQueue<int, Traits> q;
-			
+
 			{
 				ProducerToken t0(q);
 			}
-			
+
 			{
 				ProducerToken t1(q);
 			}
-			
+
 			{
 				ProducerToken t2(q);
 				ProducerToken t3(q);
 				ProducerToken t4(q);
 				ProducerToken t5(q);
 			}
-			
+
 			{
 				ProducerToken t6(q);
 				ProducerToken t7(q);
 			}
-			
+
 			{
 				ProducerToken t8(q);
 				ProducerToken t9(q);
 			}
 
-			
+
 			{
 				ProducerToken t10(q);
 				ProducerToken t11(q);
 			}
 		}
-		
+
 		ASSERT_OR_FAIL(Traits::malloc_count() == 9);		// 2 for max number of live producers + 1 for initial block pool
 		ASSERT_OR_FAIL(Traits::free_count() == Traits::malloc_count());
-		
+
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 		Traits::reset();
 		{
@@ -1170,22 +1171,22 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			const int MAX_THREADS = 48;
 			ConcurrentQueue<int, Traits> q(Traits::BLOCK_SIZE * (MAX_THREADS + 1));
 			ASSERT_OR_FAIL(Traits::malloc_count() == 1);		// Initial block pool
-			
+
 			SimpleThread t0([&]() { q.enqueue(0); });
 			t0.join();
 			ASSERT_OR_FAIL(Traits::malloc_count() == 3);		// Implicit producer
-			
+
 			SimpleThread t1([&]() { q.enqueue(1); });
 			t1.join();
 			ASSERT_OR_FAIL(Traits::malloc_count() == 3);
-			
+
 			SimpleThread t2([&]() { q.enqueue(2); });
 			t2.join();
 			ASSERT_OR_FAIL(Traits::malloc_count() == 3);
-			
+
 			q.enqueue(3);
 			ASSERT_OR_FAIL(Traits::malloc_count() == 3);
-			
+
 			int item;
 			int i = 0;
 			while (q.try_dequeue(item)) {
@@ -1194,7 +1195,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(i == 4);
 			ASSERT_OR_FAIL(Traits::malloc_count() == 3);
-			
+
 			std::vector<SimpleThread> threads(MAX_THREADS);
 			for (int rep = 0; rep != 2; ++rep) {
 				for (std::size_t tid = 0; tid != threads.size(); ++tid) {
@@ -1223,9 +1224,9 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				ASSERT_OR_FAIL(Traits::malloc_count() <= 2 * MAX_THREADS + 1);
 			}
 		}
-		ASSERT_OR_FAIL(Traits::free_count() == Traits::malloc_count());	
-		
-		
+		ASSERT_OR_FAIL(Traits::free_count() == Traits::malloc_count());
+
+
 		Traits::reset();
 		{
 			// Test many threads and implicit queues being created and destroyed concurrently
@@ -1237,7 +1238,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 						ConcurrentQueue<int, MallocTrackingTraits> q(1);
 						q.enqueue(i);
 					}
-					
+
 					ConcurrentQueue<int, MallocTrackingTraits> q(15);
 					for (int i = 0; i != 100; ++i) {
 						q.enqueue(i);
@@ -1258,22 +1259,22 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				ASSERT_OR_FAIL(success[tid]);
 			}
 		}
-		ASSERT_OR_FAIL(Traits::free_count() == Traits::malloc_count());	
+		ASSERT_OR_FAIL(Traits::free_count() == Traits::malloc_count());
 #endif
-		
+
 		return true;
 	}
-	
+
 	bool block_reuse()
 	{
 		int item;
-		
+
 		typedef TestTraits<4> SmallBlocks;
 		SmallBlocks::reset();
 		{
 			ConcurrentQueue<int, SmallBlocks> q(8);		// 2 blocks
 			ProducerToken t(q);
-			
+
 			for (int j = 0; j != 3; ++j) {
 				for (int i = 0; i != 4; ++i) {
 					ASSERT_OR_FAIL(q.enqueue(t, i));
@@ -1282,7 +1283,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					ASSERT_OR_FAIL(q.try_dequeue_from_producer(t, item));
 					ASSERT_OR_FAIL(item == i);
 				}
-				
+
 				for (int i = 0; i != 8; ++i) {
 					ASSERT_OR_FAIL(q.enqueue(t, i));
 				}
@@ -1297,21 +1298,21 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					ASSERT_OR_FAIL(q.try_dequeue_from_producer(t, item));
 					ASSERT_OR_FAIL(item == ((i + 4) & 7));
 				}
-				
+
 				ASSERT_OR_FAIL(!q.try_dequeue_from_producer(t, item));
 			}
 		}
-		
+
 		ASSERT_OR_FAIL(SmallBlocks::malloc_count() == 3);
 		ASSERT_OR_FAIL(SmallBlocks::free_count() == SmallBlocks::malloc_count());
-		
-		
+
+
 		typedef TestTraits<8192> HugeBlocks;
 		HugeBlocks::reset();
 		{
 			ConcurrentQueue<int, HugeBlocks> q(8192 * 2);		// 2 blocks
 			ProducerToken t(q);
-			
+
 			for (int j = 0; j != 3; ++j) {
 				for (int i = 0; i != 8192; ++i) {
 					ASSERT_OR_FAIL(q.enqueue(t, i));
@@ -1320,7 +1321,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					ASSERT_OR_FAIL(q.try_dequeue_from_producer(t, item));
 					ASSERT_OR_FAIL(item == i);
 				}
-				
+
 				for (int i = 0; i != 8192 * 2; ++i) {
 					ASSERT_OR_FAIL(q.enqueue(t, i));
 				}
@@ -1335,20 +1336,20 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					ASSERT_OR_FAIL(q.try_dequeue_from_producer(t, item));
 					ASSERT_OR_FAIL(item == ((i + 8192) & (8192 * 2 - 1)));
 				}
-				
+
 				ASSERT_OR_FAIL(!q.try_dequeue_from_producer(t, item));
 			}
 		}
-		
+
 		ASSERT_OR_FAIL(HugeBlocks::malloc_count() == 3);
 		ASSERT_OR_FAIL(HugeBlocks::free_count() == HugeBlocks::malloc_count());
-		
-		
+
+
 		// Implicit
 		SmallBlocks::reset();
 		{
 			ConcurrentQueue<int, SmallBlocks> q(8);		// 2 blocks
-			
+
 			for (int j = 0; j != 3; ++j) {
 				for (int i = 0; i != 4; ++i) {
 					ASSERT_OR_FAIL(q.enqueue(i));
@@ -1357,7 +1358,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					ASSERT_OR_FAIL(q.try_dequeue(item));
 					ASSERT_OR_FAIL(item == i);
 				}
-				
+
 				for (int i = 0; i != 8; ++i) {
 					ASSERT_OR_FAIL(q.enqueue(i));
 				}
@@ -1372,18 +1373,18 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					ASSERT_OR_FAIL(q.try_dequeue(item));
 					ASSERT_OR_FAIL(item == ((i + 4) & 7));
 				}
-				
+
 				ASSERT_OR_FAIL(!q.try_dequeue(item));
 			}
 		}
-		
+
 		ASSERT_OR_FAIL(SmallBlocks::malloc_count() == 3);
 		ASSERT_OR_FAIL(SmallBlocks::free_count() == SmallBlocks::malloc_count());
-		
+
 		HugeBlocks::reset();
 		{
 			ConcurrentQueue<int, HugeBlocks> q(8192 * 2);		// 2 blocks
-			
+
 			for (int j = 0; j != 3; ++j) {
 				for (int i = 0; i != 8192; ++i) {
 					ASSERT_OR_FAIL(q.enqueue(i));
@@ -1392,7 +1393,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					ASSERT_OR_FAIL(q.try_dequeue(item));
 					ASSERT_OR_FAIL(item == i);
 				}
-				
+
 				for (int i = 0; i != 8192 * 2; ++i) {
 					ASSERT_OR_FAIL(q.enqueue(i));
 				}
@@ -1407,26 +1408,26 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					ASSERT_OR_FAIL(q.try_dequeue(item));
 					ASSERT_OR_FAIL(item == ((i + 8192) & (8192 * 2 - 1)));
 				}
-				
+
 				ASSERT_OR_FAIL(!q.try_dequeue(item));
 			}
 		}
-		
+
 		ASSERT_OR_FAIL(HugeBlocks::malloc_count() == 3);
 		ASSERT_OR_FAIL(HugeBlocks::free_count() == HugeBlocks::malloc_count());
-		
+
 		return true;
 	}
-	
+
 	bool block_recycling()
 	{
 		typedef TestTraits<4> SmallBlocks;
 		SmallBlocks::reset();
-		
+
 		ConcurrentQueue<int, SmallBlocks> q(24);		// 6 blocks
 		SimpleThread threads[4];
 		std::atomic<bool> success(true);
-		
+
 		for (int i = 0; i != 4; ++i) {
 			threads[i] = SimpleThread([&](int i) {
 				int item;
@@ -1452,19 +1453,19 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		for (int i = 0; i != 4; ++i) {
 			threads[i].join();
 		}
-		
+
 		int item;
 		int prevItems[4] = { -1, -1, -1, -1 };
 		while (q.try_dequeue(item)) {
 			ASSERT_OR_FAIL((item & 0x0FFFFFFF) > prevItems[item >> 28]);
 			prevItems[item >> 28] = item & 0x0FFFFFFF;
 		}
-		
+
 		ASSERT_OR_FAIL(success.load(std::memory_order_relaxed));
-		
+
 		return true;
 	}
-	
+
 	bool leftovers_destroyed()
 	{
 		typedef TestTraits<4> Traits;
@@ -1473,7 +1474,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		{
 			ConcurrentQueue<Foo, Traits> q(4);		// One block
 			ProducerToken t(q);
-			
+
 			Foo item;
 			q.enqueue(t, Foo());
 			q.enqueue(t, Foo());
@@ -1483,13 +1484,13 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		ASSERT_OR_FAIL(Foo::createCount() == 4);
 		ASSERT_OR_FAIL(Foo::destroyCount() == 7);
 		ASSERT_OR_FAIL(Foo::destroyedInOrder());
-		
+
 		Traits::reset();
 		Foo::reset();
 		{
 			ConcurrentQueue<Foo, Traits> q(4);		// One block
 			ProducerToken t(q);
-			
+
 			q.enqueue(t, Foo());
 			q.enqueue(t, Foo());
 			q.enqueue(t, Foo());
@@ -1498,13 +1499,13 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		ASSERT_OR_FAIL(Foo::createCount() == 4);
 		ASSERT_OR_FAIL(Foo::destroyCount() == 8);
 		ASSERT_OR_FAIL(Foo::destroyedInOrder());
-		
+
 		Traits::reset();
 		Foo::reset();
 		{
 			ConcurrentQueue<Foo, Traits> q(8);		// Two blocks
 			ProducerToken t(q);
-			
+
 			for (int i = 0; i != 8; ++i) {
 				q.enqueue(t, Foo());
 			}
@@ -1512,18 +1513,18 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		ASSERT_OR_FAIL(Foo::createCount() == 8);
 		ASSERT_OR_FAIL(Foo::destroyCount() == 16);
 		ASSERT_OR_FAIL(Foo::destroyedInOrder());
-		
+
 		Traits::reset();
 		Foo::reset();
 		{
 			ConcurrentQueue<Foo, Traits> q(12);		// Three blocks
 			ProducerToken t(q);
-			
+
 			// Last block only partially full
 			for (int i = 0; i != 10; ++i) {
 				q.enqueue(t, Foo());
 			}
-			
+
 			// First block only partially full
 			Foo item;
 			ASSERT_OR_FAIL(q.try_dequeue_from_producer(t, item));
@@ -1533,14 +1534,14 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		ASSERT_OR_FAIL(Foo::createCount() == 11);
 		ASSERT_OR_FAIL(Foo::destroyCount() == 21);
 		ASSERT_OR_FAIL(Foo::destroyedInOrder());
-		
-		
+
+
 		// Implicit
 		Traits::reset();
 		Foo::reset();
 		{
 			ConcurrentQueue<Foo, Traits> q(4);		// One block
-			
+
 			Foo item;
 			q.enqueue(Foo());
 			q.enqueue(Foo());
@@ -1550,12 +1551,12 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		ASSERT_OR_FAIL(Foo::createCount() == 4);
 		ASSERT_OR_FAIL(Foo::destroyCount() == 7);
 		ASSERT_OR_FAIL(Foo::destroyedInOrder());
-		
+
 		Traits::reset();
 		Foo::reset();
 		{
 			ConcurrentQueue<Foo, Traits> q(4);		// One block
-			
+
 			q.enqueue(Foo());
 			q.enqueue(Foo());
 			q.enqueue(Foo());
@@ -1564,12 +1565,12 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		ASSERT_OR_FAIL(Foo::createCount() == 4);
 		ASSERT_OR_FAIL(Foo::destroyCount() == 8);
 		ASSERT_OR_FAIL(Foo::destroyedInOrder());
-		
+
 		Traits::reset();
 		Foo::reset();
 		{
 			ConcurrentQueue<Foo, Traits> q(8);		// Two blocks
-			
+
 			for (int i = 0; i != 8; ++i) {
 				q.enqueue(Foo());
 			}
@@ -1577,17 +1578,17 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		ASSERT_OR_FAIL(Foo::createCount() == 8);
 		ASSERT_OR_FAIL(Foo::destroyCount() == 16);
 		ASSERT_OR_FAIL(Foo::destroyedInOrder());
-		
+
 		Traits::reset();
 		Foo::reset();
 		{
 			ConcurrentQueue<Foo, Traits> q(12);		// Three blocks
-			
+
 			// Last block only partially full
 			for (int i = 0; i != 10; ++i) {
 				q.enqueue(Foo());
 			}
-			
+
 			// First block only partially full
 			Foo item;
 			ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -1597,68 +1598,68 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		ASSERT_OR_FAIL(Foo::createCount() == 11);
 		ASSERT_OR_FAIL(Foo::destroyCount() == 21);
 		ASSERT_OR_FAIL(Foo::destroyedInOrder());
-		
+
 		return true;
 	}
-	
+
 	bool block_index_resized()
 	{
 		typedef TestTraits<4, 2> Traits;
 		Traits::reset();
 		Foo::reset();
-		
+
 		{
 			ConcurrentQueue<Foo, Traits> q(8);		// 2 blocks, matches initial index size
 			ProducerToken t(q);
-			
+
 			for (int i = 0; i != 1024; ++i) {
 				q.enqueue(t, Foo());
 			}
-			
+
 			for (int i = 0; i != 1024; ++i) {
 				Foo item;
 				q.try_dequeue_from_producer(t, item);
 			}
 		}
-		
+
 		ASSERT_OR_FAIL(Traits::malloc_count() == 1 + 2 + 254 + 7);
 		ASSERT_OR_FAIL(Traits::free_count() == Traits::malloc_count());
-		
+
 		ASSERT_OR_FAIL(Foo::createCount() == 2048);
 		ASSERT_OR_FAIL(Foo::destroyCount() == 3072);
 		ASSERT_OR_FAIL(Foo::destroyedInOrder());
-		
+
 		// Implicit
 		Traits::reset();
 		Foo::reset();
 		{
 			ConcurrentQueue<Foo, Traits> q(8);		// 2 blocks
-			
+
 			for (int i = 0; i != 1024; ++i) {
 				q.enqueue(Foo());
 			}
-			
+
 			for (int i = 0; i != 1024; ++i) {
 				Foo item;
 				q.try_dequeue(item);
 			}
 		}
-		
+
 		ASSERT_OR_FAIL(Traits::malloc_count() == 1 + 2 + 254 + 6);
 		ASSERT_OR_FAIL(Traits::free_count() == Traits::malloc_count());
-		
+
 		ASSERT_OR_FAIL(Foo::createCount() == 2048);
 		ASSERT_OR_FAIL(Foo::destroyCount() == 3072);
 		ASSERT_OR_FAIL(Foo::destroyedInOrder());
-		
+
 		return true;
 	}
-	
+
 	bool try_dequeue()
 	{
 		ConcurrentQueue<int, MallocTrackingTraits> q;
 		int item;
-		
+
 		// Producer token
 		{
 			for (int i = 0; i != 50; ++i) {
@@ -1667,8 +1668,8 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					ASSERT_OR_FAIL(q.enqueue(t, i * 100 + j));
 				}
 			}
-			
-			
+
+
 			for (int i = 0; i != 50; ++i) {
 				for (int j = 0; j != 100; ++j) {
 					ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -1677,7 +1678,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// Mixed producer types
 		{
 			for (int i = 0; i != 25; ++i) {
@@ -1704,7 +1705,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// Mixed producer types with consumer token
 		{
 			for (int i = 0; i != 25; ++i) {
@@ -1736,15 +1737,15 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 			ASSERT_OR_FAIL(!q.try_dequeue(t, item));
 		}
-		
+
 		return true;
 	}
-	
+
 	bool try_dequeue_threaded()
 	{
 		int item;
 		ConcurrentQueue<int, MallocTrackingTraits> q;
-		
+
 		// Threaded consumption with tokens
 		{
 			SimpleThread threads[20];
@@ -1769,14 +1770,14 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					}
 				});
 			}
-			
+
 			for (int i = 0; i != 20; ++i) {
 				threads[i].join();
 			}
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// Threaded consumption
 		{
 			SimpleThread threads[20];
@@ -1799,36 +1800,36 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					}
 				});
 			}
-			
+
 			for (int i = 0; i != 20; ++i) {
 				threads[i].join();
 			}
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		return true;
 	}
-	
+
 	bool try_dequeue_bulk()
 	{
 		typedef TestTraits<4> Traits;
 		int items[5];
-		
+
 		// Explicit producer
 		{
 			Traits::reset();
 			ConcurrentQueue<int, Traits> q;
 			ProducerToken tok(q);
-			
+
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(items, 5) == 0);
-			
+
 			q.enqueue(tok, 17);
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(items, 5) == 1);
 			ASSERT_OR_FAIL(items[0] == 17);
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(items[0]));
-			
+
 			for (int i = 0; i != 4; ++i) {
 				q.enqueue(tok, i + 1);
 			}
@@ -1836,9 +1837,9 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != 4; ++i) {
 				ASSERT_OR_FAIL(items[i] == i + 1);
 			}
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(items[0]));
-			
+
 			for (int i = 0; i != 5; ++i) {
 				q.enqueue(tok, i + 1);
 			}
@@ -1846,9 +1847,9 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != 5; ++i) {
 				ASSERT_OR_FAIL(items[i] == i + 1);
 			}
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(items[0]));
-			
+
 			for (int i = 0; i != 6; ++i) {
 				q.enqueue(tok, i + 1);
 			}
@@ -1859,7 +1860,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.try_dequeue(items[0]));
 			ASSERT_OR_FAIL(items[0] == 6);
 			ASSERT_OR_FAIL(!q.try_dequeue(items[0]));
-			
+
 			for (int i = 0; i != 10; ++i) {
 				q.enqueue(tok, i + 1);
 			}
@@ -1871,20 +1872,20 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(items[0]));
 		}
-		
+
 		// Implicit producer
 		{
 			Traits::reset();
 			ConcurrentQueue<int, Traits> q;
-			
+
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(items, 5) == 0);
-			
+
 			q.enqueue(17);
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(items, 5) == 1);
 			ASSERT_OR_FAIL(items[0] == 17);
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(items[0]));
-			
+
 			for (int i = 0; i != 4; ++i) {
 				q.enqueue(i + 1);
 			}
@@ -1892,9 +1893,9 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != 4; ++i) {
 				ASSERT_OR_FAIL(items[i] == i + 1);
 			}
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(items[0]));
-			
+
 			for (int i = 0; i != 5; ++i) {
 				q.enqueue(i + 1);
 			}
@@ -1902,9 +1903,9 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != 5; ++i) {
 				ASSERT_OR_FAIL(items[i] == i + 1);
 			}
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(items[0]));
-			
+
 			for (int i = 0; i != 6; ++i) {
 				q.enqueue(i + 1);
 			}
@@ -1915,7 +1916,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.try_dequeue(items[0]));
 			ASSERT_OR_FAIL(items[0] == 6);
 			ASSERT_OR_FAIL(!q.try_dequeue(items[0]));
-			
+
 			for (int i = 0; i != 10; ++i) {
 				q.enqueue(i + 1);
 			}
@@ -1927,15 +1928,15 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(items[0]));
 		}
-		
+
 		return true;
 	}
-	
+
 	bool try_dequeue_bulk_threaded()
 	{
 		typedef TestTraits<2> Traits;
 		int dummy;
-		
+
 		// Explicit producer
 		{
 			Traits::reset();
@@ -1979,12 +1980,12 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != 2; ++i) {
 				threads[i].join();
 			}
-			
+
 			ASSERT_OR_FAIL(success[0]);
 			ASSERT_OR_FAIL(success[1]);
 			ASSERT_OR_FAIL(!q.try_dequeue(dummy));
 		}
-		
+
 		// Implicit producer
 		{
 			Traits::reset();
@@ -2027,17 +2028,17 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != 2; ++i) {
 				threads[i].join();
 			}
-			
+
 			ASSERT_OR_FAIL(success[0]);
 			ASSERT_OR_FAIL(success[1]);
 			ASSERT_OR_FAIL(!q.try_dequeue(dummy));
 		}
-		
+
 		// Multithreaded consumption
 		{
 			Traits::reset();
 			ConcurrentQueue<int, Traits> q;
-			
+
 			bool success[20];
 			SimpleThread threads[20];
 			for (int i = 0; i != 10; ++i) {
@@ -2056,7 +2057,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					}
 				}, i);
 			}
-			
+
 			std::atomic<size_t> dequeueCount(0);
 			for (int i = 10; i != 20; ++i) {
 				success[i] = true;
@@ -2067,7 +2068,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					}
 					int items[15];
 					ConsumerToken t(q);
-					
+
 					while (dequeueCount.load(std::memory_order_relaxed) != 1000) {
 						size_t count;
 						if ((i & 1) == 1) {
@@ -2076,7 +2077,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 						else {
 							count = q.try_dequeue_bulk(t, items, 15);
 						}
-						
+
 						if (count > 15) {
 							success[i] = false;
 						}
@@ -2090,21 +2091,21 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					}
 				}, i);
 			}
-			
+
 			for (int i = 0; i != 20; ++i) {
 				threads[i].join();
 			}
-			
+
 			int item;
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 			for (int i = 0; i != 20; ++i) {
 				ASSERT_OR_FAIL(success[i]);
 			}
 		}
-		
+
 		return true;
 	}
-	
+
 	bool implicit_producer_hash()
 	{
 		for (int j = 0; j != 5; ++j) {
@@ -2115,11 +2116,11 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					q.enqueue(7);
 				}));
 			}
-			
+
 			for (auto it = threads.begin(); it != threads.end(); ++it) {
 				it->join();
 			}
-			
+
 			int item;
 			ConsumerToken t(q);
 			for (auto i = 0; i != 20; ++i) {
@@ -2133,17 +2134,17 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		return true;
 	}
-	
+
 	bool index_wrapping()
 	{
 		{
 			// Implicit
 			ConcurrentQueue<int, SmallIndexTraits> q(16);
 			int item;
-			
+
 			for (int i = 0; i != (1 << 18); ++i) {
 				if ((i & 16) == 0) {
 					ASSERT_OR_FAIL(q.try_enqueue(i));
@@ -2155,13 +2156,13 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		{
 			// Explicit
 			ConcurrentQueue<int, SmallIndexTraits> q(16);
 			ProducerToken tok(q);
 			int item;
-			
+
 			for (int i = 0; i != (1 << 18); ++i) {
 				if ((i & 16) == 0) {
 					ASSERT_OR_FAIL(q.try_enqueue(tok, i));
@@ -2173,12 +2174,12 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		{
 			// Implicit extra small
 			ConcurrentQueue<int, ExtraSmallIndexTraits> q(1);
 			int item;
-			
+
 			for (int i = 0; i != 4097; ++i) {
 				q.enqueue(i);
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -2186,13 +2187,13 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		{
 			// Explicit extra small
 			ConcurrentQueue<int, ExtraSmallIndexTraits> q(1);
 			ProducerToken tok(q);
 			int item;
-			
+
 			for (int i = 0; i != 4097; ++i) {
 				q.enqueue(tok, i);
 				ASSERT_OR_FAIL(q.try_dequeue(item));
@@ -2200,16 +2201,16 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		return true;
 	}
-	
+
 	struct SizeLimitTraits : public MallocTrackingTraits
 	{
 		static const size_t BLOCK_SIZE = 2;
 		static const size_t MAX_SUBQUEUE_SIZE = 5;		// Will round up to 6 because of block size
 	};
-	
+
 	bool subqueue_size_limit()
 	{
 		{
@@ -2217,7 +2218,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ConcurrentQueue<int, SizeLimitTraits> q;
 			ProducerToken t(q);
 			int item;
-			
+
 			ASSERT_OR_FAIL(q.enqueue(t, 1));
 			ASSERT_OR_FAIL(q.enqueue(t, 2));
 			ASSERT_OR_FAIL(q.enqueue(t, 3));
@@ -2226,19 +2227,19 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.enqueue(t, 6));
 			ASSERT_OR_FAIL(!q.enqueue(t, 7));
 			ASSERT_OR_FAIL(!q.enqueue(t, 8));
-			
+
 			ASSERT_OR_FAIL(q.try_dequeue(item) && item == 1);
 			ASSERT_OR_FAIL(!q.enqueue(t, 7));		// Can't reuse block until it's completely empty
 			ASSERT_OR_FAIL(q.try_dequeue(item) && item == 2);
 			ASSERT_OR_FAIL(q.enqueue(t, 7));
 			ASSERT_OR_FAIL(q.enqueue(t, 8));
 			ASSERT_OR_FAIL(!q.enqueue(t, 9));
-			
+
 			ASSERT_OR_FAIL(q.try_dequeue(item) && item == 3);
 			ASSERT_OR_FAIL(!q.enqueue(t, 9));
 			ASSERT_OR_FAIL(q.try_dequeue(item) && item == 4);
 			ASSERT_OR_FAIL(q.enqueue(t, 9));
-			
+
 			for (int i = 5; i <= 9; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item) && item == i);
 			}
@@ -2250,7 +2251,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_enqueue(t, 7));
 			ASSERT_OR_FAIL(!q.enqueue(t, 7));
-			
+
 			// Bulk
 			int items[6];
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(items, 6) == 6);
@@ -2266,12 +2267,12 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(items, 1) == 1);
 			ASSERT_OR_FAIL(!q.enqueue(t, 100));
 		}
-		
+
 		{
 			// Implicit
 			ConcurrentQueue<int, SizeLimitTraits> q;
 			int item;
-			
+
 			ASSERT_OR_FAIL(q.enqueue(1));
 			ASSERT_OR_FAIL(q.enqueue(2));
 			ASSERT_OR_FAIL(q.enqueue(3));
@@ -2280,19 +2281,19 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.enqueue(6));
 			ASSERT_OR_FAIL(!q.enqueue(7));
 			ASSERT_OR_FAIL(!q.enqueue(8));
-			
+
 			ASSERT_OR_FAIL(q.try_dequeue(item) && item == 1);
 			ASSERT_OR_FAIL(!q.enqueue(7));		// Can't reuse block until it's completely empty
 			ASSERT_OR_FAIL(q.try_dequeue(item) && item == 2);
 			ASSERT_OR_FAIL(q.enqueue(7));
 			ASSERT_OR_FAIL(q.enqueue(8));
 			ASSERT_OR_FAIL(!q.enqueue(9));
-			
+
 			ASSERT_OR_FAIL(q.try_dequeue(item) && item == 3);
 			ASSERT_OR_FAIL(!q.enqueue(9));
 			ASSERT_OR_FAIL(q.try_dequeue(item) && item == 4);
 			ASSERT_OR_FAIL(q.enqueue(9));
-			
+
 			for (int i = 5; i <= 9; ++i) {
 				ASSERT_OR_FAIL(q.try_dequeue(item) && item == i);
 			}
@@ -2304,7 +2305,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q.try_enqueue(7));
 			ASSERT_OR_FAIL(!q.enqueue(7));
-			
+
 			// Bulk
 			int items[6];
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(items, 6) == 6);
@@ -2320,22 +2321,22 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(items, 1) == 1);
 			ASSERT_OR_FAIL(!q.enqueue(100));
 		}
-		
+
 		return true;
 	}
-	
+
 	bool exceptions()
 	{
 		typedef TestTraits<4, 2> Traits;
-		
+
 		{
 			// Explicit, basic
 			// enqueue
 			ConcurrentQueue<ThrowingMovable, Traits> q;
 			ProducerToken tok(q);
-			
+
 			ThrowingMovable::reset();
-			
+
 			bool threw = false;
 			try {
 				q.enqueue(tok, ThrowingMovable(1, true));
@@ -2347,23 +2348,23 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 0);
-			
+
 			ASSERT_OR_FAIL(q.enqueue(tok, ThrowingMovable(2)));
 			ThrowingMovable result(-1);
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 2);
 			ASSERT_OR_FAIL(result.moved);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 3);
-			
+
 			// dequeue
 			ThrowingMovable::reset();
 			q.enqueue(tok, ThrowingMovable(10));
 			q.enqueue(tok, ThrowingMovable(11, false, true));
 			q.enqueue(tok, ThrowingMovable(12));
 			ASSERT_OR_FAIL(q.size_approx() == 3);
-			
+
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 10);
 			threw = false;
@@ -2376,29 +2377,29 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 1);
-			
+
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 12);
 			ASSERT_OR_FAIL(result.moved);
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
 			q.enqueue(tok, ThrowingMovable(13));
 			ASSERT_OR_FAIL(q.size_approx() == 1);
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 13);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 8);
 		}
-		
+
 		{
 			// Explicit, on and off block boundaries
 			// enqueue
 			ConcurrentQueue<ThrowingMovable, Traits> q;
 			ProducerToken tok(q);
-			
+
 			ThrowingMovable::reset();
-			
+
 			for (int i = 0; i != 3; ++i) {
 				q.enqueue(tok, ThrowingMovable(i));
 			}
@@ -2412,7 +2413,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 3);
-			
+
 			q.enqueue(tok, ThrowingMovable(4));
 			threw = false;
 			try {
@@ -2426,7 +2427,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 4);
 			q.enqueue(tok, ThrowingMovable(6));
-			
+
 			ThrowingMovable result(-1);
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 0);
@@ -2439,9 +2440,9 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 6);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 12);
-			
+
 			// dequeue
 			ThrowingMovable::reset();
 			q.enqueue(tok, ThrowingMovable(10, false, true));
@@ -2452,7 +2453,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			q.enqueue(tok, ThrowingMovable(15, false, true));
 			q.enqueue(tok, ThrowingMovable(16));
 			ASSERT_OR_FAIL(q.size_approx() == 7);
-			
+
 			for (int i = 10; i != 17; ++i) {
 				if (i == 10 || (i >= 13 && i <= 15)) {
 					threw = false;
@@ -2473,23 +2474,23 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				}
 				ASSERT_OR_FAIL(q.size_approx() == (std::uint32_t)(16 - i));
 			}
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
 			q.enqueue(tok, ThrowingMovable(20));
 			ASSERT_OR_FAIL(q.size_approx() == 1);
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 20);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 16);
 		}
-		
+
 		{
 			// Explicit bulk
 			// enqueue
 			ConcurrentQueue<ThrowingMovable, Traits> q;
 			ProducerToken tok(q);
-			
+
 			ThrowingMovable::reset();
 			std::vector<ThrowingMovable> items;
 			items.reserve(5);
@@ -2499,7 +2500,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			items.push_back(ThrowingMovable(4));
 			items.push_back(ThrowingMovable(5));
 			items.back().throwOnCctor = true;
-			
+
 			bool threw = false;
 			try {
 				q.enqueue_bulk(tok, std::make_move_iterator(items.begin()), 5);
@@ -2512,7 +2513,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 0);
 			q.enqueue(tok, ThrowingMovable(6));
-			
+
 			threw = false;
 			try {
 				q.enqueue_bulk(tok, std::make_move_iterator(items.begin()), 5);
@@ -2523,15 +2524,15 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 1);
-			
+
 			ThrowingMovable result(-1);
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 6);
 			ASSERT_OR_FAIL(result.moved);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 15);
-			
+
 			// dequeue
 			ThrowingMovable::reset();
 			q.enqueue(tok, ThrowingMovable(10));
@@ -2541,7 +2542,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			q.enqueue(tok, ThrowingMovable(14, false, true, true));		// std::back_inserter turns an assignment into a ctor call
 			q.enqueue(tok, ThrowingMovable(15));
 			ASSERT_OR_FAIL(q.size_approx() == 6);
-			
+
 			std::vector<ThrowingMovable> results;
 			results.reserve(5);
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(std::back_inserter(results), 2));
@@ -2564,22 +2565,22 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.size_approx() == 0);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(std::back_inserter(results), 1) == 0);
-			
+
 			ASSERT_OR_FAIL(results.size() == 4);
 			ASSERT_OR_FAIL(results[2].id == 12);
 			ASSERT_OR_FAIL(results[3].id == 13);
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 12);
 		}
-		
-		
+
+
 		{
 			// Implicit, basic
 			// enqueue
 			ConcurrentQueue<ThrowingMovable, Traits> q;
-			
+
 			ThrowingMovable::reset();
-			
+
 			bool threw = false;
 			try {
 				q.enqueue(ThrowingMovable(1, true));
@@ -2591,23 +2592,23 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 0);
-			
+
 			ASSERT_OR_FAIL(q.enqueue(ThrowingMovable(2)));
 			ThrowingMovable result(-1);
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 2);
 			ASSERT_OR_FAIL(result.moved);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 3);
-			
+
 			// dequeue
 			ThrowingMovable::reset();
 			q.enqueue(ThrowingMovable(10));
 			q.enqueue(ThrowingMovable(11, false, true));
 			q.enqueue(ThrowingMovable(12));
 			ASSERT_OR_FAIL(q.size_approx() == 3);
-			
+
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 10);
 			threw = false;
@@ -2620,28 +2621,28 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 1);
-			
+
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 12);
 			ASSERT_OR_FAIL(result.moved);
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
 			q.enqueue(ThrowingMovable(13));
 			ASSERT_OR_FAIL(q.size_approx() == 1);
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 13);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 8);
 		}
-		
+
 		{
 			// Implicit, on and off block boundaries
 			// enqueue
 			ConcurrentQueue<ThrowingMovable, Traits> q;
-			
+
 			ThrowingMovable::reset();
-			
+
 			for (int i = 0; i != 3; ++i) {
 				q.enqueue(ThrowingMovable(i));
 			}
@@ -2655,7 +2656,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 3);
-			
+
 			q.enqueue(ThrowingMovable(4));
 			threw = false;
 			try {
@@ -2669,7 +2670,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 4);
 			q.enqueue(ThrowingMovable(6));
-			
+
 			ThrowingMovable result(-1);
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 0);
@@ -2682,9 +2683,9 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 6);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 12);
-			
+
 			// dequeue
 			ThrowingMovable::reset();
 			q.enqueue(ThrowingMovable(10, false, true));
@@ -2695,7 +2696,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			q.enqueue(ThrowingMovable(15, false, true));
 			q.enqueue(ThrowingMovable(16));
 			ASSERT_OR_FAIL(q.size_approx() == 7);
-			
+
 			for (int i = 10; i != 17; ++i) {
 				if (i == 10 || (i >= 13 && i <= 15)) {
 					threw = false;
@@ -2716,22 +2717,22 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				}
 				ASSERT_OR_FAIL(q.size_approx() == (std::uint32_t)(16 - i));
 			}
-			
+
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
 			q.enqueue(ThrowingMovable(20));
 			ASSERT_OR_FAIL(q.size_approx() == 1);
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 20);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 16);
 		}
-		
+
 		{
 			// Impplicit bulk
 			// enqueue
 			ConcurrentQueue<ThrowingMovable, Traits> q;
-			
+
 			ThrowingMovable::reset();
 			std::vector<ThrowingMovable> items;
 			items.reserve(5);
@@ -2741,7 +2742,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			items.push_back(ThrowingMovable(4));
 			items.push_back(ThrowingMovable(5));
 			items.back().throwOnCctor = true;
-			
+
 			bool threw = false;
 			try {
 				q.enqueue_bulk(std::make_move_iterator(items.begin()), 5);
@@ -2754,7 +2755,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 0);
 			q.enqueue(ThrowingMovable(6));
-			
+
 			threw = false;
 			try {
 				q.enqueue_bulk(std::make_move_iterator(items.begin()), 5);
@@ -2765,15 +2766,15 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(threw);
 			ASSERT_OR_FAIL(q.size_approx() == 1);
-			
+
 			ThrowingMovable result(-1);
 			ASSERT_OR_FAIL(q.try_dequeue(result));
 			ASSERT_OR_FAIL(result.id == 6);
 			ASSERT_OR_FAIL(result.moved);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 15);
-			
+
 			// dequeue
 			ThrowingMovable::reset();
 			q.enqueue(ThrowingMovable(10));
@@ -2783,7 +2784,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			q.enqueue(ThrowingMovable(14, false, true, true));		// std::back_inserter turns an assignment into a ctor call
 			q.enqueue(ThrowingMovable(15));
 			ASSERT_OR_FAIL(q.size_approx() == 6);
-			
+
 			std::vector<ThrowingMovable> results;
 			results.reserve(5);
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(std::back_inserter(results), 2));
@@ -2804,19 +2805,19 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(q.size_approx() == 0);
 			ASSERT_OR_FAIL(!q.try_dequeue(result));
 			ASSERT_OR_FAIL(q.try_dequeue_bulk(std::back_inserter(results), 1) == 0);
-			
+
 			ASSERT_OR_FAIL(results.size() == 4);
 			ASSERT_OR_FAIL(results[2].id == 12);
 			ASSERT_OR_FAIL(results[3].id == 13);
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() == 12);
 		}
-		
+
 		{
 			// Threaded
 			ConcurrentQueue<ThrowingMovable, Traits> q;
 			ThrowingMovable::reset();
-			
+
 			std::vector<SimpleThread> threads(6);
 			for (std::size_t tid = 0; tid != threads.size(); ++tid) {
 				threads[tid] = SimpleThread([&](std::size_t tid) {
@@ -2824,15 +2825,15 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					inVec.push_back(ThrowingMovable(1));
 					inVec.push_back(ThrowingMovable(2));
 					inVec.push_back(ThrowingMovable(3));
-					
+
 					std::vector<ThrowingMovable> outVec;
 					outVec.push_back(ThrowingMovable(-1));
 					outVec.push_back(ThrowingMovable(-1));
 					outVec.push_back(ThrowingMovable(-1));
-					
+
 					ProducerToken tok(q);
 					ThrowingMovable result(-1);
-					
+
 					for (std::size_t i = 0; i != 8192; ++i) {
 						auto magic = (tid + 1) * i + tid * 17 + i;
 						auto op = magic & 7;
@@ -2877,7 +2878,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (std::size_t i = 0; i != threads.size(); ++i) {
 				threads[i].join();
 			}
-			
+
 			ThrowingMovable result(-1);
 			while (true) {
 				try {
@@ -2888,20 +2889,20 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				catch (ThrowingMovable*) {
 				}
 			}
-			
+
 			ASSERT_OR_FAIL(ThrowingMovable::destroyCount() + 1 == ThrowingMovable::ctorCount());
 		}
-		
+
 		return true;
 	}
-	
+
 	bool test_threaded()
 	{
 		typedef TestTraits<4> Traits;
 		Traits::reset();
-		
+
 		bool inOrder = true;
-		
+
 		{
 			// Single producer, single consumer
 			ConcurrentQueue<int, Traits> q;
@@ -2924,12 +2925,12 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 					}
 				}
 			});
-			
+
 			a.join();
 			b.join();
 		}
 		ASSERT_OR_FAIL(inOrder);
-		
+
 		{
 			// Single producer, multi consumer
 			ConcurrentQueue<int, Traits> q;
@@ -2956,34 +2957,34 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				int item;
 				for (int i = 0; i != 123456; ++i) q.try_dequeue_from_producer(t, item);
 			});
-			
+
 			a.join();
 			b.join();
 			c.join();
 			d.join();
 		}
 		ASSERT_OR_FAIL(inOrder);
-		
+
 		ASSERT_OR_FAIL(Traits::malloc_count() == Traits::free_count());
-		
+
 		return true;
 	}
-	
+
 	bool test_threaded_bulk()
 	{
 		typedef TestTraits<2> Traits;
-		
+
 		// Enqueue bulk (implicit)
 		Traits::reset();
 		{
 			ConcurrentQueue<int, Traits> q;
 			SimpleThread threads[2];
 			bool success[2];
-			
+
 			int stuff[] = { 1, 2, 3, 4, 5 };
 			for (int i = 0; i != 2; ++i) {
 				success[i] = true;
-				
+
 				if (i == 0) {
 					// Enqueue bulk
 					threads[i] = SimpleThread([&](int j) {
@@ -3015,22 +3016,22 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != 2; ++i) {
 				threads[i].join();
 			}
-			
+
 			ASSERT_OR_FAIL(success[0]);
 			ASSERT_OR_FAIL(success[1]);
 		}
-		
+
 		// Enqueue bulk (while somebody is dequeueing (with tokens))
 		Traits::reset();
 		{
 			ConcurrentQueue<int, Traits> q;
 			SimpleThread threads[2];
 			bool success[2];
-			
+
 			int stuff[] = { 1, 2, 3, 4, 5 };
 			for (int i = 0; i != 2; ++i) {
 				success[i] = true;
-				
+
 				if (i == 0) {
 					// Enqueue bulk
 					threads[i] = SimpleThread([&](int j) {
@@ -3064,20 +3065,20 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != 2; ++i) {
 				threads[i].join();
 			}
-			
+
 			ASSERT_OR_FAIL(success[0]);
 			ASSERT_OR_FAIL(success[1]);
 		}
-		
+
 		return true;
 	}
-	
+
 	template<typename Traits>
 	bool full_api()
 	{
 		// A simple test that exercises the full public API (just to make sure every function is implemented
 		// and works on at least the most basic level)
-		
+
 		// enqueue(T const&)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3089,7 +3090,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.copied);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// enqueue(T&&)
 		{
 			ConcurrentQueue<Moveable, Traits> q;
@@ -3119,7 +3120,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.copied);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// enqueue(Token, T const&)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3132,7 +3133,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.copied);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// enqueue(Token, T&&)
 		{
 			ConcurrentQueue<Moveable, Traits> q;
@@ -3165,7 +3166,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.copied);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// try_enqueue(T const&)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3177,7 +3178,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.copied);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// try_enqueue(T&&)
 		{
 			ConcurrentQueue<Moveable, Traits> q;
@@ -3207,7 +3208,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.copied);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// try_enqueue(Token, T const&)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3220,7 +3221,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.copied);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// try_enqueue(Token, T&&)
 		{
 			ConcurrentQueue<Moveable, Traits> q;
@@ -3253,7 +3254,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.copied);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// enqueue_bulk(It itemFirst, size_t count)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3275,7 +3276,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.moved);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// enqueue_bulk(Token, It itemFirst, size_t count)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3299,7 +3300,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.moved);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// try_enqueue_bulk(It itemFirst, size_t count)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3321,7 +3322,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.moved);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// try_enqueue_bulk(Token, It itemFirst, size_t count)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3345,7 +3346,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.moved);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// try_dequeue(T&)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3365,7 +3366,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.moved);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// try_dequeue(Token, T&)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3389,7 +3390,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q.try_dequeue(t, item));
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// try_dequeue_from_producer(Token, T&)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3413,7 +3414,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q.try_dequeue_from_producer(t, item));
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// try_dequeue_bulk(T&)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3433,7 +3434,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item.moved);
 			ASSERT_OR_FAIL(!q.try_dequeue_bulk(&item, 1));
 		}
-		
+
 		// try_dequeue_bulk(Token, T&)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3457,7 +3458,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q.try_dequeue_bulk(t, &item, 1));
 			ASSERT_OR_FAIL(!q.try_dequeue_bulk(&item, 1));
 		}
-		
+
 		// try_dequeue_bulk_from_producer(Token, T&)
 		{
 			ConcurrentQueue<Copyable, Traits> q;
@@ -3481,7 +3482,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q.try_dequeue_bulk_from_producer(t, &item, 1));
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 		}
-		
+
 		// size_approx()
 		{
 			ConcurrentQueue<Foo, Traits> q;
@@ -3490,7 +3491,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(q.size_approx() == 1234);
 		}
-		
+
 		// is_lock_free()
 		{
 			bool lockFree = ConcurrentQueue<Foo, Traits>::is_lock_free();
@@ -3498,7 +3499,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(lockFree);
 #endif
 		}
-		
+
 		// moving
 		{
 			ConcurrentQueue<int, MallocTrackingTraits> q(4);
@@ -3510,20 +3511,20 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				q.enqueue(t, i);
 			}
 			ASSERT_OR_FAIL(q.size_approx() == 5677);
-			
+
 			ConcurrentQueue<int, MallocTrackingTraits> q2(std::move(q));
 			ASSERT_OR_FAIL(q.size_approx() == 0);
 			ASSERT_OR_FAIL(q2.size_approx() == 5677);
-			
+
 			q2.enqueue(t, 5678);
 			q2.enqueue(1233);
 			ASSERT_OR_FAIL(q2.size_approx() == 5679);
-			
+
 			for (int i = 1234; i != 0; --i) {
 				q.enqueue(i);
 			}
 			ASSERT_OR_FAIL(q.size_approx() == 1234);
-			
+
 			int item;
 			for (int i = 0; i <= 5678; ++i) {
 				ASSERT_OR_FAIL(q2.try_dequeue_non_interleaved(item));
@@ -3531,7 +3532,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q2.try_dequeue_non_interleaved(item));
 			ASSERT_OR_FAIL(q2.size_approx() == 0);
-			
+
 			for (int i = 1234; i != 0; --i) {
 				ASSERT_OR_FAIL(q.try_dequeue_non_interleaved(item));
 				ASSERT_OR_FAIL(item == i);
@@ -3539,12 +3540,12 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q.try_dequeue_non_interleaved(item));
 			ASSERT_OR_FAIL(q.size_approx() == 0);
 		}
-		
+
 		// swapping
 		{
 			ConcurrentQueue<int, MallocTrackingTraits> q1, q2, q3;
 			ProducerToken t1(q1), t2(q2), t3(q3);
-			
+
 			for (int i = 1234; i != 5678; ++i) {
 				q1.enqueue(t1, i);
 			}
@@ -3554,7 +3555,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 31234; i != 35678; ++i) {
 				q3.enqueue(t3, i);
 			}
-			
+
 			for (int i = 0; i != 1234; ++i) {
 				q1.enqueue(i);
 			}
@@ -3564,7 +3565,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 30000; i != 31234; ++i) {
 				q3.enqueue(i);
 			}
-			
+
 			{
 				ConcurrentQueue<int, MallocTrackingTraits> temp;
 				temp = std::move(q1);
@@ -3572,13 +3573,13 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				q2 = std::move(temp);
 			}
 			// q1 in q2, q2 in q1
-			
+
 			swap(q2, q3);	// q1 in q3, q3 in q2
 			q1.swap(q2);	// q2 in q2, q3 in q1
 			q1.swap(q2);	// q3 in q2, q2 in q1
 			q1.swap(q2);	// q2 in q2, q3 in q1
 			q2.swap(q3);	// q1 in q2, q2 in q3
-			
+
 			// So now q1 is in q2, q2 is in q3, and q3 is in q1
 			int item;
 			for (int i = 30000; i != 35678; ++i) {
@@ -3587,14 +3588,14 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			ASSERT_OR_FAIL(!q1.try_dequeue_non_interleaved(item));
 			ASSERT_OR_FAIL(q1.size_approx() == 0);
-			
+
 			for (int i = 0; i != 5678; ++i) {
 				ASSERT_OR_FAIL(q2.try_dequeue_non_interleaved(item));
 				ASSERT_OR_FAIL(item == i);
 			}
 			ASSERT_OR_FAIL(!q2.try_dequeue_non_interleaved(item));
 			ASSERT_OR_FAIL(q2.size_approx() == 0);
-			
+
 			for (int i = 20000; i != 25678; ++i) {
 				ASSERT_OR_FAIL(q3.try_dequeue_non_interleaved(item));
 				ASSERT_OR_FAIL(item == i);
@@ -3602,16 +3603,16 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q3.try_dequeue_non_interleaved(item));
 			ASSERT_OR_FAIL(q3.size_approx() == 0);
 		}
-		
+
 		return true;
 	}
-	
-	
+
+
 	bool blocking_wrappers()
 	{
 		typedef BlockingConcurrentQueue<int, MallocTrackingTraits> Q;
 		ASSERT_OR_FAIL((Q::is_lock_free() == ConcurrentQueue<int, MallocTrackingTraits>::is_lock_free()));
-		
+
 		// Moving
 		{
 			Q a, b, c;
@@ -3625,7 +3626,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			a.swap(c);
 			c.swap(c);
 		}
-		
+
 		// Implicit
 		{
 			Q q;
@@ -3636,7 +3637,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item == 1);
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 			ASSERT_OR_FAIL(q.size_approx() == 0);
-			
+
 			ASSERT_OR_FAIL(q.enqueue(2));
 			ASSERT_OR_FAIL(q.enqueue(3));
 			ASSERT_OR_FAIL(q.size_approx() == 2);
@@ -3648,17 +3649,17 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q.try_dequeue(item));
 			ASSERT_OR_FAIL(q.size_approx() == 0);
 		}
-		
+
 		// Implicit threaded
 		{
 			Q q;
 			const int THREADS = 8;
 			SimpleThread threads[THREADS];
 			bool success[THREADS];
-			
+
 			for (int i = 0; i != THREADS; ++i) {
 				success[i] = true;
-				
+
 				if (i % 2 == 0) {
 					// Enqueue
 					if (i % 4 == 0) {
@@ -3719,22 +3720,22 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != THREADS; ++i) {
 				threads[i].join();
 			}
-			
+
 			for (int i = 0; i != THREADS; ++i) {
 				ASSERT_OR_FAIL(success[i]);
 			}
 		}
-		
+
 		// Implicit threaded, blocking
 		{
 			Q q;
 			const int THREADS = 8;
 			SimpleThread threads[THREADS];
 			bool success[THREADS];
-			
+
 			for (int i = 0; i != THREADS; ++i) {
 				success[i] = true;
-				
+
 				if (i % 2 == 0) {
 					// Enqueue
 					if (i % 4 == 0) {
@@ -3808,13 +3809,13 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != THREADS; ++i) {
 				threads[i].join();
 			}
-			
+
 			for (int i = 0; i != THREADS; ++i) {
 				ASSERT_OR_FAIL(success[i]);
 			}
 			ASSERT_OR_FAIL(q.size_approx() == 0);
 		}
-		
+
 		// Explicit
 		{
 			Q q;
@@ -3827,7 +3828,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(item == 1);
 			ASSERT_OR_FAIL(!q.try_dequeue(ct, item));
 			ASSERT_OR_FAIL(q.size_approx() == 0);
-			
+
 			ASSERT_OR_FAIL(q.enqueue(pt, 2));
 			ASSERT_OR_FAIL(q.enqueue(pt, 3));
 			ASSERT_OR_FAIL(q.size_approx() == 2);
@@ -3839,17 +3840,17 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(!q.try_dequeue(ct, item));
 			ASSERT_OR_FAIL(q.size_approx() == 0);
 		}
-		
+
 		// Explicit threaded
 		{
 			Q q;
 			const int THREADS = 8;
 			SimpleThread threads[THREADS];
 			bool success[THREADS];
-			
+
 			for (int i = 0; i != THREADS; ++i) {
 				success[i] = true;
-				
+
 				if (i % 2 == 0) {
 					// Enqueue
 					if (i % 4 == 0) {
@@ -3913,22 +3914,22 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != THREADS; ++i) {
 				threads[i].join();
 			}
-			
+
 			for (int i = 0; i != THREADS; ++i) {
 				ASSERT_OR_FAIL(success[i]);
 			}
 		}
-		
+
 		// Explicit threaded, blocking
 		{
 			Q q;
 			const int THREADS = 8;
 			SimpleThread threads[THREADS];
 			bool success[THREADS];
-			
+
 			for (int i = 0; i != THREADS; ++i) {
 				success[i] = true;
-				
+
 				if (i % 2 == 0) {
 					// Enqueue
 					if (i % 4 == 0) {
@@ -4005,63 +4006,228 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (int i = 0; i != THREADS; ++i) {
 				threads[i].join();
 			}
-			
+
+			for (int i = 0; i != THREADS; ++i) {
+				ASSERT_OR_FAIL(success[i]);
+			}
+			ASSERT_OR_FAIL(q.size_approx() == 0);
+		}
+
+		return true;
+	}
+
+	bool blocking_wrappers_timedwait()
+	{
+		typedef BlockingConcurrentQueue<int, MallocTrackingTraits> Q;
+		ASSERT_OR_FAIL((Q::is_lock_free() == ConcurrentQueue<int, MallocTrackingTraits>::is_lock_free()));
+
+		// Implicit
+		{
+			Q q;
+			ASSERT_OR_FAIL(q.enqueue(1));
+			ASSERT_OR_FAIL(q.size_approx() == 1);
+            std::cout << "wait_dequeue with timeout 0ms\n";
+			int item;
+			ASSERT_OR_FAIL(q.wait_dequeue(item, 0UL));
+			ASSERT_OR_FAIL(item == 1);
+            // less than 1 sec is very tricky. It may end up to wait infinity
+            std::cout << "wait_dequeue with timeout 1000ms\n";
+			ASSERT_OR_FAIL(!q.wait_dequeue(item, 1000UL));
+			ASSERT_OR_FAIL(q.size_approx() == 0);
+
+			ASSERT_OR_FAIL(q.enqueue(2));
+			ASSERT_OR_FAIL(q.enqueue(3));
+            std::cout << "q.size_approx():" << q.size_approx() << "\n";
+			ASSERT_OR_FAIL(q.size_approx() == 2);
+            std::cout << "wait_dequeue with timeout 2000ms\n";
+			q.wait_dequeue(item, 2000UL);
+			ASSERT_OR_FAIL(item == 2);
+			ASSERT_OR_FAIL(q.size_approx() == 1);
+            std::cout << "wait_dequeue with timeout 3000ms\n";
+			q.wait_dequeue(item, 3000UL);
+			ASSERT_OR_FAIL(item == 3);
+            std::cout << "wait_dequeue with timeout 1000ms\n";
+			ASSERT_OR_FAIL(!q.wait_dequeue(item, 1000UL));
+			ASSERT_OR_FAIL(q.size_approx() == 0);
+		}
+
+		{
+            std::cout << "threaded wait_dequeue with/without bulk with timeout\n";
+			Q q;
+			const int THREADS = 8;
+			SimpleThread threads[THREADS];
+			bool success[THREADS];
+			int timeouts = 0;
+            const std::chrono::milliseconds timeout1(3000);
+            const std::chrono::milliseconds timeout2(5000);
+            const unsigned long wait_ms = 4000UL;
+
+			for (int i = 0; i != THREADS; ++i) {
+				success[i] = true;
+
+				if (i % 2 == 0) {
+					// Enqueue
+					if (i % 4 == 0) {
+						threads[i] = SimpleThread([&](int j) {
+							int stuff[5];
+							for (int k = 0; k != 2048; ++k) {
+								for (int x = 0; x != 5; ++x) {
+									stuff[x] = (j << 16) | (k * 5 + x);
+								}
+								if(k == 2046)
+								{
+									std::this_thread::sleep_for(timeout1);
+								}
+								else if (k == 2047)
+								{
+									std::this_thread::sleep_for(timeout2);
+								}
+								success[j] = q.enqueue_bulk(stuff, 5) && success[j];
+							}
+						}, i);
+					}
+					else {
+						threads[i] = SimpleThread([&](int j) {
+							for (int k = 0; k != 4096; ++k) {
+								if(k == 4094)
+								{
+									std::this_thread::sleep_for(timeout1);
+								}
+								else if (k == 4095)
+								{
+									std::this_thread::sleep_for(timeout2);
+								}
+								success[j] = q.enqueue((j << 16) | k) && success[j];
+							}
+						}, i);
+					}
+				}
+				else {
+					// Dequeue
+					threads[i] = SimpleThread([&](int j) {
+						int item;
+						std::vector<int> prevItems(THREADS, -1);
+						if (j % 4 == 1) {
+							int k;
+							for (k = 0; k != 2048 * 5;) {
+								if(!q.wait_dequeue(item, wait_ms))
+								{
+									++timeouts;
+                                    std::cout << "timeouts:" << timeouts << "\n";
+									continue;
+								}
+
+								int thread = item >> 16;
+								item &= 0xffff;
+								//if (item <= prevItems[thread]) {
+								//	success[j] = false;
+								//}
+								prevItems[thread] = item;
+								++k;
+							}
+						}
+						else {
+							int items[6];
+							int k;
+							for (k = 0; k < 4090; ) {
+								if (std::size_t dequeued = q.wait_dequeue_bulk(items, 6, wait_ms)) {
+									for (std::size_t x = 0; x != dequeued; ++x) {
+										item = items[x];
+										int thread = item >> 16;
+										item &= 0xffff;
+										//if (item <= prevItems[thread]) {
+										//	success[j] = false;
+										//}
+										prevItems[thread] = item;
+									}
+									k += (int)dequeued;
+								}
+								else {
+                                    std::cout << "timeouts (bulk):" << timeouts << "\n";
+									++timeouts;
+								}
+							}
+							for (; k != 4096;) {
+								if(!q.wait_dequeue(item, wait_ms))
+								{
+									++timeouts;
+                                    std::cout << "timeouts (after):" << timeouts << "\n";
+									continue;
+								}
+
+								int thread = item >> 16;
+								item &= 0xffff;
+								if (item <= prevItems[thread]) {
+									success[j] = false;
+								}
+								prevItems[thread] = item;
+								++k;
+							}
+						}
+					}, i);
+				}
+			}
+			for (int i = 0; i != THREADS; ++i) {
+				threads[i].join();
+			}
+
 			for (int i = 0; i != THREADS; ++i) {
 				ASSERT_OR_FAIL(success[i]);
 			}
 			ASSERT_OR_FAIL(q.size_approx() == 0);
+			std::cout << "timeouts:" << timeouts << "\n";
+			ASSERT_OR_FAIL(timeouts > 0);
 		}
-		
+
 		return true;
 	}
-	
-	
+
 	struct TestListItem : corealgos::ListItem
 	{
 		int value;
-		
+
 		TestListItem()
 			: value(0)
 		{
 			ctorCount().fetch_add(1, std::memory_order_relaxed);
 		}
-		
+
 		explicit TestListItem(int value)
 			: value(value)
 		{
 			ctorCount().fetch_add(1, std::memory_order_relaxed);
 		}
-		
+
 		~TestListItem()
 		{
 			dtorCount().fetch_add(1, std::memory_order_relaxed);
 		}
-		
+
 		inline TestListItem* prev(std::memory_order order = std::memory_order_relaxed) const
 		{
 			return static_cast<TestListItem*>(concurrentListPrev.load(order));
 		}
-		
-		
+
+
 		inline static void reset()
 		{
 			ctorCount().store(0, std::memory_order_relaxed);
 			dtorCount().store(0, std::memory_order_relaxed);
 		}
-		
+
 		inline static size_t constructed() { return ctorCount().load(std::memory_order_relaxed); }
 		inline static size_t destructed() { return dtorCount().load(std::memory_order_relaxed); }
-		
+
 	private:
 		inline static std::atomic<size_t>& ctorCount() { static std::atomic<size_t> count(0); return count; }
 		inline static std::atomic<size_t>& dtorCount() { static std::atomic<size_t> count(0); return count; }
 	};
-	
+
 	bool core_add_only_list()
 	{
 		auto destroyList = [](corealgos::ConcurrentAddOnlyList<TestListItem>& list) {
 			size_t count = 0;
-			
+
 			auto tail = list.tail();
 			while (tail != nullptr) {
 				auto next = tail->prev();
@@ -4071,14 +4237,14 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			}
 			return count;
 		};
-		
+
 		{
 			corealgos::ConcurrentAddOnlyList<TestListItem> list;
 			ASSERT_OR_FAIL(list.tail() == nullptr);
-			
+
 			ASSERT_OR_FAIL(destroyList(list) == 0);
 		}
-		
+
 		{
 			corealgos::ConcurrentAddOnlyList<TestListItem> list;
 			for (int i = 0; i != 1000; ++i) {
@@ -4090,10 +4256,10 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				--i;
 			}
 			ASSERT_OR_FAIL(i == -1);
-			
+
 			ASSERT_OR_FAIL(destroyList(list) == 1000);
 		}
-		
+
 		for (int repeats = 0; repeats != 10; ++repeats) {
 			corealgos::ConcurrentAddOnlyList<TestListItem> list;
 			std::vector<SimpleThread> threads(8);
@@ -4107,7 +4273,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			for (size_t tid = 0; tid != threads.size(); ++tid) {
 				threads[tid].join();
 			}
-			
+
 			std::vector<int> prevItems(threads.size());
 			for (size_t i = 0; i != prevItems.size(); ++i) {
 				prevItems[i] = 1000;
@@ -4118,13 +4284,13 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				ASSERT_OR_FAIL(prevItems[tid] == i + 1);
 				prevItems[tid] = i;
 			}
-			
+
 			ASSERT_OR_FAIL(destroyList(list) == 1000 * threads.size());
 		}
-		
+
 		return true;
 	}
-	
+
 	bool core_thread_local()
 	{
 		TestListItem::reset();
@@ -4133,7 +4299,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		}
 		ASSERT_OR_FAIL(TestListItem::constructed() == 0);
 		ASSERT_OR_FAIL(TestListItem::destructed() == 0);
-		
+
 		TestListItem::reset();
 		{
 			corealgos::ThreadLocal<TestListItem> local(4);
@@ -4141,7 +4307,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		}
 		ASSERT_OR_FAIL(TestListItem::constructed() == 1);
 		ASSERT_OR_FAIL(TestListItem::destructed() == 1);
-		
+
 		TestListItem::reset();
 		{
 			corealgos::ThreadLocal<TestListItem> local(4);
@@ -4152,8 +4318,8 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		}
 		ASSERT_OR_FAIL(TestListItem::constructed() == 1);
 		ASSERT_OR_FAIL(TestListItem::destructed() == 1);
-		
-		
+
+
 		for (size_t initialSize = 1; initialSize <= 4; initialSize <<= 1) {
 			for (int reps = 0; reps != 20; ++reps) {
 				TestListItem::reset();
@@ -4188,30 +4354,30 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				ASSERT_OR_FAIL(TestListItem::destructed() == 5 * initialSize);
 			}
 		}
-		
+
 		return true;
 	}
-	
+
 	struct TestNode : corealgos::FreeListNode<TestNode>
 	{
 		int value;
 		TestNode() { }
 		explicit TestNode(int value) : value(value) { }
 	};
-	
+
 	bool core_free_list()
 	{
 		{
 			// Basic
 			corealgos::FreeList<TestNode> freeList;
 			ASSERT_OR_FAIL(freeList.try_get() == nullptr);
-			
+
 			freeList.add(new TestNode(7));
 			TestNode* node = freeList.try_get();
 			ASSERT_OR_FAIL(node != nullptr);
 			ASSERT_OR_FAIL(node->value == 7);
 			ASSERT_OR_FAIL(freeList.try_get() == nullptr);
-			
+
 			freeList.add(node);
 			node = freeList.try_get();
 			ASSERT_OR_FAIL(node != nullptr);
@@ -4219,7 +4385,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 			ASSERT_OR_FAIL(freeList.try_get() == nullptr);
 			delete node;
 		}
-		
+
 		{
 			// Multi-threaded. Tests ABA too.
 			for (int rep = 0; rep != 10; ++rep) {
@@ -4243,7 +4409,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 									failed[tid] = true;
 								}
 								*seen = true;
-								
+
 								node->value = ((int)tid << 20) | (i + 1);
 								freeList.add(node);
 							}
@@ -4264,10 +4430,10 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				ASSERT_OR_FAIL(node == nullptr);
 			}
 		}
-		
+
 		return true;
 	}
-	
+
 	bool core_spmc_hash()
 	{
 		{
@@ -4275,21 +4441,21 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 				corealgos::SPMCSequentialHashMap<int> hash(rep < 10 ? 2 : 4);
 				std::vector<SimpleThread> threads(rep < 12 ? 4 : 16);
 				std::vector<bool> failed(threads.size());
-				
+
 				const int MAX_ENTRIES = 4096;
 				std::vector<int> values(MAX_ENTRIES);
 				std::array<std::atomic<int>, MAX_ENTRIES> useCounts;
 				std::array<std::atomic<bool>, MAX_ENTRIES> removed;
-				
+
 				for (std::size_t i = 0; i != useCounts.size(); ++i) {
 					useCounts[i].store(0, std::memory_order_relaxed);
 					removed[i].store(false, std::memory_order_relaxed);
 				}
-				
+
 				for (size_t tid = 0; tid != threads.size(); ++tid) {
 					threads[tid] = SimpleThread([&](size_t tid) {
 						failed[tid] = false;
-						
+
 						if (tid == 0) {
 							// Producer thread
 							for (int i = 0; i != MAX_ENTRIES; ++i) {
@@ -4305,13 +4471,13 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 								if (i < MAX_ENTRIES) {
 									useCount = useCounts[i].fetch_add(-1, std::memory_order_acquire);
 								}
-								
+
 								int* val;
 								if (useCount > 0) {
 									val = hash.find(i);
 									bool isRemoved = removed[i].load(std::memory_order_relaxed);
 									assert(val == nullptr || *val == *val);		// Find segfaults
-									
+
 									// We read the use count again; if it's still > 0, the item must have been in
 									// the hash during the entire call to find(), so we can check its value
 									auto currentUseCount = useCounts[i].fetch_add(0, std::memory_order_release);
@@ -4355,12 +4521,12 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		}
 		return true;
 	}
-	
+
 	bool explicit_strings_threaded()
 	{
 		std::vector<SimpleThread> threads(8);
 		ConcurrentQueue<std::string, MallocTrackingTraits> q(1024 * 1024);
-		
+
 		for (size_t tid = 0; tid != threads.size(); ++tid) {
 			threads[tid] = SimpleThread([&](size_t tid) {
 				const size_t ITERATIONS = 100 * 1024;
@@ -4383,7 +4549,7 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 		for (size_t tid = 0; tid != threads.size(); ++tid) {
 			threads[tid].join();
 		}
-		
+
 		return true;
 	}
 };
@@ -4394,11 +4560,11 @@ class ConcurrentQueueTests : public TestClass<ConcurrentQueueTests>
 void printTests(ConcurrentQueueTests const& tests)
 {
 	std::printf("   Supported tests are:\n");
-	
+
 	std::vector<std::string> names;
 	tests.getAllTestNames(names);
 	for (auto it = names.cbegin(); it != names.cend(); ++it) {
-		std::printf("      %s\n", it->c_str());
+		std::printf("	  %s\n", it->c_str());
 	}
 }
 
@@ -4409,19 +4575,19 @@ int main(int argc, char** argv)
 	bool disablePrompt = false;
 	unsigned int iterations = 8;
 	std::vector<std::string> selectedTests;
-	
+
 	// Disable buffering (so that when run in, e.g., Sublime Text, the output appears as it is written)
 	std::setvbuf(stdout, nullptr, _IONBF, 0);
-	
+
 	// Isolate the executable name
 	std::string progName = argv[0];
 	auto slash = progName.find_last_of("/\\");
 	if (slash != std::string::npos) {
 		progName = progName.substr(slash + 1);
 	}
-	
+
 	ConcurrentQueueTests tests;
-	
+
 	// Parse command line options
 	if (argc > 1) {
 		bool printHelp = false;
@@ -4444,7 +4610,7 @@ int main(int argc, char** argv)
 					error = true;
 					continue;
 				}
-				
+
 				if (!tests.validateTestName(argv[++i])) {
 					std::printf("Unrecognized test '%s'.\n", argv[i]);
 					if (!printedTests) {
@@ -4454,7 +4620,7 @@ int main(int argc, char** argv)
 					error = true;
 					continue;
 				}
-				
+
 				selectedTests.push_back(argv[i]);
 			}
 			else if (std::strcmp(argv[i], "--iterations") == 0) {
@@ -4463,7 +4629,7 @@ int main(int argc, char** argv)
 					error = true;
 					continue;
 				}
-				
+
 				iterations = static_cast<unsigned int>(std::atoi(argv[++i]));
 			}
 			else {
@@ -4471,22 +4637,22 @@ int main(int argc, char** argv)
 				error = true;
 			}
 		}
-		
+
 		if (error || printHelp) {
 			if (error) {
 				std::printf("\n");
 			}
-			std::printf("%s\n    Description: Runs unit tests for moodycamel::ConcurrentQueue\n", progName.c_str());
-			std::printf("    --help            Prints this help blurb\n");
-			std::printf("    --run test        Runs only the specified test(s)\n");
-			std::printf("    --iterations N    Do N iterations of each test\n");
-			std::printf("    --disable-prompt  Disables prompt before exit when the tests finish\n");
+			std::printf("%s\n	Description: Runs unit tests for moodycamel::ConcurrentQueue\n", progName.c_str());
+			std::printf("	--help			Prints this help blurb\n");
+			std::printf("	--run test		Runs only the specified test(s)\n");
+			std::printf("	--iterations N	Do N iterations of each test\n");
+			std::printf("	--disable-prompt  Disables prompt before exit when the tests finish\n");
 			return error ? -1 : 0;
 		}
 	}
-	
+
 	int exitCode = 0;
-	
+
 	bool result;
 	if (selectedTests.size() > 0) {
 		std::printf("Running %d iteration%s of selected unit test%s for moodycamel::ConcurrentQueue.\n\n", iterations, iterations == 1 ? "" : "s", selectedTests.size() == 1 ? "" : "s");
@@ -4496,7 +4662,7 @@ int main(int argc, char** argv)
 		std::printf("Running %d iteration%s of all unit tests for moodycamel::ConcurrentQueue.\n(Run %s --help for other options.)\n\n", iterations, iterations == 1 ? "" : "s", progName.c_str());
 		result = tests.run(iterations);
 	}
-	
+
 	if (result) {
 		std::printf("All %stests passed.\n", (selectedTests.size() > 0 ? "selected " : ""));
 	}
@@ -4504,7 +4670,7 @@ int main(int argc, char** argv)
 		std::printf("Test(s) failed!\n");
 		exitCode = 2;
 	}
-	
+
 	if (!disablePrompt) {
 		std::printf("Press ENTER to exit.\n");
 		getchar();