66
77#include < core/CStaticThreadPool.h>
88
9+ #include < chrono>
10+
911namespace ml {
1012namespace core {
1113namespace {
@@ -21,7 +23,7 @@ CStaticThreadPool::CStaticThreadPool(std::size_t size)
2123 m_Pool.reserve (m_TaskQueues.size ());
2224 for (std::size_t id = 0 ; id < m_TaskQueues.size (); ++id) {
2325 try {
24- m_Pool.emplace_back ([& , id] { worker (id); });
26+ m_Pool.emplace_back ([this , id] { this -> worker (id); });
2527 } catch (...) {
2628 this ->shutdown ();
2729 throw ;
@@ -33,18 +35,19 @@ CStaticThreadPool::~CStaticThreadPool() {
3335 this ->shutdown ();
3436}
3537
36- void CStaticThreadPool::schedule (TTask&& task ) {
38+ void CStaticThreadPool::schedule (TTask&& task_ ) {
3739 // Only block if every queue is full.
3840 std::size_t size{m_TaskQueues.size ()};
3941 std::size_t i{m_Cursor.load ()};
4042 std::size_t end{i + size};
43+ CWrappedTask task{std::forward<TTask>(task_)};
4144 for (/* */ ; i < end; ++i) {
42- if (m_TaskQueues[i % size].tryPush (std::forward<TTask> (task))) {
45+ if (m_TaskQueues[i % size].tryPush (std::move (task))) {
4346 break ;
4447 }
4548 }
4649 if (i == end) {
47- m_TaskQueues[i % size].push (std::forward<TTask> (task));
50+ m_TaskQueues[i % size].push (std::move (task));
4851 }
4952 m_Cursor.store (i + 1 );
5053}
@@ -65,33 +68,38 @@ void CStaticThreadPool::busy(bool value) {
6568}
6669
6770void CStaticThreadPool::shutdown () {
68- // Signal to each thread that it is finished.
69- for (auto & queue : m_TaskQueues) {
70- queue.push (TTask{[&] {
71+
72+ // Drain the queues before starting to shut down in order to maximise throughput.
73+ this ->drainQueuesWithoutBlocking ();
74+
75+ // Signal to each thread that it is finished. We bind each task to a thread so
76+ // so each thread executes exactly one shutdown task.
77+ for (std::size_t id = 0 ; id < m_TaskQueues.size (); ++id) {
78+ TTask done{[&] {
7179 m_Done = true ;
7280 return boost::any{};
73- }});
81+ }};
82+ m_TaskQueues[id].push (CWrappedTask{std::move (done), id});
7483 }
84+
7585 for (auto & thread : m_Pool) {
7686 if (thread.joinable ()) {
7787 thread.join ();
7888 }
7989 }
90+
8091 m_TaskQueues.clear ();
8192 m_Pool.clear ();
8293}
8394
8495void CStaticThreadPool::worker (std::size_t id) {
8596
86- auto noThrowExecute = [](TOptionalTask& task) {
87- try {
88- (*task)();
89- } catch (const std::future_error& e) {
90- LOG_ERROR (<< " Failed executing packaged task: '" << e.code () << " ' "
91- << " with error '" << e.what () << " '" );
92- }
97+ auto ifAllowed = [id](const CWrappedTask& task) {
98+ return task.executableOnThread (id);
9399 };
94100
101+ TOptionalTask task;
102+
95103 while (m_Done == false ) {
96104 // We maintain "worker count" queues and each worker has an affinity to a
97105 // different queue. We don't immediately block if the worker's "queue" is
@@ -101,9 +109,8 @@ void CStaticThreadPool::worker(std::size_t id) {
101109 // workers on queue reads.
102110
103111 std::size_t size{m_TaskQueues.size ()};
104- TOptionalTask task;
105112 for (std::size_t i = 0 ; i < size; ++i) {
106- task = m_TaskQueues[(id + i) % size].tryPop ();
113+ task = m_TaskQueues[(id + i) % size].tryPop (ifAllowed );
107114 if (task != boost::none) {
108115 break ;
109116 }
@@ -112,12 +119,48 @@ void CStaticThreadPool::worker(std::size_t id) {
112119 task = m_TaskQueues[id].pop ();
113120 }
114121
115- noThrowExecute (task);
122+ (*task)();
123+
124+ // In the typical situation that the thread(s) adding tasks to the queues can
125+ // do this much faster than the threads consuming them, all queues will be full
126+ // and the producer(s) will be waiting to add a task as each one is consumed.
127+ // By switching to work on a new queue here we minimise contention between the
128+ // producers and consumers. Testing on bare metal (OSX) the overhead per task
129+ // dropped from around 2.2 microseconds to 1.5 microseconds by yielding here.
130+ std::this_thread::yield ();
131+ }
132+ }
133+
134+ void CStaticThreadPool::drainQueuesWithoutBlocking () {
135+ TOptionalTask task;
136+ auto popTask = [&] {
137+ for (auto & queue : m_TaskQueues) {
138+ task = queue.tryPop ();
139+ if (task != boost::none) {
140+ (*task)();
141+ return true ;
142+ }
143+ }
144+ return false ;
145+ };
146+ while (popTask ()) {
116147 }
148+ }
149+
150+ CStaticThreadPool::CWrappedTask::CWrappedTask (TTask&& task, TOptionalSize threadId)
151+ : m_Task{std::forward<TTask>(task)}, m_ThreadId{threadId} {
152+ }
153+
154+ bool CStaticThreadPool::CWrappedTask::executableOnThread (std::size_t id) const {
155+ return m_ThreadId == boost::none || *m_ThreadId == id;
156+ }
117157
118- // Drain this thread's queue before exiting.
119- for (auto task = m_TaskQueues[id].tryPop (); task; task = m_TaskQueues[id].tryPop ()) {
120- noThrowExecute (task);
158+ void CStaticThreadPool::CWrappedTask::operator ()() {
159+ try {
160+ m_Task ();
161+ } catch (const std::future_error& e) {
162+ LOG_ERROR (<< " Failed executing packaged task: '" << e.code () << " ' "
163+ << " with error '" << e.what () << " '" );
121164 }
122165}
123166}
0 commit comments