From dad0577dc49e8077b924b33a84ed0312c40ba431 Mon Sep 17 00:00:00 2001 From: Terry Wilmarth Date: Fri, 15 Mar 2024 15:51:49 -0500 Subject: [PATCH 1/9] Add num_threads clause list format and strict modifier support Add support to the runtime for 6.0 spec features that allow num_threads clause to take a list, and also make use of the strict modifier. Provides new compiler interface functions for these features. --- openmp/runtime/src/kmp.h | 46 +++++++-- openmp/runtime/src/kmp_csupport.cpp | 44 +++++++++ openmp/runtime/src/kmp_runtime.cpp | 142 +++++++++++++++++++++++++--- 3 files changed, 212 insertions(+), 20 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 121e7e959129e..7397244a16f60 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -532,6 +532,19 @@ enum clock_function_type { enum mic_type { non_mic, mic1, mic2, mic3, dummy }; #endif +// OpenMP 3.1 - Nested num threads array +typedef struct kmp_nested_nthreads_t { + int *nth; + int size; + int used; + bool strict; // num_threads clause has strict modifier + ident_t *loc; // loc for strict modifier + int sev; // error severity for strict modifier + const char *msg; // error message for strict modifier +} kmp_nested_nthreads_t; + +extern kmp_nested_nthreads_t __kmp_nested_nth; + /* -- fast reduction stuff ------------------------------------------------ */ #undef KMP_FAST_REDUCTION_BARRIER @@ -2958,6 +2971,12 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { /* The data set by the primary thread at reinit, then R/W by the worker */ KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */ + int *th_set_nested_nth; + bool th_nt_strict; // num_threads clause has strict modifier + ident_t *th_nt_loc; // loc for strict modifier + int th_nt_sev; // error severity for strict modifier + const char *th_nt_msg; // error message for strict modifier + int th_set_nested_nth_sz; #if KMP_NESTED_HOT_TEAMS kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */ #endif @@ -3202,6 +3221,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team { void *t_stack_id; // team specific stack stitching id (for ittnotify) #endif /* USE_ITT_BUILD */ distributedBarrier *b; // Distributed barrier data associated with team + kmp_nested_nthreads_t *t_nested_nth; } kmp_base_team_t; union KMP_ALIGN_CACHE kmp_team { @@ -3532,15 +3552,6 @@ extern enum mic_type __kmp_mic_type; extern double __kmp_load_balance_interval; // load balance algorithm interval #endif /* USE_LOAD_BALANCE */ -// OpenMP 3.1 - Nested num threads array -typedef struct kmp_nested_nthreads_t { - int *nth; - int size; - int used; -} kmp_nested_nthreads_t; - -extern kmp_nested_nthreads_t __kmp_nested_nth; - #if KMP_USE_ADAPTIVE_LOCKS // Parameters for the speculative lock backoff system. @@ -3775,6 +3786,11 @@ extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL); ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR) extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads); +extern void __kmp_push_num_threads_list(ident_t *loc, int gtid, + kmp_uint32 list_length, + int *num_threads_list); +extern void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev, + const char *msg); extern void __kmp_push_proc_bind(ident_t *loc, int gtid, kmp_proc_bind_t proc_bind); @@ -4403,6 +4419,18 @@ KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc); KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid); KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads); +KMP_EXPORT void __kmpc_push_num_threads_strict(ident_t *loc, + kmp_int32 global_tid, + kmp_int32 num_threads, + int severity, + const char *message); + +KMP_EXPORT void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid, + kmp_uint32 list_length, + kmp_int32 *num_threads_list); +KMP_EXPORT void __kmpc_push_num_threads_list_strict( + ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length, + kmp_int32 *num_threads_list, int severity, const char *message); KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind); diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index 878e78b5c7ad2..0895ff6288f55 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -236,6 +236,50 @@ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, __kmp_push_num_threads(loc, global_tid, num_threads); } +void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_threads, int severity, + const char *message) { + __kmp_push_num_threads(loc, global_tid, num_threads); + __kmp_set_strict_num_threads(loc, global_tid, severity, message); +} + +/*! +@ingroup PARALLEL +@param loc source location information +@param global_tid global thread number +@param list_length number of entries in the num_threads_list array +@param num_threads_list array of numbers of threads requested for this parallel +construct and subsequent nested parallel constructs + +Set the number of threads to be used by the next fork spawned by this thread, +and some nested forks as well. +This call is only required if the parallel construct has a `num_threads` clause +that has a list of integers as the argument. +*/ +void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid, + kmp_uint32 list_length, + kmp_int32 *num_threads_list) { + KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=", + global_tid)); + KA_TRACE(20, ("%d", num_threads_list[0])); +#ifdef KMP_DEBUG + for (kmp_uint32 i = 1; i < list_length; ++i) + KA_TRACE(20, (", %d", num_threads_list[i])); +#endif + KA_TRACE(20, ("/n")); + + __kmp_assert_valid_gtid(global_tid); + __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list); +} + +void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid, + kmp_uint32 list_length, + kmp_int32 *num_threads_list, + int severity, const char *message) { + __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list); + __kmp_set_strict_num_threads(loc, global_tid, severity, message); +} + void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); /* the num_threads are automatically popped */ diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 7edb0b440acc7..11dcb7e8c1c01 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -113,6 +113,25 @@ void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, int new_nthreads); void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads); +static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr, + int level) { + kmp_nested_nthreads_t *new_nested_nth = + (kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC( + sizeof(kmp_nested_nthreads_t)); + int new_size = level + thr->th.th_set_nested_nth_sz; + new_nested_nth->nth = (int *)KMP_INTERNAL_MALLOC(new_size * sizeof(int)); + for (int i = 0; i < level + 1; ++i) + new_nested_nth->nth[i] = 0; + for (int i = level + 1, j = 1; i < new_size; ++i, ++j) + new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j]; + new_nested_nth->size = new_nested_nth->used = new_size; + new_nested_nth->strict = thr->th.th_nt_strict; + new_nested_nth->loc = thr->th.th_nt_loc; + new_nested_nth->sev = thr->th.th_nt_sev; + new_nested_nth->msg = thr->th.th_nt_msg; + return new_nested_nth; +} + /* Calculate the identifier of the current thread */ /* fast (and somewhat portable) way to get unique identifier of executing thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ @@ -930,6 +949,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, __kmp_get_gtid(), new_nthreads, set_nthreads)); } #endif // KMP_DEBUG + + if ((this_thr->th.th_nt_strict || parent_team->t.t_nested_nth->strict) && + new_nthreads < set_nthreads) { + __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev, + this_thr->th.th_nt_msg); + } return new_nthreads; } @@ -1242,6 +1267,10 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { serial_team->t.t_serialized = 1; serial_team->t.t_nproc = 1; serial_team->t.t_parent = this_thr->th.th_team; + if (this_thr->th.th_team->t.t_nested_nth) + serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth; + else + serial_team->t.t_nested_nth = &__kmp_nested_nth; serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched; this_thr->th.th_team = serial_team; serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; @@ -1261,9 +1290,11 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { // Thread value exists in the nested nthreads array for the next nested // level - if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { - this_thr->th.th_current_task->td_icvs.nproc = - __kmp_nested_nth.nth[level + 1]; + kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth; + if (this_thr->th.th_team->t.t_nested_nth) + nested_nth = this_thr->th.th_team->t.t_nested_nth; + if (nested_nth->used && (level + 1 < nested_nth->used)) { + this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1]; } if (__kmp_nested_proc_bind.used && @@ -1312,10 +1343,14 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { int level = this_thr->th.th_team->t.t_level; // Thread value exists in the nested nthreads array for the next nested // level - if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { - this_thr->th.th_current_task->td_icvs.nproc = - __kmp_nested_nth.nth[level + 1]; + + kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth; + if (serial_team->t.t_nested_nth) + nested_nth = serial_team->t.t_nested_nth; + if (nested_nth->used && (level + 1 < nested_nth->used)) { + this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1]; } + serial_team->t.t_level++; KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level " "of serial team %p to %d\n", @@ -2074,9 +2109,18 @@ int __kmp_fork_call(ident_t *loc, int gtid, // See if we need to make a copy of the ICVs. int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; - if ((level + 1 < __kmp_nested_nth.used) && - (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) { - nthreads_icv = __kmp_nested_nth.nth[level + 1]; + kmp_nested_nthreads_t *nested_nth = NULL; + if (!master_th->th.th_set_nested_nth && + (level + 1 < parent_team->t.t_nested_nth->used) && + (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) { + nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1]; + } else if (master_th->th.th_set_nested_nth) { + nested_nth = __kmp_override_nested_nth(master_th, level); + if ((level + 1 < nested_nth->used) && + (nested_nth->nth[level + 1] != nthreads_icv)) + nthreads_icv = nested_nth->nth[level + 1]; + else + nthreads_icv = 0; // don't update } else { nthreads_icv = 0; // don't update } @@ -2185,6 +2229,24 @@ int __kmp_fork_call(ident_t *loc, int gtid, KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator); + // Check if hot team has potentially outdated list, and if so, free it + if (team->t.t_nested_nth && + team->t.t_nested_nth != parent_team->t.t_nested_nth) { + KMP_INTERNAL_FREE(team->t.t_nested_nth->nth); + KMP_INTERNAL_FREE(team->t.t_nested_nth); + team->t.t_nested_nth = NULL; + } + team->t.t_nested_nth = parent_team->t.t_nested_nth; + if (master_th->th.th_set_nested_nth) { + if (!nested_nth) + nested_nth = __kmp_override_nested_nth(master_th, level); + team->t.t_nested_nth = nested_nth; + KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth); + master_th->th.th_set_nested_nth = NULL; + master_th->th.th_set_nested_nth_sz = 0; + master_th->th.th_nt_strict = false; + } + // Update the floating point rounding in the team if required. propagateFPControl(team); #if OMPD_SUPPORT @@ -3390,6 +3452,7 @@ static void __kmp_initialize_root(kmp_root_t *root) { root_team->t.t_serialized = 1; // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; root_team->t.t_sched.sched = r_sched.sched; + root_team->t.t_nested_nth = &__kmp_nested_nth; KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", @@ -3427,6 +3490,7 @@ static void __kmp_initialize_root(kmp_root_t *root) { // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; hot_team->t.t_sched.sched = r_sched.sched; hot_team->t.t_size_changed = 0; + hot_team->t.t_nested_nth = &__kmp_nested_nth; } #ifdef KMP_DEBUG @@ -4293,6 +4357,7 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, else // no tasking --> always safe to reap this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; this_thr->th.th_set_proc_bind = proc_bind_default; + #if KMP_AFFINITY_SUPPORTED this_thr->th.th_new_place = this_thr->th.th_current_place; #endif @@ -4556,6 +4621,11 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, /* allocate space for it. */ new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); + new_thr->th.th_nt_strict = false; + new_thr->th.th_nt_loc = NULL; + new_thr->th.th_nt_sev = severity_fatal; + new_thr->th.th_nt_msg = NULL; + TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG @@ -4666,6 +4736,9 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, new_thr->th.th_active_in_pool = FALSE; TCW_4(new_thr->th.th_active, TRUE); + new_thr->th.th_set_nested_nth = NULL; + new_thr->th.th_set_nested_nth_sz = 0; + /* adjust the global counters */ __kmp_all_nth++; __kmp_nth++; @@ -5456,7 +5529,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, } } // Check changes in number of threads - kmp_info_t *master = team->t.t_threads[0]; if (master->th.th_teams_microtask) { for (f = 1; f < new_nproc; ++f) { // propagate teams construct specific info to workers @@ -5562,6 +5634,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, __ompt_team_assign_id(team, ompt_parallel_data); #endif + team->t.t_nested_nth = NULL; + KMP_MB(); return team; @@ -5633,6 +5707,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, KMP_MB(); + team->t.t_nested_nth = NULL; + KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id)); @@ -5735,6 +5811,14 @@ void __kmp_free_team(kmp_root_t *root, } } + // Before clearing parent pointer, check if nested_nth list should be freed + if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth && + team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) { + KMP_INTERNAL_FREE(team->t.t_nested_nth->nth); + KMP_INTERNAL_FREE(team->t.t_nested_nth); + } + team->t.t_nested_nth = NULL; + // Reset pointer to parent team only for non-hot teams. team->t.t_parent = NULL; team->t.t_level = 0; @@ -7837,7 +7921,6 @@ int __kmp_invoke_teams_master(int gtid) { encountered by this team. since this should be enclosed in the forkjoin critical section it should avoid race conditions with asymmetrical nested parallelism */ - void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { kmp_info_t *thr = __kmp_threads[gtid]; @@ -7845,6 +7928,39 @@ void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { thr->th.th_set_nproc = num_threads; } +void __kmp_push_num_threads_list(ident_t *id, int gtid, kmp_uint32 list_length, + int *num_threads_list) { + kmp_info_t *thr = __kmp_threads[gtid]; + + KMP_DEBUG_ASSERT(list_length > 1); + + if (num_threads_list[0] > 0) + thr->th.th_set_nproc = num_threads_list[0]; + thr->th.th_set_nested_nth = + (int *)KMP_INTERNAL_MALLOC(list_length * sizeof(int)); + for (kmp_uint32 i = 0; i < list_length; ++i) + thr->th.th_set_nested_nth[i] = num_threads_list[i]; + thr->th.th_set_nested_nth_sz = list_length; +} + +void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev, + const char *msg) { + kmp_info_t *thr = __kmp_threads[gtid]; + thr->th.th_nt_strict = true; + thr->th.th_nt_loc = loc; + // if sev is unset make fatal + if (sev == severity_warning) + thr->th.th_nt_sev = sev; + else + thr->th.th_nt_sev = severity_fatal; + // if msg is unset, use an appropriate message + if (msg) + thr->th.th_nt_msg = msg; + else + thr->th.th_nt_msg = "Cannot form team with number of threads specified by " + "strict num_threads clause."; +} + static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams, int num_threads) { KMP_DEBUG_ASSERT(thr); @@ -8301,6 +8417,10 @@ void __kmp_cleanup(void) { __kmp_nested_nth.nth = NULL; __kmp_nested_nth.size = 0; __kmp_nested_nth.used = 0; + __kmp_nested_nth.strict = false; + __kmp_nested_nth.loc = NULL; + __kmp_nested_nth.sev = 0; + __kmp_nested_nth.msg = NULL; KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); __kmp_nested_proc_bind.bind_types = NULL; __kmp_nested_proc_bind.size = 0; From 47155e3a2b5ff3fab61309f0377b6ddef5e97819 Mon Sep 17 00:00:00 2001 From: Terry Wilmarth Date: Tue, 19 Mar 2024 10:15:02 -0500 Subject: [PATCH 2/9] Add dllexports. --- openmp/runtime/src/dllexports | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports index 0d49643709e0a..747b828093156 100644 --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -1268,6 +1268,11 @@ kmp_set_disp_num_buffers 890 __kmpc_atomic_val_8_cas_cpt 2158 %endif + # No longer need to put ordinal numbers + __kmpc_push_num_threads_list + __kmpc_push_num_threads_strict + __kmpc_push_num_threads_list_strict + %endif __kmpc_set_thread_limit From e34c994d1b16d369dde5b210f44bd16416137929 Mon Sep 17 00:00:00 2001 From: Terry Wilmarth Date: Fri, 29 Mar 2024 21:04:35 -0500 Subject: [PATCH 3/9] Add tests. --- .../parallel/omp_parallel_num_threads_list.c | 209 ++++++++++++++++++ .../omp_parallel_num_threads_strict.c | 99 +++++++++ 2 files changed, 308 insertions(+) create mode 100644 openmp/runtime/test/parallel/omp_parallel_num_threads_list.c create mode 100644 openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c new file mode 100644 index 0000000000000..1c1771c255b31 --- /dev/null +++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c @@ -0,0 +1,209 @@ +// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 %libomp-run +#include +#include "omp_testsuite.h" + +// When compiler supports num_threads clause list format, remove the following +// and use num_threads clause directly +#if defined(__cplusplus) +extern "C" { +#endif + +int __kmpc_global_thread_num(void *loc); +void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length, int *list); + +#if defined(__cplusplus) +} +#endif + +int test_omp_parallel_num_threads_list() +{ + int num_failed; + + // Initially, 5 levels specified via OMP_NUM_THREADS with 2 threads per level + // Check top 3 levels +#pragma omp parallel reduction(+:num_failed) // 1st level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); +#pragma omp parallel reduction(+:num_failed) // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); +#pragma omp parallel reduction(+:num_failed) // 3rd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); + } // end 3rd level parallel + } // end 2nd level parallel + } // end 1st level parallel + + // Make sure that basic single element num_threads clause works +#pragma omp parallel reduction(+:num_failed) num_threads(4) // 1st level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 4); +#pragma omp parallel reduction(+:num_failed) // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected +#pragma omp parallel reduction(+:num_failed) // 3rd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected + } // end 3rd level parallel + } // end 2nd level parallel + } // end 1st level parallel + + // Check that basic single element num_threads clause works on second level +#pragma omp parallel reduction(+:num_failed) // 1st level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected +#pragma omp parallel reduction(+:num_failed) num_threads(4) // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 4); +#pragma omp parallel reduction(+:num_failed) // 3rd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected + } // end 3rd level parallel + } // end 2nd level parallel + } // end 1st level parallel + + // Try a short list. It should completely overwrite the old settings. + // We need to use the compiler interface for now. + int threads[2] = {3,3}; + __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads); +#pragma omp parallel reduction(+:num_failed) //num_threads(3,3) // 1st level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 3); +#pragma omp parallel reduction(+:num_failed) // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 3); +#pragma omp parallel reduction(+:num_failed) // 3rd level + { + // NOTE: should just keep using last element in list, to nesting depth +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 3); + } // end 3rd level parallel + } // end 2nd level parallel + } // end 1st level parallel + + // Similar, but at a lower level. +#pragma omp parallel reduction(+:num_failed) // 1st level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected + int threads[2] = {3,3}; + __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads); +#pragma omp parallel reduction(+:num_failed) // num_clause(3,3) // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 3); +#pragma omp parallel reduction(+:num_failed) // 3rd level + { + // NOTE: just keep using last element in list, to nesting depth +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 3); + } // end 3rd level parallel + } // end 2nd level parallel + // Make sure a second inner parallel is NOT affected by the clause +#pragma omp parallel reduction(+:num_failed) // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected +#pragma omp parallel reduction(+:num_failed) // 3rd level + { +#pragma omp single + // NOTE: just keep using last element in list, to nesting depth + num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected + } // end 3rd level parallel + } // end 2nd level parallel + } // end 1st level parallel + + // Test lists at multiple levels + int threads2[2] = {4,3}; + __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads2); +#pragma omp parallel reduction(+:num_failed) // num_clause(4,3) // 1st level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 4); +#pragma omp parallel reduction(+:num_failed) // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 3); +#pragma omp parallel reduction(+:num_failed) // 3rd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 3); + int threads3[2] = {2,5}; + __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads3); +#pragma omp parallel reduction(+:num_failed) //num_clause(2,5) // 4th level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); +#pragma omp parallel reduction(+:num_failed) // 5th level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 5); +#pragma omp parallel reduction(+:num_failed) // 6th level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 5); + } // end 6th level parallel + } // end 5th level parallel + } // end 4th level parallel +#pragma omp parallel reduction(+:num_failed) // 4th level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 3); + } // end 4th level parallel + } // end 3rd level parallel + } // end 2nd level parallel +#pragma omp parallel reduction(+:num_failed) // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 3); +#pragma omp parallel reduction(+:num_failed) // 3rd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 3); + } // end 3rd level parallel + } // end 2nd level parallel + } // end 1st level parallel + + // Now we should be back to the way we started. +#pragma omp parallel reduction(+:num_failed) // 1st level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); +#pragma omp parallel reduction(+:num_failed) // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); +#pragma omp parallel reduction(+:num_failed) // 3rd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); + } // end 3rd level parallel + } // end 2nd level parallel + } // end 1st level parallel + + return (!num_failed); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_num_threads_list()) { + num_failed++; + } + } + return num_failed; +} diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c new file mode 100644 index 0000000000000..358ac915c1713 --- /dev/null +++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c @@ -0,0 +1,99 @@ +// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 OMP_THREAD_LIMIT=16 %libomp-run +#include +#include "omp_testsuite.h" + +// When compiler supports num_threads clause list format and strict modifier, +// remove the following and use num_threads clause directly +#if defined(__cplusplus) +extern "C" { +#endif + +int __kmpc_global_thread_num(void *loc); +void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length, int *list); +void __kmpc_push_num_threads_strict(void *loc, int gtid, int nth, int sev, const char *msg); +void __kmpc_push_num_threads_list_strict(void *loc, int gtid, unsigned length, int *list, int sev, const char *msg); + +#if defined(__cplusplus) +} +#endif + +int test_omp_parallel_num_threads_strict() +{ + int num_failed; + + // Test regular runtime warning about exceeding thread limit. + // Tolerate whatever value was given. +#pragma omp parallel num_threads(22) +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() <= 22); + + // Test with 4 threads and strict -- no problem, no warning. + __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 4, + 1, "This warning shouldn't happen."); +#pragma omp parallel //num_threads(strict:4) +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 4); + + // Exceed limit, specify user warning message. Tolerate whatever was given. + __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 20, + 1, "User-supplied warning for strict."); +#pragma omp parallel //num_threads(strict:20) severity(warning) \ + message("User-supplied warning for strict.") +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() <= 20); + + // Exceed limit, no user message, use runtime default message for strict. + // Tolerate whatever value was given. + __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 21, + 1, NULL); +#pragma omp parallel //num_threads(strict:21) +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() <= 21); + + + // Exceed limit in nested level. Should see user warning message. + int threads3[2] = {2,24}; + __kmpc_push_num_threads_list_strict(NULL, __kmpc_global_thread_num(NULL), 2, + threads3, 1, + "User-supplied warning on strict list."); +#pragma omp parallel //num_threads(strict:2,24) severity(warning) \ + message("User-supplied warning on strict. list") // 1st level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); +#pragma omp parallel // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() <= 24); + } + } + + // No strict limit in nested level. Regular runtime limiting applies. + __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, + threads3); +#pragma omp parallel //num_threads(2,24) // 1st level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() == 2); +#pragma omp parallel // 2nd level + { +#pragma omp single + num_failed = num_failed + !(omp_get_num_threads() <= 24); + } + } + + return (!num_failed); +} + +int main() +{ + int i; + int num_failed=0; + + for(i = 0; i < REPETITIONS; i++) { + if(!test_omp_parallel_num_threads_strict()) { + num_failed++; + } + } + return num_failed; +} From 177f9eb335bcc5ffe8ac381381d0c3703d9548c2 Mon Sep 17 00:00:00 2001 From: Terry Wilmarth Date: Fri, 29 Mar 2024 21:18:18 -0500 Subject: [PATCH 4/9] Fix test. --- .../parallel/omp_parallel_num_threads_strict.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c index 358ac915c1713..6fccfa7f4e764 100644 --- a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c +++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c @@ -23,21 +23,21 @@ int test_omp_parallel_num_threads_strict() // Test regular runtime warning about exceeding thread limit. // Tolerate whatever value was given. -#pragma omp parallel num_threads(22) +#pragma omp parallel reduction(+:num_failed) num_threads(22) #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 22); // Test with 4 threads and strict -- no problem, no warning. __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 4, 1, "This warning shouldn't happen."); -#pragma omp parallel //num_threads(strict:4) +#pragma omp parallel reduction(+:num_failed) //num_threads(strict:4) #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 4); // Exceed limit, specify user warning message. Tolerate whatever was given. __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 20, 1, "User-supplied warning for strict."); -#pragma omp parallel //num_threads(strict:20) severity(warning) \ +#pragma omp parallel reduction(+:num_failed) //num_threads(strict:20) severity(warning) \ message("User-supplied warning for strict.") #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 20); @@ -46,7 +46,7 @@ int test_omp_parallel_num_threads_strict() // Tolerate whatever value was given. __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 21, 1, NULL); -#pragma omp parallel //num_threads(strict:21) +#pragma omp parallel reduction(+:num_failed) //num_threads(strict:21) #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 21); @@ -56,12 +56,12 @@ int test_omp_parallel_num_threads_strict() __kmpc_push_num_threads_list_strict(NULL, __kmpc_global_thread_num(NULL), 2, threads3, 1, "User-supplied warning on strict list."); -#pragma omp parallel //num_threads(strict:2,24) severity(warning) \ +#pragma omp parallel reduction(+:num_failed) //num_threads(strict:2,24) severity(warning) \ message("User-supplied warning on strict. list") // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); -#pragma omp parallel // 2nd level +#pragma omp parallel reduction(+:num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 24); @@ -71,11 +71,11 @@ int test_omp_parallel_num_threads_strict() // No strict limit in nested level. Regular runtime limiting applies. __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads3); -#pragma omp parallel //num_threads(2,24) // 1st level +#pragma omp parallel reduction(+:num_failed) //num_threads(2,24) // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); -#pragma omp parallel // 2nd level +#pragma omp parallel reduction(+:num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 24); From 663cdfd29409b4aa03402b9f0db5744d67e183b1 Mon Sep 17 00:00:00 2001 From: Terry Wilmarth Date: Thu, 16 May 2024 13:23:47 -0500 Subject: [PATCH 5/9] Fixed formatting of test. --- .../parallel/omp_parallel_num_threads_list.c | 111 +++++++++--------- .../omp_parallel_num_threads_strict.c | 61 +++++----- 2 files changed, 89 insertions(+), 83 deletions(-) diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c index 1c1771c255b31..4c2e93cca1096 100644 --- a/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c +++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c @@ -9,27 +9,27 @@ extern "C" { #endif int __kmpc_global_thread_num(void *loc); -void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length, int *list); +void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length, + int *list); #if defined(__cplusplus) } #endif -int test_omp_parallel_num_threads_list() -{ +int test_omp_parallel_num_threads_list() { int num_failed; - // Initially, 5 levels specified via OMP_NUM_THREADS with 2 threads per level - // Check top 3 levels -#pragma omp parallel reduction(+:num_failed) // 1st level +// Initially, 5 levels specified via OMP_NUM_THREADS with 2 threads per level +// Check top 3 levels +#pragma omp parallel reduction(+ : num_failed) // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); -#pragma omp parallel reduction(+:num_failed) // 2nd level +#pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); -#pragma omp parallel reduction(+:num_failed) // 3rd level +#pragma omp parallel reduction(+ : num_failed) // 3rd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); @@ -37,16 +37,16 @@ int test_omp_parallel_num_threads_list() } // end 2nd level parallel } // end 1st level parallel - // Make sure that basic single element num_threads clause works -#pragma omp parallel reduction(+:num_failed) num_threads(4) // 1st level +// Make sure that basic single element num_threads clause works +#pragma omp parallel reduction(+ : num_failed) num_threads(4) // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 4); -#pragma omp parallel reduction(+:num_failed) // 2nd level +#pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected -#pragma omp parallel reduction(+:num_failed) // 3rd level +#pragma omp parallel reduction(+ : num_failed) // 3rd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected @@ -54,16 +54,16 @@ int test_omp_parallel_num_threads_list() } // end 2nd level parallel } // end 1st level parallel - // Check that basic single element num_threads clause works on second level -#pragma omp parallel reduction(+:num_failed) // 1st level +// Check that basic single element num_threads clause works on second level +#pragma omp parallel reduction(+ : num_failed) // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected -#pragma omp parallel reduction(+:num_failed) num_threads(4) // 2nd level +#pragma omp parallel reduction(+ : num_failed) num_threads(4) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 4); -#pragma omp parallel reduction(+:num_failed) // 3rd level +#pragma omp parallel reduction(+ : num_failed) // 3rd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected @@ -73,49 +73,51 @@ int test_omp_parallel_num_threads_list() // Try a short list. It should completely overwrite the old settings. // We need to use the compiler interface for now. - int threads[2] = {3,3}; - __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads); -#pragma omp parallel reduction(+:num_failed) //num_threads(3,3) // 1st level + int threads[2] = {3, 3}; + __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, + threads); +#pragma omp parallel reduction(+ : num_failed) // num_threads(3,3) // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 3); -#pragma omp parallel reduction(+:num_failed) // 2nd level +#pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 3); -#pragma omp parallel reduction(+:num_failed) // 3rd level +#pragma omp parallel reduction(+ : num_failed) // 3rd level { - // NOTE: should just keep using last element in list, to nesting depth +// NOTE: should just keep using last element in list, to nesting depth #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 3); } // end 3rd level parallel } // end 2nd level parallel } // end 1st level parallel - // Similar, but at a lower level. -#pragma omp parallel reduction(+:num_failed) // 1st level +// Similar, but at a lower level. +#pragma omp parallel reduction(+ : num_failed) // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected - int threads[2] = {3,3}; - __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads); -#pragma omp parallel reduction(+:num_failed) // num_clause(3,3) // 2nd level + int threads[2] = {3, 3}; + __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, + threads); +#pragma omp parallel reduction(+ : num_failed) // num_clause(3,3) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 3); -#pragma omp parallel reduction(+:num_failed) // 3rd level +#pragma omp parallel reduction(+ : num_failed) // 3rd level { - // NOTE: just keep using last element in list, to nesting depth +// NOTE: just keep using last element in list, to nesting depth #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 3); } // end 3rd level parallel } // end 2nd level parallel - // Make sure a second inner parallel is NOT affected by the clause -#pragma omp parallel reduction(+:num_failed) // 2nd level +// Make sure a second inner parallel is NOT affected by the clause +#pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected -#pragma omp parallel reduction(+:num_failed) // 3rd level +#pragma omp parallel reduction(+ : num_failed) // 3rd level { #pragma omp single // NOTE: just keep using last element in list, to nesting depth @@ -125,49 +127,51 @@ int test_omp_parallel_num_threads_list() } // end 1st level parallel // Test lists at multiple levels - int threads2[2] = {4,3}; - __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads2); -#pragma omp parallel reduction(+:num_failed) // num_clause(4,3) // 1st level + int threads2[2] = {4, 3}; + __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, + threads2); +#pragma omp parallel reduction(+ : num_failed) // num_clause(4,3) // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 4); -#pragma omp parallel reduction(+:num_failed) // 2nd level +#pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 3); -#pragma omp parallel reduction(+:num_failed) // 3rd level +#pragma omp parallel reduction(+ : num_failed) // 3rd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 3); - int threads3[2] = {2,5}; - __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads3); -#pragma omp parallel reduction(+:num_failed) //num_clause(2,5) // 4th level + int threads3[2] = {2, 5}; + __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, + threads3); +#pragma omp parallel reduction(+ : num_failed) // num_clause(2,5) // 4th level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); -#pragma omp parallel reduction(+:num_failed) // 5th level +#pragma omp parallel reduction(+ : num_failed) // 5th level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 5); -#pragma omp parallel reduction(+:num_failed) // 6th level +#pragma omp parallel reduction(+ : num_failed) // 6th level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 5); } // end 6th level parallel } // end 5th level parallel } // end 4th level parallel -#pragma omp parallel reduction(+:num_failed) // 4th level +#pragma omp parallel reduction(+ : num_failed) // 4th level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 3); } // end 4th level parallel } // end 3rd level parallel } // end 2nd level parallel -#pragma omp parallel reduction(+:num_failed) // 2nd level +#pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 3); -#pragma omp parallel reduction(+:num_failed) // 3rd level +#pragma omp parallel reduction(+ : num_failed) // 3rd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 3); @@ -175,16 +179,16 @@ int test_omp_parallel_num_threads_list() } // end 2nd level parallel } // end 1st level parallel - // Now we should be back to the way we started. -#pragma omp parallel reduction(+:num_failed) // 1st level +// Now we should be back to the way we started. +#pragma omp parallel reduction(+ : num_failed) // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); -#pragma omp parallel reduction(+:num_failed) // 2nd level +#pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); -#pragma omp parallel reduction(+:num_failed) // 3rd level +#pragma omp parallel reduction(+ : num_failed) // 3rd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); @@ -195,13 +199,12 @@ int test_omp_parallel_num_threads_list() return (!num_failed); } -int main() -{ +int main() { int i; - int num_failed=0; + int num_failed = 0; - for(i = 0; i < REPETITIONS; i++) { - if(!test_omp_parallel_num_threads_list()) { + for (i = 0; i < REPETITIONS; i++) { + if (!test_omp_parallel_num_threads_list()) { num_failed++; } } diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c index 6fccfa7f4e764..17f8bd52cf2e0 100644 --- a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c +++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c @@ -1,4 +1,5 @@ -// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 OMP_THREAD_LIMIT=16 %libomp-run +// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 OMP_THREAD_LIMIT=16 +// %libomp-run #include #include "omp_testsuite.h" @@ -9,59 +10,62 @@ extern "C" { #endif int __kmpc_global_thread_num(void *loc); -void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length, int *list); -void __kmpc_push_num_threads_strict(void *loc, int gtid, int nth, int sev, const char *msg); -void __kmpc_push_num_threads_list_strict(void *loc, int gtid, unsigned length, int *list, int sev, const char *msg); +void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length, + int *list); +void __kmpc_push_num_threads_strict(void *loc, int gtid, int nth, int sev, + const char *msg); +void __kmpc_push_num_threads_list_strict(void *loc, int gtid, unsigned length, + int *list, int sev, const char *msg); #if defined(__cplusplus) } #endif -int test_omp_parallel_num_threads_strict() -{ +int test_omp_parallel_num_threads_strict() { int num_failed; - // Test regular runtime warning about exceeding thread limit. - // Tolerate whatever value was given. -#pragma omp parallel reduction(+:num_failed) num_threads(22) +// Test regular runtime warning about exceeding thread limit. +// Tolerate whatever value was given. +#pragma omp parallel reduction(+ : num_failed) num_threads(22) #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 22); // Test with 4 threads and strict -- no problem, no warning. - __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 4, - 1, "This warning shouldn't happen."); -#pragma omp parallel reduction(+:num_failed) //num_threads(strict:4) + __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 4, 1, + "This warning shouldn't happen."); +#pragma omp parallel reduction(+ : num_failed) // num_threads(strict:4) #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 4); // Exceed limit, specify user warning message. Tolerate whatever was given. - __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 20, - 1, "User-supplied warning for strict."); -#pragma omp parallel reduction(+:num_failed) //num_threads(strict:20) severity(warning) \ + __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 20, 1, + "User-supplied warning for strict."); +#pragma omp parallel reduction( \ + + : num_failed) //num_threads(strict:20) severity(warning) \ message("User-supplied warning for strict.") #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 20); // Exceed limit, no user message, use runtime default message for strict. // Tolerate whatever value was given. - __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 21, - 1, NULL); -#pragma omp parallel reduction(+:num_failed) //num_threads(strict:21) + __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 21, 1, + NULL); +#pragma omp parallel reduction(+ : num_failed) // num_threads(strict:21) #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 21); - // Exceed limit in nested level. Should see user warning message. - int threads3[2] = {2,24}; + int threads3[2] = {2, 24}; __kmpc_push_num_threads_list_strict(NULL, __kmpc_global_thread_num(NULL), 2, threads3, 1, "User-supplied warning on strict list."); -#pragma omp parallel reduction(+:num_failed) //num_threads(strict:2,24) severity(warning) \ +#pragma omp parallel reduction( \ + + : num_failed) //num_threads(strict:2,24) severity(warning) \ message("User-supplied warning on strict. list") // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); -#pragma omp parallel reduction(+:num_failed) // 2nd level +#pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 24); @@ -71,11 +75,11 @@ int test_omp_parallel_num_threads_strict() // No strict limit in nested level. Regular runtime limiting applies. __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads3); -#pragma omp parallel reduction(+:num_failed) //num_threads(2,24) // 1st level +#pragma omp parallel reduction(+ : num_failed) // num_threads(2,24) // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); -#pragma omp parallel reduction(+:num_failed) // 2nd level +#pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 24); @@ -85,13 +89,12 @@ int test_omp_parallel_num_threads_strict() return (!num_failed); } -int main() -{ +int main() { int i; - int num_failed=0; + int num_failed = 0; - for(i = 0; i < REPETITIONS; i++) { - if(!test_omp_parallel_num_threads_strict()) { + for (i = 0; i < REPETITIONS; i++) { + if (!test_omp_parallel_num_threads_strict()) { num_failed++; } } From 02b316e6d9b667120060142c4907ad0b51b72747 Mon Sep 17 00:00:00 2001 From: Terry Wilmarth Date: Thu, 16 May 2024 13:27:26 -0500 Subject: [PATCH 6/9] Fix RUN line broken by git-clang-format --- openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c index 17f8bd52cf2e0..8004b50df72e0 100644 --- a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c +++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c @@ -1,4 +1,4 @@ -// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 OMP_THREAD_LIMIT=16 +// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 OMP_THREAD_LIMIT=16 \ // %libomp-run #include #include "omp_testsuite.h" From 72eda7ca9fcf7589480d991835aa62ab32350980 Mon Sep 17 00:00:00 2001 From: Terry Wilmarth Date: Thu, 16 May 2024 14:15:22 -0500 Subject: [PATCH 7/9] More formatter disparity --- .../parallel/omp_parallel_num_threads_strict.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c index 8004b50df72e0..4bab089d4d47b 100644 --- a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c +++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c @@ -40,9 +40,9 @@ int test_omp_parallel_num_threads_strict() { // Exceed limit, specify user warning message. Tolerate whatever was given. __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 20, 1, "User-supplied warning for strict."); -#pragma omp parallel reduction( \ - + : num_failed) //num_threads(strict:20) severity(warning) \ - message("User-supplied warning for strict.") +#pragma omp parallel reduction(+ : num_failed) + // num_threads(strict:20) severity(warning) + // message("User-supplied warning for strict.") #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 20); @@ -59,9 +59,9 @@ int test_omp_parallel_num_threads_strict() { __kmpc_push_num_threads_list_strict(NULL, __kmpc_global_thread_num(NULL), 2, threads3, 1, "User-supplied warning on strict list."); -#pragma omp parallel reduction( \ - + : num_failed) //num_threads(strict:2,24) severity(warning) \ - message("User-supplied warning on strict. list") // 1st level +#pragma omp parallel reduction(+ : num_failed) + // num_threads(strict:2,24) severity(warning) + // message("User-supplied warning on strict. list") // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); @@ -75,7 +75,8 @@ int test_omp_parallel_num_threads_strict() { // No strict limit in nested level. Regular runtime limiting applies. __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads3); -#pragma omp parallel reduction(+ : num_failed) // num_threads(2,24) // 1st level +#pragma omp parallel reduction(+ : num_failed) + // num_threads(2,24) // 1st level { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() == 2); From 4a31b49ae948fa34cef753640427a410d4a912cd Mon Sep 17 00:00:00 2001 From: Terry Wilmarth Date: Fri, 17 May 2024 10:34:12 -0500 Subject: [PATCH 8/9] Test fixes. --- openmp/runtime/test/parallel/omp_parallel_num_threads_list.c | 2 +- .../runtime/test/parallel/omp_parallel_num_threads_strict.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c index 4c2e93cca1096..b5abbc4c14de4 100644 --- a/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c +++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c @@ -17,7 +17,7 @@ void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length, #endif int test_omp_parallel_num_threads_list() { - int num_failed; + int num_failed = 0; // Initially, 5 levels specified via OMP_NUM_THREADS with 2 threads per level // Check top 3 levels diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c index 4bab089d4d47b..c8a65a31f3908 100644 --- a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c +++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c @@ -1,5 +1,5 @@ // RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 OMP_THREAD_LIMIT=16 \ -// %libomp-run +// RUN: %libomp-run #include #include "omp_testsuite.h" @@ -22,7 +22,7 @@ void __kmpc_push_num_threads_list_strict(void *loc, int gtid, unsigned length, #endif int test_omp_parallel_num_threads_strict() { - int num_failed; + int num_failed = 0; // Test regular runtime warning about exceeding thread limit. // Tolerate whatever value was given. From a3d3d83b06122061f74fb5d9eec8dd544b41c930 Mon Sep 17 00:00:00 2001 From: Terry Wilmarth Date: Fri, 21 Jun 2024 09:56:40 -0500 Subject: [PATCH 9/9] When num_threads clause is a list, strict modifier should only apply to the parallel region on which the clause was specified. --- openmp/runtime/src/kmp.h | 4 ---- openmp/runtime/src/kmp_runtime.cpp | 12 ++---------- .../parallel/omp_parallel_num_threads_strict.c | 18 +++++++++--------- 3 files changed, 11 insertions(+), 23 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index c44f153719723..d1ec43fabcda6 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -537,10 +537,6 @@ typedef struct kmp_nested_nthreads_t { int *nth; int size; int used; - bool strict; // num_threads clause has strict modifier - ident_t *loc; // loc for strict modifier - int sev; // error severity for strict modifier - const char *msg; // error message for strict modifier } kmp_nested_nthreads_t; extern kmp_nested_nthreads_t __kmp_nested_nth; diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 5b32dd1c25d6e..22cd559aa97f5 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -125,10 +125,6 @@ static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr, for (int i = level + 1, j = 1; i < new_size; ++i, ++j) new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j]; new_nested_nth->size = new_nested_nth->used = new_size; - new_nested_nth->strict = thr->th.th_nt_strict; - new_nested_nth->loc = thr->th.th_nt_loc; - new_nested_nth->sev = thr->th.th_nt_sev; - new_nested_nth->msg = thr->th.th_nt_msg; return new_nested_nth; } @@ -950,8 +946,7 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, } #endif // KMP_DEBUG - if ((this_thr->th.th_nt_strict || parent_team->t.t_nested_nth->strict) && - new_nthreads < set_nthreads) { + if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) { __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev, this_thr->th.th_nt_msg); } @@ -8354,10 +8349,7 @@ void __kmp_cleanup(void) { __kmp_nested_nth.nth = NULL; __kmp_nested_nth.size = 0; __kmp_nested_nth.used = 0; - __kmp_nested_nth.strict = false; - __kmp_nested_nth.loc = NULL; - __kmp_nested_nth.sev = 0; - __kmp_nested_nth.msg = NULL; + KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); __kmp_nested_proc_bind.bind_types = NULL; __kmp_nested_proc_bind.size = 0; diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c index c8a65a31f3908..577aa3a250578 100644 --- a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c +++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c @@ -54,36 +54,36 @@ int test_omp_parallel_num_threads_strict() { #pragma omp single num_failed = num_failed + !(omp_get_num_threads() <= 21); - // Exceed limit in nested level. Should see user warning message. - int threads3[2] = {2, 24}; + // Exceed limit at top level. Should see user warning message. + int threads3[2] = {24, 2}; __kmpc_push_num_threads_list_strict(NULL, __kmpc_global_thread_num(NULL), 2, threads3, 1, "User-supplied warning on strict list."); #pragma omp parallel reduction(+ : num_failed) - // num_threads(strict:2,24) severity(warning) + // num_threads(strict:24,2) severity(warning) // message("User-supplied warning on strict. list") // 1st level { #pragma omp single - num_failed = num_failed + !(omp_get_num_threads() == 2); + num_failed = num_failed + !(omp_get_num_threads() <= 24); #pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single - num_failed = num_failed + !(omp_get_num_threads() <= 24); + num_failed = num_failed + !(omp_get_num_threads() <= 2); } } - // No strict limit in nested level. Regular runtime limiting applies. + // No strict limit at top level. Regular runtime limiting applies. __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads3); #pragma omp parallel reduction(+ : num_failed) - // num_threads(2,24) // 1st level + // num_threads(24,2) // 1st level { #pragma omp single - num_failed = num_failed + !(omp_get_num_threads() == 2); + num_failed = num_failed + !(omp_get_num_threads() <= 24); #pragma omp parallel reduction(+ : num_failed) // 2nd level { #pragma omp single - num_failed = num_failed + !(omp_get_num_threads() <= 24); + num_failed = num_failed + !(omp_get_num_threads() <= 2); } }