2727#define OMPI_OSC_UCX_ATTACH_MAX 48
2828#define OMPI_OSC_UCX_MEM_ADDR_MAX_LEN 1024
2929
30+
3031typedef struct ompi_osc_ucx_component {
3132 ompi_osc_base_component_t super ;
3233 opal_common_ucx_wpool_t * wpool ;
@@ -125,6 +126,7 @@ typedef struct ompi_osc_ucx_module {
125126 opal_common_ucx_wpmem_t * mem ;
126127 opal_common_ucx_wpmem_t * state_mem ;
127128
129+ bool skip_sync_check ;
128130 bool noncontig_shared_win ;
129131 size_t * sizes ;
130132 /* in shared windows, shmem_addrs can be used for direct load store to
@@ -150,6 +152,17 @@ typedef struct ompi_osc_ucx_lock {
150152#define OSC_UCX_GET_EP (comm_ , rank_ ) (ompi_comm_peer_lookup(comm_, rank_)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_UCX])
151153#define OSC_UCX_GET_DISP (module_ , rank_ ) ((module_->disp_unit < 0) ? module_->disp_units[rank_] : module_->disp_unit)
152154
155+ extern bool mpi_thread_multiple_enabled ;
156+
157+ #define OSC_UCX_GET_DEFAULT_EP (_ep_ptr , _comm , _target ) \
158+ if (mpi_thread_multiple_enabled) { \
159+ _ep_ptr = NULL; \
160+ } else { \
161+ _ep_ptr = (ucp_ep_h *)&(OSC_UCX_GET_EP(_comm, _target)); \
162+ }
163+
164+ #define OSC_UCX_OUTSTANDING_OPS_FLUSH_THRESHOLD 64
165+
153166int ompi_osc_ucx_shared_query (struct ompi_win_t * win , int rank , size_t * size ,
154167 int * disp_unit , void * baseptr );
155168int ompi_osc_ucx_win_attach (struct ompi_win_t * win , void * base , size_t len );
@@ -169,6 +182,11 @@ int ompi_osc_ucx_accumulate(const void *origin_addr, int origin_count,
169182 int target , ptrdiff_t target_disp , int target_count ,
170183 struct ompi_datatype_t * target_dt ,
171184 struct ompi_op_t * op , struct ompi_win_t * win );
185+ int ompi_osc_ucx_accumulate_nb (const void * origin_addr , int origin_count ,
186+ struct ompi_datatype_t * origin_dt ,
187+ int target , ptrdiff_t target_disp , int target_count ,
188+ struct ompi_datatype_t * target_dt ,
189+ struct ompi_op_t * op , struct ompi_win_t * win );
172190int ompi_osc_ucx_compare_and_swap (const void * origin_addr , const void * compare_addr ,
173191 void * result_addr , struct ompi_datatype_t * dt ,
174192 int target , ptrdiff_t target_disp ,
@@ -184,6 +202,13 @@ int ompi_osc_ucx_get_accumulate(const void *origin_addr, int origin_count,
184202 int target_rank , ptrdiff_t target_disp ,
185203 int target_count , struct ompi_datatype_t * target_datatype ,
186204 struct ompi_op_t * op , struct ompi_win_t * win );
205+ int ompi_osc_ucx_get_accumulate_nb (const void * origin_addr , int origin_count ,
206+ struct ompi_datatype_t * origin_datatype ,
207+ void * result_addr , int result_count ,
208+ struct ompi_datatype_t * result_datatype ,
209+ int target_rank , ptrdiff_t target_disp ,
210+ int target_count , struct ompi_datatype_t * target_datatype ,
211+ struct ompi_op_t * op , struct ompi_win_t * win );
187212int ompi_osc_ucx_rput (const void * origin_addr , int origin_count ,
188213 struct ompi_datatype_t * origin_dt ,
189214 int target , ptrdiff_t target_disp , int target_count ,
@@ -229,9 +254,11 @@ int ompi_osc_find_attached_region_position(ompi_osc_dynamic_win_info_t *dynamic_
229254 int min_index , int max_index ,
230255 uint64_t base , size_t len , int * insert );
231256extern inline bool ompi_osc_need_acc_lock (ompi_osc_ucx_module_t * module , int target );
232- extern inline int ompi_osc_state_lock (ompi_osc_ucx_module_t * module , int target ,
257+ extern inline int ompi_osc_ucx_state_lock (ompi_osc_ucx_module_t * module , int target ,
233258 bool * lock_acquired , bool force_lock );
234- extern inline int ompi_osc_state_unlock (ompi_osc_ucx_module_t * module , int target ,
259+ extern inline int ompi_osc_ucx_state_unlock (ompi_osc_ucx_module_t * module , int target ,
235260 bool lock_acquired , void * free_ptr );
261+ extern inline int ompi_osc_ucx_state_unlock_nb (ompi_osc_ucx_module_t * module , int target ,
262+ bool lock_acquired , struct ompi_win_t * win );
236263
237264#endif /* OMPI_OSC_UCX_H */
0 commit comments