From c9c1ec664d51e81171e2eeb6f093634d58c6d176 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Tue, 10 Aug 2021 17:51:09 -0500 Subject: [PATCH 1/5] [from mpich] romio: avoid freeing NULLs in gpfs mpich has two commits about freeing NULLs in GPFS, titled: "romio gpfs: avoid freeing NULL" and "ROMIO: gpfs: avoid freeing NULL in more places" The code isn't a perfect match vs the romio we have here in ompi version 4.1.x, but it's essentially turning a handful of "ADIOI_Free(x)" into "if (x) { ADIOI_Free(x); }". So that's what this commit does too. Signed-off-by: Mark Allen --- .../io/romio321/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c | 10 +++++----- .../io/romio321/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c b/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c index f449acb158a..bdcff1789fb 100644 --- a/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c +++ b/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c @@ -444,8 +444,8 @@ void ADIOI_GPFS_ReadStridedColl(ADIO_File fd, void *buf, int count, ADIOI_Free(count_my_req_per_proc); for (i=0; i Date: Tue, 10 Aug 2021 17:49:29 -0500 Subject: [PATCH 2/5] [from mpich] romio GPFS: missing initialization Merging a romio commit that was accepted by mpich: > romio GPFS: missing initialization > > Romio GPFS had a runtime problem due to a .mem_ptrs field that > was uninitialized that it was potentially trying to free. Signed-off-by: Mark Allen --- ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_aggrs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_aggrs.c b/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_aggrs.c index f6df24748f0..828d0d5e799 100644 --- a/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_aggrs.c +++ b/ompi/mca/io/romio321/romio/adio/ad_gpfs/ad_gpfs_aggrs.c @@ -719,6 +719,7 @@ void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs, others_req[i].count = 0; others_req[i].offsets = NULL; others_req[i].lens = NULL; + others_req[i].mem_ptrs = NULL; } } From 3e317d73c363d135c24b80a25db83c41788dc1f7 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 16 Dec 2020 19:04:13 -0500 Subject: [PATCH 3/5] [from mpich] darray (romio) needs a resize in the block()/cyclic() subtype creation Merging a romio commit that was accepted by mpich: > darray (romio) needs a resize in the block()/cyclic() subtype creation > > When Type_create_darray() uses the block()/cyclic() function to > construct a subtype for whatever dimension it's processing, it > needs to resize the resulting type before it goes into the type > construction as the input for processing the next dimension. > > The same problem is in the main path's type creation, and in romio's > mirroring of it. > > Gist for a testcase: > https://gist.github.com/markalle/940de93d64fd779e304ee124855b8e6a > > The darray_bug_romio.c testcase creates a darray using > * 4 ranks in a 2x2 grid > * looking at the type made for rank 0 > * inner dimension: 4 ints distributed block over 2 ranks with 2 items each > * outer dimension: 6 of the above distributed cyclic over 2 ranks with 2 items each > > The type created by processing the first dimension should look like this > [ x x . . ] > > And then distributing those for the second dimension becomes > > [ x x x x ] > [ . . . . ] > [ . . . . ] > [ . . . . ] > [ x x x x ] > [ . . . . ] > > Going to the MPI standard to justify why the first layout is right, > it's where the definiton of the cyclic() function has a ub_marker > of gsize*ex, eg 4 ints for that first type. Signed-off-by: Mark Allen --- ompi/mca/io/romio321/romio/adio/common/ad_darray.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ompi/mca/io/romio321/romio/adio/common/ad_darray.c b/ompi/mca/io/romio321/romio/adio/common/ad_darray.c index 3c9ca36a8e1..910cc768c66 100644 --- a/ompi/mca/io/romio321/romio/adio/common/ad_darray.c +++ b/ompi/mca/io/romio321/romio/adio/common/ad_darray.c @@ -199,6 +199,13 @@ static int MPIOI_Type_block(int *array_of_gsizes, int dim, int ndims, int nprocs /* in terms of no. of elements of type oldtype in this dimension */ if (mysize == 0) *st_offset = 0; + MPI_Aint ex; + MPI_Type_extent(type_old, &ex); + MPI_Datatype type_tmp; + MPI_Type_create_resized(*type_new, 0, array_of_gsizes[dim] * ex, &type_tmp); + MPI_Type_free(type_new); + *type_new = type_tmp; + return MPI_SUCCESS; } @@ -287,5 +294,12 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc if (local_size == 0) *st_offset = 0; + MPI_Aint ex; + MPI_Type_extent(type_old, &ex); + MPI_Datatype type_tmp2; + MPI_Type_create_resized(*type_new, 0, array_of_gsizes[dim] * ex, &type_tmp2); + MPI_Type_free(type_new); + *type_new = type_tmp2; + return MPI_SUCCESS; } From adda190dee4cc71b6215f5271978b30398a21992 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 16 Dec 2020 19:05:10 -0500 Subject: [PATCH 4/5] [from mpich] romio flatten has flat->count consistency problems Merging a romio commit that was accepted by mpich: > romio flatten has flat->count consistency problems > > ADIOI_Count_contiguous_blocks and ADIOI_Flatten are very similar functions, > but they diverge a lot around the edges and in degenerate cases. In Spectrum > MPI I spent some time making them consistent with each other but > found that to be a losing battle. > > So the approach I used here is to not have Count() be as definitive, > and rather let Flatten() have the last word on what the final flat->count > really is. Eg Flatten's *curr_index is the real count. > > The changes I made are > > 1. Fix a couple places in Flatten where *curr_index was updated out > of sync with what was actually being added to flat->indices[] and > flat->blocklens[]. That's one of the important book-keeping rules > Flatten should follow. There were a couple places (when counts[] arrays > have 0s) where that wasn't the case (see the "nonzeroth" var in this > commit). > > 2. The main change I made was to reset flat->count based on > Flatten's curr_index. This is because the divergence between the > two functions is too great to reliably fix. > > 3. A third change is just a safety valve, using a > flatlist_node_grow() function just in case the > Count function returns a smaller value than Flatten ends up > trying to write into the array, and related to this I > got rid of the various --(flat->count) lines, since that var now > represents the allocated size of the array, until right after > the Flatten function when it's reset to represent the data written > to the array like it used to be. > > I don't think we even need ADIOI_Count_contiguous_blocks() anymore, > but I didn't remove it. Signed-off-by: Mark Allen --- .../io/romio321/romio/adio/common/flatten.c | 85 +++++++++++++++---- 1 file changed, 67 insertions(+), 18 deletions(-) diff --git a/ompi/mca/io/romio321/romio/adio/common/flatten.c b/ompi/mca/io/romio321/romio/adio/common/flatten.c index b468ec2f0f4..e2d865bf6ac 100644 --- a/ompi/mca/io/romio321/romio/adio/common/flatten.c +++ b/ompi/mca/io/romio321/romio/adio/common/flatten.c @@ -98,6 +98,33 @@ int ADIOI_Type_get_contents (MPI_Datatype datatype, int max_integers, return rc; } +/* + * I don't really expect this to ever trigger, but without the below safety + * valve, the design relies on the Count function coming out >= whatever + * the Flatten function comes up with. There are enough differences between + * the two that it's hard to be positive this will always be true. So every + * time something's added to flat's arrays, let's make sure they're big enough + * and re-alloc if not. + */ +static void flatlist_node_grow(ADIOI_Flatlist_node * flat, int idx) +{ + if (idx >= flat->count) { + ADIO_Offset *new_blocklens; + ADIO_Offset *new_indices; + int new_count = (flat->count * 1.25 + 4); + new_blocklens = (ADIO_Offset *) ADIOI_Calloc(new_count * 2, sizeof(ADIO_Offset)); + new_indices = new_blocklens + new_count; + if (flat->count) { + memcpy(new_blocklens, flat->blocklens, flat->count * sizeof(ADIO_Offset)); + memcpy(new_indices, flat->indices, flat->count * sizeof(ADIO_Offset)); + ADIOI_Free(flat->blocklens); + } + flat->blocklens = new_blocklens; + flat->indices = new_indices; + flat->count = new_count; + } +} + void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type); /* flatten datatype and add it to Flatlist */ void ADIOI_Flatten_datatype(MPI_Datatype datatype) @@ -168,6 +195,16 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype) DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: ADIOI_Flatten\n"); #endif +/* + * Setting flat->count to curr_index, since curr_index is the most fundamentally + * correct updated value that represents what's in the indices/blocklens arrays. + * It would be nice if the counter function and the flatten function were in sync, + * but the numerous cases that decrement flat->count in the flatten function show + * that syncing them is a hack, and as long as the counter doesn't under-count + * it's good enough. + */ + flat->count = curr_index; + ADIOI_Optimize_flattened(flat); #endif /* debug */ @@ -318,6 +355,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, if (prev_index == *curr_index) { /* simplest case, made up of basic or contiguous types */ j = *curr_index; + flatlist_node_grow(flat, j); flat->indices[j] = st_offset; MPI_Type_size_x(types[0], &old_size); flat->blocklens[j] = top_count * old_size; @@ -335,6 +373,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, MPI_Type_get_extent(types[0], &lb, &old_extent); for (m=1; mindices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; #ifdef FLATTEN_DEBUG @@ -366,10 +405,12 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, avoid >2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1], stride = ints[2]; j = *curr_index; + flatlist_node_grow(flat, j); flat->indices[j] = st_offset; MPI_Type_size_x(types[0], &old_size); flat->blocklens[j] = blocklength * old_size; for (i=j+1; iindices[i] = flat->indices[i-1] + stride * old_size; flat->blocklens[i] = flat->blocklens[j]; } @@ -389,6 +430,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, MPI_Type_get_extent(types[0], &lb, &old_extent); for (m=1; mindices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; @@ -400,6 +442,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, num = *curr_index - prev_index; for (i=1; iindices[j] = flat->indices[j-num] + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; @@ -429,10 +472,12 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, avoid >2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1]; j = *curr_index; + flatlist_node_grow(flat, j); flat->indices[j] = st_offset; MPI_Type_size_x(types[0], &old_size); flat->blocklens[j] = blocklength * old_size; for (i=j+1; iindices[i] = flat->indices[i-1] + adds[0]; flat->blocklens[i] = flat->blocklens[j]; } @@ -452,6 +497,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, MPI_Type_get_extent(types[0], &lb, &old_extent); for (m=1; mindices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; j++; @@ -463,6 +509,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, num = *curr_index - prev_index; for (i=1; iindices[j] = flat->indices[j-num] + adds[0]; flat->blocklens[j] = flat->blocklens[j-num]; j++; @@ -500,16 +547,15 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, avoid >2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1+i-j], stride = ints[top_count+1+i-j]; if (blocklength > 0) { + flatlist_node_grow(flat, nonzeroth); flat->indices[nonzeroth] = st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[nonzeroth] = blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent; nonzeroth++; - } else { - flat->count--; /* don't count/consider any zero-length blocklens */ } } - *curr_index = i; + *curr_index = nonzeroth; } else { /* indexed type made up of noncontiguous derived types */ @@ -523,14 +569,13 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, for (m=1; mblocklens[j-num] > 0) { + flatlist_node_grow(flat, nonzeroth); flat->indices[nonzeroth] = flat->indices[nonzeroth-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[nonzeroth] = flat->blocklens[nonzeroth-num]; j++; nonzeroth++; - } else { - flat->count --; } } } @@ -545,26 +590,24 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, avoid >2G integer arithmetic problems */ ADIO_Offset stride = ints[top_count+1+i]-ints[top_count+i]; if (flat->blocklens[j-num] > 0 ) { + flatlist_node_grow(flat, nonzeroth); flat->indices[nonzeroth] = flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[nonzeroth] = flat->blocklens[j-num]; j++; nonzeroth++; - } else { - flat->count--; } } *curr_index = j; for (m=1; mblocklens[j-basic_num] > 0) { + flatlist_node_grow(flat, nonzeroth); flat->indices[nonzeroth] = flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[nonzeroth] = flat->blocklens[j-basic_num]; j++; nonzeroth++; - } else { - flat->count --; } } } @@ -611,9 +654,11 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, avoid >2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1]; if (is_hindexed_block) { + flatlist_node_grow(flat, i); flat->indices[i] = st_offset + adds[i-j]; } else { ADIO_Offset stride = ints[1+1+i-j]; + flatlist_node_grow(flat, i); flat->indices[i] = st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent; } @@ -636,6 +681,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, * extent of a type */ MPI_Type_get_extent(types[0], &lb, &old_extent); } + flatlist_node_grow(flat, j); flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; @@ -649,12 +695,14 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, for (i=1; iindices[j] = flat->indices[j-num] + adds[i] - adds[i-1]; } else { /* By using ADIO_Offset we preserve +/- sign and avoid >2G integer arithmetic problems */ ADIO_Offset stride = ints[2+i]-ints[1+i]; + flatlist_node_grow(flat, j); flat->indices[j] = flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent; } @@ -691,14 +739,13 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, /* By using ADIO_Offset we preserve +/- sign and avoid >2G integer arithmetic problems */ ADIO_Offset blocklength = ints[1+i-j]; + flatlist_node_grow(flat, nonzeroth); flat->indices[nonzeroth] = st_offset + adds[i-j]; flat->blocklens[nonzeroth] = blocklength*old_size; nonzeroth++; - } else { - flat->count--; } } - *curr_index = i; + *curr_index = nonzeroth; } else { /* indexed type made up of noncontiguous derived types */ @@ -713,13 +760,12 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, for (m=1; mblocklens[j-num] > 0) { + flatlist_node_grow(flat, nonzeroth); flat->indices[nonzeroth] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[nonzeroth] = flat->blocklens[j-num]; j++; nonzeroth++; - } else { - flat->count--; } } } @@ -731,19 +777,19 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, prev_index = *curr_index; for (m=0, nonzeroth=j; mblocklens[j-num] > 0) { + flatlist_node_grow(flat, nonzeroth); flat->indices[nonzeroth] = flat->indices[j-num] + adds[i] - adds[i-1]; flat->blocklens[nonzeroth] = flat->blocklens[j-num]; j++; nonzeroth++; - } else { - flat->count--; } } *curr_index = j; for (m=1; mblocklens[j-basic_num] >0) { + flatlist_node_grow(flat, nonzeroth); flat->indices[nonzeroth] = flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[nonzeroth] = flat->blocklens[j-basic_num]; @@ -779,6 +825,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, if (ints[1+n] > 0) { ADIO_Offset blocklength = ints[1+n]; j = *curr_index; + flatlist_node_grow(flat, j); flat->indices[j] = st_offset + adds[n]; MPI_Type_size_x(types[n], &old_size); flat->blocklens[j] = blocklength * old_size; @@ -798,6 +845,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, MPI_Type_get_extent(types[n], &lb, &old_extent); for (m=1; mindices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent; flat->blocklens[j] = flat->blocklens[j-num]; @@ -827,6 +875,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, * bound based on the inner type, but the lower bound based on the * upper type. check both lb and ub to prevent mixing updates */ if (flat->lb_idx == -1 && flat->ub_idx == -1) { + flatlist_node_grow(flat, j); flat->indices[j] = st_offset + adds[0]; /* this zero-length blocklens[] element, unlike eleswhere in the * flattening code, is correct and is used to indicate a lower bound @@ -843,7 +892,6 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } else { /* skipped over this chunk because something else higher-up in the * type construction set this for us already */ - flat->count--; st_offset -= adds[0]; } @@ -859,6 +907,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, else { /* current type is basic or contiguous */ j = *curr_index; + flatlist_node_grow(flat, j); flat->indices[j] = st_offset; MPI_Type_size_x(types[0], &old_size); flat->blocklens[j] = old_size; @@ -874,6 +923,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, /* see note above about mixing updates for why we check lb and ub */ if ((flat->lb_idx == -1 && flat->ub_idx == -1) || lb_updated) { j = *curr_index; + flatlist_node_grow(flat, j); flat->indices[j] = st_offset + adds[0] + adds[1]; /* again, zero-element ok: an upper-bound marker explicitly set by the * constructor of this resized type */ @@ -882,7 +932,6 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat, } else { /* skipped over this chunk because something else higher-up in the * type construction set this for us already */ - flat->count--; (*curr_index)--; } From ecd3e05f2ae19d3d6228d747d94424bb3e152120 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Tue, 12 Jan 2021 14:50:56 -0500 Subject: [PATCH 5/5] [from mpich] make ADIOI_GEN_WriteStrided not step on itself Merging a romio commit that was accepted by mpich: > make ADIOI_GEN_WriteStrided not step on itself > > The ADIOI_GEN_WriteStrided funcion uses data sieving on non-contiguous > types. That is, if it wants to write data at locations > [W...X...Y...Z...] > it reads the whole buffer > [dddddddddddddddd] > changes the locations it wants to write to > [WdddXdddYdddZddd] > then writes the whole thing back. It uses locks to make this safe, but > the problem is this only protects against other parts of the product that > are using locks. And without this PR a peer who is simultaneously making > a simple non-contiguous write wouldn't have locked. > > A testcase to demonstrate the original problem is here: > https://gist.github.com/markalle/d7da240c19e57f095c5d1b13240dae24 > > % mpicc -o x romio_write_timing.c > % mpirun -np 4 ./x > > Note: you need to use a filesystem that uses ADIOI_GEN_WriteStrided to > hit the bug. I was using GPFS. > > This commit is pulled from wkliao after discussing where to put the > new lock. It adds locks to contiguous writes in independent write > functions when data sieving write is not disabled Signed-off-by: Mark Allen --- .../romio/adio/ad_lustre/ad_lustre_wrstr.c | 28 +++++++++------ .../romio321/romio/adio/common/ad_write_str.c | 34 ++++++++++++------- 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/ompi/mca/io/romio321/romio/adio/ad_lustre/ad_lustre_wrstr.c b/ompi/mca/io/romio321/romio/adio/ad_lustre/ad_lustre_wrstr.c index ce538d4a6b6..a0494d48751 100644 --- a/ompi/mca/io/romio321/romio/adio/ad_lustre/ad_lustre_wrstr.c +++ b/ompi/mca/io/romio321/romio/adio/ad_lustre/ad_lustre_wrstr.c @@ -18,7 +18,7 @@ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, writebuf_off, \ &status1, error_code); \ - if (!(fd->atomicity)) \ + if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \ ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ if (*error_code != MPI_SUCCESS) { \ *error_code = MPIO_Err_create_code(*error_code, \ @@ -35,7 +35,7 @@ writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \ (writebuf_off / stripe_size + 1) * \ stripe_size - writebuf_off); \ - if (!(fd->atomicity)) \ + if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \ ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, \ @@ -58,7 +58,7 @@ while (write_sz != req_len) { \ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \ - if (!(fd->atomicity)) \ + if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \ ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ if (*error_code != MPI_SUCCESS) { \ *error_code = MPIO_Err_create_code(*error_code, \ @@ -75,7 +75,7 @@ writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \ (writebuf_off / stripe_size + 1) * \ stripe_size - writebuf_off); \ - if (!(fd->atomicity)) \ + if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \ ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, \ @@ -221,8 +221,9 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count, writebuf_off = 0; writebuf_len = 0; - /* if atomicity is true, lock the region to be accessed */ - if (fd->atomicity) + /* if atomicity is true or data sieving is not disable, lock the region + * to be accessed */ + if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, bufsize); for (j = 0; j < count; j++) { @@ -241,7 +242,7 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count, ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); - if (fd->atomicity) + if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE) ADIOI_UNLOCK(fd, start_off, SEEK_SET, bufsize); if (*error_code != MPI_SUCCESS) { ADIOI_Free(writebuf); @@ -325,9 +326,13 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count, userbuf_off = 0; ADIOI_BUFFERED_WRITE_WITHOUT_READ /* write the buffer out finally */ + if (fd->hints->ds_write != ADIOI_HINT_DISABLE) + ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); + if (fd->hints->ds_write != ADIOI_HINT_DISABLE) + ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); if (file_ptr_type == ADIO_INDIVIDUAL) { /* update MPI-IO file pointer to point to the first byte @@ -378,8 +383,9 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count, fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } -/* if atomicity is true, lock the region to be accessed */ - if (fd->atomicity) + /* if atomicity is true or data sieving is not disable, lock the region + * to be accessed */ + if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); writebuf_off = 0; @@ -502,11 +508,11 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count, ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); - if (!(fd->atomicity)) + if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); if (*error_code != MPI_SUCCESS) return; } - if (fd->atomicity) + if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); ADIOI_Free(writebuf); diff --git a/ompi/mca/io/romio321/romio/adio/common/ad_write_str.c b/ompi/mca/io/romio321/romio/adio/common/ad_write_str.c index 83f2420ddc9..34c7b123ea6 100644 --- a/ompi/mca/io/romio321/romio/adio/common/ad_write_str.c +++ b/ompi/mca/io/romio321/romio/adio/common/ad_write_str.c @@ -14,7 +14,8 @@ if (writebuf_len) { \ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \ - if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ + if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \ + ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ if (*error_code != MPI_SUCCESS) { \ *error_code = MPIO_Err_create_code(*error_code, \ MPIR_ERR_RECOVERABLE, myname, \ @@ -25,7 +26,8 @@ } \ writebuf_off = req_off; \ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ - if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ + if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \ + ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \ if (*error_code != MPI_SUCCESS) { \ @@ -42,7 +44,8 @@ while (write_sz != req_len) { \ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \ - if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ + if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \ + ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ if (*error_code != MPI_SUCCESS) { \ *error_code = MPIO_Err_create_code(*error_code, \ MPIR_ERR_RECOVERABLE, myname, \ @@ -54,7 +57,8 @@ userbuf_off += write_sz; \ writebuf_off += writebuf_len; \ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\ - if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ + if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) \ + ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \ if (*error_code != MPI_SUCCESS) { \ @@ -191,9 +195,10 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, writebuf = (char *) ADIOI_Malloc(max_bufsize); writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-writebuf_off+1)); -/* if atomicity is true, lock the region to be accessed */ - if (fd->atomicity) - ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); + /* if atomicity is true or data sieving is not disable, lock the region + * to be accessed */ + if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE) + ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1); for (j=0; jatomicity) + if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); if (*error_code != MPI_SUCCESS) goto fn_exit; @@ -287,8 +292,10 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, * datatypes, instead of a count of bytes (which might overflow) * Other WriteContig calls in this path are operating on data * sieving buffer */ + ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, bufsize); ADIO_WriteContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET, offset, status, error_code); + ADIOI_UNLOCK(fd, offset, SEEK_SET, bufsize); if (file_ptr_type == ADIO_INDIVIDUAL) { /* update MPI-IO file pointer to point to the first byte @@ -338,9 +345,10 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset); } -/* if atomicity is true, lock the region to be accessed */ - if (fd->atomicity) - ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); + /* if atomicity is true or data sieving is not disable, lock the region + * to be accessed */ + if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE) + ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1); writebuf_off = 0; writebuf_len = 0; @@ -460,11 +468,11 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count, if (writebuf_len) { ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); - if (!(fd->atomicity)) + if (!fd->atomicity && fd->hints->ds_write == ADIOI_HINT_DISABLE) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); if (*error_code != MPI_SUCCESS) goto fn_exit; } - if (fd->atomicity) + if (fd->atomicity || fd->hints->ds_write != ADIOI_HINT_DISABLE) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;