@@ -4915,12 +4915,12 @@ void CGOpenMPRuntime::emitPrivateReduction(
4915
4915
// - Thread enters critical section.
4916
4916
// - Reads its private value from LHSExprs[i].
4917
4917
// - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4918
- // LHSExprs [i]).
4918
+ // Privates [i]).
4919
4919
// - Exits critical section.
4920
4920
//
4921
4921
// Call __kmpc_barrier after combining.
4922
4922
//
4923
- // Each thread copies __shared_reduction_var[i] back to LHSExprs [i].
4923
+ // Each thread copies __shared_reduction_var[i] back to RHSExprs [i].
4924
4924
//
4925
4925
// Final __kmpc_barrier to synchronize after broadcasting
4926
4926
QualType PrivateType = Privates->getType();
@@ -5025,7 +5025,7 @@ void CGOpenMPRuntime::emitPrivateReduction(
5025
5025
const Expr *ReductionOp = ReductionOps;
5026
5026
const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5027
5027
LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5028
- LValue LHSLV = CGF.EmitLValue(LHSExprs );
5028
+ LValue LHSLV = CGF.EmitLValue(Privates );
5029
5029
5030
5030
auto EmitCriticalReduction = [&](auto ReductionGen) {
5031
5031
std::string CriticalName = getName({"reduction_critical"});
@@ -5114,7 +5114,7 @@ void CGOpenMPRuntime::emitPrivateReduction(
5114
5114
else
5115
5115
FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5116
5116
5117
- LValue TargetLHSLV = CGF.EmitLValue(LHSExprs );
5117
+ LValue TargetLHSLV = CGF.EmitLValue(RHSExprs );
5118
5118
if (IsAggregate) {
5119
5119
CGF.EmitAggregateCopy(TargetLHSLV,
5120
5120
CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
@@ -5126,13 +5126,23 @@ void CGOpenMPRuntime::emitPrivateReduction(
5126
5126
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5127
5127
CGM.getModule(), OMPRTL___kmpc_barrier),
5128
5128
BarrierArgs);
5129
+
5130
+ // Combiner with original list item
5131
+ auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5132
+ PrePostActionTy &Action) {
5133
+ Action.Enter(CGF);
5134
+ emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5135
+ cast<DeclRefExpr>(LHSExprs),
5136
+ cast<DeclRefExpr>(RHSExprs));
5137
+ };
5138
+ EmitCriticalReduction(OriginalListCombiner);
5129
5139
}
5130
5140
5131
5141
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5132
- ArrayRef<const Expr *> Privates ,
5133
- ArrayRef<const Expr *> LHSExprs ,
5134
- ArrayRef<const Expr *> RHSExprs ,
5135
- ArrayRef<const Expr *> ReductionOps ,
5142
+ ArrayRef<const Expr *> OrgPrivates ,
5143
+ ArrayRef<const Expr *> OrgLHSExprs ,
5144
+ ArrayRef<const Expr *> OrgRHSExprs ,
5145
+ ArrayRef<const Expr *> OrgReductionOps ,
5136
5146
ReductionOptionsTy Options) {
5137
5147
if (!CGF.HaveInsertPoint())
5138
5148
return;
@@ -5179,10 +5189,10 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5179
5189
5180
5190
if (SimpleReduction) {
5181
5191
CodeGenFunction::RunCleanupsScope Scope(CGF);
5182
- const auto *IPriv = Privates .begin();
5183
- const auto *ILHS = LHSExprs .begin();
5184
- const auto *IRHS = RHSExprs .begin();
5185
- for (const Expr *E : ReductionOps ) {
5192
+ const auto *IPriv = OrgPrivates .begin();
5193
+ const auto *ILHS = OrgLHSExprs .begin();
5194
+ const auto *IRHS = OrgRHSExprs .begin();
5195
+ for (const Expr *E : OrgReductionOps ) {
5186
5196
emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5187
5197
cast<DeclRefExpr>(*IRHS));
5188
5198
++IPriv;
@@ -5192,6 +5202,26 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5192
5202
return;
5193
5203
}
5194
5204
5205
+ // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5206
+ // Only keep entries where the corresponding variable is not private.
5207
+ SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5208
+ FilteredRHSExprs, FilteredReductionOps;
5209
+ for (unsigned I : llvm::seq<unsigned>(
5210
+ std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5211
+ if (!Options.IsPrivateVarReduction[I]) {
5212
+ FilteredPrivates.emplace_back(OrgPrivates[I]);
5213
+ FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5214
+ FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5215
+ FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5216
+ }
5217
+ }
5218
+ // Wrap filtered vectors in ArrayRef for downstream shared reduction
5219
+ // processing.
5220
+ ArrayRef<const Expr *> Privates = FilteredPrivates;
5221
+ ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5222
+ ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5223
+ ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5224
+
5195
5225
// 1. Build a list of reduction variables.
5196
5226
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5197
5227
auto Size = RHSExprs.size();
@@ -5439,11 +5469,12 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5439
5469
"PrivateVarReduction: ReductionOps size mismatch");
5440
5470
assert(LHSExprs.size() == Options.IsPrivateVarReduction.size() &&
5441
5471
"PrivateVarReduction: IsPrivateVarReduction size mismatch");
5442
- for (unsigned I :
5443
- llvm::seq<unsigned>(std::min(ReductionOps.size(), LHSExprs.size()))) {
5472
+
5473
+ for (unsigned I : llvm::seq<unsigned>(
5474
+ std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5444
5475
if (Options.IsPrivateVarReduction[I])
5445
- emitPrivateReduction(CGF, Loc, Privates [I], LHSExprs[I], RHSExprs [I],
5446
- ReductionOps [I]);
5476
+ emitPrivateReduction(CGF, Loc, OrgPrivates [I], OrgLHSExprs [I],
5477
+ OrgRHSExprs[I], OrgReductionOps [I]);
5447
5478
}
5448
5479
}
5449
5480
0 commit comments