diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 8aeb3e373298e..0a9a3ca5bd030 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -79,6 +79,7 @@ createOMPEarlyOutliningPass();
 std::unique_ptr<mlir::Pass> createOMPFunctionFilteringPass();
 std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
 createOMPMarkDeclareTargetPass();
+std::unique_ptr<mlir::Pass> createOMPLoopIndexMemToRegPass();
 
 // declarative passes
 #define GEN_PASS_REGISTRATION
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 9474edf13ce46..8304b882d525c 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -326,4 +326,13 @@ def OMPFunctionFiltering : Pass<"omp-function-filtering"> {
   ];
 }
 
+def OMPLoopIndexMemToReg : Pass<"omp-loop-index-mem2reg", "mlir::func::FuncOp"> {
+  let summary = "Pushes allocations for index variables of OpenMP loops into "
+                "the loop region and, if they are never passed by reference, "
+                "they are replaced by the corresponding entry block arguments, "
+                "removing all redundant allocations in the process.";
+  let constructor = "::fir::createOMPLoopIndexMemToRegPass()";
+  let dependentDialects = ["fir::FIROpsDialect", "mlir::omp::OpenMPDialect"];
+}
+
 #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 616d9ddc066a7..0b5e8a0656804 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -270,6 +270,7 @@ inline void createOpenMPFIRPassPipeline(
     pm.addPass(fir::createOMPEarlyOutliningPass());
     pm.addPass(fir::createOMPFunctionFilteringPass());
   }
+  pm.addPass(fir::createOMPLoopIndexMemToRegPass());
 }
 
 #if !defined(FLANG_EXCLUDE_CODEGEN)
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 3d2b7e5eaeade..306551b03ced1 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -19,6 +19,7 @@ add_flang_library(FIRTransforms
   OMPEarlyOutlining.cpp
   OMPFunctionFiltering.cpp
   OMPMarkDeclareTarget.cpp
+  OMPLoopIndexMemToReg.cpp
 
   DEPENDS
   FIRDialect
diff --git a/flang/lib/Optimizer/Transforms/OMPLoopIndexMemToReg.cpp b/flang/lib/Optimizer/Transforms/OMPLoopIndexMemToReg.cpp
new file mode 100644
index 0000000000000..af117d625154b
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/OMPLoopIndexMemToReg.cpp
@@ -0,0 +1,250 @@
+//===- OMPWsLoopIndexMem2Reg.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements transforms to push allocations into an OpenMP loop
+// operation region when they are used to store loop indices. Then, they are
+// removed together with any associated load or store operations if their
+// address is not needed, in which case uses of their values are replaced for
+// the block argument from which they were originally initialized.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
+#include "mlir/IR/BuiltinOps.h"
+#include <llvm/ADT/MapVector.h>
+#include <llvm/ADT/SmallSet.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/Support/Casting.h>
+#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
+#include <mlir/IR/Builders.h>
+#include <mlir/IR/Value.h>
+#include <mlir/IR/ValueRange.h>
+#include <mlir/Support/LLVM.h>
+
+namespace fir {
+#define GEN_PASS_DEF_OMPLOOPINDEXMEMTOREG
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+using namespace mlir;
+
+template <typename LoopOpTy>
+class LoopProcessorHelper {
+  LoopOpTy loop;
+
+  bool allUsesInLoop(ValueRange stores) {
+    for (Value store : stores) {
+      for (OpOperand &use : store.getUses()) {
+        Operation *owner = use.getOwner();
+        if (owner->getParentOfType<LoopOpTy>() != loop.getOperation())
+          return false;
+      }
+    }
+    return true;
+  }
+
+  /// Check whether a given hlfir.declare known to only be used inside of the
+  /// loop and initialized by a fir.alloca operation also only used inside of
+  /// the loop can be removed and replaced by the block argument representing
+  /// the corresponding loop index.
+  static bool isDeclareRemovable(hlfir::DeclareOp declareOp) {
+    fir::AllocaOp allocaOp = llvm::dyn_cast_if_present<fir::AllocaOp>(
+        declareOp.getMemref().getDefiningOp());
+
+    // Check that the hlfir.declare is initialized by a fir.alloca that is only
+    // used as argument to that operation.
+    if (!allocaOp || !allocaOp.getResult().hasOneUse())
+      return false;
+
+    // Check that uses of the pointers can be replaced by the block argument.
+    for (OpOperand &use : declareOp.getOriginalBase().getUses()) {
+      Operation *owner = use.getOwner();
+      if (!isa<fir::StoreOp>(owner))
+        return false;
+    }
+    for (OpOperand &use : declareOp.getBase().getUses()) {
+      Operation *owner = use.getOwner();
+      if (!isa<fir::LoadOp>(owner))
+        return false;
+    }
+
+    return true;
+  }
+
+  /// Check whether a given fir.alloca known to only be used inside of the loop
+  /// can be removed and replaced by the block argument representing the
+  /// corresponding loop index.
+  static bool isAllocaRemovable(fir::AllocaOp allocaOp) {
+    // Check that uses of the pointer are all fir.load and fir.store.
+    for (OpOperand &use : allocaOp.getResult().getUses()) {
+      Operation *owner = use.getOwner();
+      if (!isa<fir::LoadOp>(owner) && !isa<fir::StoreOp>(owner))
+        return false;
+    }
+
+    return true;
+  }
+
+  /// Try to push an hlfir.declare operation defined outside of the loop inside,
+  /// if all uses of that operation and the corresponding fir.alloca are
+  /// contained inside of the loop.
+  LogicalResult pushDeclareIntoLoop(hlfir::DeclareOp declareOp) {
+    // Check that all uses are inside of the loop.
+    if (!allUsesInLoop(declareOp->getResults()))
+      return failure();
+
+    // Push hlfir.declare into the beginning of the loop region.
+    Block &b = loop.getRegion().getBlocks().front();
+    declareOp->moveBefore(&b, b.begin());
+
+    // Find associated fir.alloca and push into the beginning of the loop
+    // region.
+    fir::AllocaOp allocaOp =
+        cast<fir::AllocaOp>(declareOp.getMemref().getDefiningOp());
+    Value allocaVal = allocaOp.getResult();
+
+    if (!allUsesInLoop(allocaVal))
+      return failure();
+
+    allocaOp->moveBefore(&b, b.begin());
+    return success();
+  }
+
+  /// Try to push a fir.alloca operation defined outside of the loop inside,
+  /// if all uses of that operation are contained inside of the loop.
+  LogicalResult pushAllocaIntoLoop(fir::AllocaOp allocaOp) {
+    Value store = allocaOp.getResult();
+
+    // Check that all uses are inside of the loop.
+    if (!allUsesInLoop(store))
+      return failure();
+
+    // Push fir.alloca into the beginning of the loop region.
+    Block &b = loop.getRegion().getBlocks().front();
+    allocaOp->moveBefore(&b, b.begin());
+    return success();
+  }
+
+  void processLoopArg(BlockArgument arg, llvm::ArrayRef<Value> argStores,
+                      SmallPtrSetImpl<Operation *> &opsToDelete) {
+    llvm::SmallPtrSet<Operation *, 16> toDelete;
+    for (Value store : argStores) {
+      Operation *op = store.getDefiningOp();
+
+      // Skip argument if storage not defined by an operation.
+      if (!op)
+        return;
+
+      // Support HLFIR flow as well as regular FIR flow.
+      if (auto declareOp = dyn_cast<hlfir::DeclareOp>(op)) {
+        if (succeeded(pushDeclareIntoLoop(declareOp)) &&
+            isDeclareRemovable(declareOp)) {
+          // Mark hlfir.declare, fir.alloca and related uses for deletion.
+          for (OpOperand &use : declareOp.getOriginalBase().getUses())
+            toDelete.insert(use.getOwner());
+
+          for (OpOperand &use : declareOp.getBase().getUses())
+            toDelete.insert(use.getOwner());
+
+          Operation *allocaOp = declareOp.getMemref().getDefiningOp();
+          toDelete.insert(declareOp);
+          toDelete.insert(allocaOp);
+        }
+      } else if (auto allocaOp = dyn_cast<fir::AllocaOp>(op)) {
+        if (succeeded(pushAllocaIntoLoop(allocaOp)) &&
+            isAllocaRemovable(allocaOp)) {
+          // Do not make any further modifications if an address to the index
+          // is necessary. Otherwise, the values can be used directly from the
+          // loop region first block's arguments.
+
+          // Mark fir.alloca and related uses for deletion.
+          for (OpOperand &use : allocaOp.getResult().getUses())
+            toDelete.insert(use.getOwner());
+
+          // Delete now-unused fir.alloca.
+          toDelete.insert(allocaOp);
+        }
+      } else {
+        return;
+      }
+    }
+
+    // Only consider marked operations if all load, store and allocation
+    // operations associated with the given loop index can be removed.
+    opsToDelete.insert(toDelete.begin(), toDelete.end());
+
+    for (Operation *op : toDelete) {
+      // Replace all fir.load operations with the index as returned by the
+      // OpenMP loop operation.
+      if (isa<fir::LoadOp>(op))
+        op->replaceAllUsesWith(ValueRange(arg));
+      // Drop all uses of fir.alloca and hlfir.declare because their defining
+      // operations will be deleted as well.
+      else if (isa<fir::AllocaOp>(op) || isa<hlfir::DeclareOp>(op))
+        op->dropAllUses();
+    }
+  }
+
+public:
+  explicit LoopProcessorHelper(LoopOpTy loop) : loop(loop) {}
+
+  void process() {
+    llvm::SmallPtrSet<Operation *, 16> opsToDelete;
+    llvm::SmallVector<llvm::SmallVector<Value>> storeAddresses;
+    llvm::ArrayRef<BlockArgument> loopArgs = loop.getRegion().getArguments();
+
+    // Collect arguments of the loop operation.
+    for (BlockArgument arg : loopArgs) {
+      // Find fir.store uses of these indices and gather all addresses where
+      // they are stored.
+      llvm::SmallVector<Value> &argStores = storeAddresses.emplace_back();
+      for (OpOperand &argUse : arg.getUses())
+        if (auto storeOp = dyn_cast<fir::StoreOp>(argUse.getOwner()))
+          argStores.push_back(storeOp.getMemref());
+    }
+
+    // Process all loop indices and mark them for deletion independently of each
+    // other.
+    for (auto it : llvm::zip(loopArgs, storeAddresses))
+      processLoopArg(std::get<0>(it), std::get<1>(it), opsToDelete);
+
+    // Delete marked operations.
+    for (Operation *op : opsToDelete)
+      op->erase();
+  }
+};
+
+namespace {
+class OMPLoopIndexMemToRegPass
+    : public fir::impl::OMPLoopIndexMemToRegBase<OMPLoopIndexMemToRegPass> {
+public:
+  void runOnOperation() override {
+    func::FuncOp func = getOperation();
+
+    func->walk(
+        [&](omp::WsLoopOp loop) { LoopProcessorHelper(loop).process(); });
+
+    func.walk(
+        [&](omp::SimdLoopOp loop) { LoopProcessorHelper(loop).process(); });
+
+    func.walk(
+        [&](omp::TaskLoopOp loop) { LoopProcessorHelper(loop).process(); });
+  }
+};
+} // namespace
+
+std::unique_ptr<Pass> fir::createOMPLoopIndexMemToRegPass() {
+  return std::make_unique<OMPLoopIndexMemToRegPass>();
+}
diff --git a/flang/test/Lower/OpenMP/FIR/copyin.f90 b/flang/test/Lower/OpenMP/FIR/copyin.f90
index ddfa0ea091462..3443b310074f5 100644
--- a/flang/test/Lower/OpenMP/FIR/copyin.f90
+++ b/flang/test/Lower/OpenMP/FIR/copyin.f90
@@ -138,17 +138,15 @@ subroutine copyin_derived_type()
 ! CHECK:         %[[VAL_1:.*]] = fir.address_of(@_QFcombined_parallel_worksharing_loopEx6) : !fir.ref<i32>
 ! CHECK:         %[[VAL_2:.*]] = omp.threadprivate %[[VAL_1]] : !fir.ref<i32> -> !fir.ref<i32>
 ! CHECK:         omp.parallel   {
-! CHECK:           %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-! CHECK:           %[[VAL_4:.*]] = omp.threadprivate %[[VAL_1]] : !fir.ref<i32> -> !fir.ref<i32>
-! CHECK:           %[[VAL_5:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
-! CHECK:           fir.store %[[VAL_5]] to %[[VAL_4]] : !fir.ref<i32>
+! CHECK:           %[[VAL_3:.*]] = omp.threadprivate %[[VAL_1]] : !fir.ref<i32> -> !fir.ref<i32>
+! CHECK:           %[[VAL_4:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
+! CHECK:           fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:           omp.barrier
-! CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:           %[[VAL_7:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
-! CHECK:           %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop   for  (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
-! CHECK:             fir.store %[[VAL_9]] to %[[VAL_3]] : !fir.ref<i32>
-! CHECK:             fir.call @_QPsub4(%[[VAL_4]]) {{.*}}: (!fir.ref<i32>) -> ()
+! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+! CHECK:           %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:           omp.wsloop   for  (%[[VAL_9:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) {
+! CHECK:             fir.call @_QPsub4(%[[VAL_3]]) {{.*}}: (!fir.ref<i32>) -> ()
 ! CHECK:             omp.yield
 ! CHECK:           }
 ! CHECK:           omp.terminator
@@ -269,30 +267,27 @@ subroutine common_1()
 !CHECK: %[[val_7:.*]] = fir.coordinate_of %[[val_6]], %[[val_c4]] : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
 !CHECK: %[[val_8:.*]] = fir.convert %[[val_7]] : (!fir.ref<i8>) -> !fir.ref<i32>
 !CHECK: omp.parallel {
-!CHECK: %[[val_9:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK: %[[val_10:.*]] = omp.threadprivate %[[val_1]] : !fir.ref<!fir.array<8xi8>> -> !fir.ref<!fir.array<8xi8>>
-!CHECK: %[[val_11:.*]] = fir.convert %[[val_10]] : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+!CHECK: %[[val_9:.*]] = omp.threadprivate %[[val_1]] : !fir.ref<!fir.array<8xi8>> -> !fir.ref<!fir.array<8xi8>>
+!CHECK: %[[val_10:.*]] = fir.convert %[[val_9]] : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
 !CHECK: %[[val_c0_0:.*]] = arith.constant 0 : index
-!CHECK: %[[val_12:.*]] = fir.coordinate_of %[[val_11]], %[[val_c0_0]] : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
-!CHECK: %[[val_13:.*]] = fir.convert %[[val_12]] : (!fir.ref<i8>) -> !fir.ref<i32>
-!CHECK: %[[val_14:.*]] = fir.convert %[[val_10]] : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+!CHECK: %[[val_11:.*]] = fir.coordinate_of %[[val_10]], %[[val_c0_0]] : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+!CHECK: %[[val_12:.*]] = fir.convert %[[val_11]] : (!fir.ref<i8>) -> !fir.ref<i32>
+!CHECK: %[[val_13:.*]] = fir.convert %[[val_9]] : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
 !CHECK: %[[val_c4_1:.*]] = arith.constant 4 : index
-!CHECK: %[[val_15:.*]] = fir.coordinate_of %[[val_14]], %[[val_c4_1]] : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
-!CHECK: %[[val_16:.*]] = fir.convert %[[val_15]] : (!fir.ref<i8>) -> !fir.ref<i32>
-!CHECK: %[[val_17:.*]] = fir.load %[[val_5]] : !fir.ref<i32>
-!CHECK: fir.store %[[val_17]] to %[[val_13]] : !fir.ref<i32>
-!CHECK: %[[val_18:.*]] = fir.load %[[val_8]] : !fir.ref<i32>
-!CHECK: fir.store %[[val_18]] to %[[val_16]] : !fir.ref<i32>
+!CHECK: %[[val_14:.*]] = fir.coordinate_of %[[val_13]], %[[val_c4_1]] : (!fir.ref<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+!CHECK: %[[val_15:.*]] = fir.convert %[[val_14]] : (!fir.ref<i8>) -> !fir.ref<i32>
+!CHECK: %[[val_16:.*]] = fir.load %[[val_5]] : !fir.ref<i32>
+!CHECK: fir.store %[[val_16]] to %[[val_12]] : !fir.ref<i32>
+!CHECK: %[[val_17:.*]] = fir.load %[[val_8]] : !fir.ref<i32>
+!CHECK: fir.store %[[val_17]] to %[[val_15]] : !fir.ref<i32>
 !CHECK: omp.barrier
 !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32
-!CHECK: %[[val_19:.*]] = fir.load %[[val_13]] : !fir.ref<i32>
+!CHECK: %[[val_18:.*]] = fir.load %[[val_12]] : !fir.ref<i32>
 !CHECK: %[[val_c1_i32_2:.*]] = arith.constant 1 : i32
-!CHECK: omp.wsloop   for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) {
-!CHECK: fir.store %[[arg]] to %[[val_9]] : !fir.ref<i32>
-!CHECK: %[[val_20:.*]] = fir.load %[[val_16]] : !fir.ref<i32>
-!CHECK: %[[val_21:.*]] = fir.load %[[val_9]] : !fir.ref<i32>
-!CHECK: %[[val_22:.*]] = arith.addi %[[val_20]], %[[val_21]] : i32
-!CHECK: fir.store %[[val_22]] to %[[val_16]] : !fir.ref<i32>
+!CHECK: omp.wsloop   for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_18]]) inclusive step (%[[val_c1_i32_2]]) {
+!CHECK: %[[val_19:.*]] = fir.load %[[val_15]] : !fir.ref<i32>
+!CHECK: %[[val_20:.*]] = arith.addi %[[val_19]], %[[arg]] : i32
+!CHECK: fir.store %[[val_20]] to %[[val_15]] : !fir.ref<i32>
 !CHECK: omp.yield
 !CHECK: }
 !CHECK: omp.terminator
diff --git a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
index 06f3e1ca82234..bba9dbc4fc4cb 100644
--- a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
+++ b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
@@ -1,7 +1,6 @@
 ! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s 
 
 !CHECK: func.func @_QPlastprivate_common() {
-!CHECK: %[[val_0:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK: %[[val_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_commonEi"}
 !CHECK: %[[val_2:.*]] = fir.address_of(@c_) : !fir.ref<!fir.array<8xi8>>
 !CHECK: %[[val_3:.*]] = fir.convert %[[val_2]] : (!fir.ref<!fir.array<8xi8>>) -> !fir.ref<!fir.array<?xi8>>
@@ -18,7 +17,6 @@
 !CHECK: %[[val_c100_i32:.*]] = arith.constant 100 : i32
 !CHECK: %[[val_c1_i32_0:.*]] = arith.constant 1 : i32
 !CHECK: omp.wsloop   for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) {
-!CHECK: fir.store %[[arg]] to %[[val_0]] : !fir.ref<i32>
 !CHECK: %[[val_11:.*]] = arith.cmpi eq, %[[arg]], %[[val_c100_i32]] : i32
 !CHECK: fir.if %[[val_11]] {
 !CHECK: %[[val_12:.*]] = fir.load %[[val_9]] : !fir.ref<f32>
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
index 3152f9c44d0c6..8cf216361bcb6 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
@@ -8,34 +8,31 @@
 ! CHECK:         %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_private_fixEx"}
 ! CHECK:         omp.parallel {
 ! CHECK:           %[[PRIV_J:.*]] = fir.alloca i32 {bindc_name = "j", pinned
-! CHECK:           %[[PRIV_I:.*]] = fir.alloca i32 {adapt.valuebyref, pinned
 ! CHECK:           %[[PRIV_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned
 ! CHECK:           %[[ONE:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_4:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
-! CHECK:             fir.store %[[VAL_6]] to %[[PRIV_I]] : !fir.ref<i32>
-! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
-! CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
-! CHECK:             %[[VAL_9:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
-! CHECK:             %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
-! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : index
-! CHECK:             %[[LB:.*]] = fir.convert %[[VAL_8]] : (index) -> i32
-! CHECK:             %[[VAL_12:.*]]:2 = fir.do_loop %[[VAL_13:[^ ]*]] =
-! CHECK-SAME:            %[[VAL_8]] to %[[VAL_10]] step %[[VAL_11]]
-! CHECK-SAME:            iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i32) {
-! CHECK:               fir.store %[[IV]] to %[[PRIV_J]] : !fir.ref<i32>
-! CHECK:               %[[LOAD:.*]] = fir.load %[[PRIV_I]] : !fir.ref<i32>
-! CHECK:               %[[VAL_15:.*]] = fir.load %[[PRIV_J]] : !fir.ref<i32>
-! CHECK:               %[[VAL_16:.*]] = arith.addi %[[LOAD]], %[[VAL_15]] : i32
-! CHECK:               fir.store %[[VAL_16]] to %[[PRIV_X]] : !fir.ref<i32>
-! CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] : index
-! CHECK:               %[[STEPCAST:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
+! CHECK:           omp.wsloop for (%[[IV_I:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
+! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i32) -> index
+! CHECK:             %[[VAL_8:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
+! CHECK:             %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> index
+! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : index
+! CHECK:             %[[LB:.*]] = fir.convert %[[VAL_7]] : (index) -> i32
+! CHECK:             %[[VAL_11:.*]]:2 = fir.do_loop %[[VAL_12:[^ ]*]] =
+! CHECK-SAME:            %[[VAL_7]] to %[[VAL_9]] step %[[VAL_10]]
+! CHECK-SAME:            iter_args(%[[IV_J:.*]] = %[[LB]]) -> (index, i32) {
+! CHECK:               fir.store %[[IV_J]] to %[[PRIV_J]] : !fir.ref<i32>
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[PRIV_J]] : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]] = arith.addi %[[IV_I]], %[[VAL_13]] : i32
+! CHECK:               fir.store %[[VAL_14]] to %[[PRIV_X]] : !fir.ref<i32>
+! CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_10]] : index
+! CHECK:               %[[STEPCAST:.*]] = fir.convert %[[VAL_10]] : (index) -> i32
 ! CHECK:               %[[IVLOAD:.*]] = fir.load %[[PRIV_J]] : !fir.ref<i32>
 ! CHECK:               %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]]
-! CHECK:               fir.result %[[VAL_17]], %[[IVINC]] : index, i32
+! CHECK:               fir.result %[[VAL_15]], %[[IVINC]] : index, i32
 ! CHECK:             }
-! CHECK:             fir.store %[[VAL_12]]#1 to %[[PRIV_J]] : !fir.ref<i32>
+! CHECK:             fir.store %[[VAL_11]]#1 to %[[PRIV_J]] : !fir.ref<i32>
 ! CHECK:             omp.yield
 ! CHECK:           }
 ! CHECK:           omp.terminator
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
index e9d9218702cc5..f341d0ccda423 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
@@ -249,8 +249,6 @@ subroutine simple_loop_1
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
   !$OMP PARALLEL PRIVATE(r)
-  ! FIRDialect:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-
   ! FIRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
   ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
   ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
@@ -262,9 +260,7 @@ subroutine simple_loop_1
   ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO
   do i=1, 9
-  ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
-  ! FIRDialect:     %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! FIRDialect:     omp.yield
@@ -285,8 +281,6 @@ subroutine simple_loop_2
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
   !$OMP PARALLEL
-  ! FIRDialect:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-
   ! FIRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
   ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
   ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
@@ -298,9 +292,7 @@ subroutine simple_loop_2
   ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO PRIVATE(r)
   do i=1, 9
-  ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
-  ! FIRDialect:     %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! FIRDialect:     omp.yield
@@ -320,8 +312,6 @@ subroutine simple_loop_3
   integer :: i
   real, allocatable :: r;
   ! FIRDialect:  omp.parallel
-  ! FIRDialect:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-
   ! FIRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
   ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
   ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
@@ -333,9 +323,7 @@ subroutine simple_loop_3
   ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(r)
   do i=1, 9
-  ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
-  ! FIRDialect:     %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! FIRDialect:     omp.yield
@@ -353,9 +341,9 @@ subroutine simple_loop_3
 subroutine simd_loop_1
   integer :: i
   real, allocatable :: r;
-  ! IRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
-  ! IRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
-  ! IRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:     [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
+  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
+  ! FIRDialect:     fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
 
   ! FIRDialect:     %[[LB:.*]] = arith.constant 1 : i32
   ! FIRDialect:     %[[UB:.*]] = arith.constant 9 : i32
@@ -364,9 +352,7 @@ subroutine simd_loop_1
   ! FIRDialect: omp.simdloop for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
   !$OMP SIMD PRIVATE(r)
   do i=1, 9
-  ! FIRDialect:     fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
-  ! FIRDialect:     %[[LOAD_IV:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
-  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! FIRDialect:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   !$OMP END SIMD
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
index ac63c45677ffe..bcc08e59560ab 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
@@ -10,7 +10,6 @@ subroutine omp_do_firstprivate(a)
   n = a+1
   !$omp parallel do firstprivate(a)
   ! CHECK:  omp.parallel {
-  ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
   ! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned
   ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref<i32>
@@ -18,6 +17,7 @@ subroutine omp_do_firstprivate(a)
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: omp.wsloop   for  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[REF]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
@@ -36,19 +36,19 @@ subroutine omp_do_firstprivate2(a, n)
   n = a+1
   !$omp parallel do firstprivate(a, n)
   ! CHECK:  omp.parallel {
-  ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
   ! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned
   ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "n", pinned
   ! CHECK-NEXT: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.store %[[LD1]] to %[[CLONE1]] : !fir.ref<i32>
-
-
+  
+  
   ! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: omp.wsloop   for  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
   ! CHECK-NEXT: fir.store %[[ARG2]] to %[[REF]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
index c302b91be8e67..bc44a12c53f4d 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
@@ -12,9 +12,7 @@ subroutine simple_parallel_do
   ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
@@ -38,9 +36,7 @@ subroutine parallel_do_with_parallel_clauses(cond, nt)
   ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
@@ -61,9 +57,7 @@ subroutine parallel_do_with_clauses(nt)
   ! CHECK:     omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
@@ -93,9 +87,7 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   ! CHECK:    omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
   do i=1, 9
-  ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
-  ! CHECK:    %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref<i32>
-  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:      fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
   ! CHECK:      %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref<!fir.logical<4>>
   ! CHECK:      %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1
   ! CHECK:      fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) {{.*}}: (!fir.ref<i8>, i1) -> i1
@@ -132,7 +124,6 @@ end subroutine parallel_private_do
 ! CHECK-SAME:                                      %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}) {
 ! CHECK:           %[[I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_private_doEi"}
 ! CHECK:           omp.parallel   {
-! CHECK:             %[[I_PRIV:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:             %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_private_doEcond"}
 ! CHECK:             %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_private_doEnt"}
 ! CHECK:             %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
@@ -141,6 +132,7 @@ end subroutine parallel_private_do
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:               %[[I_PRIV:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -172,7 +164,6 @@ end subroutine omp_parallel_multiple_firstprivate_do
 ! CHECK-SAME:                                                        %[[B_ADDR:.*]]: !fir.ref<i32> {fir.bindc_name = "b"}) {
 ! CHECK:           %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_multiple_firstprivate_doEi"}
 ! CHECK:           omp.parallel   {
-! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:             %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_multiple_firstprivate_doEa"}
 ! CHECK:             %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref<i32>
@@ -183,6 +174,7 @@ end subroutine omp_parallel_multiple_firstprivate_do
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
 ! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:               %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -216,7 +208,6 @@ end subroutine parallel_do_private
 ! CHECK-SAME:                                      %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "nt"}) {
 ! CHECK:           %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_do_privateEi"}
 ! CHECK:           omp.parallel   {
-! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:             %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_privateEcond"}
 ! CHECK:             %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_privateEnt"}
 ! CHECK:             %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
@@ -225,6 +216,7 @@ end subroutine parallel_do_private
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:               %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -256,7 +248,6 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK-SAME:                                                        %[[B_ADDR:.*]]: !fir.ref<i32> {fir.bindc_name = "b"}) {
 ! CHECK:           %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_do_multiple_firstprivateEi"}
 ! CHECK:           omp.parallel   {
-! CHECK:             %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:             %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"}
 ! CHECK:             %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref<i32>
@@ -267,6 +258,7 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
 ! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:               %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/simd.f90 b/flang/test/Lower/OpenMP/FIR/simd.f90
index 47596cccdbc12..c0c8d7f32838a 100644
--- a/flang/test/Lower/OpenMP/FIR/simd.f90
+++ b/flang/test/Lower/OpenMP/FIR/simd.f90
@@ -11,9 +11,7 @@ subroutine simdloop
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: omp.simdloop for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
   do i=1, 9
-    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
-    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
-    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   !$OMP END SIMD 
@@ -29,9 +27,7 @@ subroutine simdloop_with_if_clause(n, threshold)
   ! CHECK: %[[COND:.*]] = arith.cmpi sge
   ! CHECK: omp.simdloop if(%[[COND:.*]]) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive  step (%[[STEP]]) {
   do i = 1, n
-    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
-    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
-    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   !$OMP END SIMD
@@ -46,9 +42,7 @@ subroutine simdloop_with_simdlen_clause(n, threshold)
   ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
   ! CHECK: omp.simdloop simdlen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive  step (%[[STEP]]) {
   do i = 1, n
-    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
-    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
-    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   !$OMP END SIMD
@@ -64,9 +58,7 @@ subroutine simdloop_with_simdlen_clause_from_param(n, threshold)
   ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
   ! CHECK: omp.simdloop simdlen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive  step (%[[STEP]]) {
   do i = 1, n
-    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
-    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
-    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   !$OMP END SIMD
@@ -82,9 +74,7 @@ subroutine simdloop_with_simdlen_clause_from_expr_from_param(n, threshold)
   ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
   ! CHECK: omp.simdloop simdlen(6) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive  step (%[[STEP]]) {
   do i = 1, n
-    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
-    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
-    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   !$OMP END SIMD
@@ -99,9 +89,7 @@ subroutine simdloop_with_safelen_clause(n, threshold)
   ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
   ! CHECK: omp.simdloop safelen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive  step (%[[STEP]]) {
   do i = 1, n
-    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
-    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
-    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   !$OMP END SIMD
@@ -117,9 +105,7 @@ subroutine simdloop_with_safelen_clause_from_expr_from_param(n, threshold)
   ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
   ! CHECK: omp.simdloop safelen(6) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive  step (%[[STEP]]) {
   do i = 1, n
-    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
-    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
-    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   !$OMP END SIMD
@@ -134,9 +120,7 @@ subroutine simdloop_with_simdlen_safelen_clause(n, threshold)
   ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
   ! CHECK: omp.simdloop simdlen(1) safelen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive  step (%[[STEP]]) {
   do i = 1, n
-    ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref<i32>
-    ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref<i32>
-    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+    ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   !$OMP END SIMD
diff --git a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
index 2f73fb31966ec..ab906e1dfba5b 100644
--- a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
+++ b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
@@ -71,14 +71,12 @@ subroutine test_stop_in_region3()
 end
 
 ! CHECK-LABEL: func.func @_QPtest_stop_in_region4() {
-! CHECK:         %[[VAL_0:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK:         %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_stop_in_region4Ei"}
 ! CHECK:         %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtest_stop_in_region4Ex"}
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 10 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 1 : i32
 ! CHECK:         omp.wsloop   for  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
-! CHECK:           fir.store %[[VAL_6]] to %[[VAL_0]] : !fir.ref<i32>
 ! CHECK:           cf.br ^bb1
 ! CHECK:         ^bb1:
 ! CHECK:           %[[VAL_7:.*]] = arith.constant 3 : i32
diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90
index 9b1fb5c15ac1d..90e4e8a058297 100644
--- a/flang/test/Lower/OpenMP/FIR/target.f90
+++ b/flang/test/Lower/OpenMP/FIR/target.f90
@@ -278,19 +278,16 @@ subroutine omp_target_parallel_do
    !CHECK: omp.target   map_entries(%[[MAP]] : !fir.ref<!fir.array<1024xi32>>) {
       !CHECK-NEXT: omp.parallel
       !$omp target parallel do map(tofrom: a)
-         !CHECK: %[[VAL_2:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-         !CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32
-         !CHECK: %[[VAL_4:.*]] = arith.constant 1024 : i32
-         !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
-         !CHECK: omp.wsloop   for  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
-         !CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref<i32>
-         !CHECK: %[[VAL_7:.*]] = arith.constant 10 : i32
-         !CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
-         !CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> i64
-         !CHECK: %[[VAL_10:.*]] = arith.constant 1 : i64
-         !CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_9]], %[[VAL_10]] : i64
-         !CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_11]] : (!fir.ref<!fir.array<1024xi32>>, i64) -> !fir.ref<i32>
-         !CHECK: fir.store %[[VAL_7]] to %[[VAL_12]] : !fir.ref<i32>
+         !CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
+         !CHECK: %[[VAL_3:.*]] = arith.constant 1024 : i32
+         !CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32
+         !CHECK: omp.wsloop   for  (%[[ARG:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
+         !CHECK: %[[VAL_5:.*]] = arith.constant 10 : i32
+         !CHECK: %[[VAL_6:.*]] = fir.convert %[[ARG]] : (i32) -> i64
+         !CHECK: %[[VAL_7:.*]] = arith.constant 1 : i64
+         !CHECK: %[[VAL_8:.*]] = arith.subi %[[VAL_6]], %[[VAL_7]] : i64
+         !CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_8]] : (!fir.ref<!fir.array<1024xi32>>, i64) -> !fir.ref<i32>
+         !CHECK: fir.store %[[VAL_5]] to %[[VAL_9]] : !fir.ref<i32>
          do i = 1, 1024
             a(i) = 10
          end do
diff --git a/flang/test/Lower/OpenMP/FIR/unstructured.f90 b/flang/test/Lower/OpenMP/FIR/unstructured.f90
index e7d48bb269349..a6b572eaafecf 100644
--- a/flang/test/Lower/OpenMP/FIR/unstructured.f90
+++ b/flang/test/Lower/OpenMP/FIR/unstructured.f90
@@ -61,21 +61,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct
 ! CHECK-LABEL: func @_QPss3{{.*}} {
 ! CHECK:   omp.parallel {
 ! CHECK:     %[[ALLOCA_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned}
-! CHECK:     %[[ALLOCA_1:.*]] = fir.alloca i32 {{{.*}}, pinned}
-! CHECK:     %[[ALLOCA_2:.*]] = fir.alloca i32 {{{.*}}, pinned}
 ! CHECK:     br ^bb1
 ! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb3
 ! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb4
 ! CHECK:   ^bb2:  // pred: ^bb1
 ! CHECK:     omp.wsloop for (%[[ARG1:.*]]) : {{.*}} {
-! CHECK:       fir.store %[[ARG1]] to %[[ALLOCA_2]] : !fir.ref<i32>
 ! CHECK:     @_FortranAioBeginExternalListOutput
-! CHECK:       %[[LOAD_1:.*]] = fir.load %[[ALLOCA_2]] : !fir.ref<i32>
-! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]])
+! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[ARG1]])
 ! CHECK:       omp.yield
 ! CHECK:     }
 ! CHECK:     omp.wsloop for (%[[ARG2:.*]]) : {{.*}} {
-! CHECK:       fir.store %[[ARG2]] to %[[ALLOCA_1]] : !fir.ref<i32>
 ! CHECK:       br ^bb1
 ! CHECK:     ^bb2:  // 2 preds: ^bb1, ^bb5
 ! CHECK:       cond_br %{{[0-9]*}}, ^bb3, ^bb6
@@ -116,15 +111,12 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs
 
 ! CHECK-LABEL: func @_QPss4{{.*}} {
 ! CHECK:       omp.parallel {
-! CHECK:         %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned}
 ! CHECK:         omp.wsloop for (%[[ARG:.*]]) : {{.*}} {
-! CHECK:           fir.store %[[ARG]] to %[[ALLOCA]] : !fir.ref<i32>
 ! CHECK:           %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
 ! CHECK:           %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
 ! CHECK:          fir.if %[[COND_XOR]] {
 ! CHECK:           @_FortranAioBeginExternalListOutput
-! CHECK:           %[[LOAD:.*]] = fir.load %[[ALLOCA]] : !fir.ref<i32>
-! CHECK:           @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]])
+! CHECK:           @_FortranAioOutputInteger32(%{{.*}}, %[[ARG]])
 ! CHECK:          } else {
 ! CHECK:          }
 ! CHECK-NEXT:      omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
index 99b0cf0f1298e..f1bd5459a2b61 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
@@ -20,9 +20,7 @@ program wsloop
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 4 : i32
 ! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait for  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
-! CHECK:           fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref<i32>
-! CHECK:           %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref<i32>
-! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[ARG0]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
 ! CHECK:           omp.yield
 ! CHECK:         }
 
@@ -38,10 +36,8 @@ program wsloop
 ! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_17:.*]] = arith.constant 4 : i32
 ! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait for  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
-! CHECK:           fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_24:.*]] = arith.constant 2 : i32
-! CHECK:           %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i32>
-! CHECK:           %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[LOAD_IV1]] : i32
+! CHECK:           %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[ARG1]] : i32
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
 ! CHECK:           omp.yield
 ! CHECK:         }
@@ -62,10 +58,8 @@ program wsloop
 ! CHECK:         %[[VAL_31:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref<i32>
 ! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait for  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
-! CHECK:           fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_39:.*]] = arith.constant 3 : i32
-! CHECK:           %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref<i32>
-! CHECK:           %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[LOAD_IV2]] : i32
+! CHECK:           %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[ARG2]] : i32
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_40]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
 ! CHECK:           omp.yield
 ! CHECK:         }
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
index a122a41ba8b8f..ba5860faac0e6 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
@@ -40,16 +40,10 @@ program wsloop_collapse
      do j= 1, b
         do k = 1, c
 ! CHECK:           omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
-! CHECK:             fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i32>
-! CHECK:             fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
-! CHECK:             fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
 ! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_6]] : !fir.ref<i32>
-! CHECK:             %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i32>
-! CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[LOAD_IV0]] : i32
-! CHECK:             %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i32>
-! CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[LOAD_IV1]] : i32
-! CHECK:             %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref<i32>
-! CHECK:             %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[LOAD_IV2]] : i32
+! CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[ARG0]] : i32
+! CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[ARG1]] : i32
+! CHECK:             %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[ARG2]] : i32
 ! CHECK:             fir.store %[[VAL_15]] to %[[VAL_6]] : !fir.ref<i32>
 ! CHECK:             omp.yield
 ! CHECK:           }
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
index 9509920c6ec1b..ac0021e2edf20 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
@@ -11,18 +11,15 @@ program wsloop_dynamic
 !CHECK:  omp.parallel {
 
 !$OMP DO SCHEDULE(monotonic:dynamic)
-!CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 !CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-!CHECK:       fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
 
   do i=1, 9
     print*, i
 !CHECK:    %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
-!CHECK:    %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
 !CHECK:    fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
   end do
 !CHECK:       omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
index 5e4e66c77b343..39215e8d31c92 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
@@ -12,18 +12,15 @@ program wsloop_dynamic
 !CHECK:  omp.parallel {
 
 !$OMP DO SCHEDULE(nonmonotonic:dynamic)
-!CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 !CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-!CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
 
   do i=1, 9
     print*, i
 !CHECK:    %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
-!CHECK:    %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
 !CHECK:    fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
   end do
 !CHECK:       omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
index 69d133d50ffa0..069b777d1cdb2 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
@@ -50,14 +50,11 @@
 !CHECK:  %[[C0_2:.*]] = arith.constant 0 : i32
 !CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<i32>
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref<i32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
 !CHECK:  return
@@ -78,14 +75,11 @@ subroutine simple_int_reduction
 !CHECK:  %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32
 !CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<f32>
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref<f32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
@@ -107,14 +101,11 @@ subroutine simple_real_reduction
 !CHECK:  %[[C0_2:.*]] = arith.constant 0 : i32
 !CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<i32>
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref<i32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
 !CHECK:  return
@@ -135,14 +126,11 @@ subroutine simple_int_reduction_switch_order
 !CHECK:  %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32
 !CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<f32>
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref<f32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
@@ -164,15 +152,10 @@ subroutine simple_real_reduction_switch_order
 !CHECK:  %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"}
 !CHECK:  %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref<i32>) for  (%[[IVAL]]) : i32
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : i32, !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : i32, !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[YREF]] : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[ZREF]] : i32, !fir.ref<i32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
 !CHECK:  return
@@ -197,17 +180,12 @@ subroutine multiple_int_reductions_same_type
 !CHECK:  %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"}
 !CHECK:  %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>, @[[RED_F32_NAME]] -> %[[YREF]] : !fir.ref<f32>, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref<f32>) for  (%[[IVAL]]) : i32
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL1_F32]], %[[XREF]] : f32, !fir.ref<f32>
-!CHECK:      %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL2_F32]], %[[YREF]] : f32, !fir.ref<f32>
-!CHECK:      %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref<f32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
@@ -234,19 +212,13 @@ subroutine multiple_real_reductions_same_type
 !CHECK:  %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
 !CHECK:  %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]]  : !fir.ref<i32>, @[[RED_I64_NAME]] -> %[[YREF]] : !fir.ref<i64>, @[[RED_F32_NAME]] -> %[[ZREF]]  : !fir.ref<f32>, @[[RED_F64_NAME]] -> %[[WREF]]  : !fir.ref<f64>) for  (%[[IVAL:.*]]) : i32
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL1_I32]], %[[XREF]] : i32, !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64
+!CHECK:      omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      omp.reduction %[[I_PVT_VAL2_I64]], %[[YREF]] : i64, !fir.ref<i64>
-!CHECK:      %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref<f32>
-!CHECK:      %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64
+!CHECK:      %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[IVAL]] : (i32) -> f64
 !CHECK:      omp.reduction %[[I_PVT_VAL4_F64]], %[[WREF]] : f64, !fir.ref<f64>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90
index 425d37398c571..03fbc2819659d 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90
@@ -21,14 +21,11 @@
 !CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
 !CHECK:  %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64
+!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64
 !CHECK:      %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
@@ -54,14 +51,11 @@ subroutine simple_reduction(y)
 !CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
 !CHECK:  %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64
+!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64
 !CHECK:      %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
@@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y)
 !CHECK:  %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
 !CHECK:  %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref<!fir.logical<4>>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
 !CHECK:      %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref<!fir.logical<4>>
 !CHECK:      omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
 !CHECK:      %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref<!fir.logical<4>>
 !CHECK:      omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
index e8cf46f8261c4..eaa627e6afd51 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
@@ -21,14 +21,11 @@
 !CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
 !CHECK:  %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64
+!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64
 !CHECK:      %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
@@ -54,14 +51,11 @@ subroutine simple_reduction(y)
 !CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
 !CHECK:  %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64
+!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64
 !CHECK:      %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
@@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y)
 !CHECK:  %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
 !CHECK:  %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref<!fir.logical<4>>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
 !CHECK:      %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref<!fir.logical<4>>
 !CHECK:      omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
 !CHECK:      %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref<!fir.logical<4>>
 !CHECK:      omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
index 6e5d6c34cedc5..617198d01716f 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
@@ -21,14 +21,11 @@
 !CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
 !CHECK:  %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64
+!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64
 !CHECK:      %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
@@ -54,14 +51,11 @@ subroutine simple_reduction(y)
 !CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
 !CHECK:  %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64
+!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64
 !CHECK:      %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
@@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y)
 !CHECK:  %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
 !CHECK:  %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref<!fir.logical<4>>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
 !CHECK:      %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref<!fir.logical<4>>
 !CHECK:      omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
 !CHECK:      %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref<!fir.logical<4>>
 !CHECK:      omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90
index cdc12500e2c30..e3d691e347ba2 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90
@@ -21,14 +21,11 @@
 !CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"}
 !CHECK:  %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64
+!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64
 !CHECK:      %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
@@ -54,14 +51,11 @@ subroutine simple_reduction(y)
 !CHECK:  %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"}
 !CHECK:  %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64
+!CHECK:      %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64
 !CHECK:      %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
@@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y)
 !CHECK:  %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"}
 !CHECK:  %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref<!fir.logical<4>>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref<!fir.logical<4>>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref<!fir.logical<4>>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) {
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
 !CHECK:      %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref<!fir.logical<4>>
 !CHECK:      omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
 !CHECK:      %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref<!fir.logical<4>>
 !CHECK:      omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64
+!CHECK:      %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      %[[C1_64:.*]] = arith.constant 1 : i64
 !CHECK:      %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64
 !CHECK:      %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref<!fir.array<100x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90
index c30cde66b5167..f5d2113d2e57a 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90
@@ -50,14 +50,11 @@
 !CHECK:  %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:  fir.store %[[C1_2]] to %[[XREF]] : !fir.ref<i32>
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C10:.*]] = arith.constant 10 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref<i32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
 !CHECK:  return
@@ -79,14 +76,11 @@ subroutine simple_int_reduction
 !CHECK:  %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32
 !CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<f32>
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 10 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref<f32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
@@ -108,14 +102,11 @@ subroutine simple_real_reduction
 !CHECK:  %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:  fir.store %[[C1_2]] to %[[XREF]] : !fir.ref<i32>
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C10:.*]] = arith.constant 10 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref<i32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
 !CHECK:  return
@@ -136,14 +127,11 @@ subroutine simple_int_reduction_switch_order
 !CHECK:  %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32
 !CHECK:  fir.store %[[C0_2]] to %[[XREF]] : !fir.ref<f32>
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 10 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref<f32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
@@ -165,15 +153,10 @@ subroutine simple_real_reduction_switch_order
 !CHECK:  %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"}
 !CHECK:  %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref<i32>, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref<i32>) for  (%[[IVAL]]) : i32
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : i32, !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : i32, !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[YREF]] : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[ZREF]] : i32, !fir.ref<i32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
 !CHECK:  return
@@ -198,17 +181,12 @@ subroutine multiple_int_reductions_same_type
 !CHECK:  %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"}
 !CHECK:  %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    omp.wsloop   reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref<f32>, @[[RED_F32_NAME]] -> %[[YREF]] : !fir.ref<f32>, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref<f32>) for  (%[[IVAL]]) : i32
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL1_F32]], %[[XREF]] : f32, !fir.ref<f32>
-!CHECK:      %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL2_F32]], %[[YREF]] : f32, !fir.ref<f32>
-!CHECK:      %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref<f32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
@@ -235,19 +213,13 @@ subroutine multiple_real_reductions_same_type
 !CHECK:  %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"}
 !CHECK:  %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"}
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %2 : !fir.ref<i32>, @[[RED_I64_NAME]] -> %3 : !fir.ref<i64>, @[[RED_F32_NAME]] -> %4 : !fir.ref<f32>, @[[RED_F64_NAME]] -> %1 : !fir.ref<f64>) for  (%[[IVAL:.*]]) : i32
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL1_I32]], %[[XREF]] : i32, !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64
+!CHECK:      omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref<i32>
+!CHECK:      %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64
 !CHECK:      omp.reduction %[[I_PVT_VAL2_I64]], %[[YREF]] : i64, !fir.ref<i64>
-!CHECK:      %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32
+!CHECK:      %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32
 !CHECK:      omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref<f32>
-!CHECK:      %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64
+!CHECK:      %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[IVAL]] : (i32) -> f64
 !CHECK:      omp.reduction %[[I_PVT_VAL4_F64]], %[[WREF]] : f64, !fir.ref<f64>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
index 47f9d572a8653..95248e99b9c63 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
@@ -15,13 +15,11 @@ program wsloop_dynamic
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 !CHECK:     omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-!CHECK:       fir.store %[[I]] to %[[STORE:.*]] : !fir.ref<i32>
 
   do i=1, 9
     print*, i
 !CHECK:    %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput
-!CHECK:    %[[LOAD:.*]] = fir.load %[[STORE]] : !fir.ref<i32>
-!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+!CHECK:    fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
 !CHECK:    fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref<i8>) -> i32
   end do
 !CHECK:       omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
index 466055868f1cc..c515ed10d0d5c 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
@@ -25,11 +25,9 @@ program wsloop_variable
 !CHECK:  omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
 !CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
 !CHECK:    fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]] : !fir.ref<i16>
-!CHECK:    fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i64>
 !CHECK:    %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref<i16>
 !CHECK:    %[[LOAD_IV0_I64:.*]] = fir.convert %[[LOAD_IV0]] : (i16) -> i64
-!CHECK:    %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i64>
-!CHECK:    %[[TMP10:.*]] = arith.addi %[[LOAD_IV0_I64]], %[[LOAD_IV1]] : i64
+!CHECK:    %[[TMP10:.*]] = arith.addi %[[LOAD_IV0_I64]], %[[ARG1]] : i64
 !CHECK:    %[[TMP11:.*]] = fir.convert %[[TMP10]] : (i64) -> f32
 !CHECK:    fir.store %[[TMP11]] to %{{.*}} : !fir.ref<f32>
 !CHECK:    omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop.f90 b/flang/test/Lower/OpenMP/FIR/wsloop.f90
index 2c00d1a9fddae..49b47a6307284 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop.f90
@@ -7,16 +7,13 @@ subroutine simple_loop
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
-  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
   ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO
   do i=1, 9
-  ! CHECK:             fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
-  ! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
@@ -30,16 +27,13 @@ subroutine simple_loop_with_step
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
-  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 2 : i32
   ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
-  ! CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
   !$OMP DO
   do i=1, 9, 2
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
@@ -53,16 +47,13 @@ subroutine loop_with_schedule_nowait
   integer :: i
   ! CHECK:  omp.parallel
   !$OMP PARALLEL
-  ! CHECK:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
   ! CHECK:     omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO SCHEDULE(runtime)
   do i=1, 9
-  ! CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
   end do
   ! CHECK:       omp.yield
diff --git a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
index fd56038231b19..ea39293ab78ff 100644
--- a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
+++ b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
@@ -8,16 +8,13 @@
 ! CHECK: omp.parallel   {
 ! EXPECTED: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
 ! EXPECTED: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFEz"}
-! CHECK: %[[TEMP:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 ! CHECK: %[[const_1:.*]] = arith.constant 1 : i32
 ! CHECK: %[[const_2:.*]] = arith.constant 10 : i32
 ! CHECK: %[[const_3:.*]] = arith.constant 1 : i32
 ! CHECK: omp.wsloop   for  (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) {
-! CHECK: fir.store %[[ARG]] to %[[TEMP]] : !fir.ref<i32>
 ! EXPECTED: %[[temp_1:.*]] = fir.load %[[PRIVATE_Z]] : !fir.ref<i32>
 ! CHECK: %[[temp_1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
-! CHECK: %[[temp_2:.*]] = fir.load %[[TEMP]] : !fir.ref<i32>
-! CHECK: %[[result:.*]] = arith.addi %[[temp_1]], %[[temp_2]] : i32
+! CHECK: %[[result:.*]] = arith.addi %[[temp_1]], %[[ARG]] : i32
 ! EXPECTED: fir.store %[[result]] to %[[PRIVATE_Y]] : !fir.ref<i32>
 ! CHECK: fir.store %[[result]] to %{{.*}} : !fir.ref<i32>
 ! CHECK: omp.yield
diff --git a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 b/flang/test/Lower/OpenMP/hlfir-wsloop.f90
index b6be77fe3016d..8b3bee48890d2 100644
--- a/flang/test/Lower/OpenMP/hlfir-wsloop.f90
+++ b/flang/test/Lower/OpenMP/hlfir-wsloop.f90
@@ -6,19 +6,21 @@
 !CHECK-LABEL: func @_QPsimple_loop()
 subroutine simple_loop
   integer :: i
-  ! CHECK-DAG:     %[[WS_ST:.*]] = arith.constant 1 : i32
-  ! CHECK-DAG:     %[[WS_END:.*]] = arith.constant 9 : i32
-  ! CHECK:  omp.parallel
+  ! CHECK-DAG: %[[WS_ST:.*]] = arith.constant 1 : i32
+  ! CHECK-DAG: %[[WS_END:.*]] = arith.constant 9 : i32
+  ! CHECK:     omp.parallel
   !$OMP PARALLEL
-  ! CHECK-DAG:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
-  ! CHECK:     %[[IV:.*]]    = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
   ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]])
   !$OMP DO
   do i=1, 9
-  ! CHECK:             fir.store %[[I]] to %[[IV:.*]] : !fir.ref<i32>
-  ! CHECK:             %[[LOAD_IV:.*]] = fir.load %[[IV]] : !fir.ref<i32>
-  ! CHECK:    fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
+  ! CHECK-DAG: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
+  ! CHECK:     %[[IV:.*]] = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  ! CHECK:     fir.store %[[I]] to %[[IV:.*]] : !fir.ref<i32>
+  ! CHECK:     %[[LOAD_IV:.*]] = fir.load %[[IV]] : !fir.ref<i32>
+  ! CHECK:     fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
     print*, i
+  ! CHECK:    fir.call @_QPfoo(%[[IV]]) {{.*}}: (!fir.ref<i32>) -> ()
+    call foo(i)
   end do
   ! CHECK:       omp.yield
   !$OMP END DO
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
index 97ee665442e3a..97510745d0539 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
@@ -18,15 +18,11 @@
 !CHECK:  %[[C0_2:.*]] = arith.constant 0 : i32
 !CHECK:  hlfir.assign %[[C0_2]] to %[[XDECL]]#0 : i32, !fir.ref<i32>
 !CHECK:  omp.parallel
-!CHECK:    %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
-!CHECK:    %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK:    %[[C1_1:.*]] = arith.constant 1 : i32
 !CHECK:    %[[C100:.*]] = arith.constant 100 : i32
 !CHECK:    %[[C1_2:.*]] = arith.constant 1 : i32
 !CHECK:    omp.wsloop   reduction(@[[RED_I32_NAME]] -> %[[XDECL]]#0 : !fir.ref<i32>) for  (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]])
-!CHECK:      fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
-!CHECK:      %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref<i32>
-!CHECK:      omp.reduction %[[I_PVT_VAL]], %[[XDECL]]#0 : i32, !fir.ref<i32>
+!CHECK:      omp.reduction %[[IVAL]], %[[XDECL]]#0 : i32, !fir.ref<i32>
 !CHECK:      omp.yield
 !CHECK:    omp.terminator
 !CHECK:  return
diff --git a/flang/test/Transforms/omp-wsloop-index.mlir b/flang/test/Transforms/omp-wsloop-index.mlir
new file mode 100644
index 0000000000000..9443209d90b9f
--- /dev/null
+++ b/flang/test/Transforms/omp-wsloop-index.mlir
@@ -0,0 +1,247 @@
+// RUN: fir-opt --omp-loop-index-mem2reg %s | FileCheck %s
+
+func.func private @foo(%arg0 : !fir.ref<i32>) -> i32
+
+// CHECK-LABEL: @wsloop_remove_alloca
+func.func @wsloop_remove_alloca() {
+  // CHECK: %[[RESULT:.*]] = fir.alloca i32
+  // CHECK: omp.parallel
+  %0 = fir.alloca i32
+  omp.parallel {
+    // CHECK-NOT: fir.alloca
+    // CHECK-DAG: arith.constant 1
+    // CHECK-DAG: arith.constant 10
+    // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32
+    %1 = fir.alloca i32
+    %c1_i32 = arith.constant 1 : i32
+    %c10_i32 = arith.constant 10 : i32
+    omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
+      // CHECK-NOT: fir.alloca
+      // CHECK: fir.store %[[INDEX]] to %[[RESULT]]
+      // CHECK: omp.yield
+      fir.store %arg0 to %1 : !fir.ref<i32>
+      %2 = fir.load %1 : !fir.ref<i32>
+      fir.store %2 to %0 : !fir.ref<i32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @simdloop_remove_alloca
+func.func @simdloop_remove_alloca() {
+  // CHECK: %[[RESULT:.*]] = fir.alloca i32
+  // CHECK: omp.parallel
+  %0 = fir.alloca i32
+  omp.parallel {
+    // CHECK-NOT: fir.alloca
+    // CHECK-DAG: arith.constant 1
+    // CHECK-DAG: arith.constant 10
+    // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32
+    %1 = fir.alloca i32
+    %c1_i32 = arith.constant 1 : i32
+    %c10_i32 = arith.constant 10 : i32
+    omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
+      // CHECK-NOT: fir.alloca
+      // CHECK: fir.store %[[INDEX]] to %[[RESULT]]
+      // CHECK: omp.yield
+      fir.store %arg0 to %1 : !fir.ref<i32>
+      %2 = fir.load %1 : !fir.ref<i32>
+      fir.store %2 to %0 : !fir.ref<i32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @wsloop_push_alloca
+func.func @wsloop_push_alloca() {
+  // CHECK: %[[RESULT:.*]] = fir.alloca i32
+  // CHECK: omp.parallel
+  %0 = fir.alloca i32
+  omp.parallel {
+    // CHECK-NOT: fir.alloca
+    // CHECK-DAG: arith.constant 1
+    // CHECK-DAG: arith.constant 10
+    // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32
+    %1 = fir.alloca i32
+    %c1_i32 = arith.constant 1 : i32
+    %c10_i32 = arith.constant 10 : i32
+    omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
+      // CHECK: %[[ALLOCA:.*]] = fir.alloca i32
+      // CHECK: fir.store %[[INDEX]] to %[[ALLOCA]]
+      // CHECK: %[[RETURN:.*]] = func.call @foo(%[[ALLOCA]])
+      // CHECK: fir.store %[[RETURN]] to %[[RESULT]]
+      // CHECK: omp.yield
+      fir.store %arg0 to %1 : !fir.ref<i32>
+      %2 = func.call @foo(%1) : (!fir.ref<i32>) -> i32
+      fir.store %2 to %0 : !fir.ref<i32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @simdloop_push_alloca
+func.func @simdloop_push_alloca() {
+  // CHECK: %[[RESULT:.*]] = fir.alloca i32
+  // CHECK: omp.parallel
+  %0 = fir.alloca i32
+  omp.parallel {
+    // CHECK-NOT: fir.alloca
+    // CHECK-DAG: arith.constant 1
+    // CHECK-DAG: arith.constant 10
+    // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32
+    %1 = fir.alloca i32
+    %c1_i32 = arith.constant 1 : i32
+    %c10_i32 = arith.constant 10 : i32
+    omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
+      // CHECK: %[[ALLOCA:.*]] = fir.alloca i32
+      // CHECK: fir.store %[[INDEX]] to %[[ALLOCA]]
+      // CHECK: %[[RETURN:.*]] = func.call @foo(%[[ALLOCA]])
+      // CHECK: fir.store %[[RETURN]] to %[[RESULT]]
+      // CHECK: omp.yield
+      fir.store %arg0 to %1 : !fir.ref<i32>
+      %2 = func.call @foo(%1) : (!fir.ref<i32>) -> i32
+      fir.store %2 to %0 : !fir.ref<i32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @hlfir_wsloop_remove_alloca
+func.func @hlfir_wsloop_remove_alloca() {
+  // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32
+  // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]]
+  // CHECK: omp.parallel
+  %0 = fir.alloca i32
+  %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  omp.parallel {
+    // CHECK-NOT: fir.alloca
+    // CHECK-NOT: hlfir.declare
+    // CHECK-DAG: arith.constant 1
+    // CHECK-DAG: arith.constant 10
+    // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32
+    %2 = fir.alloca i32
+    %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %c1_i32 = arith.constant 1 : i32
+    %c10_i32 = arith.constant 10 : i32
+    omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
+      // CHECK-NOT: fir.alloca
+      // CHECK-NOT: hlfir.declare
+      // CHECK: hlfir.assign %[[INDEX]] to %[[RESULT]]#0
+      // CHECK: omp.yield
+      fir.store %arg0 to %3#1 : !fir.ref<i32>
+      %4 = fir.load %3#0 : !fir.ref<i32>
+      hlfir.assign %4 to %1#0 : i32, !fir.ref<i32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @hlfir_simdloop_remove_alloca
+func.func @hlfir_simdloop_remove_alloca() {
+  // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32
+  // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]]
+  // CHECK: omp.parallel
+  %0 = fir.alloca i32
+  %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  omp.parallel {
+    // CHECK-NOT: fir.alloca
+    // CHECK-NOT: hlfir.declare
+    // CHECK-DAG: arith.constant 1
+    // CHECK-DAG: arith.constant 10
+    // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32
+    %2 = fir.alloca i32
+    %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %c1_i32 = arith.constant 1 : i32
+    %c10_i32 = arith.constant 10 : i32
+    omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
+      // CHECK-NOT: fir.alloca
+      // CHECK-NOT: hlfir.declare
+      // CHECK: hlfir.assign %[[INDEX]] to %[[RESULT]]#0
+      // CHECK: omp.yield
+      fir.store %arg0 to %3#1 : !fir.ref<i32>
+      %4 = fir.load %3#0 : !fir.ref<i32>
+      hlfir.assign %4 to %1#0 : i32, !fir.ref<i32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @hlfir_wsloop_push_alloca
+func.func @hlfir_wsloop_push_alloca() {
+  // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32
+  // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]]
+  // CHECK: omp.parallel
+  %0 = fir.alloca i32
+  %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  omp.parallel {
+    // CHECK-NOT: fir.alloca
+    // CHECK-NOT: hlfir.declare
+    // CHECK-DAG: arith.constant 1
+    // CHECK-DAG: arith.constant 10
+    // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32
+    %2 = fir.alloca i32
+    %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %c1_i32 = arith.constant 1 : i32
+    %c10_i32 = arith.constant 10 : i32
+    omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
+      // CHECK: %[[INDEX_ALLOCA:.*]] = fir.alloca i32
+      // CHECK: %[[INDEX_DECL:.*]]:2 = hlfir.declare %[[INDEX_ALLOCA]]
+      // CHECK: fir.store %[[INDEX]] to %[[INDEX_DECL]]#1
+      // CHECK: %[[RETURN:.*]] = fir.call @foo(%[[INDEX_DECL]]#1)
+      // CHECK: hlfir.assign %[[RETURN]] to %[[RESULT]]#0
+      // CHECK: omp.yield
+      fir.store %arg0 to %3#1 : !fir.ref<i32>
+      %4 = fir.call @foo(%3#1) : (!fir.ref<i32>) -> i32
+      hlfir.assign %4 to %1#0 : i32, !fir.ref<i32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}
+
+// CHECK-LABEL: @hlfir_simdloop_push_alloca
+func.func @hlfir_simdloop_push_alloca() {
+  // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32
+  // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]]
+  // CHECK: omp.parallel
+  %0 = fir.alloca i32
+  %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  omp.parallel {
+    // CHECK-NOT: fir.alloca
+    // CHECK-NOT: hlfir.declare
+    // CHECK-DAG: arith.constant 1
+    // CHECK-DAG: arith.constant 10
+    // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32
+    %2 = fir.alloca i32
+    %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %c1_i32 = arith.constant 1 : i32
+    %c10_i32 = arith.constant 10 : i32
+    omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
+      // CHECK: %[[INDEX_ALLOCA:.*]] = fir.alloca i32
+      // CHECK: %[[INDEX_DECL:.*]]:2 = hlfir.declare %[[INDEX_ALLOCA]]
+      // CHECK: fir.store %[[INDEX]] to %[[INDEX_DECL]]#1
+      // CHECK: %[[RETURN:.*]] = fir.call @foo(%[[INDEX_DECL]]#1)
+      // CHECK: hlfir.assign %[[RETURN]] to %[[RESULT]]#0
+      // CHECK: omp.yield
+      fir.store %arg0 to %3#1 : !fir.ref<i32>
+      %4 = fir.call @foo(%3#1) : (!fir.ref<i32>) -> i32
+      hlfir.assign %4 to %1#0 : i32, !fir.ref<i32>
+      omp.yield
+    }
+    omp.terminator
+  }
+  return
+}