diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 163bf9ba5b089..23bfe063fee06 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -18,6 +18,7 @@
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyTargetMachine.h"
 #include "WebAssemblyUtilities.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -91,6 +92,19 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     setOperationAction(ISD::LOAD, T, Custom);
     setOperationAction(ISD::STORE, T, Custom);
   }
+
+  // Likewise, transform zext/sext/anyext extending loads from address space 1
+  // (WASM globals)
+  setLoadExtAction({ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}, MVT::i32,
+                   {MVT::i8, MVT::i16}, Custom);
+  setLoadExtAction({ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}, MVT::i64,
+                   {MVT::i8, MVT::i16, MVT::i32}, Custom);
+
+  // Compensate for the EXTLOADs being custom by reimplementing some combiner
+  // logic
+  setTargetDAGCombine(ISD::AND);
+  setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+
   if (Subtarget->hasSIMD128()) {
     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
                    MVT::v2f64}) {
@@ -1683,6 +1697,11 @@ static bool IsWebAssemblyGlobal(SDValue Op) {
   if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
     return WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace());
 
+  if (Op->getOpcode() == WebAssemblyISD::Wrapper)
+    if (const GlobalAddressSDNode *GA =
+            dyn_cast<GlobalAddressSDNode>(Op->getOperand(0)))
+      return WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace());
+
   return false;
 }
 
@@ -1740,16 +1759,109 @@ SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
   const SDValue &Base = LN->getBasePtr();
   const SDValue &Offset = LN->getOffset();
+  ISD::LoadExtType ExtType = LN->getExtensionType();
+  EVT ResultType = LN->getValueType(0);
 
   if (IsWebAssemblyGlobal(Base)) {
     if (!Offset->isUndef())
       report_fatal_error(
           "unexpected offset when loading from webassembly global", false);
 
-    SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
-    SDValue Ops[] = {LN->getChain(), Base};
-    return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
-                                   LN->getMemoryVT(), LN->getMemOperand());
+    if (!ResultType.isInteger() && !ResultType.isFloatingPoint()) {
+      SDVTList Tys = DAG.getVTList(ResultType, MVT::Other);
+      SDValue Ops[] = {LN->getChain(), Base};
+      SDValue GlobalGetNode =
+          DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
+                                  LN->getMemoryVT(), LN->getMemOperand());
+      return GlobalGetNode;
+    }
+
+    EVT GT = MVT::INVALID_SIMPLE_VALUE_TYPE;
+
+    if (auto *GA = dyn_cast<GlobalAddressSDNode>(
+            Base->getOpcode() == WebAssemblyISD::Wrapper ? Base->getOperand(0)
+                                                         : Base))
+      GT = EVT::getEVT(GA->getGlobal()->getValueType());
+
+    if (GT != MVT::i8 && GT != MVT::i16 && GT != MVT::i32 && GT != MVT::i64 &&
+        GT != MVT::f32 && GT != MVT::f64)
+      report_fatal_error("encountered unexpected global type for Base when "
+                         "loading from webassembly global",
+                         false);
+
+    EVT PromotedGT = getTypeToTransformTo(*DAG.getContext(), GT);
+
+    if (ExtType == ISD::NON_EXTLOAD) {
+      // A normal, non-extending load may try to load more or less than the
+      // underlying global, which is invalid. We lower this to a load of the
+      // global (i32 or i64) then truncate or extend as needed
+
+      // Modify the MMO to load the full global
+      // This is assumed to be safe without copy/dup, as the original load will be removed
+      MachineMemOperand *MMO = LN->getMemOperand();
+      MMO->setType(LLT(PromotedGT.getSimpleVT()));
+
+      SDVTList Tys = DAG.getVTList(PromotedGT, MVT::Other);
+      SDValue Ops[] = {LN->getChain(), Base};
+      SDValue GlobalGetNode = DAG.getMemIntrinsicNode(
+          WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops, PromotedGT, MMO);
+
+      if (ResultType.bitsEq(PromotedGT)) {
+        return GlobalGetNode;
+      }
+
+      SDValue ValRes;
+      if (ResultType.isFloatingPoint())
+        ValRes = DAG.getFPExtendOrRound(GlobalGetNode, DL, ResultType);
+      else
+        ValRes = DAG.getAnyExtOrTrunc(GlobalGetNode, DL, ResultType);
+
+      return DAG.getMergeValues({ValRes, GlobalGetNode.getValue(1)}, DL);
+    }
+
+    if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::SEXTLOAD) {
+      // Turn the unsupported load into an EXTLOAD followed by an
+      // explicit zero/sign extend inreg. Same as Expand
+
+      SDValue Result =
+          DAG.getExtLoad(ISD::EXTLOAD, DL, ResultType, LN->getChain(), Base,
+                         LN->getMemoryVT(), LN->getMemOperand());
+      SDValue ValRes;
+      if (ExtType == ISD::SEXTLOAD)
+        ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Result.getValueType(),
+                             Result, DAG.getValueType(LN->getMemoryVT()));
+      else
+        ValRes = DAG.getZeroExtendInReg(Result, DL, LN->getMemoryVT());
+
+      return DAG.getMergeValues({ValRes, Result.getValue(1)}, DL);
+    }
+
+    if (ExtType == ISD::EXTLOAD) {
+      // Expand the EXTLOAD into a regular LOAD of the global, and if
+      // needed, a zero-extension
+
+      EVT OldLoadType = LN->getMemoryVT();
+      EVT NewLoadType = getTypeToTransformTo(*DAG.getContext(), OldLoadType);
+
+      // Modify the MMO to load a whole WASM "register"'s worth
+      // This is assumed to be safe without copy/dup, as the original load will be removed
+      MachineMemOperand *MMO = LN->getMemOperand();
+      MMO->setType(LLT(NewLoadType.getSimpleVT()));
+
+      SDValue Result =
+          DAG.getLoad(NewLoadType, DL, LN->getChain(), Base, MMO);
+
+      if (NewLoadType != ResultType) {
+        SDValue ValRes = DAG.getNode(ISD::ANY_EXTEND, DL, ResultType, Result);
+        return DAG.getMergeValues({ValRes, Result.getValue(1)}, DL);
+      }
+
+      return Result;
+    }
+
+    report_fatal_error(
+        "encountered unexpected ExtType when loading from webassembly global",
+        false);
   }
 
   if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
@@ -3525,6 +3637,184 @@ static SDValue performMulCombine(SDNode *N,
   }
 }
 
+static SDValue performANDCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  // Copied and modified from DAGCombiner::visitAND(SDNode *N)
+  // We have to do this because the original combiner doesn't work when ZEXTLOAD
+  // has custom lowering
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDLoc DL(N);
+
+  // fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+  // already be zero by virtue of the width of the base type of the load.
+  //
+  // the 'X' node here can either be nothing or an extract_vector_elt to catch
+  // more cases.
+  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+       N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
+       N0.getOperand(0).getOpcode() == ISD::LOAD &&
+       N0.getOperand(0).getResNo() == 0) ||
+      (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
+    auto *Load =
+        cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
+
+    // Get the constant (if applicable) the zero'th operand is being ANDed with.
+    // This can be a pure constant or a vector splat, in which case we treat the
+    // vector as a scalar and use the splat value.
+    APInt Constant = APInt::getZero(1);
+    if (const ConstantSDNode *C = isConstOrConstSplat(
+            N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
+      Constant = C->getAPIntValue();
+    } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+      unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
+      APInt SplatValue, SplatUndef;
+      unsigned SplatBitSize;
+      bool HasAnyUndefs;
+      // Endianness should not matter here. Code below makes sure that we only
+      // use the result if the SplatBitSize is a multiple of the vector element
+      // size. And after that we AND all element sized parts of the splat
+      // together. So the end result should be the same regardless of in which
+      // order we do those operations.
+      const bool IsBigEndian = false;
+      bool IsSplat =
+          Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+                                  HasAnyUndefs, EltBitWidth, IsBigEndian);
+
+      // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+      // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+      if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
+        // Undef bits can contribute to a possible optimisation if set, so
+        // set them.
+        SplatValue |= SplatUndef;
+
+        // The splat value may be something like "0x00FFFFFF", which means 0 for
+        // the first vector value and FF for the rest, repeating. We need a mask
+        // that will apply equally to all members of the vector, so AND all the
+        // lanes of the constant together.
+        Constant = APInt::getAllOnes(EltBitWidth);
+        for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
+          Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
+      }
+    }
+
+    // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+    // actually legal and isn't going to get expanded, else this is a false
+    // optimisation.
+
+    /*bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+                                                    Load->getValueType(0),
+                                                    Load->getMemoryVT());*/
+    // MODIFIED: this is the one difference in the logic; we allow ZEXT combine
+    // only in addrspace 0, where it's legal
+    bool CanZextLoadProfitably = Load->getAddressSpace() == 0;
+
+    // Resize the constant to the same size as the original memory access before
+    // extension. If it is still the AllOnesValue then this AND is completely
+    // unneeded.
+    Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
+
+    bool B;
+    switch (Load->getExtensionType()) {
+    default:
+      B = false;
+      break;
+    case ISD::EXTLOAD:
+      B = CanZextLoadProfitably;
+      break;
+    case ISD::ZEXTLOAD:
+    case ISD::NON_EXTLOAD:
+      B = true;
+      break;
+    }
+
+    if (B && Constant.isAllOnes()) {
+      // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+      // preserve semantics once we get rid of the AND.
+      SDValue NewLoad(Load, 0);
+
+      // Fold the AND away. NewLoad may get replaced immediately.
+      DCI.CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+      if (Load->getExtensionType() == ISD::EXTLOAD) {
+        NewLoad = DCI.DAG.getLoad(
+            Load->getAddressingMode(), ISD::ZEXTLOAD, Load->getValueType(0),
+            SDLoc(Load), Load->getChain(), Load->getBasePtr(),
+            Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand());
+        // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+        if (Load->getNumValues() == 3) {
+          // PRE/POST_INC loads have 3 values.
+          SDValue To[] = {NewLoad.getValue(0), NewLoad.getValue(1),
+                          NewLoad.getValue(2)};
+          DCI.CombineTo(Load, ArrayRef<SDValue>(To, 3), true);
+        } else {
+          DCI.CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+        }
+      }
+
+      return SDValue(N, 0); // Return N so it doesn't get rechecked!
+    }
+  }
+  return SDValue();
+}
+
+static SDValue
+performSIGN_EXTEND_INREGCombine(SDNode *N,
+                                TargetLowering::DAGCombinerInfo &DCI) {
+  // Copied and modified from DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N)
+  // We have to do this because the original combiner doesn't work when SEXTLOAD
+  // has custom lowering
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  EVT ExtVT = cast<VTSDNode>(N1)->getVT();
+  SDLoc DL(N);
+
+  // fold (sext_inreg (extload x)) -> (sextload x)
+  // If sextload is not supported by target, we can only do the combine when
+  // load has one use. Doing otherwise can block folding the extload with other
+  // extends that the target does support.
+
+  // MODIFIED: replaced TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) with
+  // cast<LoadSDNode>(N0)->getAddressSpace() == 0)
+  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+      ((!DCI.isAfterLegalizeDAG() && cast<LoadSDNode>(N0)->isSimple() &&
+        N0.hasOneUse()) ||
+       cast<LoadSDNode>(N0)->getAddressSpace() == 0)) {
+    auto *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad =
+        DCI.DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
+                           LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
+    DCI.CombineTo(N, ExtLoad);
+    DCI.CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+    DCI.AddToWorklist(ExtLoad.getNode());
+    return SDValue(N, 0); // Return N so it doesn't get rechecked!
+  }
+
+  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+
+  // MODIFIED: replaced TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) with
+  // cast<LoadSDNode>(N0)->getAddressSpace() == 0)
+  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+      ((!DCI.isAfterLegalizeDAG() && cast<LoadSDNode>(N0)->isSimple()) &&
+       cast<LoadSDNode>(N0)->getAddressSpace() == 0)) {
+    auto *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad =
+        DCI.DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
+                           LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
+    DCI.CombineTo(N, ExtLoad);
+    DCI.CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+    return SDValue(N, 0); // Return N so it doesn't get rechecked!
+  }
+
+  return SDValue();
+}
+
 SDValue
 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
@@ -3557,5 +3847,9 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
     return performAnyAllCombine(N, DCI.DAG);
   case ISD::MUL:
     return performMulCombine(N, DCI);
+  case ISD::AND:
+    return performANDCombine(N, DCI);
+  case ISD::SIGN_EXTEND_INREG:
+    return performSIGN_EXTEND_INREGCombine(N, DCI);
   }
 }
diff --git a/llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll b/llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll
new file mode 100644
index 0000000000000..0112296df1aa8
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+; Test various loads from WASM (address space 1) globals lower as intended
+
+target triple = "wasm32-unknown-unknown"
+
+
+@globalI8 = local_unnamed_addr addrspace(1) global i8 undef
+@globalI32 = local_unnamed_addr addrspace(1) global i32 undef
+@globalI64 = local_unnamed_addr addrspace(1) global i64 undef
+
+
+define i32 @zext_i8_i32() {
+; CHECK-LABEL: zext_i8_i32:
+; CHECK:         .functype zext_i8_i32 () -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI32
+; CHECK-NEXT:    i32.const 255
+; CHECK-NEXT:    i32.and
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i8, ptr addrspace(1) @globalI32
+  %e = zext i8 %v to i32
+  ret i32 %e
+}
+
+define i32 @sext_i8_i32() {
+; CHECK-LABEL: sext_i8_i32:
+; CHECK:         .functype sext_i8_i32 () -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI32
+; CHECK-NEXT:    i32.extend8_s
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i8, ptr addrspace(1) @globalI32
+  %e = sext i8 %v to i32
+  ret i32 %e
+}
+
+define i32 @zext_i16_i32() {
+; CHECK-LABEL: zext_i16_i32:
+; CHECK:         .functype zext_i16_i32 () -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI32
+; CHECK-NEXT:    i32.const 65535
+; CHECK-NEXT:    i32.and
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i16, ptr addrspace(1) @globalI32
+  %e = zext i16 %v to i32
+  ret i32 %e
+}
+
+define i32 @sext_i16_i32() {
+; CHECK-LABEL: sext_i16_i32:
+; CHECK:         .functype sext_i16_i32 () -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI32
+; CHECK-NEXT:    i32.extend16_s
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i16, ptr addrspace(1) @globalI32
+  %e = sext i16 %v to i32
+  ret i32 %e
+}
+
+
+define i64 @zext_i8_i64() {
+; CHECK-LABEL: zext_i8_i64:
+; CHECK:         .functype zext_i8_i64 () -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI64
+; CHECK-NEXT:    i64.const 255
+; CHECK-NEXT:    i64.and
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i8, ptr addrspace(1) @globalI64
+  %e = zext i8 %v to i64
+  ret i64 %e
+}
+
+define i64 @sext_i8_i64() {
+; CHECK-LABEL: sext_i8_i64:
+; CHECK:         .functype sext_i8_i64 () -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI64
+; CHECK-NEXT:    i64.extend8_s
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i8, ptr addrspace(1) @globalI64
+  %e = sext i8 %v to i64
+  ret i64 %e
+}
+
+define i64 @zext_i16_i64() {
+; CHECK-LABEL: zext_i16_i64:
+; CHECK:         .functype zext_i16_i64 () -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI64
+; CHECK-NEXT:    i64.const 65535
+; CHECK-NEXT:    i64.and
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i16, ptr addrspace(1) @globalI64
+  %e = zext i16 %v to i64
+  ret i64 %e
+}
+
+define i64 @sext_i16_i64() {
+; CHECK-LABEL: sext_i16_i64:
+; CHECK:         .functype sext_i16_i64 () -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI64
+; CHECK-NEXT:    i64.extend16_s
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i16, ptr addrspace(1) @globalI64
+  %e = sext i16 %v to i64
+  ret i64 %e
+}
+
+define i64 @zext_i32_i64() {
+; CHECK-LABEL: zext_i32_i64:
+; CHECK:         .functype zext_i32_i64 () -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI64
+; CHECK-NEXT:    i64.const 4294967295
+; CHECK-NEXT:    i64.and
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i32, ptr addrspace(1) @globalI64
+  %e = zext i32 %v to i64
+  ret i64 %e
+}
+
+define i64 @sext_i32_i64() {
+; CHECK-LABEL: sext_i32_i64:
+; CHECK:         .functype sext_i32_i64 () -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI64
+; CHECK-NEXT:    i64.extend32_s
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i32, ptr addrspace(1) @globalI64
+  %e = sext i32 %v to i64
+  ret i64 %e
+}
+
+
+define i64 @load_i64_from_i32() {
+; CHECK-LABEL: load_i64_from_i32:
+; CHECK:         .functype load_i64_from_i32 () -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI32
+; CHECK-NEXT:    i64.extend_i32_u
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i64, ptr addrspace(1) @globalI32
+  ret i64 %v
+}
+
+define i32 @load_i32_from_i64() {
+; CHECK-LABEL: load_i32_from_i64:
+; CHECK:         .functype load_i32_from_i64 () -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI64
+; CHECK-NEXT:    i32.wrap_i64
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i32, ptr addrspace(1) @globalI64
+  ret i32 %v
+}
+
+define i8 @load_i8() {
+; CHECK-LABEL: load_i8:
+; CHECK:         .functype load_i8 () -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI8
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i8, ptr addrspace(1) @globalI8
+  ret i8 %v
+}
+
+define i64 @load_i16_from_i8_zext_to_i64() {
+; CHECK-LABEL: load_i16_from_i8_zext_to_i64:
+; CHECK:         .functype load_i16_from_i8_zext_to_i64 () -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    global.get globalI8
+; CHECK-NEXT:    i64.extend_i32_u
+; CHECK-NEXT:    i64.const 65535
+; CHECK-NEXT:    i64.and
+; CHECK-NEXT:    # fallthrough-return
+  %v = load i16, ptr addrspace(1) @globalI8
+  %e = zext i16 %v to i64
+  ret i64 %e
+}
diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll
index d23c2272d9c0d..973940b389976 100644
--- a/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/int-mac-reduction-costs.ll
@@ -6,9 +6,9 @@ target triple = "wasm32"
 define hidden i32 @i32_mac_s8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %N) {
 ; CHECK-LABEL: 'i32_mac_s8'
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i8, ptr %arrayidx, align 1
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv = sext i8 %0 to i32
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv = sext i8 %0 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i8, ptr %arrayidx1, align 1
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = sext i8 %1 to i32
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv2 = sext i8 %1 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nsw i32 %conv2, %conv
 
 ; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction:   %0 = load i8, ptr %arrayidx, align 1
@@ -50,9 +50,9 @@ for.body:                                         ; preds = %entry, %for.body
 define hidden i32 @i32_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %N) {
 ; CHECK-LABEL: 'i32_mac_s16'
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i16, ptr %arrayidx, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv = sext i16 %0 to i32
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv = sext i16 %0 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i16, ptr %arrayidx1, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = sext i16 %1 to i32
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv2 = sext i16 %1 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nsw i32 %conv2, %conv
 
 ; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %0 = load i16, ptr %arrayidx, align 2
@@ -94,9 +94,9 @@ for.body:                                         ; preds = %entry, %for.body
 define hidden i64 @i64_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %N) {
 ; CHECK-LABEL: 'i64_mac_s16'
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i16, ptr %arrayidx, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv = sext i16 %0 to i64
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv = sext i16 %0 to i64
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i16, ptr %arrayidx1, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = sext i16 %1 to i64
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv2 = sext i16 %1 to i64
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nsw i64 %conv2, %conv
 
 ; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %0 = load i16, ptr %arrayidx, align 2
@@ -167,9 +167,9 @@ for.body:                                         ; preds = %entry, %for.body
 define hidden i32 @i32_mac_u8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %N) {
 ; CHECK-LABEL: 'i32_mac_u8'
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i8, ptr %arrayidx, align 1
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv = zext i8 %0 to i32
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv = zext i8 %0 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i8, ptr %arrayidx1, align 1
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = zext i8 %1 to i32
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv2 = zext i8 %1 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nuw nsw i32 %conv2, %conv
 
 ; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction:   %0 = load i8, ptr %arrayidx, align 1
@@ -211,9 +211,9 @@ for.body:                                         ; preds = %entry, %for.body
 define hidden i32 @i32_mac_u16(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %N) {
 ; CHECK-LABEL: 'i32_mac_u16'
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i16, ptr %arrayidx, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv = zext i16 %0 to i32
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv = zext i16 %0 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i16, ptr %arrayidx1, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = zext i16 %1 to i32
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv2 = zext i16 %1 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nuw nsw i32 %conv2, %conv
 
 ; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %0 = load i16, ptr %arrayidx, align 2
@@ -255,9 +255,9 @@ for.body:                                         ; preds = %entry, %for.body
 define hidden i64 @i64_mac_u16(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %N) {
 ; CHECK-LABEL: 'i64_mac_u16'
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i16, ptr %arrayidx, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv = zext i16 %0 to i64
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv = zext i16 %0 to i64
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i16, ptr %arrayidx1, align 2
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv2 = zext i16 %1 to i64
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv2 = zext i16 %1 to i64
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nuw nsw i64 %conv2, %conv
 
 ; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %0 = load i16, ptr %arrayidx, align 2
diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll
index c8d20dccbb32b..e393276ac6416 100644
--- a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll
@@ -629,7 +629,7 @@ define hidden void @three_bytes_interleave_op(ptr noalias nocapture noundef writ
 
 ; CHECK-LABEL: four_bytes_same_op
 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4
 ; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4
 ; CHECK: LV: Scalar loop costs: 20.
 ; CHECK: LV: Vector loop of width 2 costs: 40.
@@ -690,7 +690,7 @@ define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly %
 
 ; CHECK-LABEL: four_bytes_split_op
 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4
 ; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4
 ; CHECK: LV: Scalar loop costs: 20.
 ; CHECK: LV: Vector loop of width 2 costs: 45.
@@ -755,7 +755,7 @@ define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly
 
 ; CHECK-LABEL: four_bytes_interleave_op
 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4 
+; CHECK: Cost of 26 for VF 8: INTERLEAVE-GROUP with factor 4
 ; CHECK: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4
 ; CHECK: LV: Scalar loop costs: 20.
 ; CHECK: LV: Vector loop of width 2 costs: 40
@@ -1055,7 +1055,7 @@ define hidden void @eight_bytes_interleave_op(ptr noalias nocapture noundef writ
 }
 
 ; CHECK-LABEL: four_bytes_into_four_ints_same_op
-; CHECK: LV: Scalar loop costs: 28.
+; CHECK: LV: Scalar loop costs: 36.
 ; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i8
 ; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: %17 = load i32
 ; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: store i32
@@ -1127,7 +1127,7 @@ define hidden void @four_bytes_into_four_ints_same_op(ptr noalias nocapture noun
 ; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 4
 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4
 ; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 4
-; CHECK: LV: Scalar loop costs: 21.
+; CHECK: LV: Scalar loop costs: 27.
 ; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %10 = load i8
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %11 = zext i8
 ; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: store i32