Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
===================================================================
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -879,6 +879,61 @@
   return nullptr;
 }

+/// \brief Look for extractelement/insertvalue sequence that acts like a bitcast.
+///
+/// \returns underlying value that was "cast", or nullptr otherwise.
+///
+/// For example, if we have:
+///
+///     %E0 = extractelement <2 x double> %U, i32 0
+///     %V0 = insertvalue [2 x double] undef, double %E0, 0
+///     %E1 = extractelement <2 x double> %U, i32 1
+///     %V1 = insertvalue [2 x double] %V0, double %E1, 1
+///
+/// and the layout of a <2 x double> is isomorphic to a [2 x double],
+/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
+/// Note that %U may contain non-undef values where %V1 has undef.
+static Value* likeBitCastFromVector(InstCombiner &IC, Value* V) {
+  Value *U = nullptr;
+  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
+    auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
+    if (!E)
+      return nullptr;
+    auto *W = E->getVectorOperand();
+    if (!U)
+      U = W;
+    else if (U != W)
+      return nullptr;
+    auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
+    if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
+      return nullptr;
+    V = IV->getAggregateOperand();
+  }
+  if (!isa<UndefValue>(V) ||!U)
+    return nullptr;
+
+  VectorType *UT = cast<VectorType>(U->getType());
+  Type *VT = V->getType();
+  // Check that types UT and VT are bitwise isomorphic.
+  const DataLayout &DL = IC.getDataLayout();
+  if (DL.getTypeSizeInBits(UT) != DL.getTypeSizeInBits(VT)) {
+    return nullptr;
+  }
+  if (ArrayType *AT = dyn_cast<ArrayType>(VT)) {
+    if (AT->getNumElements() != UT->getNumElements())
+      return nullptr;
+  } else {
+    StructType *ST = cast<StructType>(VT);
+    if (ST->getNumElements() != UT->getNumElements())
+      return nullptr;
+    for (const Type *EltT : ST->elements()) {
+      if (EltT != UT->getElementType())
+        return nullptr;
+    }
+  }
+  return U;
+}
+
 /// \brief Combine stores to match the type of value being stored.
 ///
 /// The core idea here is that the memory does not have any intrinsic type and
@@ -914,6 +969,11 @@
     return true;
   }

+  if (Value *U = likeBitCastFromVector(IC, V)) {
+    combineStoreToNewValue(IC, SI, U);
+    return true;
+  }
+
   // FIXME: We should also canonicalize loads of vectors when their elements are
   // cast to other types.
   return false;
Index: test/Transforms/InstCombine/insert-val-extract-elem.ll
===================================================================
--- a/test/Transforms/InstCombine/insert-val-extract-elem.ll
+++ b/test/Transforms/InstCombine/insert-val-extract-elem.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <2 x double>
+define void @julia_2xdouble([2 x double]* sret, <2 x double>*) {
+top:
+  %x = load <2 x double>, <2 x double>* %1
+  %x0 = extractelement <2 x double> %x, i32 0
+  %i0 = insertvalue [2 x double] undef, double %x0, 0
+  %x1 = extractelement <2 x double> %x, i32 1
+  %i1 = insertvalue [2 x double] %i0, double %x1, 1
+  store [2 x double] %i1, [2 x double]* %0, align 4
+  ret void
+}
+
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <4 x float>
+define void @julia_4xfloat([4 x float]* sret, <4 x float>*) {
+top:
+  %x = load <4 x float>, <4 x float>* %1
+  %x0 = extractelement <4 x float> %x, i32 0
+  %i0 = insertvalue [4 x float] undef, float %x0, 0
+  %x1 = extractelement <4 x float> %x, i32 1
+  %i1 = insertvalue [4 x float] %i0, float %x1, 1
+  %x2 = extractelement <4 x float> %x, i32 2
+  %i2 = insertvalue [4 x float] %i1, float %x2, 2
+  %x3 = extractelement <4 x float> %x, i32 3
+  %i3 = insertvalue [4 x float] %i2, float %x3, 3
+  store [4 x float] %i3, [4 x float]* %0, align 4
+  ret void
+}
+
+%pseudovec = type { float, float, float, float }
+
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <4 x float>
+define void @julia_pseudovec(%pseudovec* sret, <4 x float>*) {
+top:
+  %x = load <4 x float>, <4 x float>* %1
+  %x0 = extractelement <4 x float> %x, i32 0
+  %i0 = insertvalue %pseudovec undef, float %x0, 0
+  %x1 = extractelement <4 x float> %x, i32 1
+  %i1 = insertvalue %pseudovec %i0, float %x1, 1
+  %x2 = extractelement <4 x float> %x, i32 2
+  %i2 = insertvalue %pseudovec %i1, float %x2, 2
+  %x3 = extractelement <4 x float> %x, i32 3
+  %i3 = insertvalue %pseudovec %i2, float %x3, 3
+  store %pseudovec %i3, %pseudovec* %0, align 4
+  ret void
+}