Blame patches/llvm_host/llvm-0005-optimize-store-of-bitcast-from-vector-to-aggregate.patch

Lucio Andrés Illanes Albornoz (arab, vxp) 585426
From 1ca1fcaa5b4c75a65a202badfd5df8240a36ca0f Mon Sep 17 00:00:00 2001
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
From: "Arch D. Robison" <arch.robison@intel.com>
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
Date: Mon, 25 Apr 2016 22:22:39 +0000
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
Subject: [PATCH] Optimize store of "bitcast" from vector to aggregate.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
This patch is what was the "instcombine" portion of D14185, with an additional
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
test added (see julia_pseudovec in test/Transforms/InstCombine/insert-val-extract-elem.ll).
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
The patch causes instcombine to replace sequences of extractelement-insertvalue-store
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
that act essentially like a bitcast followed by a store.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
Differential review: http://reviews.llvm.org/D14260
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@267482 91177308-0d34-0410-b5e6-96231b3b80d8
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
Alpine maintainer notes:
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
  - Updated for llvm 3.8.1.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
  - This patch replaces llvm-D14260.patch from Julia.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
---
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
 .../InstCombine/InstCombineLoadStoreAlloca.cpp     | 60 ++++++++++++++++++
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
 .../InstCombine/insert-val-extract-elem.ll         | 74 ++++++++++++++++++++++
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
 2 files changed, 134 insertions(+)
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
 create mode 100644 test/Transforms/InstCombine/insert-val-extract-elem.ll
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
index 96f0908..0ee6045 100644
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
@@ -913,6 +913,61 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
   return nullptr;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
 }
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
 
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+/// \brief Look for extractelement/insertvalue sequence that acts like a bitcast.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+///
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+/// \returns underlying value that was "cast", or nullptr otherwise.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+///
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+/// For example, if we have:
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+///
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+///     %E0 = extractelement <2 x double> %U, i32 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+///     %V0 = insertvalue [2 x double] undef, double %E0, 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+///     %E1 = extractelement <2 x double> %U, i32 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+///     %V1 = insertvalue [2 x double] %V0, double %E1, 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+///
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+/// and the layout of a <2 x double> is isomorphic to a [2 x double],
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+/// Note that %U may contain non-undef values where %V1 has undef.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+static Value *likeBitCastFromVector(InstCombiner &IC, Value *V) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  Value *U = nullptr;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    if (!E)
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+      return nullptr;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    auto *W = E->getVectorOperand();
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    if (!U)
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+      U = W;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    else if (U != W)
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+      return nullptr;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+      return nullptr;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    V = IV->getAggregateOperand();
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  }
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  if (!isa<UndefValue>(V) ||!U)
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    return nullptr;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  auto *UT = cast<VectorType>(U->getType());
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  auto *VT = V->getType();
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  // Check that types UT and VT are bitwise isomorphic.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  const auto &DL = IC.getDataLayout();
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    return nullptr;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  }
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  if (auto *AT = dyn_cast<ArrayType>(VT)) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    if (AT->getNumElements() != UT->getNumElements())
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+      return nullptr;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  } else {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    auto *ST = cast<StructType>(VT);
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    if (ST->getNumElements() != UT->getNumElements())
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+      return nullptr;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    for (const auto *EltT : ST->elements()) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+      if (EltT != UT->getElementType())
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+        return nullptr;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    }
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  }
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  return U;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+}
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
 /// \brief Combine stores to match the type of value being stored.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
 ///
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
 /// The core idea here is that the memory does not have any intrinsic type and
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
@@ -924,6 +979,11 @@
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
     return true;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
   }
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  if (Value *U = likeBitCastFromVector(IC, V)) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    combineStoreToNewValue(IC, SI, U);
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+    return true;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  }
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
   // FIXME: We should also canonicalize loads of vectors when their elements are
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
   // cast to other types.
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
   return false;
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
diff --git a/test/Transforms/InstCombine/insert-val-extract-elem.ll b/test/Transforms/InstCombine/insert-val-extract-elem.ll
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
new file mode 100644
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
index 0000000..db7b403
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
--- /dev/null
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+++ b/test/Transforms/InstCombine/insert-val-extract-elem.ll
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
@@ -0,0 +1,74 @@
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; RUN: opt -S -instcombine %s | FileCheck %s
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-LABEL: julia_2xdouble
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-NOT: insertvalue
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-NOT: extractelement
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK: store <2 x double>
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+define void @julia_2xdouble([2 x double]* sret, <2 x double>*) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+top:
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x = load <2 x double>, <2 x double>* %1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x0 = extractelement <2 x double> %x, i32 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i0 = insertvalue [2 x double] undef, double %x0, 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x1 = extractelement <2 x double> %x, i32 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i1 = insertvalue [2 x double] %i0, double %x1, 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  store [2 x double] %i1, [2 x double]* %0, align 4
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  ret void
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+}
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; Test with two inserts to the same index
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-LABEL: julia_2xi64
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-NOT: insertvalue
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-NOT: extractelement
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK: store <2 x i64>
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+define void @julia_2xi64([2 x i64]* sret, <2 x i64>*) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+top:
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x = load <2 x i64>, <2 x i64>* %1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x0 = extractelement <2 x i64> %x, i32 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i0 = insertvalue [2 x i64] undef, i64 %x0, 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x1 = extractelement <2 x i64> %x, i32 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i1 = insertvalue [2 x i64] %i0, i64 %x1, 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x2 = extractelement <2 x i64> %x, i32 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i2 = insertvalue [2 x i64] %i1, i64 %x2, 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  store [2 x i64] %i2, [2 x i64]* %0, align 4
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  ret void
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+}
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-LABEL: julia_4xfloat
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-NOT: insertvalue
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-NOT: extractelement
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK: store <4 x float>
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+define void @julia_4xfloat([4 x float]* sret, <4 x float>*) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+top:
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x = load <4 x float>, <4 x float>* %1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x0 = extractelement <4 x float> %x, i32 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i0 = insertvalue [4 x float] undef, float %x0, 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x1 = extractelement <4 x float> %x, i32 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i1 = insertvalue [4 x float] %i0, float %x1, 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x2 = extractelement <4 x float> %x, i32 2
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i2 = insertvalue [4 x float] %i1, float %x2, 2
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x3 = extractelement <4 x float> %x, i32 3
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i3 = insertvalue [4 x float] %i2, float %x3, 3
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  store [4 x float] %i3, [4 x float]* %0, align 4
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  ret void
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+}
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+%pseudovec = type { float, float, float, float }
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-LABEL: julia_pseudovec
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-NOT: insertvalue
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK-NOT: extractelement
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+; CHECK: store <4 x float>
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+define void @julia_pseudovec(%pseudovec* sret, <4 x float>*) {
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+top:
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x = load <4 x float>, <4 x float>* %1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x0 = extractelement <4 x float> %x, i32 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i0 = insertvalue %pseudovec undef, float %x0, 0
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x1 = extractelement <4 x float> %x, i32 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i1 = insertvalue %pseudovec %i0, float %x1, 1
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x2 = extractelement <4 x float> %x, i32 2
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i2 = insertvalue %pseudovec %i1, float %x2, 2
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %x3 = extractelement <4 x float> %x, i32 3
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  %i3 = insertvalue %pseudovec %i2, float %x3, 3
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  store %pseudovec %i3, %pseudovec* %0, align 4
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+  ret void
Lucio Andrés Illanes Albornoz (arab, vxp) 585426
+}