|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
From 1ca1fcaa5b4c75a65a202badfd5df8240a36ca0f Mon Sep 17 00:00:00 2001
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
From: "Arch D. Robison" <arch.robison@intel.com>
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
Date: Mon, 25 Apr 2016 22:22:39 +0000
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
Subject: [PATCH] Optimize store of "bitcast" from vector to aggregate.
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
This patch is what was the "instcombine" portion of D14185, with an additional
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
test added (see julia_pseudovec in test/Transforms/InstCombine/insert-val-extract-elem.ll).
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
The patch causes instcombine to replace sequences of extractelement-insertvalue-store
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
that act essentially like a bitcast followed by a store.
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
Differential review: http:
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
git-svn-id: https:
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
Alpine maintainer notes:
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
- Updated for llvm 3.8.1.
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
- This patch replaces llvm-D14260.patch from Julia.
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
.../InstCombine/InstCombineLoadStoreAlloca.cpp | 60 ++++++++++++++++++
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
.../InstCombine/insert-val-extract-elem.ll | 74 ++++++++++++++++++++++
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
2 files changed, 134 insertions(+)
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
create mode 100644 test/Transforms/InstCombine/insert-val-extract-elem.ll
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
diff
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
index 96f0908..0ee6045 100644
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
@@ -913,6 +913,61 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
return nullptr;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
}
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+/// \returns underlying value that was "cast", or nullptr otherwise.
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+/// For example, if we have:
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+/// %V0 = insertvalue [2 x double] undef, double %E0, 0
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+/// Note that %U may contain non-undef values where %V1 has undef.
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+static Value *likeBitCastFromVector(InstCombiner &IC, Value *V) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ Value *U = nullptr;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ while (auto *IV = dyn_cast<InsertValueInst>(V)) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ if (!E)
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ return nullptr;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ auto *W = E->getVectorOperand();
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ if (!U)
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ U = W;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ else if (U != W)
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ return nullptr;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ return nullptr;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ V = IV->getAggregateOperand();
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ }
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ if (!isa<UndefValue>(V) ||!U)
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ return nullptr;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ auto *UT = cast<VectorType>(U->getType());
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ auto *VT = V->getType();
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ // Check that types UT and VT are bitwise isomorphic.
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ const auto &DL = IC.getDataLayout();
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ return nullptr;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ }
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ if (auto *AT = dyn_cast<ArrayType>(VT)) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ if (AT->getNumElements() != UT->getNumElements())
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ return nullptr;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ } else {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ auto *ST = cast<StructType>(VT);
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ if (ST->getNumElements() != UT->getNumElements())
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ return nullptr;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ for (const auto *EltT : ST->elements()) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ if (EltT != UT->getElementType())
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ return nullptr;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ }
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ }
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ return U;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+}
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
/// The core idea here is that the memory does not have any intrinsic type and
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
@@ -924,6 +979,11 @@
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
return true;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
}
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ if (Value *U = likeBitCastFromVector(IC, V)) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ combineStoreToNewValue(IC, SI, U);
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ return true;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ }
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
return false;
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
diff --git a/test/Transforms/InstCombine/insert-val-extract-elem.ll b/test/Transforms/InstCombine/insert-val-extract-elem.ll
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
new file mode 100644
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
index 0000000..db7b403
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
--- /dev/null
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+++ b/test/Transforms/InstCombine/insert-val-extract-elem.ll
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
@@ -0,0 +1,74 @@
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; RUN: opt -S -instcombine %s | FileCheck %s
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-LABEL: julia_2xdouble
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-NOT: insertvalue
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-NOT: extractelement
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK: store <2 x double>
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+define void @julia_2xdouble([2 x double]* sret, <2 x double>*) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+top:
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x = load <2 x double>, <2 x double>* %1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x0 = extractelement <2 x double> %x, i32 0
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i0 = insertvalue [2 x double] undef, double %x0, 0
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x1 = extractelement <2 x double> %x, i32 1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i1 = insertvalue [2 x double] %i0, double %x1, 1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ store [2 x double] %i1, [2 x double]* %0, align 4
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ ret void
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+}
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; Test with two inserts to the same index
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-LABEL: julia_2xi64
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-NOT: insertvalue
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-NOT: extractelement
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK: store <2 x i64>
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+define void @julia_2xi64([2 x i64]* sret, <2 x i64>*) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+top:
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x = load <2 x i64>, <2 x i64>* %1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x0 = extractelement <2 x i64> %x, i32 1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i0 = insertvalue [2 x i64] undef, i64 %x0, 0
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x1 = extractelement <2 x i64> %x, i32 1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i1 = insertvalue [2 x i64] %i0, i64 %x1, 1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x2 = extractelement <2 x i64> %x, i32 0
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i2 = insertvalue [2 x i64] %i1, i64 %x2, 0
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ store [2 x i64] %i2, [2 x i64]* %0, align 4
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ ret void
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+}
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-LABEL: julia_4xfloat
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-NOT: insertvalue
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-NOT: extractelement
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK: store <4 x float>
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+define void @julia_4xfloat([4 x float]* sret, <4 x float>*) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+top:
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x = load <4 x float>, <4 x float>* %1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x0 = extractelement <4 x float> %x, i32 0
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i0 = insertvalue [4 x float] undef, float %x0, 0
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x1 = extractelement <4 x float> %x, i32 1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i1 = insertvalue [4 x float] %i0, float %x1, 1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x2 = extractelement <4 x float> %x, i32 2
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i2 = insertvalue [4 x float] %i1, float %x2, 2
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x3 = extractelement <4 x float> %x, i32 3
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i3 = insertvalue [4 x float] %i2, float %x3, 3
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ store [4 x float] %i3, [4 x float]* %0, align 4
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ ret void
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+}
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+%pseudovec = type { float, float, float, float }
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-LABEL: julia_pseudovec
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-NOT: insertvalue
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK-NOT: extractelement
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+; CHECK: store <4 x float>
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+define void @julia_pseudovec(%pseudovec* sret, <4 x float>*) {
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+top:
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x = load <4 x float>, <4 x float>* %1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x0 = extractelement <4 x float> %x, i32 0
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i0 = insertvalue %pseudovec undef, float %x0, 0
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x1 = extractelement <4 x float> %x, i32 1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i1 = insertvalue %pseudovec %i0, float %x1, 1
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x2 = extractelement <4 x float> %x, i32 2
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i2 = insertvalue %pseudovec %i1, float %x2, 2
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %x3 = extractelement <4 x float> %x, i32 3
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ %i3 = insertvalue %pseudovec %i2, float %x3, 3
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ store %pseudovec %i3, %pseudovec* %0, align 4
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+ ret void
|
|
Lucio Andrés Illanes Albornoz (arab, vxp) |
585426 |
+}
|