-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][GlobalISel] Add disjoint handling for add_and_or_is_add. #123594
base: main
Are you sure you want to change the base?
Conversation
This allows us to easily detect, without known-bits, that the or in a fshl/fshr is disjoint allowing us to use usra under aarch64.
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-globalisel Author: David Green (davemgreen) ChangesThis allows us to easily detect, without known-bits, that the or in a fshl/fshr is disjoint allowing us to use usra under aarch64. Patch is 89.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123594.diff 10 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d0a62340a5f322..508171dd086170 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7255,7 +7255,7 @@ LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
}
}
- MIRBuilder.buildOr(Dst, ShX, ShY);
+ MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 40a6b8e4c8e640..4bb6d34595bd94 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1025,9 +1025,9 @@ def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
return CurDAG->isADDLike(SDValue(N,0));
}]> {
let GISelPredicateCode = [{
- // Only handle G_ADD for now. FIXME. build capability to compute whether
- // operands of G_OR have common bits set or not.
- return MI.getOpcode() == TargetOpcode::G_ADD;
+ return MI.getOpcode() == TargetOpcode::G_ADD ||
+ (MI.getOpcode() == TargetOpcode::G_OR &&
+ MI.getFlag(MachineInstr::MIFlag::Disjoint));
}];
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir
index c3c23aecc161f0..1e549fa9a833a6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshl.mir
@@ -28,7 +28,7 @@ body: |
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s64)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR1]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%3:_(s32) = COPY $w0
@@ -71,7 +71,7 @@ body: |
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s64)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR1]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%3:_(s32) = COPY $w0
@@ -110,7 +110,7 @@ body: |
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s64)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[AND1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR1]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s32) = COPY $w0
@@ -145,7 +145,7 @@ body: |
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[AND]](s64)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s64)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[AND1]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[SHL]], [[LSHR1]]
; CHECK-NEXT: $x0 = COPY [[OR]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
@@ -175,7 +175,7 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -209,7 +209,7 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -246,7 +246,7 @@ body: |
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C3]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR1]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -280,7 +280,7 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -314,7 +314,7 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -351,7 +351,7 @@ body: |
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C3]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR1]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -437,7 +437,7 @@ body: |
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s64)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LSHR]], [[C2]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR1]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s32) = COPY $w0
@@ -520,7 +520,7 @@ body: |
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C1]](s64)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s64)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[C]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[SHL]], [[LSHR1]]
; CHECK-NEXT: $x0 = COPY [[OR]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
@@ -556,7 +556,7 @@ body: |
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY1]], [[BUILD_VECTOR2]](<4 x s32>)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[LSHR]], [[BUILD_VECTOR1]](<4 x s32>)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[SHL]], [[LSHR1]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = disjoint G_OR [[SHL]], [[LSHR1]]
; CHECK-NEXT: $q0 = COPY [[OR]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir
index 09520445b71d98..9a9e5bb42c64fd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fshr.mir
@@ -27,7 +27,7 @@ body: |
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL1]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%3:_(s32) = COPY $w0
@@ -69,7 +69,7 @@ body: |
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[AND]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL1]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%3:_(s32) = COPY $w0
@@ -108,7 +108,7 @@ body: |
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s64)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[AND]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL1]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s32) = COPY $w0
@@ -143,7 +143,7 @@ body: |
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s64)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[AND1]](s64)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[AND]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[SHL1]], [[LSHR]]
; CHECK-NEXT: $x0 = COPY [[OR]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
@@ -175,7 +175,7 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -209,7 +209,7 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -245,7 +245,7 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C3]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL1]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -279,7 +279,7 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -313,7 +313,7 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C2]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -349,7 +349,7 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C3]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = disjoint G_OR [[SHL1]], [[LSHR]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:_(s32) = COPY $w0
@@ -507,7 +507,7 @@ body: |
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s64)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[C]](s64)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C1]](s64)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[SHL1]], [[LSHR]]
; CHECK-NEXT: $x0 = COPY [[OR]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:_(s64) = COPY $x0
@@ -541,7 +541,7 @@ body: |
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[COPY]], [[BUILD_VECTOR2]](<4 x s32>)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<4 x s32>) = G_SHL [[SHL]], [[BUILD_VECTOR1]](<4 x s32>)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY1]], [[BUILD_VECTOR]](<4 x s32>)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[SHL1]], [[LSHR]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = disjoint G_OR [[SHL1]], [[LSHR]]
; CHECK-NEXT: $q0 = COPY [[OR]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll
index b3ce00aeb36e50..bc311f60d07d26 100644
--- a/llvm/test/CodeGen/AArch64/fsh.ll
+++ b/llvm/test/CodeGen/AArch64/fsh.ll
@@ -3917,8 +3917,7 @@ define <8 x i8> @fshl_v8i8_c(<8 x i8> %a, <8 x i8> %b) {
; CHECK-GI-LABEL: fshl_v8i8_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.8b, v0.8b, #3
-; CHECK-GI-NEXT: ushr v1.8b, v1.8b, #5
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: usra v0.8b, v1.8b, #5
; CHECK-GI-NEXT: ret
entry:
%d = call <8 x i8> @llvm.fshl(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
@@ -3936,8 +3935,7 @@ define <8 x i8> @fshr_v8i8_c(<8 x i8> %a, <8 x i8> %b) {
; CHECK-GI-LABEL: fshr_v8i8_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.8b, v0.8b, #5
-; CHECK-GI-NEXT: ushr v1.8b, v1.8b, #3
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: usra v0.8b, v1.8b, #3
; CHECK-GI-NEXT: ret
entry:
%d = call <8 x i8> @llvm.fshr(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
@@ -3955,8 +3953,7 @@ define <16 x i8> @fshl_v16i8_c(<16 x i8> %a, <16 x i8> %b) {
; CHECK-GI-LABEL: fshl_v16i8_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #3
-; CHECK-GI-NEXT: ushr v1.16b, v1.16b, #5
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: usra v0.16b, v1.16b, #5
; CHECK-GI-NEXT: ret
entry:
%d = call <16 x i8> @llvm.fshl(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
@@ -3974,8 +3971,7 @@ define <16 x i8> @fshr_v16i8_c(<16 x i8> %a, <16 x i8> %b) {
; CHECK-GI-LABEL: fshr_v16i8_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #5
-; CHECK-GI-NEXT: ushr v1.16b, v1.16b, #3
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: usra v0.16b, v1.16b, #3
; CHECK-GI-NEXT: ret
entry:
%d = call <16 x i8> @llvm.fshr(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
@@ -3993,8 +3989,7 @@ define <4 x i16> @fshl_v4i16_c(<4 x i16> %a, <4 x i16> %b) {
; CHECK-GI-LABEL: fshl_v4i16_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.4h, v0.4h, #3
-; CHECK-GI-NEXT: ushr v1.4h, v1.4h, #13
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: usra v0.4h, v1.4h, #13
; CHECK-GI-NEXT: ret
entry:
%d = call <4 x i16> @llvm.fshl(<4 x i16> %a, <4 x i16> %b, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
@@ -4012,8 +4007,7 @@ define <4 x i16> @fshr_v4i16_c(<4 x i16> %a, <4 x i16> %b) {
; CHECK-GI-LABEL: fshr_v4i16_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.4h, v0.4h, #13
-; CHECK-GI-NEXT: ushr v1.4h, v1.4h, #3
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: usra v0.4h, v1.4h, #3
; CHECK-GI-NEXT: ret
entry:
%d = call <4 x i16> @llvm.fshr(<4 x i16> %a, <4 x i16> %b, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
@@ -4113,8 +4107,7 @@ define <8 x i16> @fshl_v8i16_c(<8 x i16> %a, <8 x i16> %b) {
; CHECK-GI-LABEL: fshl_v8i16_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #3
-; CHECK-GI-NEXT: ushr v1.8h, v1.8h, #13
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: usra v0.8h, v1.8h, #13
; CHECK-GI-NEXT: ret
entry:
%d = call <8 x i16> @llvm.fshl(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
@@ -4132,8 +4125,7 @@ define <8 x i16> @fshr_v8i16_c(<8 x i16> %a, <8 x i16> %b) {
; CHECK-GI-LABEL: fshr_v8i16_c:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #13
-; CHECK-GI-NEXT: ushr v1.8h, v1.8h, #3
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: usra v0.8h, v1.8h, #3
; CHECK-GI-NEXT: ret
entry:
%d = call <8 x i16> @llvm.fshr(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
@@ -4155,10 +4147,8 @@ define <16 x i16> @fshl_v16i16_c(<16 x i16> %a, <16 x i16> %b) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #3
; CHECK-GI-NEXT: shl v1.8h, v1.8h, #3
-; CHECK-GI-NEXT: ushr v2.8h, v2.8h, #13
-; CHECK-GI-NEXT: ushr v3.8h, v3.8h, #13
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: orr v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: usra v0.8h, v2.8h, #13
+; CHECK-GI-NEXT: usra v1.8h, v3.8h, #13
; CHECK-GI-NEXT: ret
entry:
%d = call <16 x i16> @llvm.fshl(<16 x i16> %a, <16 x i16> %b, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
@@ -4180,10 +4170,8 @@ define <16 x i16> @fshr_v16i16_c(<16 x i16> %a, <16 x i16> %b) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #13
; CHECK-GI-NEXT: shl v1.8h, v1.8h, #13
-; CHECK-GI-NEXT: ushr v2.8h, v2.8h, #3
-; CHECK-GI-NEXT: ushr v3.8h, v3.8h, #3
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: orr v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: usra v0.8h, v2.8h, #3
+; CHECK-GI-NEXT: usra v1.8h, v3.8h, #3
; CHECK-GI-NEXT: ret
entry:
%d = call <16 x i16> @llvm.fshr(<16 x i16> %a, <16 x i16> %b, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
@@ -4191,72 +4179,44 @@ entry:
}
define <2 x i32> @fshl_v2i32_c(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-SD-LABEL: fshl_v2i32_c:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: shl v0.2s, v0.2s, #3
-; CHECK-SD-NEXT: usra v0.2s, v1...
[truncated]
|
This allows us to easily detect, without known-bits, that the or in a fshl/fshr is disjoint allowing us to use usra under aarch64.