Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions compiler-rt/test/profile/instrprof-tmpdir.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
// AIX does not support env -u.
// TODO(boomanaiden154): Reenable AIX support once we use the internal shell by
// default.
// UNSUPPORTED: system-aix

// RUN: rm -rf %t
// RUN: mkdir -p %t
// RUN: cd %t
Expand All @@ -12,8 +17,7 @@
// RUN: llvm-profdata show ./raw2.profraw | FileCheck %s -check-prefix TMPDIR
//
// Check that we fall back to the default path if TMPDIR is missing.
// RUN: %if system-aix %{ unset TMPDIR %}
// RUN: env %if !system-aix %{ -u TMPDIR %} LLVM_PROFILE_FILE="%%t/raw3.profraw" %run %t/binary 2>&1 | FileCheck %s -check-prefix MISSING
// RUN: env -u TMPDIR LLVM_PROFILE_FILE="%%t/raw3.profraw" %run %t/binary 2>&1 | FileCheck %s -check-prefix MISSING
// RUN: llvm-profdata show ./default.profraw | FileCheck %s -check-prefix TMPDIR

// TMPDIR: Maximum function count: 1
Expand Down
2 changes: 0 additions & 2 deletions libcxx/utils/ci/buildkite-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ steps:
queue: libcxx-builders
os: aix
<<: *common
skip: "https://github.com/llvm/llvm-project/issues/162516"

- label: AIX (64-bit)
command: libcxx/utils/ci/run-buildbot aix
Expand All @@ -115,7 +114,6 @@ steps:
queue: libcxx-builders
os: aix
<<: *common
skip: "https://github.com/llvm/llvm-project/issues/162516"

- group: ':freebsd: FreeBSD'
steps:
Expand Down
29 changes: 0 additions & 29 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1128,40 +1128,11 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
if (!AMDGPU::isSISrcInlinableOperand(Desc, UseOpIdx))
return false;

MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
return true;
}

// TODO: Verify the following code handles subregisters correctly.
// TODO: Handle extract of global reference
if (UseOp.getSubReg())
return false;

if (!OpToFold.isReg())
return false;

Register UseReg = OpToFold.getReg();
if (!UseReg.isVirtual())
return false;

// Maybe it is just a COPY of an immediate itself.

// FIXME: Remove this handling. There is already special case folding of
// immediate into copy in foldOperand. This is looking for the def of the
// value the folding started from in the first place.
MachineInstr *Def = MRI->getVRegDef(UseReg);
if (Def && TII->isFoldableCopy(*Def)) {
MachineOperand &DefOp = Def->getOperand(1);
if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
FoldableDef FoldableImm(DefOp.getImm(), OpToFold.DefRC,
OpToFold.DefSubReg);
appendFoldCandidate(FoldList, UseMI, UseOpIdx, FoldableImm);
return true;
}
}

return false;
}

Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
Expand Down Expand Up @@ -393,6 +394,17 @@ class ConstantTerminatorFoldingImpl {
DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
++NumLoopExitsDeleted;
}
// We don't really need to add branch weights to DummySwitch, because all
// but one branches are just a temporary artifact - see the comment on top
// of this function. But, it's easy to estimate the weights, and it helps
// maintain a property of the overall compiler - that the branch weights
// don't "just get dropped" accidentally (i.e. profcheck)
if (DummySwitch->getParent()->getParent()->hasProfileData()) {
SmallVector<uint32_t> DummyBranchWeights(1 + DummySwitch->getNumCases());
// default. 100% probability, the rest are dead.
DummyBranchWeights[0] = 1;
setBranchWeights(*DummySwitch, DummyBranchWeights, /*IsExpected=*/false);
}

assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
Expand Down
31 changes: 31 additions & 0 deletions llvm/test/CodeGen/AMDGPU/true16-fold.mir
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,34 @@ body: |
$vgpr0 = COPY %3
S_ENDPGM 0, implicit $vgpr0
...

# Make sure the immediate materialized by the v_mov_b16 isn't
# incorrectly folded into the bfi as 0.

# FIXME: %4:vgpr_32 = COPY %3 is a direct copy from v16 to v32 and
# should probably fail the verifier
---
name: mov_v16_copy_v32_fold_b32_regression
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1

; CHECK-LABEL: name: mov_v16_copy_v32_fold_b32_regression
; CHECK: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_MOV_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, 15360, 0, implicit $exec
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B16_t16_e64_]]
; CHECK-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 32767, [[COPY2]], [[COPY1]], implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_BFI_B32_e64_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:vgpr_32 = COPY $vgpr1
%1:vgpr_32 = COPY $vgpr0
%3:vgpr_16 = V_MOV_B16_t16_e64 0, 15360, 0, implicit $exec
%4:vgpr_32 = COPY %3
%5:vgpr_32 = V_BFI_B32_e64 32767, %4, %1, implicit $exec
$vgpr0 = COPY %5
SI_RETURN implicit $vgpr0
...
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/AMDGPU/true16-imm-folded-to-0-regression.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s

; Make sure that the 16-bit constant 0x3c00 isn't folded as 0 into
; v_bfi_b32.
define i32 @mov16_bfi_fold_regression(half %arg, i32 %arg1) {
; CHECK-LABEL: bfi_fold_regression:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b16_e32 v2.l, 0x3c00
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; CHECK-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0
; CHECK-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; CHECK-NEXT: v_pack_b32_f16 v0, v0.l, 0
; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
%cmp = icmp eq i32 %arg1, 0
%call = call half @llvm.copysign.f16(half 0xH3C00, half %arg)
%select = select i1 %cmp, half 0xH3C00, half %call
%insertelement = insertelement <2 x half> zeroinitializer, half %select, i64 0
%bitcast = bitcast <2 x half> %insertelement to i32
ret i32 %bitcast
}

declare half @llvm.copysign.f16(half, half) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
Loading