Skip to content

Conversation

@petar-avramovic
Copy link
Collaborator

Move G_UNMERGE_VALUES handling to AMDGPURegBankLegalizeRules.cpp.
Fix sgpr S16 unmerge by lowering using shift and using S32.
Previously sgpr S16 unmerge was selected using _lo16 and _hi16 subreg
indexes which are exclusive to vgpr register classes.
For remaing cases we do trivial mapping, assigns same reg bank
to all operands, vgpr or sgpr.

Move G_UNMERGE_VALUES handling to AMDGPURegBankLegalizeRules.cpp.
Fix sgpr S16 unmerge by lowering using shift and using S32.
Previously sgpr S16 unmerge was selected using _lo16 and _hi16 subreg
indexes which are exclusive to vgpr register classes.
For remaing cases we do trivial mapping, assigns same reg bank
to all operands, vgpr or sgpr.
Copy link
Collaborator Author

This stack of pull requests is managed by Graphite. Learn more about stacking.

@llvmbot
Copy link
Member

llvmbot commented Dec 10, 2025

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-globalisel

Author: Petar Avramovic (petar-avramovic)

Changes

Move G_UNMERGE_VALUES handling to AMDGPURegBankLegalizeRules.cpp.
Fix sgpr S16 unmerge by lowering using shift and using S32.
Previously sgpr S16 unmerge was selected using _lo16 and _hi16 subreg
indexes which are exclusive to vgpr register classes.
For remaing cases we do trivial mapping, assigns same reg bank
to all operands, vgpr or sgpr.


Full diff: https://github.com/llvm/llvm-project/pull/171653.diff

7 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp (+16)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h (+4)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp (+2-2)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp (+60)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+11)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h (+8-1)
  • (added) llvm/test/CodeGen/AMDGPU/GlobalISel/unmerge-sgpr-s16.ll (+36)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index f36935d8c0e8f..f684e830e20fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -186,3 +186,19 @@ void AMDGPU::buildReadFirstLane(MachineIRBuilder &B, Register SgprDst,
             .addReg(VgprSrc);
       });
 }
+
+bool AMDGPU::isBRC(LLT Ty) {
+  if (Ty.isPointer())
+    return true;
+
+  unsigned Size = Ty.getSizeInBits();
+  if (Size % 32 != 0)
+    return false;
+
+  // 32, 2x32, 3x32 ... 12x32, 16x32, 32x32
+  unsigned NumB32s = Size / 32;
+  if ((NumB32s >= 1 && NumB32s <= 12) || NumB32s == 16 || NumB32s == 32)
+    return true;
+
+  return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
index 5e1000ee0ab26..86af5b8bfd2f6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
@@ -53,6 +53,10 @@ void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
                       const RegisterBankInfo &RBI);
 void buildReadFirstLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
                         const RegisterBankInfo &RBI);
+
+// "Reg Class" low level types. LLTs that fit into some register class on both
+// sgpr and vgpr. LLTs with sizes 32, 2x32, 3x32 ... 12x32, 16x32, 32x32.
+bool isBRC(LLT Ty);
 }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 839120da89711..a1fce93eb01f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -442,8 +442,8 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
 
     // Opcodes that support pretty much all combinations of reg banks and LLTs
     // (except S1). There is no point in writing rules for them.
-    if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES ||
-        Opc == AMDGPU::G_MERGE_VALUES || Opc == AMDGPU::G_BITCAST) {
+    if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_MERGE_VALUES ||
+        Opc == AMDGPU::G_BITCAST) {
       RBLHelper.applyMappingTrivial(*MI);
       continue;
     }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index cc31d7d5c55ac..b5a8c7b89e64c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -910,6 +910,55 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
     return lowerUnpackAExt(MI);
   case WidenMMOToS32:
     return widenMMOToS32(cast<GAnyLoad>(MI));
+  case VerifyAllSgpr: {
+    for (unsigned i = 0; i < MI.getNumOperands(); ++i)
+      assert(MRI.getRegBankOrNull(MI.getOperand(i).getReg()) == SgprRB);
+    return true;
+  }
+  case ApplyAllVgpr: {
+    B.setInstr(MI);
+    unsigned NumDefs = MI.getNumDefs();
+
+    for (unsigned i = 0; i < NumDefs; ++i)
+      assert(MRI.getRegBankOrNull(MI.getOperand(i).getReg()) == VgprRB);
+
+    for (unsigned i = NumDefs; i < MI.getNumOperands(); ++i) {
+      Register Reg = MI.getOperand(i).getReg();
+      if (MRI.getRegBank(Reg) != VgprRB) {
+        auto Copy = B.buildCopy({VgprRB, MRI.getType(Reg)}, Reg);
+        MI.getOperand(i).setReg(Copy.getReg(0));
+      }
+    }
+
+    return true;
+  }
+  case UnmergeToShiftTrunc: {
+    GUnmerge *Unmerge = dyn_cast<GUnmerge>(&MI);
+    LLT Ty = MRI.getType(Unmerge->getSourceReg());
+    if (Ty.getSizeInBits() % 32 != 0) {
+      reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
+                         "AMDGPU RegBankLegalize: unmerge not multiple of 32",
+                         MI);
+      return false;
+    }
+
+    B.setInstr(MI);
+    if (Ty.getSizeInBits() > 32) {
+      auto Unmerge32 = B.buildUnmerge(SgprRB_S32, Unmerge->getSourceReg());
+      for (unsigned i = 0; i < Unmerge32->getNumDefs(); ++i) {
+        auto [Dst0S32, Dst1S32] = unpackAExt(Unmerge32->getOperand(i).getReg());
+        B.buildTrunc(MI.getOperand(i * 2).getReg(), Dst0S32);
+        B.buildTrunc(MI.getOperand(i * 2 + 1).getReg(), Dst1S32);
+      }
+    } else {
+      auto [Dst0S32, Dst1S32] = unpackAExt(MI.getOperand(2).getReg());
+      B.buildTrunc(MI.getOperand(0).getReg(), Dst0S32);
+      B.buildTrunc(MI.getOperand(1).getReg(), Dst1S32);
+    }
+
+    MI.eraseFromParent();
+    return true;
+  }
   }
 
   if (!WaterfallSgprs.empty()) {
@@ -1035,6 +1084,11 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) {
         Ty == LLT::fixed_vector(8, 64))
       return Ty;
     return LLT();
+  case SgprBRC:
+  case VgprBRC:
+    if (isBRC(Ty))
+      return Ty;
+    return LLT();
   default:
     return LLT();
   }
@@ -1069,6 +1123,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case SgprB128:
   case SgprB256:
   case SgprB512:
+  case SgprBRC:
   case UniInVcc:
   case UniInVgprS16:
   case UniInVgprS32:
@@ -1108,6 +1163,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
   case VgprB128:
   case VgprB256:
   case VgprB512:
+  case VgprBRC:
   case Vgpr32SExt:
   case Vgpr32ZExt:
     return VgprRB;
@@ -1167,6 +1223,7 @@ bool RegBankLegalizeHelper::applyMappingDst(
     case SgprB128:
     case SgprB256:
     case SgprB512:
+    case SgprBRC:
     case SgprPtr32:
     case SgprPtr64:
     case SgprPtr128:
@@ -1176,6 +1233,7 @@ bool RegBankLegalizeHelper::applyMappingDst(
     case VgprB128:
     case VgprB256:
     case VgprB512:
+    case VgprBRC:
     case VgprPtr32:
     case VgprPtr64:
     case VgprPtr128: {
@@ -1307,6 +1365,7 @@ bool RegBankLegalizeHelper::applyMappingSrc(
     case SgprB128:
     case SgprB256:
     case SgprB512:
+    case SgprBRC:
     case SgprPtr32:
     case SgprPtr64:
     case SgprPtr128: {
@@ -1341,6 +1400,7 @@ bool RegBankLegalizeHelper::applyMappingSrc(
     case VgprB128:
     case VgprB256:
     case VgprB512:
+    case VgprBRC:
     case VgprPtr32:
     case VgprPtr64:
     case VgprPtr128: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index abe01e752960f..789339f5fc5fa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPURegBankLegalizeRules.h"
+#include "AMDGPUGlobalISelUtils.h"
 #include "AMDGPUInstrInfo.h"
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -132,6 +133,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
     return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
   case UniB512:
     return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
+  case UniBRC:
+    return isBRC(MRI.getType(Reg)) && MUI.isUniform(Reg);
   case DivS1:
     return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
   case DivS16:
@@ -172,6 +175,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
     return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
   case DivB512:
     return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
+  case DivBRC:
+    return isBRC(MRI.getType(Reg)) && MUI.isDivergent(Reg);
   case _:
     return true;
   default:
@@ -560,6 +565,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
   addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
 
+  addRulesForGOpcs({G_UNMERGE_VALUES})
+      .Any({{UniS16}, {{}, {}, UnmergeToShiftTrunc}})
+      .Any({{DivS16}, {{}, {}, ApplyAllVgpr}})
+      .Any({{UniBRC}, {{}, {}, VerifyAllSgpr}})
+      .Any({{DivBRC}, {{}, {}, ApplyAllVgpr}});
+
   addRulesForGOpcs({G_ICMP})
       .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
       .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 1ac117304b76f..4aeea206ab5ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -113,6 +113,7 @@ enum UniformityLLTOpPredicateID {
   UniB128,
   UniB256,
   UniB512,
+  UniBRC,
 
   DivB32,
   DivB64,
@@ -120,6 +121,7 @@ enum UniformityLLTOpPredicateID {
   DivB128,
   DivB256,
   DivB512,
+  DivBRC
 };
 
 // How to apply register bank on register operand.
@@ -156,6 +158,7 @@ enum RegBankLLTMappingApplyID {
   SgprB128,
   SgprB256,
   SgprB512,
+  SgprBRC,
 
   // vgpr scalars, pointers, vectors and B-types
   Vgpr16,
@@ -178,6 +181,7 @@ enum RegBankLLTMappingApplyID {
   VgprB128,
   VgprB256,
   VgprB512,
+  VgprBRC,
   VgprV4S32,
 
   // Dst only modifiers: read-any-lane and truncs
@@ -233,7 +237,10 @@ enum LoweringMethodID {
   SplitLoad,
   WidenLoad,
   WidenMMOToS32,
-  UnpackAExt
+  UnpackAExt,
+  VerifyAllSgpr,
+  ApplyAllVgpr,
+  UnmergeToShiftTrunc
 };
 
 enum FastRulesTypes {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/unmerge-sgpr-s16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/unmerge-sgpr-s16.ll
new file mode 100644
index 0000000000000..fb013d35d540b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/unmerge-sgpr-s16.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s
+
+define amdgpu_ps void @unmerge_sgprS16_from_V2S16(ptr addrspace(1) inreg %ptr, ptr addrspace(1) inreg %out) {
+; GFX11-LABEL: unmerge_sgprS16_from_V2S16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b32 s0, s[0:1], 0x0
+; GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_pack_hl_b32_b16 s0, s0, s0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    global_store_b32 v1, v0, s[2:3]
+; GFX11-NEXT:    s_endpgm
+  %load = load <2 x i16>, ptr addrspace(1) %ptr
+  %shuffle = shufflevector <2 x i16> %load, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
+  store <2 x i16> %shuffle, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @unmerge_sgprS16_from_V4S16(ptr addrspace(1) inreg %ptr, ptr addrspace(1) inreg %out) {
+; GFX11-LABEL: unmerge_sgprS16_from_V4S16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_pack_lh_b32_b16 s0, s0, s1
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    global_store_b32 v1, v0, s[2:3]
+; GFX11-NEXT:    s_endpgm
+  %load = load <4 x i16>, ptr addrspace(1) %ptr
+  %shuffle = shufflevector <4 x i16> %load, <4 x i16> poison, <2 x i32> <i32 0, i32 3>
+  store <2 x i16> %shuffle, ptr addrspace(1) %out
+  ret void
+}

@github-actions
Copy link

🐧 Linux x64 Test Results

  • 166983 tests passed
  • 2928 tests skipped
  • 1 test failed

Failed Tests

(click on a test name to see its output)

LLVM

LLVM.CodeGen/AMDGPU/GlobalISel/fpext.ll
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 2
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fpext.ll | /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck -check-prefixes=GFX11,GFX11-FAKE16 /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fpext.ll
# executed command: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fpext.ll
# note: command had no output on stdout or stderr
# executed command: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck -check-prefixes=GFX11,GFX11-FAKE16 /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fpext.ll
# note: command had no output on stdout or stderr
# RUN: at line 3
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fpext.ll | /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck -check-prefixes=GFX11,GFX11-TRUE16 /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fpext.ll
# executed command: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fpext.ll
# note: command had no output on stdout or stderr
# executed command: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck -check-prefixes=GFX11,GFX11-TRUE16 /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fpext.ll
# .---command stderr------------
# | /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fpext.ll:149:22: error: GFX11-TRUE16-NEXT: is not on the line after the previous match
# | ; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s0
# |                      ^
# | <stdin>:281:2: note: 'next' match was here
# |  v_cvt_f32_f16_e32 v0, s0
# |  ^
# | <stdin>:279:9: note: previous match ended here
# | ; %bb.0:
# |         ^
# | <stdin>:280:1: note: non-matching line after previous match is here
# |  s_lshr_b32 s1, s0, 16
# | ^
# | 
# | Input file: <stdin>
# | Check file: /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fpext.ll
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |           .
# |           .
# |           .
# |         276:  .p2align 8 
# |         277:  .type fpext_v2f16_to_v2f32_uniform,@function 
# |         278: fpext_v2f16_to_v2f32_uniform: ; @fpext_v2f16_to_v2f32_uniform 
# |         279: ; %bb.0: 
# |         280:  s_lshr_b32 s1, s0, 16 
# |         281:  v_cvt_f32_f16_e32 v0, s0 
# | next:149      !~~~~~~~~~~~~~~~~~~~~~~~  error: match on wrong line
# |         282:  v_cvt_f32_f16_e32 v1, s1 
# |         283:  ; return to shader part epilog 
# |         284: .Lfunc_end6: 
# |         285:  .size fpext_v2f16_to_v2f32_uniform, .Lfunc_end6-fpext_v2f16_to_v2f32_uniform 
# |         286:  ; -- End function 
# |           .
# |           .
# |           .
# | >>>>>>
# `-----------------------------
# error: command failed with exit status: 1

--

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

@github-actions
Copy link

🪟 Windows x64 Test Results

  • 128505 tests passed
  • 2804 tests skipped
  • 1 test failed

Failed Tests

(click on a test name to see its output)

LLVM

LLVM.CodeGen/AMDGPU/GlobalISel/fpext.ll
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 2
c:\_work\llvm-project\llvm-project\build\bin\llc.exe -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\fpext.ll | c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe -check-prefixes=GFX11,GFX11-FAKE16 C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\fpext.ll
# executed command: 'c:\_work\llvm-project\llvm-project\build\bin\llc.exe' -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - 'C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\fpext.ll'
# note: command had no output on stdout or stderr
# executed command: 'c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe' -check-prefixes=GFX11,GFX11-FAKE16 'C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\fpext.ll'
# note: command had no output on stdout or stderr
# RUN: at line 3
c:\_work\llvm-project\llvm-project\build\bin\llc.exe -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\fpext.ll | c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe -check-prefixes=GFX11,GFX11-TRUE16 C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\fpext.ll
# executed command: 'c:\_work\llvm-project\llvm-project\build\bin\llc.exe' -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - 'C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\fpext.ll'
# note: command had no output on stdout or stderr
# executed command: 'c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe' -check-prefixes=GFX11,GFX11-TRUE16 'C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\fpext.ll'
# .---command stderr------------
# | C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\fpext.ll:149:22: error: GFX11-TRUE16-NEXT: is not on the line after the previous match
# | ; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s0
# |                      ^
# | <stdin>:281:2: note: 'next' match was here
# |  v_cvt_f32_f16_e32 v0, s0
# |  ^
# | <stdin>:279:9: note: previous match ended here
# | ; %bb.0:
# |         ^
# | <stdin>:280:1: note: non-matching line after previous match is here
# |  s_lshr_b32 s1, s0, 16
# | ^
# | 
# | Input file: <stdin>
# | Check file: C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\fpext.ll
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |           .
# |           .
# |           .
# |         276:  .p2align 8 
# |         277:  .type fpext_v2f16_to_v2f32_uniform,@function 
# |         278: fpext_v2f16_to_v2f32_uniform: ; @fpext_v2f16_to_v2f32_uniform 
# |         279: ; %bb.0: 
# |         280:  s_lshr_b32 s1, s0, 16 
# |         281:  v_cvt_f32_f16_e32 v0, s0 
# | next:149      !~~~~~~~~~~~~~~~~~~~~~~~  error: match on wrong line
# |         282:  v_cvt_f32_f16_e32 v1, s1 
# |         283:  ; return to shader part epilog 
# |         284: .Lfunc_end6: 
# |         285:  .size fpext_v2f16_to_v2f32_uniform, .Lfunc_end6-fpext_v2f16_to_v2f32_uniform 
# |         286:  ; -- End function 
# |           .
# |           .
# |           .
# | >>>>>>
# `-----------------------------
# error: command failed with exit status: 1

--

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

@kosarev kosarev requested a review from Sisyph December 11, 2025 14:43
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants