diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index bace63d349caf..4ced9204effdf 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -136,6 +136,9 @@ def warn_drv_unsupported_option_for_offload_arch_req_feature : Warning< def warn_drv_unsupported_option_for_target : Warning< "ignoring '%0' option as it is not currently supported for target '%1'">, InGroup; +def warn_drv_invalid_argument_for_flang : Warning< + "'%0' is not valid for Fortran">, + InGroup; def warn_drv_unsupported_option_for_flang : Warning< "the argument '%0' is not supported for option '%1'. Mapping to '%1%2'">, InGroup; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 22b7d42254546..e686ad0d0c1b9 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1964,7 +1964,7 @@ defm borland_extensions : BoolFOption<"borland-extensions", "Accept non-standard constructs supported by the Borland compiler">, NegFlag>; def fbuiltin : Flag<["-"], "fbuiltin">, Group, - Visibility<[ClangOption, CLOption, DXCOption]>; + Visibility<[ClangOption, CLOption, DXCOption, FlangOption, FC1Option]>; def fbuiltin_module_map : Flag <["-"], "fbuiltin-module-map">, Group, Flags<[]>, HelpText<"Load the clang builtins module map file.">; defm caret_diagnostics : BoolFOption<"caret-diagnostics", @@ -3572,7 +3572,7 @@ def fno_assume_sane_operator_new : Flag<["-"], "fno-assume-sane-operator-new">, Visibility<[ClangOption, CC1Option]>, MarshallingInfoNegativeFlag>; def fno_builtin : Flag<["-"], "fno-builtin">, Group, - Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>, + Visibility<[ClangOption, CC1Option, CLOption, DXCOption, FlangOption, FC1Option]>, HelpText<"Disable implicit builtin knowledge of functions">; def fno_builtin_ : Joined<["-"], "fno-builtin-">, Group, Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>, diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp index 90551c22e0734..b42bfa3821c2e 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp @@ -177,6 +177,41 @@ static auto isPointerComparisonOperatorCall(std::string operator_name) { pointee(anyOf(statusOrType(), statusType()))))))); } +// The nullPointerConstant in the two matchers below is to support +// absl::StatusOr X = nullptr. +// nullptr does not match the bound type. +// TODO: be less restrictive around convertible types in general. +static auto isStatusOrValueAssignmentCall() { + using namespace ::clang::ast_matchers; // NOLINT: Too many names + return cxxOperatorCallExpr( + hasOverloadedOperatorName("="), + callee(cxxMethodDecl(ofClass(statusOrClass()))), + hasArgument(1, anyOf(hasType(hasUnqualifiedDesugaredType( + type(equalsBoundNode("T")))), + nullPointerConstant()))); +} + +static auto isStatusOrValueConstructor() { + using namespace ::clang::ast_matchers; // NOLINT: Too many names + return cxxConstructExpr( + hasType(statusOrType()), + hasArgument(0, + anyOf(hasType(hasCanonicalType(type(equalsBoundNode("T")))), + nullPointerConstant(), + hasType(namedDecl(hasAnyName("absl::in_place_t", + "std::in_place_t")))))); +} + +static auto isStatusOrConstructor() { + using namespace ::clang::ast_matchers; // NOLINT: Too many names + return cxxConstructExpr(hasType(statusOrType())); +} + +static auto isStatusConstructor() { + using namespace ::clang::ast_matchers; // NOLINT: Too many names + return cxxConstructExpr(hasType(statusType())); +} + static auto buildDiagnoseMatchSwitch(const UncheckedStatusOrAccessModelOptions &Options) { return CFGMatchSwitchBuildergetNumArgs() > 1); + + auto *StatusOrLoc = State.Env.get(*Expr->getArg(0)); + if (StatusOrLoc == nullptr) + return; + + auto &OkVal = initializeStatusOr(*StatusOrLoc, State.Env); + State.Env.assume(OkVal.formula()); +} + +static void transferValueConstructor(const CXXConstructExpr *Expr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + auto &OkVal = + initializeStatusOr(State.Env.getResultObjectLocation(*Expr), State.Env); + State.Env.assume(OkVal.formula()); +} + +static void transferStatusOrConstructor(const CXXConstructExpr *Expr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + RecordStorageLocation &StatusOrLoc = State.Env.getResultObjectLocation(*Expr); + RecordStorageLocation &StatusLoc = locForStatus(StatusOrLoc); + + if (State.Env.getValue(locForOk(StatusLoc)) == nullptr) + initializeStatusOr(StatusOrLoc, State.Env); +} + +static void transferStatusConstructor(const CXXConstructExpr *Expr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + RecordStorageLocation &StatusLoc = State.Env.getResultObjectLocation(*Expr); + + if (State.Env.getValue(locForOk(StatusLoc)) == nullptr) + initializeStatus(StatusLoc, State.Env); +} + CFGMatchSwitch buildTransferMatchSwitch(ASTContext &Ctx, CFGMatchSwitchBuilder Builder) { @@ -573,6 +648,20 @@ buildTransferMatchSwitch(ASTContext &Ctx, .CaseOfCFGStmt(isNotOkStatusCall(), transferNotOkStatusCall) .CaseOfCFGStmt(isStatusOrMemberCallWithName("emplace"), transferEmplaceCall) + .CaseOfCFGStmt(isStatusOrValueAssignmentCall(), + transferValueAssignmentCall) + .CaseOfCFGStmt(isStatusOrValueConstructor(), + transferValueConstructor) + // N.B. These need to come after all other CXXConstructExpr. + // These are there to make sure that every Status and StatusOr object + // have their ok boolean initialized when constructed. If we were to + // lazily initialize them when we first access them, we can produce + // false positives if that first access is in a control flow statement. + // You can comment out these two constructors and see tests fail. + .CaseOfCFGStmt(isStatusOrConstructor(), + transferStatusOrConstructor) + .CaseOfCFGStmt(isStatusConstructor(), + transferStatusConstructor) .Build(); } diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 468c930acacbd..aefc262dca17f 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -713,14 +713,16 @@ static void addSanitizers(const Triple &TargetTriple, ThinOrFullLTOPhase) { if (CodeGenOpts.hasSanitizeCoverage()) { auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); - MPM.addPass(SanitizerCoveragePass( - SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles, - CodeGenOpts.SanitizeCoverageIgnorelistFiles)); + MPM.addPass( + SanitizerCoveragePass(SancovOpts, PB.getVirtualFileSystemPtr(), + CodeGenOpts.SanitizeCoverageAllowlistFiles, + CodeGenOpts.SanitizeCoverageIgnorelistFiles)); } if (CodeGenOpts.hasSanitizeBinaryMetadata()) { MPM.addPass(SanitizerBinaryMetadataPass( getSanitizerBinaryMetadataOptions(CodeGenOpts), + PB.getVirtualFileSystemPtr(), CodeGenOpts.SanitizeMetadataIgnorelistFiles)); } diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 8abc4ccaa2e17..f24dd3982eab7 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -953,6 +953,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, assert(false && "Unexpected action class for Flang tool."); } + // We support some options that are invalid for Fortran and have no effect. + // These are solely for compatibility with other compilers. Emit a warning if + // any such options are provided, then proceed normally. + for (options::ID Opt : {options::OPT_fbuiltin, options::OPT_fno_builtin}) + if (const Arg *A = Args.getLastArg(Opt)) + D.Diag(diag::warn_drv_invalid_argument_for_flang) << A->getSpelling(); + const InputInfo &Input = Inputs[0]; types::ID InputType = Input.getType(); diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index 65fc65e79fdc3..f24b8ab14bdce 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -288,6 +288,9 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, ArrayRef Matches, SmallVector &Changes) { int Shift = 0; + // Set when the shift is applied anywhere in the line. Cleared when the line + // ends. + bool LineShifted = false; // ScopeStack keeps track of the current scope depth. It contains the levels // of at most 2 scopes. The first one is the one that the matched token is @@ -339,8 +342,11 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, Changes[i - 1].Tok->is(tok::string_literal); bool SkipMatchCheck = InsideNestedScope || ContinuedStringLiteral; - if (CurrentChange.NewlinesBefore > 0 && !SkipMatchCheck) - Shift = 0; + if (CurrentChange.NewlinesBefore > 0) { + LineShifted = false; + if (!SkipMatchCheck) + Shift = 0; + } // If this is the first matching token to be aligned, remember by how many // spaces it has to be shifted, so the rest of the changes on the line are @@ -349,7 +355,6 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, Shift = Column - (RightJustify ? CurrentChange.TokenLength : 0) - CurrentChange.StartOfTokenColumn; ScopeStack = {CurrentChange.indentAndNestingLevel()}; - CurrentChange.Spaces += Shift; } if (Shift == 0) @@ -358,8 +363,10 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, // This is for lines that are split across multiple lines, as mentioned in // the ScopeStack comment. The stack size being 1 means that the token is // not in a scope that should not move. - if (ScopeStack.size() == 1u && CurrentChange.NewlinesBefore > 0 && - (ContinuedStringLiteral || InsideNestedScope)) { + if ((!Matches.empty() && Matches[0] == i) || + (ScopeStack.size() == 1u && CurrentChange.NewlinesBefore > 0 && + (ContinuedStringLiteral || InsideNestedScope))) { + LineShifted = true; CurrentChange.Spaces += Shift; } @@ -369,9 +376,11 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, static_cast(Changes[i].Tok->SpacesRequiredBefore) || CurrentChange.Tok->is(tok::eof)); - CurrentChange.StartOfTokenColumn += Shift; - if (i + 1 != Changes.size()) - Changes[i + 1].PreviousEndOfTokenColumn += Shift; + if (LineShifted) { + CurrentChange.StartOfTokenColumn += Shift; + if (i + 1 != Changes.size()) + Changes[i + 1].PreviousEndOfTokenColumn += Shift; + } // If PointerAlignment is PAS_Right, keep *s or &s next to the token, // except if the token is equal, then a space is needed. diff --git a/clang/test/DebugInfo/ObjC/property4.m b/clang/test/DebugInfo/ObjC/property-auto-synth.m similarity index 100% rename from clang/test/DebugInfo/ObjC/property4.m rename to clang/test/DebugInfo/ObjC/property-auto-synth.m diff --git a/clang/test/DebugInfo/ObjC/property-basic.m b/clang/test/DebugInfo/ObjC/property-basic.m new file mode 100644 index 0000000000000..65e1d7a6a9b1f --- /dev/null +++ b/clang/test/DebugInfo/ObjC/property-basic.m @@ -0,0 +1,20 @@ +// Checks basic debug-info generation for property. Makes sure we +// create a DIObjCProperty for the synthesized property. + +// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s + +// CHECK: !DIObjCProperty(name: "p1" +// CHECK-SAME: attributes: 2316 +// CHECK-SAME: type: ![[P1_TYPE:[0-9]+]] +// +// CHECK: ![[P1_TYPE]] = !DIBasicType(name: "int" + +@interface I1 { +int p1; +} +@property int p1; +@end + +@implementation I1 +@synthesize p1; +@end diff --git a/clang/test/DebugInfo/ObjC/property.m b/clang/test/DebugInfo/ObjC/property.m deleted file mode 100644 index ca013b24be421..0000000000000 --- a/clang/test/DebugInfo/ObjC/property.m +++ /dev/null @@ -1,15 +0,0 @@ -// FIXME: Check IR rather than asm, then triple is not needed. -// RUN: %clang_cc1 -triple %itanium_abi_triple -S -debug-info-kind=limited %s -o - | FileCheck %s - -// CHECK: AT_APPLE_property_name -// CHECK: AT_APPLE_property_attribute -// CHECK: AT_APPLE_property -@interface I1 { -int p1; -} -@property int p1; -@end - -@implementation I1 -@synthesize p1; -@end diff --git a/clang/test/DebugInfo/ObjCXX/lit.local.cfg b/clang/test/DebugInfo/ObjCXX/lit.local.cfg new file mode 100644 index 0000000000000..8d5c476a2f682 --- /dev/null +++ b/clang/test/DebugInfo/ObjCXX/lit.local.cfg @@ -0,0 +1,5 @@ +# objective-CXX is not supported on AIX and zOS +unsupported_platforms = [ "system-aix", "system-zos" ] + +if any(up in config.available_features for up in unsupported_platforms): + config.unsupported = True diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp index 425beb939a42a..5635ff4e01d36 100644 --- a/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp +++ b/clang/unittests/Analysis/FlowSensitive/UncheckedStatusOrAccessModelTestFixture.cpp @@ -2975,6 +2975,217 @@ TEST_P(UncheckedStatusOrAccessModelTest, Emplace) { )cc"); } +TEST_P(UncheckedStatusOrAccessModelTest, ValueConstruction) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_BOOL result = false; + result.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_INT result = 21; + result.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_INT result = Make(); + result.value(); // [[unsafe]] + } + )cc"); + ExpectDiagnosticsFor( + R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_BOOL result = false; + if (result.ok()) + result.value(); + else + result.value(); + } + )cc"); + + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_BOOL result(false); + result.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_INT result(21); + result.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_INT result(Make()); + result.value(); // [[unsafe]] + } + )cc"); + ExpectDiagnosticsFor( + R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_BOOL result(false); + if (result.ok()) + result.value(); + else + result.value(); + } + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, ValueAssignment) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_BOOL result; + result = false; + result.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_INT result; + result = 21; + result.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_INT result; + result = Make(); + result.value(); // [[unsafe]] + } + )cc"); + ExpectDiagnosticsFor( + R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_BOOL result; + result = false; + if (result.ok()) + result.value(); + else + result.value(); + } + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, NestedStatusOr) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + absl::StatusOr result; + result = Make(); + result.value(); + } + )cc"); + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + absl::StatusOr result = Make(); + result.value(); + } + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, PtrConstruct) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_VOIDPTR sor = nullptr; + *sor; + } + )cc"); + + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_VOIDPTR sor(nullptr); + *sor; + } + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, InPlaceConstruct) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + STATUSOR_VOIDPTR absl_sor(absl::in_place, {nullptr}); + *absl_sor; + STATUSOR_VOIDPTR std_sor(std::in_place, {nullptr}); + *std_sor; + } + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, ConstructStatusOrFromReference) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + void target() { + const auto sor1 = Make(); + const auto sor2 = Make(); + if (!sor1.ok() && !sor2.ok()) return; + if (sor1.ok() && !sor2.ok()) { + } else if (!sor1.ok() && sor2.ok()) { + } else { + sor1.value(); + sor2.value(); + } + } + )cc"); +} + +TEST_P(UncheckedStatusOrAccessModelTest, ConstructStatusFromReference) { + ExpectDiagnosticsFor(R"cc( +#include "unchecked_statusor_access_test_defs.h" + + void target() { + const auto sor1 = Make(); + const auto sor2 = Make(); + const auto s1 = Make(); + const auto s2 = Make(); + + if (!s1.ok() && !s2.ok()) return; + if (s1.ok() && !s2.ok()) { + } else if (!s1.ok() && s2.ok()) { + } else { + if (s1 != sor1.status() || s2 != sor2.status()) return; + sor1.value(); + sor2.value(); + } + } + )cc"); +} + } // namespace std::string diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index ce68f91bef02a..d45babe1b82ad 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -19615,6 +19615,25 @@ TEST_F(FormatTest, AlignConsecutiveAssignments) { "};", Alignment); + // Aligning lines should not mess up the comments. However, feel free to + // change the test if it turns out that comments inside the closure should not + // be aligned with those outside it. + verifyFormat("auto aaaaaaaaaaaaaaaaaaaaa = {}; //\n" + "auto b = [] { //\n" + " return; //\n" + "};", + Alignment); + verifyFormat("auto aaaaaaaaaaaaaaaaaaaaa = {}; //\n" + "auto b = [] { //\n" + " return aaaaaaaaaaaaaaaaaaaaa; //\n" + "};", + Alignment); + verifyFormat("auto aaaaaaaaaaaaaaa = {}; //\n" + "auto b = [] { //\n" + " return aaaaaaaaaaaaaaaaaaaaa; //\n" + "};", + Alignment); + verifyFormat("auto b = f(aaaaaaaaaaaaaaaaaaaaaaaaa,\n" " ccc ? aaaaa : bbbbb,\n" " dddddddddddddddddddddddddd);", diff --git a/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c index f347d439e2e06..46d25a4e386dc 100644 --- a/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c +++ b/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c @@ -1,5 +1,5 @@ // Value profiling is currently not supported in lightweight mode. -// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: %clang_pgogen -o %t -g -mllvm --profile-correlate=debug-info -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t.dSYM %t.proflite @@ -9,7 +9,7 @@ // RUN: diff <(llvm-profdata show --all-functions --counts %t.normal.profdata) <(llvm-profdata show --all-functions --counts %t.profdata) -// RUN: %clang_pgogen -o %t.cov -g -mllvm --debug-info-correlate -mllvm -pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: %clang_pgogen -o %t.cov -g -mllvm --profile-correlate=debug-info -mllvm -pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.cov.proflite %run %t.cov // RUN: llvm-profdata merge -o %t.cov.profdata --debug-info=%t.cov.dSYM %t.cov.proflite diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-debuginfod.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-debuginfod.c index 788cb31e5116c..903ead31e0f60 100644 --- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-debuginfod.c +++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-debuginfod.c @@ -7,7 +7,7 @@ // RUN: llvm-profdata merge -o %t.default.profdata %t.profraw // Build with profile debuginfo correlation. -// RUN: %clang_pgogen -o %t.correlate.exe -Wl,--build-id=0x12345678 -g -gdwarf-4 -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: %clang_pgogen -o %t.correlate.exe -Wl,--build-id=0x12345678 -g -gdwarf-4 -mllvm --profile-correlate=debug-info -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.debug-info-correlate.proflite %run %t.correlate.exe // Test llvm-profdata merge profile correlation with --debuginfod option. diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c index 426426d9a05a2..194f980df9681 100644 --- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c +++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c @@ -3,7 +3,7 @@ // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.normal // RUN: llvm-profdata merge -o %t.normal.profdata %t.profraw -// RUN: %clang_pgogen -o %t.d4 -g -gdwarf-4 -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: %clang_pgogen -o %t.d4 -g -gdwarf-4 -mllvm --profile-correlate=debug-info -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.d4.proflite %run %t.d4 // RUN: llvm-profdata merge -o %t.d4.profdata --debug-info=%t.d4 %t.d4.proflite @@ -11,7 +11,7 @@ // RUN: llvm-profdata show --all-functions --counts %t.d4.profdata > %t.d4.dump // RUN: diff %t.normal.dump %t.d4.dump -// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: %clang_pgogen -o %t -g -mllvm --profile-correlate=debug-info -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite @@ -19,7 +19,7 @@ // RUN: llvm-profdata show --all-functions --counts %t.profdata > %t.prof.dump // RUN: diff %t.normal2.dump %t.prof.dump -// RUN: %clang_pgogen -o %t.cov -g -mllvm --debug-info-correlate -mllvm -pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: %clang_pgogen -o %t.cov -g -mllvm --profile-correlate=debug-info -mllvm -pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.cov.proflite %run %t.cov // RUN: llvm-profdata merge -o %t.cov.profdata --debug-info=%t.cov %t.cov.proflite diff --git a/compiler-rt/test/profile/Linux/instrprof-show-debug-info-correlation.c b/compiler-rt/test/profile/Linux/instrprof-show-debug-info-correlation.c index 245dc79891042..93bf40f98d3ab 100644 --- a/compiler-rt/test/profile/Linux/instrprof-show-debug-info-correlation.c +++ b/compiler-rt/test/profile/Linux/instrprof-show-debug-info-correlation.c @@ -1,8 +1,8 @@ -// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %s +// RUN: %clang_pgogen -o %t -g -mllvm --profile-correlate=debug-info -mllvm --disable-vp=true %s // RUN: llvm-profdata show --debug-info=%t --detailed-summary --show-prof-sym-list | FileCheck %s // RUN: llvm-profdata show --debug-info=%t --show-format=yaml | FileCheck %s --match-full-lines --check-prefix YAML -// RUN: %clang_pgogen -o %t.no.dbg -mllvm --debug-info-correlate -mllvm --disable-vp=true %s +// RUN: %clang_pgogen -o %t.no.dbg -mllvm --profile-correlate=debug-info -mllvm --disable-vp=true %s // RUN: not llvm-profdata show --debug-info=%t.no.dbg 2>&1 | FileCheck %s --check-prefix NO-DBG // NO-DBG: unable to correlate profile: could not find any profile data metadata in correlated file diff --git a/flang/lib/Lower/Support/PrivateReductionUtils.cpp b/flang/lib/Lower/Support/PrivateReductionUtils.cpp index f081ca4a3eaef..e636a50699351 100644 --- a/flang/lib/Lower/Support/PrivateReductionUtils.cpp +++ b/flang/lib/Lower/Support/PrivateReductionUtils.cpp @@ -376,6 +376,8 @@ class PopulateInitAndCleanupRegionsHelper { loadedMoldArg = builder.loadIfRef(loc, moldArg); return loadedMoldArg; } + + bool shouldAllocateTempOnStack() const; }; } // namespace @@ -438,8 +440,14 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedScalar( builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); } - mlir::Value valAlloc = builder.createHeapTemporary(loc, innerTy, /*name=*/{}, - /*shape=*/{}, lenParams); + bool shouldAllocateOnStack = shouldAllocateTempOnStack(); + mlir::Value valAlloc = + (shouldAllocateOnStack) + ? builder.createTemporary(loc, innerTy, /*name=*/{}, + /*shape=*/{}, lenParams) + : builder.createHeapTemporary(loc, innerTy, /*name=*/{}, + /*shape=*/{}, lenParams); + if (scalarInitValue) builder.createStoreWithConvert(loc, scalarInitValue, valAlloc); mlir::Value box = fir::EmboxOp::create(builder, loc, valType, valAlloc, @@ -451,8 +459,9 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedScalar( fir::StoreOp lastOp = fir::StoreOp::create(builder, loc, box, allocatedPrivVarArg); - createCleanupRegion(converter, loc, argType, cleanupRegion, sym, - isDoConcurrent); + if (!shouldAllocateOnStack) + createCleanupRegion(converter, loc, argType, cleanupRegion, sym, + isDoConcurrent); if (ifUnallocated) builder.setInsertionPointAfter(ifUnallocated); @@ -462,6 +471,14 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedScalar( createYield(allocatedPrivVarArg); } +bool PopulateInitAndCleanupRegionsHelper::shouldAllocateTempOnStack() const { + // On the GPU, always allocate on the stack since heap allocatins are very + // expensive. + auto offloadMod = + llvm::dyn_cast(*builder.getModule()); + return offloadMod && offloadMod.getIsGPU(); +} + void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray( fir::BaseBoxType boxTy, bool needsInitialization) { bool isAllocatableOrPointer = @@ -504,15 +521,7 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray( // TODO: Allocate on the heap if the whole reduction/privatization is nested // inside of a loop auto temp = [&]() { - bool shouldAllocateOnStack = false; - - // On the GPU, always allocate on the stack since heap allocatins are very - // expensive. - if (auto offloadMod = llvm::dyn_cast( - *builder.getModule())) - shouldAllocateOnStack = offloadMod.getIsGPU(); - - if (shouldAllocateOnStack) + if (shouldAllocateTempOnStack()) return createStackTempFromMold(loc, builder, source); auto [temp, needsDealloc] = createTempFromMold(loc, builder, source); diff --git a/flang/test/Driver/flang-f-opts.f90 b/flang/test/Driver/flang-f-opts.f90 index b972b9b7b2a59..77bb4d7aa8a91 100644 --- a/flang/test/Driver/flang-f-opts.f90 +++ b/flang/test/Driver/flang-f-opts.f90 @@ -13,3 +13,16 @@ ! CHECK-PROFILE-GENERATE-LLVM: "-fprofile-generate" ! RUN: %flang -### -S -fprofile-use=%S %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-USE-DIR %s ! CHECK-PROFILE-USE-DIR: "-fprofile-use={{.*}}" + +! RUN: %flang -### -fbuiltin %s 2>&1 \ +! RUN: | FileCheck %s -check-prefix=WARN-BUILTIN +! WARN-BUILTIN: warning: '-fbuiltin' is not valid for Fortran + +! RUN: %flang -### -fno-builtin %s 2>&1 \ +! RUN: | FileCheck %s -check-prefix=WARN-NO-BUILTIN +! WARN-NO-BUILTIN: warning: '-fno-builtin' is not valid for Fortran + +! RUN: %flang -### -fbuiltin -fno-builtin %s 2>&1 \ +! RUN: | FileCheck %s -check-prefix=WARN-BUILTIN-MULTIPLE +! WARN-BUILTIN-MULTIPLE: warning: '-fbuiltin' is not valid for Fortran +! WARN-BUILTIN-MULTIPLE: warning: '-fno-builtin' is not valid for Fortran diff --git a/libcxx/include/__config b/libcxx/include/__config index 5971a3c5407b9..b4c081dcdff1b 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1021,9 +1021,7 @@ typedef __char32_t char32_t; // the latter depends on internal GNU libc details that are not appropriate // to depend on here, so any declarations present when __cpp_char8_t is not // defined are ignored. -# if defined(__clang__) -# define _LIBCPP_HAS_C8RTOMB_MBRTOC8 1 -# elif defined(_LIBCPP_GLIBC_PREREQ) +# if defined(_LIBCPP_GLIBC_PREREQ) # if _LIBCPP_GLIBC_PREREQ(2, 36) && defined(__cpp_char8_t) # define _LIBCPP_HAS_C8RTOMB_MBRTOC8 1 # else diff --git a/libcxx/test/std/depr/depr.c.headers/uchar_h.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/uchar_h.compile.pass.cpp index c448ba83f4b38..a1560c8ee5853 100644 --- a/libcxx/test/std/depr/depr.c.headers/uchar_h.compile.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/uchar_h.compile.pass.cpp @@ -23,6 +23,11 @@ // __STDC_UTF_16__ may or may not be defined by the C standard library // __STDC_UTF_32__ may or may not be defined by the C standard library +#if !defined(TEST_HAS_NO_C8RTOMB_MBRTOC8) +ASSERT_SAME_TYPE(size_t, decltype(mbrtoc8((char8_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(size_t, decltype(c8rtomb((char*)0, (char8_t)0, (mbstate_t*)0))); +#endif + ASSERT_SAME_TYPE(size_t, decltype(mbrtoc16((char16_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); ASSERT_SAME_TYPE(size_t, decltype(c16rtomb((char*)0, (char16_t)0, (mbstate_t*)0))); diff --git a/libcxx/test/std/depr/depr.c.headers/uchar_h_char8_t.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/uchar_h_char8_t.compile.pass.cpp deleted file mode 100644 index 34b512f9c5959..0000000000000 --- a/libcxx/test/std/depr/depr.c.headers/uchar_h_char8_t.compile.pass.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// The following platforms do not provide mbrtoc8 and c8rtomb so the tests fail -// XFAIL: target={{.+}}-aix{{.*}} -// XFAIL: android -// XFAIL: darwin -// XFAIL: freebsd -// XFAIL: windows -// XFAIL: glibc-no-char8_t-support -// XFAIL: LIBCXX-PICOLIBC-FIXME - -// - -#include - -ASSERT_SAME_TYPE(size_t, decltype(mbrtoc8((char8_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); -ASSERT_SAME_TYPE(size_t, decltype(c8rtomb((char*)0, (char8_t)0, (mbstate_t*)0))); diff --git a/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp b/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp index 96b394a9934f8..2076384deb2b2 100644 --- a/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp +++ b/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp @@ -23,6 +23,11 @@ // __STDC_UTF_16__ may or may not be defined by the C standard library // __STDC_UTF_32__ may or may not be defined by the C standard library +#if !defined(TEST_HAS_NO_C8RTOMB_MBRTOC8) +ASSERT_SAME_TYPE(std::size_t, decltype(std::mbrtoc8((char8_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(std::size_t, decltype(std::c8rtomb((char*)0, (char8_t)0, (mbstate_t*)0))); +#endif + ASSERT_SAME_TYPE(std::size_t, decltype(std::mbrtoc16((char16_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); ASSERT_SAME_TYPE(std::size_t, decltype(std::c16rtomb((char*)0, (char16_t)0, (mbstate_t*)0))); diff --git a/libcxx/test/std/strings/c.strings/cuchar_char8_t.compile.pass.cpp b/libcxx/test/std/strings/c.strings/cuchar_char8_t.compile.pass.cpp deleted file mode 100644 index 019265b534c5c..0000000000000 --- a/libcxx/test/std/strings/c.strings/cuchar_char8_t.compile.pass.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// The following platforms do not provide mbrtoc8 and c8rtomb so the tests fail -// XFAIL: target={{.+}}-aix{{.*}} -// XFAIL: android -// XFAIL: darwin -// XFAIL: freebsd -// XFAIL: windows -// XFAIL: glibc-no-char8_t-support -// XFAIL: LIBCXX-PICOLIBC-FIXME - -// - -#include - -ASSERT_SAME_TYPE(std::size_t, decltype(std::mbrtoc8((char8_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); -ASSERT_SAME_TYPE(std::size_t, decltype(std::c8rtomb((char*)0, (char8_t)0, (mbstate_t*)0))); diff --git a/libcxx/test/std/strings/c.strings/no_c8rtomb_mbrtoc8.verify.cpp b/libcxx/test/std/strings/c.strings/no_c8rtomb_mbrtoc8.verify.cpp new file mode 100644 index 0000000000000..1d4a225668d80 --- /dev/null +++ b/libcxx/test/std/strings/c.strings/no_c8rtomb_mbrtoc8.verify.cpp @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +#include + +#include "test_macros.h" + +// When C++ char8_t support is not enabled, definitions of these functions that +// match the C2X declarations may still be present in the global namespace with +// a char8_t typedef substituted for the C++ char8_t type. If so, these are not +// the declarations we are looking for, so don't test for them. +#if !defined(TEST_HAS_NO_CHAR8_T) +using U = decltype(::c8rtomb); +using V = decltype(::mbrtoc8); +# if !_LIBCPP_HAS_C8RTOMB_MBRTOC8 +// expected-error@-3 {{no member named 'c8rtomb' in the global namespace}} +// expected-error@-3 {{no member named 'mbrtoc8' in the global namespace}} +# else +// expected-no-diagnostics +# endif +#else +// expected-no-diagnostics +#endif diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index 1668e4ab01d75..7d6e78de343c5 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -293,23 +293,6 @@ def _mingwSupportsModules(cfg): """, ), ), - # Check for Glibc < 2.36, where there was no support for char8_t functions - Feature( - name="glibc-no-char8_t-support", - when=lambda cfg: "__GLIBC__" in compilerMacros(cfg) - and not sourceBuilds( - cfg, - """ - #include - #include - int main(void) { - char8_t c; - mbstate_t s = {0}; - return mbrtoc8(&c, "", 0, &s); - } - """, - ), - ), Feature( name="has-unix-headers", when=lambda cfg: sourceBuilds( diff --git a/lldb/include/lldb/Host/MainLoopBase.h b/lldb/include/lldb/Host/MainLoopBase.h index be9a2676e7443..9529f2c214784 100644 --- a/lldb/include/lldb/Host/MainLoopBase.h +++ b/lldb/include/lldb/Host/MainLoopBase.h @@ -57,18 +57,23 @@ class MainLoopBase { // Add a pending callback that will be executed once after all the pending // events are processed. The callback will be executed even if termination // was requested. - void AddPendingCallback(const Callback &callback) { - AddCallback(callback, std::chrono::steady_clock::time_point()); + // Returns false if an interrupt was needed to get the loop to act on the new + // callback, but the interrupt failed, true otherwise. Mostly used when the + // pending callback is a RequestTermination, since if the interrupt fails for + // that callback, waiting for the MainLoop thread to terminate could stall. + bool AddPendingCallback(const Callback &callback) { + return AddCallback(callback, std::chrono::steady_clock::time_point()); } // Add a callback that will be executed after a certain amount of time has - // passed. - void AddCallback(const Callback &callback, std::chrono::nanoseconds delay) { - AddCallback(callback, std::chrono::steady_clock::now() + delay); + // passed. See AddPendingCallback comment for the return value. + bool AddCallback(const Callback &callback, std::chrono::nanoseconds delay) { + return AddCallback(callback, std::chrono::steady_clock::now() + delay); } // Add a callback that will be executed after a given point in time. - void AddCallback(const Callback &callback, TimePoint point); + // See AddPendingCallback comment for the return value. + bool AddCallback(const Callback &callback, TimePoint point); // Waits for registered events and invoke the proper callbacks. Returns when // all callbacks deregister themselves or when someone requests termination. @@ -85,8 +90,9 @@ class MainLoopBase { virtual void UnregisterReadObject(IOObject::WaitableHandle handle) = 0; - // Interrupt the loop that is currently waiting for events. - virtual void Interrupt() = 0; + /// Interrupt the loop that is currently waiting for events. Return true if + /// the interrupt succeeded, false if it failed. + virtual bool Interrupt() = 0; void ProcessCallbacks(); diff --git a/lldb/include/lldb/Host/posix/MainLoopPosix.h b/lldb/include/lldb/Host/posix/MainLoopPosix.h index e9ac798b948df..92cdbe9d87ec3 100644 --- a/lldb/include/lldb/Host/posix/MainLoopPosix.h +++ b/lldb/include/lldb/Host/posix/MainLoopPosix.h @@ -54,7 +54,7 @@ class MainLoopPosix : public MainLoopBase { void UnregisterReadObject(IOObject::WaitableHandle handle) override; void UnregisterSignal(int signo, std::list::iterator callback_it); - void Interrupt() override; + bool Interrupt() override; private: void ProcessReadObject(IOObject::WaitableHandle handle); diff --git a/lldb/include/lldb/Host/windows/MainLoopWindows.h b/lldb/include/lldb/Host/windows/MainLoopWindows.h index 705e7e78ba48a..65b44aa1582c3 100644 --- a/lldb/include/lldb/Host/windows/MainLoopWindows.h +++ b/lldb/include/lldb/Host/windows/MainLoopWindows.h @@ -50,7 +50,7 @@ class MainLoopWindows : public MainLoopBase { protected: void UnregisterReadObject(IOObject::WaitableHandle handle) override; - void Interrupt() override; + bool Interrupt() override; private: llvm::Expected Poll(); diff --git a/lldb/source/Host/common/MainLoopBase.cpp b/lldb/source/Host/common/MainLoopBase.cpp index 64a57e65849e9..232b9bc0aa354 100644 --- a/lldb/source/Host/common/MainLoopBase.cpp +++ b/lldb/source/Host/common/MainLoopBase.cpp @@ -12,8 +12,9 @@ using namespace lldb; using namespace lldb_private; -void MainLoopBase::AddCallback(const Callback &callback, TimePoint point) { +bool MainLoopBase::AddCallback(const Callback &callback, TimePoint point) { bool interrupt_needed; + bool interrupt_succeeded = true; { std::lock_guard lock{m_callback_mutex}; // We need to interrupt the main thread if this callback is scheduled to @@ -22,7 +23,8 @@ void MainLoopBase::AddCallback(const Callback &callback, TimePoint point) { m_callbacks.emplace(point, callback); } if (interrupt_needed) - Interrupt(); + interrupt_succeeded = Interrupt(); + return interrupt_succeeded; } void MainLoopBase::ProcessCallbacks() { diff --git a/lldb/source/Host/posix/MainLoopPosix.cpp b/lldb/source/Host/posix/MainLoopPosix.cpp index 19a7128fbe407..c6fe7814bd22e 100644 --- a/lldb/source/Host/posix/MainLoopPosix.cpp +++ b/lldb/source/Host/posix/MainLoopPosix.cpp @@ -387,10 +387,11 @@ void MainLoopPosix::ProcessSignal(int signo) { } } -void MainLoopPosix::Interrupt() { +bool MainLoopPosix::Interrupt() { if (m_interrupting.exchange(true)) - return; + return true; char c = '.'; - cantFail(m_interrupt_pipe.Write(&c, 1)); + llvm::Expected result = m_interrupt_pipe.Write(&c, 1); + return result && *result != 0; } diff --git a/lldb/source/Host/windows/MainLoopWindows.cpp b/lldb/source/Host/windows/MainLoopWindows.cpp index 9b7df10258bcd..5e5888aee2181 100644 --- a/lldb/source/Host/windows/MainLoopWindows.cpp +++ b/lldb/source/Host/windows/MainLoopWindows.cpp @@ -272,4 +272,6 @@ Status MainLoopWindows::Run() { return Status(); } -void MainLoopWindows::Interrupt() { WSASetEvent(m_interrupt_event); } +bool MainLoopWindows::Interrupt() { + return WSASetEvent(m_interrupt_event); +} diff --git a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp index 390cf3eeb16a5..77a3ba6574cde 100644 --- a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp +++ b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp @@ -133,11 +133,12 @@ llvm::Error ProtocolServerMCP::Stop() { } // Stop the main loop. - m_loop.AddPendingCallback( + bool addition_succeeded = m_loop.AddPendingCallback( [](lldb_private::MainLoopBase &loop) { loop.RequestTermination(); }); - // Wait for the main loop to exit. - if (m_loop_thread.joinable()) + // Wait for the main loop to exit, but not if we didn't succeed in inserting + // our pending callback or we'll wait forever. + if (addition_succeeded && m_loop_thread.joinable()) m_loop_thread.join(); m_accept_handles.clear(); diff --git a/lldb/test/API/driver/stdio_closed/TestDriverWithClosedSTDIO.py b/lldb/test/API/driver/stdio_closed/TestDriverWithClosedSTDIO.py new file mode 100644 index 0000000000000..cff97b822db81 --- /dev/null +++ b/lldb/test/API/driver/stdio_closed/TestDriverWithClosedSTDIO.py @@ -0,0 +1,51 @@ +""" +Test that if you exec lldb with the stdio file handles +closed, it is able to exit without hanging. +""" + + +import lldb +import os +import sys +import socket +import fcntl + +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.lldbtest import * + + +class TestDriverWithClosedSTDIO(TestBase): + # If your test case doesn't stress debug info, then + # set this to true. That way it won't be run once for + # each debug info format. + NO_DEBUG_INFO_TESTCASE = True + + def test_run_lldb_and_wait(self): + """This test forks, closes the stdio channels and exec's lldb. + Then it waits for it to exit and asserts it did that successfully""" + pid = os.fork() + if pid == 0: + fcntl.fcntl(sys.stdin, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + fcntl.fcntl(sys.stdout, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + fcntl.fcntl(sys.stderr, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + lldb = lldbtest_config.lldbExec + print(f"About to run: {lldb}") + os.execlp( + lldb, + lldb, + "-x", + "-o", + "script print(lldb.debugger.GetNumTargets())", + "--batch", + ) + else: + if pid == -1: + print("Couldn't fork a process.") + return + ret_pid, status = os.waitpid(pid, 0) + # We're really just checking that lldb doesn't stall. + # At the time this test was written, if you close stdin + # in an asserts build, lldb aborts. So handle both + # of those cases. The failure will just be that the + # waitpid doesn't return, and the test times out. + self.assertFalse(os.WIFSTOPPED(status), "We either exited or crashed.") diff --git a/lldb/test/API/functionalities/vtable/Makefile b/lldb/test/API/functionalities/vtable/Makefile index 99998b20bcb05..cbd7d472fb768 100644 --- a/lldb/test/API/functionalities/vtable/Makefile +++ b/lldb/test/API/functionalities/vtable/Makefile @@ -1,3 +1,9 @@ CXX_SOURCES := main.cpp +ifeq "$(OS)" "Darwin" + # Make vtables writable for test_overwrite_vtable test + # The -no_data_const flag prevents vtables from being placed in __DATA_CONST + LD_EXTRAS := -Wl,-no_data_const +endif + include Makefile.rules diff --git a/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py b/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py index 0f40dfd09c958..3633701833220 100644 --- a/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py +++ b/lldb/test/API/macosx/posix_spawn/TestLaunchProcessPosixSpawn.py @@ -18,6 +18,12 @@ def apple_silicon(): def rosetta_debugserver_installed(): + import platform + version = platform.mac_ver() + # Workaround for an undiagnosed problem on green dragon. + print(version) + if version[0] and version[0][0] == '15' and version[0][1] == '5': + return False return exists("/Library/Apple/usr/libexec/oah/debugserver") diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index ba0041111045b..733331f4ddac0 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -902,9 +902,10 @@ int main(int argc, char const *argv[]) { } #if !defined(_WIN32) - signal_loop.AddPendingCallback( - [](MainLoopBase &loop) { loop.RequestTermination(); }); - signal_thread.join(); + // Try to interrupt the signal thread. If that succeeds, wait for it to exit. + if (signal_loop.AddPendingCallback( + [](MainLoopBase &loop) { loop.RequestTermination(); })) + signal_thread.join(); #endif return exit_code; diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp index 3c4f2253d1ad5..f009a902f79e7 100644 --- a/lldb/tools/lldb-dap/DAP.cpp +++ b/lldb/tools/lldb-dap/DAP.cpp @@ -1104,9 +1104,11 @@ llvm::Error DAP::Loop() { "unhandled packet"); } - m_loop.AddPendingCallback( - [](MainLoopBase &loop) { loop.RequestTermination(); }); - thread.join(); + // Don't wait to join the mainloop thread if our callback wasn't added + // successfully, or we'll wait forever. + if (m_loop.AddPendingCallback( + [](MainLoopBase &loop) { loop.RequestTermination(); })) + thread.join(); if (m_error_occurred) return llvm::createStringError(llvm::inconvertibleErrorCode(), diff --git a/lldb/unittests/DAP/TestBase.cpp b/lldb/unittests/DAP/TestBase.cpp index 83a303554ad6b..8cb459964f7d8 100644 --- a/lldb/unittests/DAP/TestBase.cpp +++ b/lldb/unittests/DAP/TestBase.cpp @@ -55,8 +55,9 @@ void TransportBase::SetUp() { } void TransportBase::Run() { - loop.AddPendingCallback( + bool addition_succeeded = loop.AddPendingCallback( [](lldb_private::MainLoopBase &loop) { loop.RequestTermination(); }); + EXPECT_TRUE(addition_succeeded); EXPECT_THAT_ERROR(loop.Run().takeError(), llvm::Succeeded()); } diff --git a/lldb/unittests/Host/JSONTransportTest.cpp b/lldb/unittests/Host/JSONTransportTest.cpp index 54f1372ca0fff..e90ab8e85a105 100644 --- a/lldb/unittests/Host/JSONTransportTest.cpp +++ b/lldb/unittests/Host/JSONTransportTest.cpp @@ -269,12 +269,13 @@ template class JSONTransportTest : public PipePairTest { loop.RequestTermination(); }); } - loop.AddCallback( + bool addition_succeeded = loop.AddCallback( [](MainLoopBase &loop) { loop.RequestTermination(); FAIL() << "timeout"; }, timeout); + EXPECT_TRUE(addition_succeeded); auto handle = transport->RegisterMessageHandler(loop, message_handler); if (!handle) return handle.takeError(); @@ -367,7 +368,9 @@ class TransportBinderTest : public testing::Test { } void Run() { - loop.AddPendingCallback([](auto &loop) { loop.RequestTermination(); }); + bool addition_succeeded = + loop.AddPendingCallback([](auto &loop) { loop.RequestTermination(); }); + EXPECT_TRUE(addition_succeeded); EXPECT_THAT_ERROR(loop.Run().takeError(), Succeeded()); } }; @@ -435,8 +438,9 @@ TEST_F(HTTPDelimitedJSONTransportTest, ReadPartialMessage) { EXPECT_CALL(message_handler, Received(Request{5, "foo", std::nullopt})); ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded()); - loop.AddPendingCallback( + bool addition_succeeded = loop.AddPendingCallback( [](MainLoopBase &loop) { loop.RequestTermination(); }); + EXPECT_TRUE(addition_succeeded); ASSERT_THAT_ERROR(Run(/*close_stdin=*/false), Succeeded()); ASSERT_THAT_EXPECTED(input.Write(part2.data(), part2.size()), Succeeded()); input.CloseWriteFileDescriptor(); @@ -454,15 +458,17 @@ TEST_F(HTTPDelimitedJSONTransportTest, ReadWithZeroByteWrites) { ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded()); // Run the main loop once for the initial read. - loop.AddPendingCallback( + bool addition_succeeded = loop.AddPendingCallback( [](MainLoopBase &loop) { loop.RequestTermination(); }); + EXPECT_TRUE(addition_succeeded); ASSERT_THAT_ERROR(Run(/*close_stdin=*/false), Succeeded()); // zero-byte write. ASSERT_THAT_EXPECTED(input.Write(part1.data(), 0), Succeeded()); // zero-byte write. - loop.AddPendingCallback( + addition_succeeded = loop.AddPendingCallback( [](MainLoopBase &loop) { loop.RequestTermination(); }); + EXPECT_TRUE(addition_succeeded); ASSERT_THAT_ERROR(Run(/*close_stdin=*/false), Succeeded()); // Write the remaining part of the message. @@ -569,8 +575,9 @@ TEST_F(JSONRPCTransportTest, ReadPartialMessage) { EXPECT_CALL(message_handler, Received(Request{42, "foo", std::nullopt})); ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded()); - loop.AddPendingCallback( + bool addition_succeeded = loop.AddPendingCallback( [](MainLoopBase &loop) { loop.RequestTermination(); }); + EXPECT_TRUE(addition_succeeded); ASSERT_THAT_ERROR(Run(/*close_input=*/false), Succeeded()); ASSERT_THAT_EXPECTED(input.Write(part2.data(), part2.size()), Succeeded()); diff --git a/lldb/unittests/Host/MainLoopTest.cpp b/lldb/unittests/Host/MainLoopTest.cpp index ae16d02101819..8a248100c936a 100644 --- a/lldb/unittests/Host/MainLoopTest.cpp +++ b/lldb/unittests/Host/MainLoopTest.cpp @@ -179,9 +179,13 @@ TEST_F(MainLoopTest, PipeDelayBetweenRegisterAndRun) { ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1)); }; // Add a write that triggers a read events. - loop.AddCallback(cb, std::chrono::milliseconds(500)); - loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); }, - std::chrono::milliseconds(1000)); + bool addition_succeeded = + loop.AddCallback(cb, std::chrono::milliseconds(500)); + ASSERT_TRUE(addition_succeeded); + addition_succeeded = + loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); }, + std::chrono::milliseconds(1000)); + ASSERT_TRUE(addition_succeeded); ASSERT_TRUE(error.Success()); ASSERT_TRUE(handle); @@ -310,8 +314,10 @@ TEST_F(MainLoopTest, NoSpuriousSocketReads) { error); ASSERT_THAT_ERROR(error.ToError(), llvm::Succeeded()); // Terminate the loop after one second. - loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); }, - std::chrono::seconds(1)); + bool addition_succeeded = + loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); }, + std::chrono::seconds(1)); + ASSERT_TRUE(addition_succeeded); ASSERT_THAT_ERROR(loop.Run().ToError(), llvm::Succeeded()); // Make sure the callback was called only once. @@ -388,10 +394,11 @@ TEST_F(MainLoopTest, PendingCallbackTrigger) { MainLoop loop; std::promise add_callback2; bool callback1_called = false; - loop.AddPendingCallback([&](MainLoopBase &loop) { + bool addition_succeeded = loop.AddPendingCallback([&](MainLoopBase &loop) { callback1_called = true; add_callback2.set_value(); }); + EXPECT_TRUE(addition_succeeded); Status error; ASSERT_THAT_ERROR(error.ToError(), llvm::Succeeded()); bool callback2_called = false; @@ -416,9 +423,11 @@ TEST_F(MainLoopTest, ManyPendingCallbacks) { // caused a deadlock when the pipe filled up (either because the main loop was // not running, because it was slow, or because it was busy/blocked doing // something else). - for (int i = 0; i < 65536; ++i) - loop.AddPendingCallback( + for (int i = 0; i < 65536; ++i) { + bool addition_succeeded = loop.AddPendingCallback( [&](MainLoopBase &loop) { loop.RequestTermination(); }); + EXPECT_TRUE(addition_succeeded); + } ASSERT_TRUE(loop.Run().Success()); } @@ -444,8 +453,10 @@ TEST_F(MainLoopTest, TimedCallbacksRunInOrder) { add_cb(2); add_cb(4); add_cb(1); - loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); }, - start + 5 * epsilon); + bool addition_succeeded = + loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); }, + start + 5 * epsilon); + EXPECT_TRUE(addition_succeeded); ASSERT_THAT_ERROR(loop.Run().takeError(), llvm::Succeeded()); EXPECT_GE(std::chrono::steady_clock::now() - start, 5 * epsilon); ASSERT_THAT(order, testing::ElementsAre(1, 2, 3, 4)); @@ -455,22 +466,24 @@ TEST_F(MainLoopTest, TimedCallbackShortensSleep) { MainLoop loop; auto start = std::chrono::steady_clock::now(); bool long_callback_called = false; - loop.AddCallback( + bool addition_succeeded = loop.AddCallback( [&](MainLoopBase &loop) { long_callback_called = true; loop.RequestTermination(); }, std::chrono::seconds(30)); + EXPECT_TRUE(addition_succeeded); std::future async_run = std::async(std::launch::async, &MainLoop::Run, std::ref(loop)); std::this_thread::sleep_for(std::chrono::milliseconds(100)); bool short_callback_called = false; - loop.AddCallback( + addition_succeeded = loop.AddCallback( [&](MainLoopBase &loop) { short_callback_called = true; loop.RequestTermination(); }, std::chrono::seconds(1)); + EXPECT_TRUE(addition_succeeded); ASSERT_THAT_ERROR(async_run.get().takeError(), llvm::Succeeded()); EXPECT_LT(std::chrono::steady_clock::now() - start, std::chrono::seconds(10)); EXPECT_TRUE(short_callback_called); diff --git a/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp b/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp index 45464db958e04..97f32e2fbb1bf 100644 --- a/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp +++ b/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp @@ -150,8 +150,9 @@ class ProtocolServerMCPTest : public testing::Test { /// Runs the MainLoop a single time, executing any pending callbacks. void Run() { - loop.AddPendingCallback( + bool addition_succeeded = loop.AddPendingCallback( [](MainLoopBase &loop) { loop.RequestTermination(); }); + EXPECT_TRUE(addition_succeeded); EXPECT_THAT_ERROR(loop.Run().takeError(), Succeeded()); } diff --git a/llvm/include/llvm/ADT/RadixTree.h b/llvm/include/llvm/ADT/RadixTree.h index 9e2ab9753d50c..87e2a3ebecc06 100644 --- a/llvm/include/llvm/ADT/RadixTree.h +++ b/llvm/include/llvm/ADT/RadixTree.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -92,7 +93,7 @@ template class RadixTree { /// If this node does not have a value (i.e., it's an internal node that /// only serves as a path to other values), this iterator will be equal /// to default constructed `ContainerType::iterator()`. - typename ContainerType::iterator Value; + std::optional Value; /// The first character of the Key. Used for fast child lookup. KeyValueType KeyFront; @@ -215,7 +216,7 @@ template class RadixTree { KeyConstIteratorType{}}; void findNextValid() { - while (Curr && Curr->Value == typename ContainerType::iterator()) + while (Curr && !Curr->Value.has_value()) advance(); } @@ -249,7 +250,7 @@ template class RadixTree { public: IteratorImpl() = default; - MappedType &operator*() const { return *Curr->Value; } + MappedType &operator*() const { return **Curr->Value; } IteratorImpl &operator++() { advance(); @@ -315,12 +316,12 @@ template class RadixTree { const value_type &NewValue = KeyValuePairs.emplace_front( std::move(Key), T(std::forward(Args)...)); Node &Node = findOrCreate(NewValue.first); - bool HasValue = Node.Value != typename ContainerType::iterator(); + bool HasValue = Node.Value.has_value(); if (!HasValue) Node.Value = KeyValuePairs.begin(); else KeyValuePairs.pop_front(); - return {Node.Value, !HasValue}; + return {*Node.Value, !HasValue}; } /// diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h index 8c8ca769c7d16..663af6302d6ff 100644 --- a/llvm/include/llvm/Remarks/Remark.h +++ b/llvm/include/llvm/Remarks/Remark.h @@ -51,12 +51,21 @@ struct Argument { // If set, the debug location corresponding to the value. std::optional Loc; + Argument() = default; + Argument(StringRef Key, StringRef Val) : Key(Key), Val(Val) {} + /// Implement operator<< on Argument. LLVM_ABI void print(raw_ostream &OS) const; - /// Return the value of argument as int. - LLVM_ABI std::optional getValAsInt() const; - /// Check if the argument value can be parsed as int. - LLVM_ABI bool isValInt() const; + + /// Return the value of argument as an integer of type T. + template + std::optional getValAsInt(unsigned Radix = 10) const { + StringRef Str = Val; + T Res; + if (Str.consumeInteger(Radix, Res) || !Str.empty()) + return std::nullopt; + return Res; + } }; // Create wrappers for C Binding types (see CBindingWrapping.h). @@ -127,6 +136,10 @@ struct Remark { /// Return a message composed from the arguments as a string. LLVM_ABI std::string getArgsAsMsg() const; + /// Return the first argument with the specified key or nullptr if no such + /// argument was found. + LLVM_ABI Argument *getArgByKey(StringRef Key); + /// Clone this remark to explicitly ask for a copy. Remark clone() const { return *this; } diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h index 054016622a577..63c5990a41741 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h +++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h @@ -13,6 +13,7 @@ #define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERBINARYMETADATA_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -20,6 +21,9 @@ #include "llvm/Transforms/Utils/Instrumentation.h" namespace llvm { +namespace vfs { +class FileSystem; +} // namespace vfs struct SanitizerBinaryMetadataOptions { bool Covered = false; @@ -53,12 +57,14 @@ class SanitizerBinaryMetadataPass public: LLVM_ABI explicit SanitizerBinaryMetadataPass( SanitizerBinaryMetadataOptions Opts = {}, + IntrusiveRefCntPtr VFS = nullptr, ArrayRef IgnorelistFiles = {}); LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static bool isRequired() { return true; } private: const SanitizerBinaryMetadataOptions Options; + IntrusiveRefCntPtr VFS; const ArrayRef IgnorelistFiles; }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h index f14f5b90a5cc9..a8a09fb95c4bd 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h +++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h @@ -15,14 +15,17 @@ #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H #define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/SpecialCaseList.h" -#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Transforms/Utils/Instrumentation.h" namespace llvm { class Module; +namespace vfs { +class FileSystem; +} // namespace vfs /// This is the ModuleSanitizerCoverage pass used in the new pass manager. The /// pass instruments functions for coverage, adds initialization calls to the @@ -32,24 +35,15 @@ class SanitizerCoveragePass : public PassInfoMixin { public: explicit SanitizerCoveragePass( SanitizerCoverageOptions Options = SanitizerCoverageOptions(), - const std::vector &AllowlistFiles = - std::vector(), - const std::vector &BlocklistFiles = - std::vector()) - : Options(Options) { - if (AllowlistFiles.size() > 0) - Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, - *vfs::getRealFileSystem()); - if (BlocklistFiles.size() > 0) - Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, - *vfs::getRealFileSystem()); - } + IntrusiveRefCntPtr VFS = nullptr, + const std::vector &AllowlistFiles = {}, + const std::vector &BlocklistFiles = {}); LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static bool isRequired() { return true; } private: SanitizerCoverageOptions Options; - + IntrusiveRefCntPtr VFS; std::unique_ptr Allowlist; std::unique_ptr Blocklist; }; diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index a572eefddd20e..84ee8c0bf3e18 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -1131,9 +1131,14 @@ bool DependenceInfo::haveSameSD(const Loop *SrcLoop, if (SE->hasLoopInvariantBackedgeTakenCount(DstLoop)) DstUP = SE->getBackedgeTakenCount(DstLoop); - if (SrcUB != nullptr && DstUP != nullptr && - SE->isKnownPredicate(ICmpInst::ICMP_EQ, SrcUB, DstUP)) - return true; + if (SrcUB != nullptr && DstUP != nullptr) { + Type *WiderType = SE->getWiderType(SrcUB->getType(), DstUP->getType()); + SrcUB = SE->getNoopOrZeroExtend(SrcUB, WiderType); + DstUP = SE->getNoopOrZeroExtend(DstUP, WiderType); + + if (SE->isKnownPredicate(ICmpInst::ICMP_EQ, SrcUB, DstUP)) + return true; + } return false; } diff --git a/llvm/lib/Remarks/Remark.cpp b/llvm/lib/Remarks/Remark.cpp index 0e98cad8e9045..09f24e93255e0 100644 --- a/llvm/lib/Remarks/Remark.cpp +++ b/llvm/lib/Remarks/Remark.cpp @@ -13,6 +13,7 @@ #include "llvm/Remarks/Remark.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include using namespace llvm; @@ -26,16 +27,13 @@ std::string Remark::getArgsAsMsg() const { return Str; } -/// Returns the value of a specified key parsed from StringRef. -std::optional Argument::getValAsInt() const { - APInt KeyVal; - if (Val.getAsInteger(10, KeyVal)) - return std::nullopt; - return KeyVal.getSExtValue(); +Argument *Remark::getArgByKey(StringRef Key) { + auto *It = find_if(Args, [&](auto &Arg) { return Arg.Key == Key; }); + if (It == Args.end()) + return nullptr; + return &*It; } -bool Argument::isValInt() const { return getValAsInt().has_value(); } - void RemarkLocation::print(raw_ostream &OS) const { OS << "{ " << "File: " << SourceFilePath << ", Line: " << SourceLine diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 14fb2477f3ccb..ce35e87c86d5b 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1955,9 +1955,13 @@ void SIRegisterInfo::buildSpillLoadStore( } bool IsSrcDstDef = SrcDstRegState & RegState::Define; + bool PartialReloadCopy = (RemEltSize != EltSize) && !IsStore; if (NeedSuperRegImpOperand && - (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef))) + (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef))) { MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); + if (PartialReloadCopy) + MIB.addReg(ValueReg, RegState::Implicit); + } // The epilog restore of a wwm-scratch register can cause undesired // optimization during machine-cp post PrologEpilogInserter if the same diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 98c5f09260811..1a77b00588311 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -850,24 +850,34 @@ class XForm_45 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } -class XForm_RSB5_UIMM2_2UIMM1 opcode, bits<10> xo, dag OOL, dag IOL, +class XForm_RSB5_UIMM2 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, list pattern> : I { bits<5> RS; bits<5> RB; bits<2> RIC; - bits<1> PRS; - bits<1> R; let Pattern = pattern; let Inst{6...10} = RS; + let Inst{11} = 0; let Inst{12...13} = RIC; - let Inst{14} = PRS; - let Inst{15} = R; + let Inst{14...15} = 0; let Inst{16...20} = RB; let Inst{21...30} = xo; + let Inst{31} = 0; +} + +class XForm_RSB5_UIMM2_2UIMM1 opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, list pattern> + : XForm_RSB5_UIMM2 { + + bits<1> PRS; + bits<1> R; + + let Inst{14} = PRS; + let Inst{15} = R; } class X_FRT5_XO2_XO3_XO10 opcode, bits<2> xo1, bits<3> xo2, bits<10> xo, diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index 1aefea1a1c498..b0bed71c6755f 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -11,6 +11,18 @@ // //===----------------------------------------------------------------------===// +class XForm_RS5 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + list pattern> : I { + bits<5> RS; + + let Pattern = pattern; + + let Inst{6...10} = RS; + let Inst{11...20} = 0; + let Inst{21...30} = xo; + let Inst{31} = 0; +} + class XOForm_RTAB5_L1 opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, list pattern> : I { @@ -294,6 +306,24 @@ let Predicates = [IsISAFuture] in { defm SUBFUS : XOForm_RTAB5_L1r<31, 72, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB, u1imm:$L), "subfus", "$RT, $L, $RA, $RB", []>; + def TLBSYNCIO + : XForm_RS5<31, 564, (outs), (ins g8rc:$RS), "tlbsyncio $RS", []>; + def PTESYNCIO + : XForm_RS5<31, 596, (outs), (ins g8rc:$RS), "ptesyncio $RS", []>; + def TLBIEP : XForm_RSB5_UIMM2_2UIMM1<31, 50, (outs), + (ins gprc:$RB, gprc:$RS, u2imm:$RIC, + u1imm:$PRS, u1imm:$R), + "tlbiep $RB, $RS, $RIC, $PRS, $R", []>; + def TLBIEIO + : XForm_RSB5_UIMM2<31, 18, (outs), (ins g8rc:$RB, g8rc:$RS, u2imm:$RIC), + "tlbieio $RB, $RS, $RIC", []>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in { + def TLBIEP8 + : XForm_RSB5_UIMM2_2UIMM1<31, 50, (outs), + (ins g8rc:$RB, g8rc:$RS, u2imm:$RIC, + u1imm:$PRS, u1imm:$R), + "tlbiep $RB, $RS, $RIC, $PRS, $R", []>; + } } let Predicates = [HasVSX, IsISAFuture] in { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index de28faf4908e9..3da720f54e6ab 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1714,7 +1714,7 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( } if (Constraint[1] == '@') { if (StringRef("{@cc}").compare(Constraint) == 0) - return std::make_pair(0u, &SystemZ::GR32BitRegClass); + return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass); } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); @@ -1766,10 +1766,6 @@ SDValue SystemZTargetLowering::LowerAsmOutputForConstraint( OpInfo.ConstraintVT.getSizeInBits() < 8) report_fatal_error("Glue output operand is of invalid type"); - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - MRI.addLiveIn(SystemZ::CC); - if (Glue.getNode()) { Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue); Chain = Glue.getValue(1); diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp index 4801ac75f8572..210b1266de23c 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp @@ -481,15 +481,18 @@ StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { } // namespace SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass( - SanitizerBinaryMetadataOptions Opts, ArrayRef IgnorelistFiles) - : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {} + SanitizerBinaryMetadataOptions Opts, + IntrusiveRefCntPtr VFS, + ArrayRef IgnorelistFiles) + : Options(std::move(Opts)), + VFS(VFS ? std::move(VFS) : vfs::getRealFileSystem()), + IgnorelistFiles(std::move(IgnorelistFiles)) {} PreservedAnalyses SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager &AM) { std::unique_ptr Ignorelist; if (!IgnorelistFiles.empty()) { - Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, - *vfs::getRealFileSystem()); + Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, *VFS); if (Ignorelist->inSection("metadata", "src", M.getSourceFileName())) return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index b74a0708b67ae..09abf6a33648c 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -318,6 +318,18 @@ class ModuleSanitizerCoverage { }; } // namespace +SanitizerCoveragePass::SanitizerCoveragePass( + SanitizerCoverageOptions Options, IntrusiveRefCntPtr VFS, + const std::vector &AllowlistFiles, + const std::vector &BlocklistFiles) + : Options(std::move(Options)), + VFS(VFS ? std::move(VFS) : vfs::getRealFileSystem()) { + if (AllowlistFiles.size() > 0) + Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, *this->VFS); + if (BlocklistFiles.size() > 0) + Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, *this->VFS); +} + PreservedAnalyses SanitizerCoveragePass::run(Module &M, ModuleAnalysisManager &MAM) { auto &FAM = MAM.getResult(M).getManager(); diff --git a/llvm/test/Analysis/DependenceAnalysis/same-sd-for-diff-becount-type-loops.ll b/llvm/test/Analysis/DependenceAnalysis/same-sd-for-diff-becount-type-loops.ll new file mode 100644 index 0000000000000..66880b5a553ec --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/same-sd-for-diff-becount-type-loops.ll @@ -0,0 +1,68 @@ +; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 | FileCheck %s + +define void @f1() { +; CHECK-LABEL: 'f1' +; CHECK-NEXT: Src: store i32 0, ptr null, align 4 --> Dst: store i32 0, ptr null, align 4 +; CHECK-NEXT: da analyze - consistent output [S]! +; CHECK-NEXT: Src: store i32 0, ptr null, align 4 --> Dst: %2 = load i32, ptr null, align 4 +; CHECK-NEXT: da analyze - consistent flow [|<]! +; CHECK-NEXT: Src: %2 = load i32, ptr null, align 4 --> Dst: %2 = load i32, ptr null, align 4 +; CHECK-NEXT: da analyze - consistent input [S]! +; +entry: + br label %for.1.header + +for.1.header: ; preds = %for.2.end, %entry + br label %for.1.body + +for.1.body: ; preds = %for.1.body, %whiledo + %0 = phi i32 [ 0, %for.1.header ], [ 1, %for.1.body ] + store i32 0, ptr null, align 4 + %1 = icmp ult i32 %0, 1 + br i1 %1, label %for.1.body, label %for.1.end + +for.1.end: ; preds = %for.1.body + br label %for.2.body + +for.2.body: ; preds = %for.2.body, %for.1.end + %2 = load i32, ptr null, align 4 + br i1 false, label %for.2.body, label %exit + +exit: ; preds = %for.2.body + ret void +} + +define void @f2() { +; CHECK-LABEL: 'f2' +; CHECK-NEXT: Src: store i32 0, ptr null, align 4 --> Dst: store i32 0, ptr null, align 4 +; CHECK-NEXT: da analyze - consistent output [S]! +; CHECK-NEXT: Src: store i32 0, ptr null, align 4 --> Dst: %3 = load i32, ptr null, align 4 +; CHECK-NEXT: da analyze - flow [|<] / assuming 1 loop level(s) fused: [S|<]! +; CHECK-NEXT: Src: %3 = load i32, ptr null, align 4 --> Dst: %3 = load i32, ptr null, align 4 +; CHECK-NEXT: da analyze - consistent input [S]! +; +entry: + br label %for.1.header + +for.1.header: ; preds = %for.2.end, %entry + br label %for.1.body + +for.1.body: ; preds = %for.1.body, %whiledo + %0 = phi i32 [ 0, %for.1.header ], [ 1, %for.1.body ] + store i32 0, ptr null, align 4 + %1 = icmp ult i32 %0, 1 + br i1 %1, label %for.1.body, label %for.1.end + +for.1.end: ; preds = %for.1.body + br label %for.2.body + +for.2.body: ; preds = %for.2.body, %for.1.end + %2 = phi i64 [ 0, %for.1.end ], [ %4, %for.2.body ] + %3 = load i32, ptr null, align 4 + %4 = add nuw nsw i64 %2, 1 + %5 = icmp ult i64 %4, 2 + br i1 %5, label %for.2.body, label %exit + +exit: ; preds = %for.2.body + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/loop-vector-sink.ll b/llvm/test/CodeGen/AMDGPU/loop-vector-sink.ll new file mode 100644 index 0000000000000..670e2c5b2c9e0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/loop-vector-sink.ll @@ -0,0 +1,236 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefix=OPT %s + +; testing insert case +define amdgpu_kernel void @runningSum(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %inputElement1, i32 %inputIter) { +; OPT-LABEL: define amdgpu_kernel void @runningSum( +; OPT-SAME: ptr addrspace(1) [[OUT0:%.*]], ptr addrspace(1) [[OUT1:%.*]], i32 [[INPUTELEMENT1:%.*]], i32 [[INPUTITER:%.*]]) #[[ATTR0:[0-9]+]] { +; OPT-NEXT: [[PREHEADER:.*]]: +; OPT-NEXT: [[VECELEMENT1:%.*]] = insertelement <2 x i32> poison, i32 [[INPUTELEMENT1]], i64 0 +; OPT-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VECELEMENT1]], <2 x i32> poison, <2 x i32> zeroinitializer +; OPT-NEXT: br label %[[LOOPBODY:.*]] +; OPT: [[LOOPBODY]]: +; OPT-NEXT: [[PREVIOUSSUM:%.*]] = phi <2 x i32> [ [[TMP1]], %[[PREHEADER]] ], [ [[RUNNINGSUM:%.*]], %[[LOOPBODY]] ] +; OPT-NEXT: [[ITERCOUNT:%.*]] = phi i32 [ [[INPUTITER]], %[[PREHEADER]] ], [ [[ITERSLEFT:%.*]], %[[LOOPBODY]] ] +; OPT-NEXT: [[RUNNINGSUM]] = add <2 x i32> [[TMP1]], [[PREVIOUSSUM]] +; OPT-NEXT: [[ITERSLEFT]] = sub i32 [[ITERCOUNT]], 1 +; OPT-NEXT: [[COND:%.*]] = icmp eq i32 [[ITERSLEFT]], 0 +; OPT-NEXT: br i1 [[COND]], label %[[LOOPEXIT:.*]], label %[[LOOPBODY]] +; OPT: [[LOOPEXIT]]: +; OPT-NEXT: [[SUMELEMENT0:%.*]] = extractelement <2 x i32> [[RUNNINGSUM]], i64 0 +; OPT-NEXT: [[SUMELEMENT1:%.*]] = extractelement <2 x i32> [[RUNNINGSUM]], i64 1 +; OPT-NEXT: store i32 [[SUMELEMENT0]], ptr addrspace(1) [[OUT0]], align 4 +; OPT-NEXT: store i32 [[SUMELEMENT1]], ptr addrspace(1) [[OUT1]], align 4 +; OPT-NEXT: ret void +; +preheader: + %vecElement1 = insertelement <2 x i32> poison, i32 %inputElement1, i64 0 + %broadcast1 = shufflevector <2 x i32> %vecElement1, <2 x i32> poison, <2 x i32> zeroinitializer + br label %loopBody + +loopBody: + %previousSum = phi <2 x i32> [ %broadcast1, %preheader ], [ %runningSum, %loopBody ] + %iterCount = phi i32 [ %inputIter, %preheader ], [ %itersLeft, %loopBody ] + %runningSum = add <2 x i32> %broadcast1, %previousSum + %itersLeft = sub i32 %iterCount, 1 + %cond = icmp eq i32 %itersLeft, 0 + br i1 %cond, label %loopExit, label %loopBody + +loopExit: + %sumElement0 = extractelement <2 x i32> %runningSum, i64 0 + %sumElement1 = extractelement <2 x i32> %runningSum, i64 1 + store i32 %sumElement0, ptr addrspace(1) %out0 + store i32 %sumElement1, ptr addrspace(1) %out1 + ret void +} + +; testing extract case with single use - with divergent control flow +; The vector has SINGLE use (extractelement), both sink into if.then +define amdgpu_kernel void @test_sink_extract_single_use_operands(ptr addrspace(1) %out0, <2 x i32> %inputVec, i32 %tid, i32 %cond) { +; OPT-LABEL: define amdgpu_kernel void @test_sink_extract_single_use_operands( +; OPT-SAME: ptr addrspace(1) [[OUT0:%.*]], <2 x i32> [[INPUTVEC:%.*]], i32 [[TID:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[RUNNINGSUM:%.*]] = add <2 x i32> [[INPUTVEC]], splat (i32 1) +; OPT-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[RUNNINGSUM]], i64 0 +; OPT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TID]], [[COND]] +; OPT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; OPT: [[IF_THEN]]: +; OPT-NEXT: [[RESULT:%.*]] = add i32 [[TMP0]], 100 +; OPT-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT0]], align 4 +; OPT-NEXT: br label %[[IF_END]] +; OPT: [[IF_END]]: +; OPT-NEXT: ret void +; +entry: + %runningSum = add <2 x i32> %inputVec, + %sumElement0 = extractelement <2 x i32> %runningSum, i64 0 + %cmp = icmp slt i32 %tid, %cond + br i1 %cmp, label %if.then, label %if.end + +if.then: + %result = add i32 %sumElement0, 100 + store i32 %result, ptr addrspace(1) %out0 + br label %if.end + +if.end: + ret void +} + +; testing extract case - extracting two elements with divergent control flow +; The vector has TWO uses (two extractelements), all sink into if.then +define amdgpu_kernel void @test_sink_extract_operands(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %input_vec, i32 %tid, i32 %cond) { +; OPT-LABEL: define amdgpu_kernel void @test_sink_extract_operands( +; OPT-SAME: ptr addrspace(1) [[OUT0:%.*]], ptr addrspace(1) [[OUT1:%.*]], <4 x i32> [[INPUT_VEC:%.*]], i32 [[TID:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[VEC_FULL:%.*]] = add <4 x i32> [[INPUT_VEC]], +; OPT-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[VEC_FULL]], i64 0 +; OPT-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[VEC_FULL]], i64 1 +; OPT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TID]], [[COND]] +; OPT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; OPT: [[IF_THEN]]: +; OPT-NEXT: [[RESULT0:%.*]] = add i32 [[TMP0]], 100 +; OPT-NEXT: [[RESULT1:%.*]] = add i32 [[TMP1]], 200 +; OPT-NEXT: store i32 [[RESULT0]], ptr addrspace(1) [[OUT0]], align 4 +; OPT-NEXT: store i32 [[RESULT1]], ptr addrspace(1) [[OUT1]], align 4 +; OPT-NEXT: br label %[[IF_END]] +; OPT: [[IF_END]]: +; OPT-NEXT: ret void +; +entry: + %vec_full = add <4 x i32> %input_vec, + %extract0 = extractelement <4 x i32> %vec_full, i64 0 + %extract1 = extractelement <4 x i32> %vec_full, i64 1 + %cmp = icmp slt i32 %tid, %cond + br i1 %cmp, label %if.then, label %if.end + +if.then: + %result0 = add i32 %extract0, 100 + %result1 = add i32 %extract1, 200 + store i32 %result0, ptr addrspace(1) %out0 + store i32 %result1, ptr addrspace(1) %out1 + br label %if.end + +if.end: + ret void +} + +; testing shuffle case with divergent control flow - shuffles sink into if.then +define amdgpu_kernel void @test_shuffle_insert_subvector(ptr addrspace(1) %ptr, <4 x i16> %vec1, <4 x i16> %vec2, i32 %tid, i32 %cond) { +; OPT-LABEL: define amdgpu_kernel void @test_shuffle_insert_subvector( +; OPT-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x i16> [[VEC1:%.*]], <4 x i16> [[VEC2:%.*]], i32 [[TID:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC1]], <4 x i16> [[VEC2]], <4 x i32> +; OPT-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i16> [[VEC1]], <4 x i16> [[VEC2]], <4 x i32> +; OPT-NEXT: [[SHUFFLE3:%.*]] = shufflevector <4 x i16> [[VEC1]], <4 x i16> poison, <4 x i32> +; OPT-NEXT: [[SHUFFLE4:%.*]] = shufflevector <4 x i16> [[VEC2]], <4 x i16> poison, <4 x i32> +; OPT-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x i16> [[SHUFFLE]], <4 x i16> [[SHUFFLE2]], <4 x i32> +; OPT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TID]], [[COND]] +; OPT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; OPT: [[IF_THEN]]: +; OPT-NEXT: [[RESULT_VEC:%.*]] = add <4 x i16> [[SHUFFLE5]], +; OPT-NEXT: [[OTHER_RESULT:%.*]] = mul <4 x i16> [[SHUFFLE3]], splat (i16 2) +; OPT-NEXT: [[MORE_RESULT:%.*]] = sub <4 x i16> [[SHUFFLE4]], splat (i16 5) +; OPT-NEXT: store <4 x i16> [[RESULT_VEC]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: store <4 x i16> [[OTHER_RESULT]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: store <4 x i16> [[MORE_RESULT]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: br label %[[IF_END]] +; OPT: [[IF_END]]: +; OPT-NEXT: ret void +; +entry: + %shuffle = shufflevector <4 x i16> %vec1, <4 x i16> %vec2, <4 x i32> + %shuffle2 = shufflevector <4 x i16> %vec1, <4 x i16> %vec2, <4 x i32> + %shuffle3 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <4 x i32> + %shuffle4 = shufflevector <4 x i16> %vec2, <4 x i16> poison, <4 x i32> + %shuffle5 = shufflevector <4 x i16> %shuffle, <4 x i16> %shuffle2, <4 x i32> + %cmp = icmp slt i32 %tid, %cond + br i1 %cmp, label %if.then, label %if.end + +if.then: + %result_vec = add <4 x i16> %shuffle5, + %other_result = mul <4 x i16> %shuffle3, + %more_result = sub <4 x i16> %shuffle4, + store <4 x i16> %result_vec, ptr addrspace(1) %ptr + store <4 x i16> %other_result, ptr addrspace(1) %ptr + store <4 x i16> %more_result, ptr addrspace(1) %ptr + br label %if.end + +if.end: + ret void +} + +; testing shuffle extract subvector with divergent control flow - shuffles sink into if.then +define amdgpu_kernel void @test_shuffle_extract_subvector(ptr addrspace(1) %ptr, <4 x i16> %input_vec, i32 %tid, i32 %cond) { +; OPT-LABEL: define amdgpu_kernel void @test_shuffle_extract_subvector( +; OPT-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x i16> [[INPUT_VEC:%.*]], i32 [[TID:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[INPUT_VEC]], <4 x i16> poison, <2 x i32> +; OPT-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i16> [[INPUT_VEC]], <4 x i16> poison, <2 x i32> +; OPT-NEXT: [[SHUFFLE3:%.*]] = shufflevector <4 x i16> [[INPUT_VEC]], <4 x i16> poison, <4 x i32> +; OPT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TID]], [[COND]] +; OPT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; OPT: [[IF_THEN]]: +; OPT-NEXT: [[RESULT_VEC:%.*]] = add <2 x i16> [[SHUFFLE]], +; OPT-NEXT: [[RESULT_VEC2:%.*]] = mul <2 x i16> [[SHUFFLE2]], splat (i16 3) +; OPT-NEXT: [[RESULT_VEC3:%.*]] = sub <4 x i16> [[SHUFFLE3]], splat (i16 10) +; OPT-NEXT: store <2 x i16> [[RESULT_VEC]], ptr addrspace(1) [[PTR]], align 4 +; OPT-NEXT: store <2 x i16> [[RESULT_VEC2]], ptr addrspace(1) [[PTR]], align 4 +; OPT-NEXT: store <4 x i16> [[RESULT_VEC3]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: br label %[[IF_END]] +; OPT: [[IF_END]]: +; OPT-NEXT: ret void +; +entry: + %shuffle = shufflevector <4 x i16> %input_vec, <4 x i16> poison, <2 x i32> + %shuffle2 = shufflevector <4 x i16> %input_vec, <4 x i16> poison, <2 x i32> + %shuffle3 = shufflevector <4 x i16> %input_vec, <4 x i16> poison, <4 x i32> + %cmp = icmp slt i32 %tid, %cond + br i1 %cmp, label %if.then, label %if.end + +if.then: + %result_vec = add <2 x i16> %shuffle, + %result_vec2 = mul <2 x i16> %shuffle2, + %result_vec3 = sub <4 x i16> %shuffle3, + store <2 x i16> %result_vec, ptr addrspace(1) %ptr + store <2 x i16> %result_vec2, ptr addrspace(1) %ptr + store <4 x i16> %result_vec3, ptr addrspace(1) %ptr + br label %if.end + +if.end: + ret void +} + +; testing shuffle sink with widening operations and divergent control flow +define amdgpu_kernel void @test_shuffle_sink_operands(ptr addrspace(1) %ptr, <2 x i16> %input_vec, <2 x i16> %input_vec2, i32 %tid, i32 %cond) { +; OPT-LABEL: define amdgpu_kernel void @test_shuffle_sink_operands( +; OPT-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x i16> [[INPUT_VEC:%.*]], <2 x i16> [[INPUT_VEC2:%.*]], i32 [[TID:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[INPUT_VEC]], <2 x i16> poison, <4 x i32> +; OPT-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x i16> [[INPUT_VEC2]], <2 x i16> poison, <4 x i32> +; OPT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TID]], [[COND]] +; OPT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; OPT: [[IF_THEN]]: +; OPT-NEXT: [[RESULT_VEC:%.*]] = add <4 x i16> [[SHUFFLE]], +; OPT-NEXT: [[RESULT_VEC2:%.*]] = mul <4 x i16> [[SHUFFLE2]], splat (i16 5) +; OPT-NEXT: store <4 x i16> [[RESULT_VEC]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: store <4 x i16> [[RESULT_VEC2]], ptr addrspace(1) [[PTR]], align 8 +; OPT-NEXT: br label %[[IF_END]] +; OPT: [[IF_END]]: +; OPT-NEXT: ret void +; +entry: + %shuffle = shufflevector <2 x i16> %input_vec, <2 x i16> poison, <4 x i32> + %shuffle2 = shufflevector <2 x i16> %input_vec2, <2 x i16> poison, <4 x i32> + %cmp = icmp slt i32 %tid, %cond + br i1 %cmp, label %if.then, label %if.end + +if.then: + %result_vec = add <4 x i16> %shuffle, + %result_vec2 = mul <4 x i16> %shuffle2, + store <4 x i16> %result_vec, ptr addrspace(1) %ptr + store <4 x i16> %result_vec2, ptr addrspace(1) %ptr + br label %if.end + +if.end: + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir index 603aa92f1b27a..55914bacb0dc1 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir @@ -81,7 +81,7 @@ body: | ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 - ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) + ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = IMPLICIT_DEF SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -130,7 +130,7 @@ body: | ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) @@ -187,7 +187,7 @@ body: | ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) + ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) @@ -247,7 +247,7 @@ body: | ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF @@ -315,7 +315,7 @@ body: | ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5) + ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir new file mode 100644 index 0000000000000..cd9a4d07b870d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir @@ -0,0 +1,506 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx950 -run-pass prologepilog,machine-cp -o - %s | FileCheck -check-prefix=GFX950 %s + +--- | + define amdgpu_kernel void @full_copy() #0 { ret void } + + define amdgpu_kernel void @partial_copy() #0 { ret void } + + define amdgpu_kernel void @full_spill() #0 { ret void } + + attributes #0 = { "amdgpu-waves-per-eu"="8,8" } +... + +--- +name: full_copy +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 2, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 4, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 5, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 6, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + hasSpilledVGPRs: true +body: | + bb.0: + ; GFX950-LABEL: name: full_copy + ; GFX950: liveins: $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29 + ; GFX950-NEXT: {{ $}} + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX950-NEXT: renamable $agpr0_agpr1 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit-def $vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit killed $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr21, implicit $exec, implicit $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr20, implicit $exec, implicit killed $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: $vgpr0 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr5 = COPY $agpr6, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr7, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr8, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr9, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr10, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr11, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr12, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr13, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr14, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr15, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr16, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr17, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr18, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr19, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr20, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr21, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr22, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr23, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr24, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr25, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr26, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr27, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr28, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr29, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + ; GFX950-NEXT: S_ENDPGM 0 + renamable $agpr0_agpr1 = IMPLICIT_DEF + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + SI_SPILL_AV128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr4_vgpr5_vgpr6_vgpr7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr8_vgpr9_vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr12_vgpr13_vgpr14_vgpr15, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr20_vgpr21_vgpr22_vgpr23, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) + $vgpr0 = IMPLICIT_DEF + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + S_ENDPGM 0 +... + + +# We need to add implicit operand as well as implicit-def operand to the scratch_load, otherwise, MachineCopyPropagation will think the preceeding copies are dead, and will delete them. + +--- +name: partial_copy +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 2, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 4, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 5, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 6, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + hasSpilledVGPRs: true +body: | + bb.0: + ; GFX950-LABEL: name: partial_copy + ; GFX950: liveins: $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27 + ; GFX950-NEXT: {{ $}} + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX950-NEXT: renamable $agpr0_agpr1 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit-def $vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit killed $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr20_vgpr21_vgpr22_vgpr23 + ; GFX950-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr20_vgpr21, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr20_vgpr21_vgpr22_vgpr23 :: (store (s64) into %stack.5, align 4, addrspace 5) + ; GFX950-NEXT: $vgpr0 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr5 = COPY $agpr6, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr7, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr8, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr9, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr10, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr11, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr12, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr13, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr14, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr15, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr16, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr17, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr18, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr19, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr20, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr21, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr22, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr23, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr24, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr25, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr26, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr27, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2_agpr3 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr2_agpr3_agpr4_agpr5 :: (load (s64) from %stack.5, align 4, addrspace 5) + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + ; GFX950-NEXT: S_ENDPGM 0 + renamable $agpr0_agpr1 = IMPLICIT_DEF + renamable $agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + SI_SPILL_AV128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr4_vgpr5_vgpr6_vgpr7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr8_vgpr9_vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr12_vgpr13_vgpr14_vgpr15, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr20_vgpr21_vgpr22_vgpr23, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) + $vgpr0 = IMPLICIT_DEF + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + S_ENDPGM 0 +... + +# Since there are no preceeding copies , we do not need to add implicit operand, as the implicit-def operand does not clobber. + +--- +name: full_spill +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 2, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 4, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 5, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } + - { id: 6, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 } +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + hasSpilledVGPRs: true +body: | + bb.0: + ; GFX950-LABEL: name: full_spill + ; GFX950: liveins: $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25 + ; GFX950-NEXT: {{ $}} + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX950-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX950-NEXT: renamable $agpr0_agpr1 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $agpr26_agpr27 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; GFX950-NEXT: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX950-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX950-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr8, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX950-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit-def $vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX950-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr19, implicit $exec, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit killed $vgpr16_vgpr17_vgpr18_vgpr19 + ; GFX950-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.5, align 4, addrspace 5) + ; GFX950-NEXT: $vgpr0 = IMPLICIT_DEF + ; GFX950-NEXT: $agpr5 = COPY $agpr6, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr7, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr8, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr9, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr10, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr11, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr12, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr13, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr14, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr15, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr16, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr17, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr18, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr19, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr20, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr21, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + ; GFX950-NEXT: $agpr5 = COPY $agpr22, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr4 = COPY $agpr23, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr3 = COPY $agpr24, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: $agpr2 = COPY $agpr25, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + ; GFX950-NEXT: $agpr2_agpr3_agpr4_agpr5 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.5, align 4, addrspace 5) + ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + ; GFX950-NEXT: S_ENDPGM 0 + renamable $agpr0_agpr1 = IMPLICIT_DEF + renamable $agpr26_agpr27 = IMPLICIT_DEF + renamable $agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF + SI_SPILL_AV128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr4_vgpr5_vgpr6_vgpr7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr8_vgpr9_vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr12_vgpr13_vgpr14_vgpr15, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) + SI_SPILL_AV128_SAVE killed $vgpr20_vgpr21_vgpr22_vgpr23, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) + $vgpr0 = IMPLICIT_DEF + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 0, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 1024, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 2048, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 3072, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec + renamable $agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) + DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir index da80320bc1af1..a5d029a532f63 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir @@ -25,7 +25,7 @@ body: | ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s96) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s96) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29, implicit $agpr30 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -58,7 +58,7 @@ body: | ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -93,7 +93,7 @@ body: | ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -163,7 +163,7 @@ body: | ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s96) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (load (s96) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53, implicit $vgpr54 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -196,7 +196,7 @@ body: | ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -231,7 +231,7 @@ body: | ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GCN-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir index 613963403cc67..0316c35128087 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir +++ b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir @@ -28,7 +28,7 @@ body: | ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr51 = COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s96) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s96) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr54, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -61,7 +61,7 @@ body: | ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr51 = COPY $vgpr54, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 ; GCN-NEXT: $vgpr50 = COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -96,7 +96,7 @@ body: | ; GCN-NEXT: $vgpr51 = COPY $vgpr53, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 ; GCN-NEXT: $vgpr50 = COPY $vgpr54, implicit $vgpr48_vgpr49_vgpr50_vgpr51 ; GCN-NEXT: $vgpr49 = COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51 - ; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -170,7 +170,7 @@ body: | ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 - ; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s96) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: (load (s96) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -203,7 +203,7 @@ body: | ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 ; GCN-NEXT: $agpr28 = COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29 - ; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -238,7 +238,7 @@ body: | ; GCN-NEXT: $agpr29 = COPY $agpr25, implicit-def $agpr26_agpr27_agpr28_agpr29 ; GCN-NEXT: $agpr28 = COPY $agpr30, implicit $agpr26_agpr27_agpr28_agpr29 ; GCN-NEXT: $agpr27 = COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29 - ; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24 SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll index 6b8746e05704c..a86420ef6ffa6 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll @@ -736,3 +736,40 @@ exit: ret void } +; Test INLINEASM defines CC. +@wait_fence = global i32 0, align 4 +@bit_cc = global i32 0, align 4 +define void @test_inlineasm_define_cc() { +; CHECK-LABEL: test_inlineasm_define_cc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgrl %r1, wait_fence@GOT +; CHECK-NEXT: chsi 0(%r1), 0 +; CHECK-NEXT: ber %r14 +; CHECK-NEXT: .LBB29_1: # %while.body.lr.ph +; CHECK-NEXT: lgrl %r1, bit_cc@GOT +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: srl %r0, 28 +; CHECK-NEXT: st %r0, 0(%r1) +; CHECK-NEXT: .LBB29_2: # %while.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: j .LBB29_2 +entry: + %0 = load i32, ptr @wait_fence, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: + %1 = tail call i32 asm "", "={@cc}"() + %2 = icmp ult i32 %1, 4 + tail call void @llvm.assume(i1 %2) + store i32 %1, ptr @bit_cc, align 4 + br label %while.body + +while.body: + br label %while.body + +while.end: + ret void +} diff --git a/llvm/test/Instrumentation/InstrProfiling/debug-info-correlate-coverage.ll b/llvm/test/Instrumentation/InstrProfiling/debug-info-correlate-coverage.ll index 192bac6e503a0..dd64615338170 100644 --- a/llvm/test/Instrumentation/InstrProfiling/debug-info-correlate-coverage.ll +++ b/llvm/test/Instrumentation/InstrProfiling/debug-info-correlate-coverage.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=instrprof -debug-info-correlate -S | opt -O2 -S | FileCheck %s +; RUN: opt < %s -passes=instrprof -profile-correlate=debug-info -S | opt -O2 -S | FileCheck %s @__profn_foo = private constant [3 x i8] c"foo" ; CHECK: @__profc_foo diff --git a/llvm/test/Instrumentation/InstrProfiling/debug-info-correlate.ll b/llvm/test/Instrumentation/InstrProfiling/debug-info-correlate.ll index fd868ead5b78d..84eaab33701a4 100644 --- a/llvm/test/Instrumentation/InstrProfiling/debug-info-correlate.ll +++ b/llvm/test/Instrumentation/InstrProfiling/debug-info-correlate.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=instrprof -debug-info-correlate -S > %t.ll +; RUN: opt < %s -passes=instrprof -profile-correlate=debug-info -S > %t.ll ; RUN: FileCheck < %t.ll --implicit-check-not "{{__llvm_prf_data|__llvm_prf_names}}" %s ; RUN: %llc_dwarf -O0 -filetype=obj < %t.ll | llvm-dwarfdump - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s --check-prefix CHECK-DWARF diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt index cdfc8ce9e0ca5..054489ce51a60 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt @@ -7,6 +7,18 @@ # RUN: llvm-mc --disassemble %s -triple powerpc-unknown-aix-gnu \ # RUN: -mcpu=future | FileCheck %s +#CHECK: tlbiep 8, 10, 2, 1, 0 +0x7d 0x4a 0x40 0x64 + +#CHECK: tlbieio 8, 10, 2 +0x7d 0x48 0x40 0x24 + +#CHECK: tlbsyncio 15 +0x7d 0xe0 0x04 0x68 + +#CHECK: ptesyncio 15 +0x7d 0xe0 0x04 0xa8 + #CHECK: dmxxextfdmr512 2, 34, 1, 0 0xf0 0x82 0x17 0x12 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt index f7e314fc819e4..17d1413bacc3a 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt @@ -1,6 +1,18 @@ # RUN: llvm-mc --disassemble %s -triple powerpc64le-unknown-unknown \ # RUN: -mcpu=future | FileCheck %s +#CHECK: tlbiep 8, 10, 2, 1, 0 +0x64 0x40 0x4a 0x7d + +#CHECK: tlbieio 8, 10, 2 +0x24 0x40 0x48 0x7d + +#CHECK: tlbsyncio 15 +0x68 0x04 0xe0 0x7d + +#CHECK: ptesyncio 15 +0xa8 0x04 0xe0 0x7d + #CHECK: dmxxextfdmr512 2, 34, 1, 0 0x12 0x17 0x82 0xf0 diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s index 29fedd7c20646..e5bc1f47bf666 100644 --- a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s +++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s @@ -5,6 +5,22 @@ # RUN: llvm-mc -triple powerpc-unknown-aix-gnu --show-encoding %s | \ # RUN: FileCheck -check-prefix=CHECK-BE %s +#CHECK-BE: tlbiep 8, 10, 2, 1, 0 # encoding: [0x7d,0x4a,0x40,0x64] +#CHECK-LE: tlbiep 8, 10, 2, 1, 0 # encoding: [0x64,0x40,0x4a,0x7d] + tlbiep 8, 10, 2, 1, 0 + +# CHECK-BE: tlbieio 8, 10, 2 # encoding: [0x7d,0x48,0x40,0x24] +# CHECK-LE: tlbieio 8, 10, 2 # encoding: [0x24,0x40,0x48,0x7d] + tlbieio 8, 10, 2 + +# CHECK-BE: tlbsyncio 15 # encoding: [0x7d,0xe0,0x04,0x68] +# CHECK-LE: tlbsyncio 15 # encoding: [0x68,0x04,0xe0,0x7d] + tlbsyncio 15 + +# CHECK-BE: ptesyncio 15 # encoding: [0x7d,0xe0,0x04,0xa8] +# CHECK-LE: ptesyncio 15 # encoding: [0xa8,0x04,0xe0,0x7d] + ptesyncio 15 + # CHECK-BE: dmxxextfdmr512 2, 34, 1, 0 # encoding: [0xf0,0x82,0x17,0x12] # CHECK-LE: dmxxextfdmr512 2, 34, 1, 0 # encoding: [0x12,0x17,0x82,0xf0] dmxxextfdmr512 2, 34, 1, 0 diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/target-triple-mismatch.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/target-triple-mismatch.ll deleted file mode 100644 index 3da27cbacd172..0000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/target-triple-mismatch.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s - -define i64 @foo(i64 %a) { -entry: - %b = add i64 %a, 1 - ret i64 %b -} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/target-triple-mismatch.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/target-triple-mismatch.test deleted file mode 100644 index 3bbf14d469d4b..0000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/target-triple-mismatch.test +++ /dev/null @@ -1,11 +0,0 @@ -# REQUIRES: aarch64-registered-target -## Check that arm64-apple-darwin target triple is wrongly captured as arm64 (non-Apple) - -# RUN: cp -f %S/Inputs/target-triple-mismatch.ll %t.ll -# RUN: %update_llc_test_checks %t.ll 2>&1 | FileCheck %s --check-prefix=LOG -# RUN: FileCheck --input-file=%t.ll %s --check-prefix=AUTOGEN - -# LOG: WARNING: Couldn't match any function. Possibly the wrong target triple has been provided - -# AUTOGEN: ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -# AUTOGEN-NEXT: ; CHECK: {{.*}} diff --git a/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test b/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test index c21dbd72a2a18..9d64201cc071e 100644 --- a/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test +++ b/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test @@ -3,5 +3,6 @@ RUN: not llvm-remarkutil instruction-mix %p/Inputs/broken-remark-magic.bitstream RUN: not llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s RUN: not llvm-remarkutil count %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s RUN: not llvm-remarkutil filter %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s +RUN: not llvm-remarkutil summary %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s CHECK: error: Automatic detection of remark format failed. Unknown magic number: '1234' diff --git a/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark.test b/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark.test index 339f082d4825b..0a668131c801c 100644 --- a/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark.test +++ b/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark.test @@ -3,5 +3,6 @@ RUN: not llvm-remarkutil instruction-count --parser=bitstream %p/Inputs/broken-r RUN: not llvm-remarkutil annotation-count --parser=bitstream --annotation-type=remark %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s RUN: not llvm-remarkutil count --parser=bitstream %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s RUN: not llvm-remarkutil filter --parser=bitstream %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s +RUN: not llvm-remarkutil summary --parser=bitstream %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s CHECK: error: Unknown magic number: expecting RMRK, got --- . diff --git a/llvm/test/tools/llvm-remarkutil/broken-yaml-remark.test b/llvm/test/tools/llvm-remarkutil/broken-yaml-remark.test index 9da3de4034b0f..76b2d5610d8cd 100644 --- a/llvm/test/tools/llvm-remarkutil/broken-yaml-remark.test +++ b/llvm/test/tools/llvm-remarkutil/broken-yaml-remark.test @@ -4,5 +4,6 @@ RUN: not llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/broken-remark - RUN: not llvm-remarkutil annotation-count --parser=yaml --annotation-type=remark %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s RUN: not llvm-remarkutil count --parser=yaml %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s RUN: not llvm-remarkutil filter --parser=yaml %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s +RUN: not llvm-remarkutil summary --parser=yaml %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s CHECK: error: Type, Pass, Name or Function missing diff --git a/llvm/test/tools/llvm-remarkutil/empty-file.test b/llvm/test/tools/llvm-remarkutil/empty-file.test index 9b2b000e9c24b..53f04f36226a5 100644 --- a/llvm/test/tools/llvm-remarkutil/empty-file.test +++ b/llvm/test/tools/llvm-remarkutil/empty-file.test @@ -4,18 +4,21 @@ RUN: not llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/empty-file -o - RUN: not llvm-remarkutil annotation-count --parser=yaml --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --check-prefix=YAMLPARSER RUN: not llvm-remarkutil count --parser=yaml %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --check-prefix=YAMLPARSER RUN: not llvm-remarkutil filter --parser=yaml %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --check-prefix=YAMLPARSER +RUN: not llvm-remarkutil summary --parser=yaml %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --check-prefix=YAMLPARSER RUN: llvm-remarkutil bitstream2yaml %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=BITSTREAM2YAML RUN: llvm-remarkutil instruction-count --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=SIZEBITSTREAM RUN: llvm-remarkutil instruction-mix --parser=bitstream %p/Inputs/empty-file --report_style=csv -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=MIXBITSTREAM RUN: llvm-remarkutil annotation-count --parser=bitstream --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=ANNOTATIONBITSTREAM RUN: llvm-remarkutil count --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=COUNTBITSTREAM -RUN: llvm-remarkutil filter --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=FILTERBITSTREAM +RUN: llvm-remarkutil filter --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=EMPTYBITSTREAM +RUN: llvm-remarkutil summary --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=EMPTYBITSTREAM ; Parser format auto-detection should treat empty files as bitstream files RUN: llvm-remarkutil instruction-count %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=SIZEBITSTREAM RUN: llvm-remarkutil instruction-mix %p/Inputs/empty-file --report_style=csv -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=MIXBITSTREAM RUN: llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=ANNOTATIONBITSTREAM RUN: llvm-remarkutil count %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=COUNTBITSTREAM -RUN: llvm-remarkutil filter %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=FILTERBITSTREAM +RUN: llvm-remarkutil filter %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=EMPTYBITSTREAM +RUN: llvm-remarkutil summary %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=EMPTYBITSTREAM ; YAMLPARSER: error: document root is not of mapping type. @@ -34,4 +37,4 @@ RUN: llvm-remarkutil filter %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allo ; MIXBITSTREAM-LABEL: Instruction,Count ; MIXBITSTREAM-EMPTY: -; FILTERBITSTREAM-NOT: {{.}} +; EMPTYBITSTREAM-NOT: {{.}} diff --git a/llvm/test/tools/llvm-remarkutil/summary/Inputs/inline.yaml b/llvm/test/tools/llvm-remarkutil/summary/Inputs/inline.yaml new file mode 100644 index 0000000000000..efb8cd6ecf5a9 --- /dev/null +++ b/llvm/test/tools/llvm-remarkutil/summary/Inputs/inline.yaml @@ -0,0 +1,50 @@ +--- !Missed +Pass: inline +Name: TooCostly +DebugLoc: { File: 'foo.cpp', Line: 21, Column: 6 } +Function: fooCaller +Args: + - Callee: fooCallee + DebugLoc: { File: 'foo.cpp', Line: 10, Column: 0 } + - Caller: fooCaller + DebugLoc: { File: 'foo.cpp', Line: 20, Column: 0 } + - Cost: '125' + - Threshold: '100' +... +--- !Passed +Pass: inline +Name: Inlined +DebugLoc: { File: 'foo.cpp', Line: 21, Column: 6 } +Function: fooCaller2 +Args: + - Callee: fooCallee + DebugLoc: { File: 'foo.cpp', Line: 10, Column: 0 } + - Caller: fooCaller + DebugLoc: { File: 'foo.cpp', Line: 20, Column: 0 } + - Cost: '-15' + - Threshold: '100' + - Line: '1' + - Column: '6' +... +--- !Passed +Pass: inline +Name: AlwaysInline +DebugLoc: { File: 'bar.cpp', Line: 23, Column: 10 } +Function: barCaller +Args: + - Callee: barCallee + DebugLoc: { File: 'bar.cpp', Line: 5, Column: 0 } + - Caller: barCaller + DebugLoc: { File: 'bar.cpp', Line: 22, Column: 0 } + - Reason: always inline attribute + - Line: '23' + - Column: '10' +... +--- !Missed +Pass: inline +Name: NoDefinition +Function: bazCaller +Args: + - Callee: bazCallee + - Caller: bazCaller +... diff --git a/llvm/test/tools/llvm-remarkutil/summary/inline.test b/llvm/test/tools/llvm-remarkutil/summary/inline.test new file mode 100644 index 0000000000000..57473186e63e3 --- /dev/null +++ b/llvm/test/tools/llvm-remarkutil/summary/inline.test @@ -0,0 +1,54 @@ +RUN: llvm-remarkutil summary --inline-callees %p/Inputs/inline.yaml | FileCheck -strict-whitespace %s + +; CHECK: --- !Analysis +; CHECK-NEXT: Pass: inline +; CHECK-NEXT: Name: Summary +; CHECK-NEXT: DebugLoc: { File: bar.cpp, Line: 5, Column: 0 } +; CHECK-NEXT: Function: barCallee +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: 'Incoming Calls (' +; CHECK-NEXT: - String: AlwaysInline +; CHECK-NEXT: - String: ': ' +; CHECK-NEXT: - AlwaysInline: '1' +; CHECK-NEXT: - String: ')' +; CHECK-NEXT: ... +; CHECK-NEXT: --- !Analysis +; CHECK-NEXT: Pass: inline +; CHECK-NEXT: Name: Summary +; CHECK-NEXT: Function: bazCallee +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: 'Incoming Calls (' +; CHECK-NEXT: - String: NoDefinition +; CHECK-NEXT: - String: ': ' +; CHECK-NEXT: - NoDefinition: '1' +; CHECK-NEXT: - String: ')' +; CHECK-NEXT: ... +; CHECK-NEXT: --- !Analysis +; CHECK-NEXT: Pass: inline +; CHECK-NEXT: Name: Summary +; CHECK-NEXT: DebugLoc: { File: foo.cpp, Line: 10, Column: 0 } +; CHECK-NEXT: Function: fooCallee +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: 'Incoming Calls (' +; CHECK-NEXT: - String: Inlined +; CHECK-NEXT: - String: ': ' +; CHECK-NEXT: - Inlined: '1' +; CHECK-NEXT: - String: ', ' +; CHECK-NEXT: - String: TooCostly +; CHECK-NEXT: - String: ': ' +; CHECK-NEXT: - TooCostly: '1' +; CHECK-NEXT: - String: ')' +; CHECK-NEXT: - String: "\nLeast profitable (cost=" +; CHECK-NEXT: - LeastProfitCost: '125' +; CHECK-NEXT: DebugLoc: { File: foo.cpp, Line: 21, Column: 6 } +; CHECK-NEXT: - String: ', threshold=' +; CHECK-NEXT: - LeastProfitThreshold: '100' +; CHECK-NEXT: - String: ')' +; CHECK-NEXT: - String: "\nMost profitable (cost=" +; CHECK-NEXT: - MostProfitCost: '-15' +; CHECK-NEXT: DebugLoc: { File: foo.cpp, Line: 21, Column: 6 } +; CHECK-NEXT: - String: ', threshold=' +; CHECK-NEXT: - MostProfitThreshold: '100' +; CHECK-NEXT: - String: ')' +; CHECK-NEXT: ... +; CHECK-NOT: {{.}} diff --git a/llvm/tools/llvm-remarkutil/CMakeLists.txt b/llvm/tools/llvm-remarkutil/CMakeLists.txt index c6e9334d87c04..3f0a4360266e1 100644 --- a/llvm/tools/llvm-remarkutil/CMakeLists.txt +++ b/llvm/tools/llvm-remarkutil/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_tool(llvm-remarkutil RemarkFilter.cpp RemarkInstructionMix.cpp RemarkSizeDiff.cpp + RemarkSummary.cpp RemarkUtil.cpp RemarkUtilHelpers.cpp RemarkUtilRegistry.cpp diff --git a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp index 2e842c8c2d72e..4e429b75e3c2d 100644 --- a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp +++ b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp @@ -70,11 +70,11 @@ static cl::opt GroupByOpt( /// integer value or 0 if it is has no integer value. static unsigned getValForKey(StringRef Key, const Remark &Remark) { auto *RemarkArg = find_if(Remark.Args, [&Key](const Argument &Arg) { - return Arg.Key == Key && Arg.isValInt(); + return Arg.Key == Key && Arg.getValAsInt(); }); if (RemarkArg == Remark.Args.end()) return 0; - return *RemarkArg->getValAsInt(); + return *RemarkArg->getValAsInt(); } Error ArgumentCounter::getAllMatchingArgumentsInRemark( @@ -91,7 +91,7 @@ Error ArgumentCounter::getAllMatchingArgumentsInRemark( continue; for (auto &Key : Arguments) { for (Argument Arg : Remark.Args) - if (Key.match(Arg.Key) && Arg.isValInt()) + if (Key.match(Arg.Key) && Arg.getValAsInt()) ArgumentSetIdxMap.insert({Arg.Key, ArgumentSetIdxMap.size()}); } } diff --git a/llvm/tools/llvm-remarkutil/RemarkSummary.cpp b/llvm/tools/llvm-remarkutil/RemarkSummary.cpp new file mode 100644 index 0000000000000..124bd51720d17 --- /dev/null +++ b/llvm/tools/llvm-remarkutil/RemarkSummary.cpp @@ -0,0 +1,254 @@ +//===- RemarkSummary.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Specialized tool to summarize remarks +// +//===----------------------------------------------------------------------===// + +#include "RemarkUtilHelpers.h" +#include "RemarkUtilRegistry.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/WithColor.h" +#include + +using namespace llvm; +using namespace remarks; +using namespace llvm::remarkutil; + +namespace summary { + +static cl::SubCommand + SummarySub("summary", "Summarize remarks using different strategies."); + +INPUT_FORMAT_COMMAND_LINE_OPTIONS(SummarySub) +OUTPUT_FORMAT_COMMAND_LINE_OPTIONS(SummarySub) +INPUT_OUTPUT_COMMAND_LINE_OPTIONS(SummarySub) + +static cl::OptionCategory SummaryStrategyCat("Strategy options"); + +enum class KeepMode { None, Used, All }; + +static cl::opt KeepInputOpt( + "keep", cl::desc("Keep input remarks in output"), cl::init(KeepMode::None), + cl::values(clEnumValN(KeepMode::None, "none", + "Don't keep input remarks (default)"), + clEnumValN(KeepMode::Used, "used", + "Keep only remarks used for summary"), + clEnumValN(KeepMode::All, "all", "Keep all input remarks")), + cl::sub(SummarySub)); + +static cl::opt + IgnoreMalformedOpt("ignore-malformed", + cl::desc("Ignore remarks that fail to process"), + cl::init(false), cl::Hidden, cl::sub(SummarySub)); + +// Use one cl::opt per Strategy, because future strategies might need to take +// per-strategy parameters. +static cl::opt EnableInlineSummaryOpt( + "inline-callees", cl::desc("Summarize per-callee inling statistics"), + cl::cat(SummaryStrategyCat), cl::init(false), cl::sub(SummarySub)); + +/// An interface to implement different strategies for creating remark +/// summaries. Override this class to develop new strategies. +class SummaryStrategy { +public: + virtual ~SummaryStrategy() = default; + + /// Strategy should return true if it wants to process the remark \p R. + virtual bool filter(Remark &R) = 0; + + /// Hook to process the remark \p R (i.e. collect the necessary data for + /// producing summary remarks). This will only be called with remarks + /// accepted by filter(). Can return an error if \p R is malformed or + /// unexpected. + virtual Error process(Remark &R) = 0; + + /// Hook to emit new remarks based on the collected data. + virtual void emit(RemarkSerializer &Serializer) = 0; +}; + +/// Check if any summary strategy options are explicitly enabled. +static bool isAnyStrategyRequested() { + StringMap Opts = cl::getRegisteredOptions(SummarySub); + for (auto &[_, Opt] : Opts) { + if (!is_contained(Opt->Categories, &SummaryStrategyCat)) + continue; + if (!Opt->getNumOccurrences()) + continue; + return true; + } + return false; +} + +class InlineCalleeSummary : public SummaryStrategy { + struct CallsiteCost { + int Cost = 0; + int Threshold = 0; + std::optional Loc; + + int getProfit() const { return Threshold - Cost; } + + friend bool operator==(const CallsiteCost &A, const CallsiteCost &B) { + return A.Cost == B.Cost && A.Threshold == B.Threshold && A.Loc == B.Loc; + } + + friend bool operator!=(const CallsiteCost &A, const CallsiteCost &B) { + return !(A == B); + } + }; + + struct CalleeSummary { + SmallDenseMap Stats; + std::optional Loc; + std::optional LeastProfit; + std::optional MostProfit; + + void updateCost(CallsiteCost NewCost) { + if (!LeastProfit || NewCost.getProfit() < LeastProfit->getProfit()) + LeastProfit = NewCost; + if (!MostProfit || NewCost.getProfit() > MostProfit->getProfit()) + MostProfit = NewCost; + } + }; + + DenseMap Callees; + + Error malformed() { return createStringError("Malformed inline remark."); } + + bool filter(Remark &R) override { + return R.PassName == "inline" && R.RemarkName != "Summary"; + } + + Error process(Remark &R) override { + auto *CalleeArg = R.getArgByKey("Callee"); + if (!CalleeArg) + return Error::success(); + auto &Callee = Callees[CalleeArg->Val]; + ++Callee.Stats[R.RemarkName]; + if (!Callee.Loc) + Callee.Loc = CalleeArg->Loc; + + Argument *CostArg = R.getArgByKey("Cost"); + Argument *ThresholdArg = R.getArgByKey("Threshold"); + if (!CostArg || !ThresholdArg) + return Error::success(); + auto CostVal = CostArg->getValAsInt(); + auto ThresholdVal = ThresholdArg->getValAsInt(); + if (!CostVal || !ThresholdVal) + return malformed(); + Callee.updateCost({*CostVal, *ThresholdVal, R.Loc}); + return Error::success(); + } + + void emit(RemarkSerializer &Serializer) override { + SmallVector SortedKeys(Callees.keys()); + llvm::sort(SortedKeys); + for (StringRef K : SortedKeys) { + auto &V = Callees[K]; + RemarkBuilder RB(Type::Analysis, "inline", "Summary", K); + if (V.Stats.empty()) + continue; + RB.R.Loc = V.Loc; + RB << "Incoming Calls ("; + SmallVector StatKeys(V.Stats.keys()); + llvm::sort(StatKeys); + bool First = true; + for (StringRef StatK : StatKeys) { + if (!First) + RB << ", "; + RB << StatK << ": " << NV(StatK, V.Stats[StatK]); + First = false; + } + RB << ")"; + if (V.LeastProfit && V.MostProfit != V.LeastProfit) { + RB << "\nLeast profitable (cost=" + << NV("LeastProfitCost", V.LeastProfit->Cost, V.LeastProfit->Loc) + << ", threshold=" + << NV("LeastProfitThreshold", V.LeastProfit->Threshold) << ")"; + } + if (V.MostProfit) { + RB << "\nMost profitable (cost=" + << NV("MostProfitCost", V.MostProfit->Cost, V.MostProfit->Loc) + << ", threshold=" + << NV("MostProfitThreshold", V.MostProfit->Threshold) << ")"; + } + Serializer.emit(RB.R); + } + } +}; + +static Error trySummary() { + auto MaybeBuf = getInputMemoryBuffer(InputFileName); + if (!MaybeBuf) + return MaybeBuf.takeError(); + auto MaybeParser = createRemarkParser(InputFormat, (*MaybeBuf)->getBuffer()); + if (!MaybeParser) + return MaybeParser.takeError(); + auto &Parser = **MaybeParser; + + Format SerializerFormat = + getSerializerFormat(OutputFileName, OutputFormat, Parser.ParserFormat); + + auto MaybeOF = getOutputFileForRemarks(OutputFileName, SerializerFormat); + if (!MaybeOF) + return MaybeOF.takeError(); + auto OF = std::move(*MaybeOF); + + auto MaybeSerializer = createRemarkSerializer(SerializerFormat, OF->os()); + if (!MaybeSerializer) + return MaybeSerializer.takeError(); + auto &Serializer = **MaybeSerializer; + + bool UseDefaultStrategies = !isAnyStrategyRequested(); + SmallVector> Strategies; + if (EnableInlineSummaryOpt || UseDefaultStrategies) + Strategies.push_back(std::make_unique()); + + auto MaybeRemark = Parser.next(); + for (; MaybeRemark; MaybeRemark = Parser.next()) { + Remark &Remark = **MaybeRemark; + bool UsedRemark = false; + for (auto &Strategy : Strategies) { + if (!Strategy->filter(Remark)) + continue; + UsedRemark = true; + if (auto E = Strategy->process(Remark)) { + if (IgnoreMalformedOpt) { + WithColor::warning() << "Ignored error: " << E << "\n"; + consumeError(std::move(E)); + continue; + } + return E; + } + } + if (KeepInputOpt == KeepMode::All || + (KeepInputOpt == KeepMode::Used && UsedRemark)) + Serializer.emit(Remark); + } + + auto E = MaybeRemark.takeError(); + if (!E.isA()) + return E; + consumeError(std::move(E)); + + for (auto &Strategy : Strategies) + Strategy->emit(Serializer); + + OF->keep(); + return Error::success(); +} + +static CommandRegistration SummaryReg(&SummarySub, trySummary); + +} // namespace summary diff --git a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h index 73867fe35f06c..39e7b423c4dc0 100644 --- a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h +++ b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h @@ -9,6 +9,7 @@ // Helpers for remark utilites // //===----------------------------------------------------------------------===// +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkFormat.h" @@ -19,6 +20,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/ToolOutputFile.h" // Keep input + output help + names consistent across the various modes via a @@ -205,5 +207,54 @@ struct Filters { bool filterRemark(const Remark &Remark); }; +/// Helper to construct Remarks using an API similar to DiagnosticInfo. +/// Once this is more fully featured, consider implementing DiagnosticInfo using +/// RemarkBuilder. +class RemarkBuilder { + BumpPtrAllocator Alloc; + UniqueStringSaver Strs; + +public: + Remark R; + struct Argument { + std::string Key; + std::string Val; + std::optional Loc; + Argument(StringRef Key, StringRef Val, + std::optional Loc = std::nullopt) + : Key(Key), Val(Val), Loc(Loc) {} + Argument(StringRef Key, int Val, + std::optional Loc = std::nullopt) + : Key(Key), Val(itostr(Val)), Loc(Loc) {} + }; + + RemarkBuilder(Type RemarkType, StringRef PassName, StringRef RemarkName, + StringRef FunctionName) + : Strs(Alloc) { + R.RemarkType = RemarkType; + R.PassName = Strs.save(PassName); + R.RemarkName = Strs.save(RemarkName); + R.FunctionName = Strs.save(FunctionName); + } + + RemarkBuilder &operator<<(Argument &&Arg) { + auto &RArg = R.Args.emplace_back(Strs.save(Arg.Key), Strs.save(Arg.Val)); + RArg.Loc = Arg.Loc; + return *this; + } + + RemarkBuilder &operator<<(const char *Str) { + R.Args.emplace_back("String", Str); + return *this; + } + + RemarkBuilder &operator<<(StringRef Str) { + R.Args.emplace_back("String", Strs.save(Str)); + return *this; + } +}; + +using NV = RemarkBuilder::Argument; + } // namespace remarks } // namespace llvm diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py index 457b23f4a5f59..469e27facedb0 100644 --- a/llvm/utils/UpdateTestChecks/asm.py +++ b/llvm/utils/UpdateTestChecks/asm.py @@ -570,6 +570,7 @@ def get_run_handler(triple): "arm64": (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_RE), "arm64e": (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_DARWIN_RE), "arm64ec": (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_RE), + "arm64-apple-darwin": (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_DARWIN_RE), "arm64-apple-ios": (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_DARWIN_RE), "arm64-apple-macosx": (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_DARWIN_RE), "armv7-apple-ios": (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_IOS_RE), diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index f88314547bb3f..a48df097403c7 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -612,6 +612,10 @@ def executeBuiltinUlimit(cmd, shenv): shenv.ulimit["RLIMIT_AS"] = new_limit * 1024 elif cmd.args[1] == "-n": shenv.ulimit["RLIMIT_NOFILE"] = new_limit + elif cmd.args[1] == "-s": + shenv.ulimit["RLIMIT_STACK"] = new_limit * 1024 + elif cmd.args[1] == "-f": + shenv.ulimit["RLIMIT_FSIZE"] = new_limit else: raise InternalShellError( cmd, "'ulimit' does not support option: %s" % cmd.args[1] diff --git a/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py b/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py index 33d2d59ff0dbe..a9dc2595497e7 100644 --- a/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py +++ b/llvm/utils/lit/lit/builtin_commands/_launch_with_limit.py @@ -17,6 +17,10 @@ def main(argv): resource.setrlimit(resource.RLIMIT_AS, limit) elif limit_str == "RLIMIT_NOFILE": resource.setrlimit(resource.RLIMIT_NOFILE, limit) + elif limit_str == "RLIMIT_STACK": + resource.setrlimit(resource.RLIMIT_STACK, limit) + elif limit_str == "RLIMIT_FSIZE": + resource.setrlimit(resource.RLIMIT_FSIZE, limit) process_output = subprocess.run(command_args) sys.exit(process_output.returncode) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py b/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py index 632f954fa8fde..c732c0429e661 100644 --- a/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/print_limits.py @@ -2,3 +2,5 @@ print("RLIMIT_AS=" + str(resource.getrlimit(resource.RLIMIT_AS)[0])) print("RLIMIT_NOFILE=" + str(resource.getrlimit(resource.RLIMIT_NOFILE)[0])) +print("RLIMIT_STACK=" + str(resource.getrlimit(resource.RLIMIT_STACK)[0])) +print("RLIMIT_FSIZE=" + str(resource.getrlimit(resource.RLIMIT_FSIZE)[0])) diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt index 4edf1c303a092..d38dc44fa033d 100644 --- a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_okay.txt @@ -1,4 +1,6 @@ # RUN: ulimit -n 50 +# RUN: ulimit -s 256 +# RUN: ulimit -f 5 # RUN: %{python} %S/print_limits.py # Fail the test so that we can assert on the output. # RUN: not echo return diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py index 09cd475b737c1..ba3de8b1bfced 100644 --- a/llvm/utils/lit/tests/shtest-ulimit.py +++ b/llvm/utils/lit/tests/shtest-ulimit.py @@ -19,7 +19,11 @@ # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}}) # CHECK: ulimit -n 50 +# CHECK: ulimit -s 256 +# CHECK: ulimit -f 5 # CHECK: RLIMIT_NOFILE=50 +# CHECK: RLIMIT_STACK=262144 +# CHECK: RLIMIT_FSIZE=5 # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_reset.txt ({{[^)]*}}) # CHECK: RLIMIT_NOFILE=[[BASE_NOFILE_LIMIT]] diff --git a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp index 291da1f76ca9b..14152c5a1af0c 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp @@ -15,6 +15,7 @@ #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h" +#include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Interfaces/RuntimeVerifiableOpInterface.h" using namespace mlir; @@ -273,7 +274,9 @@ struct SubViewOpInterface Value one = arith::ConstantIndexOp::create(builder, loc, 1); auto metadataOp = ExtractStridedMetadataOp::create(builder, loc, subView.getSource()); - for (int64_t i = 0, e = sourceType.getRank(); i < e; ++i) { + for (int64_t i : llvm::seq(0, sourceType.getRank())) { + // Reset insertion point to before the operation for each dimension + builder.setInsertionPoint(subView); Value offset = getValueOrCreateConstantIndexOp( builder, loc, subView.getMixedOffsets()[i]); Value size = getValueOrCreateConstantIndexOp(builder, loc, @@ -290,6 +293,16 @@ struct SubViewOpInterface std::to_string(i) + " is out-of-bounds")); + // Only verify if size > 0 + Value sizeIsNonZero = arith::CmpIOp::create( + builder, loc, arith::CmpIPredicate::sgt, size, zero); + + auto ifOp = scf::IfOp::create(builder, loc, builder.getI1Type(), + sizeIsNonZero, /*withElseRegion=*/true); + + // Populate the "then" region (for size > 0). + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + // Verify that slice does not run out-of-bounds. Value sizeMinusOne = arith::SubIOp::create(builder, loc, size, one); Value sizeMinusOneTimesStride = @@ -298,8 +311,20 @@ struct SubViewOpInterface arith::AddIOp::create(builder, loc, offset, sizeMinusOneTimesStride); Value lastPosInBounds = generateInBoundsCheck(builder, loc, lastPos, zero, dimSize); + + scf::YieldOp::create(builder, loc, lastPosInBounds); + + // Populate the "else" region (for size == 0). + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + Value trueVal = + arith::ConstantOp::create(builder, loc, builder.getBoolAttr(true)); + scf::YieldOp::create(builder, loc, trueVal); + + builder.setInsertionPointAfter(ifOp); + Value finalCondition = ifOp.getResult(0); + cf::AssertOp::create( - builder, loc, lastPosInBounds, + builder, loc, finalCondition, generateErrorMessage(op, "subview runs out-of-bounds along dimension " + std::to_string(i))); diff --git a/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp index c031118606823..753cb95b1c906 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/RuntimeOpVerification.cpp @@ -12,6 +12,7 @@ #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" +#include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Interfaces/RuntimeVerifiableOpInterface.h" @@ -158,7 +159,11 @@ struct ExtractSliceOpInterface // 0 <= offset + (size - 1) * stride < dim_size Value zero = arith::ConstantIndexOp::create(builder, loc, 0); Value one = arith::ConstantIndexOp::create(builder, loc, 1); - for (int64_t i = 0, e = sourceType.getRank(); i < e; ++i) { + + for (int64_t i : llvm::seq(0, sourceType.getRank())) { + // Reset insertion point to before the operation for each dimension + builder.setInsertionPoint(extractSliceOp); + Value offset = getValueOrCreateConstantIndexOp( builder, loc, extractSliceOp.getMixedOffsets()[i]); Value size = getValueOrCreateConstantIndexOp( @@ -176,6 +181,16 @@ struct ExtractSliceOpInterface std::to_string(i) + " is out-of-bounds")); + // Only verify if size > 0 + Value sizeIsNonZero = arith::CmpIOp::create( + builder, loc, arith::CmpIPredicate::sgt, size, zero); + + auto ifOp = scf::IfOp::create(builder, loc, builder.getI1Type(), + sizeIsNonZero, /*withElseRegion=*/true); + + // Populate the "then" region (for size > 0). + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + // Verify that slice does not run out-of-bounds. Value sizeMinusOne = arith::SubIOp::create(builder, loc, size, one); Value sizeMinusOneTimesStride = @@ -184,8 +199,19 @@ struct ExtractSliceOpInterface arith::AddIOp::create(builder, loc, offset, sizeMinusOneTimesStride); Value lastPosInBounds = generateInBoundsCheck(builder, loc, lastPos, zero, dimSize); + scf::YieldOp::create(builder, loc, lastPosInBounds); + + // Populate the "else" region (for size == 0). + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + Value trueVal = + arith::ConstantOp::create(builder, loc, builder.getBoolAttr(true)); + scf::YieldOp::create(builder, loc, trueVal); + + builder.setInsertionPointAfter(ifOp); + Value finalCondition = ifOp.getResult(0); + cf::AssertOp::create( - builder, loc, lastPosInBounds, + builder, loc, finalCondition, generateErrorMessage( op, "extract_slice runs out-of-bounds along dimension " + std::to_string(i))); diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 24e909548fe0b..f9aa28d5203db 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -113,9 +113,12 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef shape, if (layout.size() != shape.size()) return std::nullopt; auto ratio = computeShapeRatio(shape, layout); - if (!ratio.has_value()) + if (ratio.has_value()) { + newShape = ratio.value(); + } else if (!rr || !computeShapeRatio(layout, shape).has_value()) { return std::nullopt; - newShape = ratio.value(); + } + // Round-robin case: continue with original newShape } if (data.size()) { diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp index 2c37140ad9c76..ec5feb8bc8c4a 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp @@ -344,6 +344,13 @@ void XeGPUBlockingPass::runOnOperation() { xegpu::doSCFStructuralTypeConversionWithTensorType(op, converter); + // Remove leading unit dimensions from vector ops and then + // do the unrolling. + { + RewritePatternSet patterns(ctx); + vector::populateCastAwayVectorLeadingOneDimPatterns(patterns); + (void)applyPatternsGreedily(op, std::move(patterns)); + } xegpu::UnrollOptions options; options.setFilterConstraint( [&](Operation *op) -> LogicalResult { return success(needsUnroll(op)); }); diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp index 52162a43aeae3..2255633c746b3 100644 --- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp @@ -239,6 +239,8 @@ ExecutionEngine::create(Operation *m, const ExecutionEngineOptions &options, // Remember all entry-points if object dumping is enabled. if (options.enableObjectDump) { for (auto funcOp : m->getRegion(0).getOps()) { + if (funcOp.getBlocks().empty()) + continue; StringRef funcName = funcOp.getSymName(); engine->functionNames.push_back(funcName.str()); } diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir index 7e742af754fbe..d61908b422194 100644 --- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir +++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir @@ -715,7 +715,8 @@ gpu.module @test_kernel { gpu.module @test_kernel { // CHECK-LABEL: load_store_nd_with_offsets // CHECK-SAME: [[arg0:%.+]]: memref<1024x1024xf32>, [[arg1:%.+]]: memref<1024x1024xf32>, [[arg2:%.+]]: memref<1024x1024xf32> - // CHECK-DAG: [[cst:%.+]] = arith.constant dense<0.000000e+00> : vector<1x32xf32> + // CHECK-DAG: [[cst:%.+]] = arith.constant dense<0.000000e+00> : vector<32xf32> + // CHECK-DAG: [[cst_0:%.+]] = arith.constant dense<0.000000e+00> : vector<1x32xf32> // CHECK-DAG: [[c16:%.+]] = arith.constant 16 : index // CHECK-DAG: [[c0:%.+]] = arith.constant 0 : index // CHECK: [[tdesc_a:%.+]] = xegpu.create_nd_tdesc [[arg0]] : memref<1024x1024xf32> -> !xegpu.tensor_desc<1x16xf32> @@ -723,20 +724,27 @@ gpu.module @test_kernel { // CHECK: [[tdesc_c:%.+]] = xegpu.create_nd_tdesc [[arg2]] : memref<1024x1024xf32> -> !xegpu.tensor_desc<1x16xf32> // CHECK: [[ld_a0:%.+]] = xegpu.load_nd [[tdesc_a]][[[c0]], [[c0]]] : !xegpu.tensor_desc<1x16xf32> -> vector<1x16xf32> // CHECK: [[ld_a1:%.+]] = xegpu.load_nd [[tdesc_a]][[[c0]], [[c16]]] : !xegpu.tensor_desc<1x16xf32> -> vector<1x16xf32> + // CHECK: [[ins_a0:%.+]] = vector.insert_strided_slice [[ld_a0]], [[cst_0]] {offsets = [0, 0], strides = [1, 1]} : vector<1x16xf32> into vector<1x32xf32> + // CHECK: [[ins_a1:%.+]] = vector.insert_strided_slice [[ld_a1]], [[ins_a0]] {offsets = [0, 16], strides = [1, 1]} : vector<1x16xf32> into vector<1x32xf32> // CHECK: [[ld_b0:%.+]] = xegpu.load_nd [[tdesc_b]][[[c0]], [[c0]]] : !xegpu.tensor_desc<1x16xf32> -> vector<1x16xf32> // CHECK: [[ld_b1:%.+]] = xegpu.load_nd [[tdesc_b]][[[c0]], [[c16]]] : !xegpu.tensor_desc<1x16xf32> -> vector<1x16xf32> - // CHECK: [[cast_a0:%.+]] = vector.shape_cast [[ld_a0]] : vector<1x16xf32> to vector<16xf32> - // CHECK: [[cast_b0:%.+]] = vector.shape_cast [[ld_b0]] : vector<1x16xf32> to vector<16xf32> - // CHECK: [[add0:%.+]] = arith.addf [[cast_a0]], [[cast_b0]] : vector<16xf32> - // CHECK: [[ins0:%.+]] = vector.insert_strided_slice [[add0]], [[cst]] {offsets = [0, 0], strides = [1]} : vector<16xf32> into vector<1x32xf32> - // CHECK: [[cast_a1:%.+]] = vector.shape_cast [[ld_a1]] : vector<1x16xf32> to vector<16xf32> - // CHECK: [[cast_b1:%.+]] = vector.shape_cast [[ld_b1]] : vector<1x16xf32> to vector<16xf32> - // CHECK: [[add1:%.+]] = arith.addf [[cast_a1]], [[cast_b1]] : vector<16xf32> - // CHECK: [[ins1:%.+]] = vector.insert_strided_slice [[add1]], [[ins0]] {offsets = [0, 16], strides = [1]} : vector<16xf32> into vector<1x32xf32> - // CHECK: [[ext0:%.+]] = vector.extract_strided_slice [[ins1]] {offsets = [0, 0], sizes = [1, 16], strides = [1, 1]} : vector<1x32xf32> to vector<1x16xf32> - // CHECK: [[ext1:%.+]] = vector.extract_strided_slice [[ins1]] {offsets = [0, 16], sizes = [1, 16], strides = [1, 1]} : vector<1x32xf32> to vector<1x16xf32> - // CHECK: xegpu.store_nd [[ext0]], [[tdesc_c]][[[c0]], [[c0]]] : vector<1x16xf32>, !xegpu.tensor_desc<1x16xf32> - // CHECK: xegpu.store_nd [[ext1]], [[tdesc_c]][[[c0]], [[c16]]] : vector<1x16xf32>, !xegpu.tensor_desc<1x16xf32> + // CHECK: [[ins_b0:%.+]] = vector.insert_strided_slice [[ld_b0]], [[cst_0]] {offsets = [0, 0], strides = [1, 1]} : vector<1x16xf32> into vector<1x32xf32> + // CHECK: [[ins_b1:%.+]] = vector.insert_strided_slice [[ld_b1]], [[ins_b0]] {offsets = [0, 16], strides = [1, 1]} : vector<1x16xf32> into vector<1x32xf32> + // CHECK: [[ext_a:%.+]] = vector.extract [[ins_a1]][0] : vector<32xf32> from vector<1x32xf32> + // CHECK: [[ext_b:%.+]] = vector.extract [[ins_b1]][0] : vector<32xf32> from vector<1x32xf32> + // CHECK: [[slice_a0:%.+]] = vector.extract_strided_slice [[ext_a]] {offsets = [0], sizes = [16], strides = [1]} : vector<32xf32> to vector<16xf32> + // CHECK: [[slice_b0:%.+]] = vector.extract_strided_slice [[ext_b]] {offsets = [0], sizes = [16], strides = [1]} : vector<32xf32> to vector<16xf32> + // CHECK: [[add0:%.+]] = arith.addf [[slice_a0]], [[slice_b0]] : vector<16xf32> + // CHECK: [[ins_add0:%.+]] = vector.insert_strided_slice [[add0]], [[cst]] {offsets = [0], strides = [1]} : vector<16xf32> into vector<32xf32> + // CHECK: [[slice_a1:%.+]] = vector.extract_strided_slice [[ext_a]] {offsets = [16], sizes = [16], strides = [1]} : vector<32xf32> to vector<16xf32> + // CHECK: [[slice_b1:%.+]] = vector.extract_strided_slice [[ext_b]] {offsets = [16], sizes = [16], strides = [1]} : vector<32xf32> to vector<16xf32> + // CHECK: [[add1:%.+]] = arith.addf [[slice_a1]], [[slice_b1]] : vector<16xf32> + // CHECK: [[ins_add1:%.+]] = vector.insert_strided_slice [[add1]], [[ins_add0]] {offsets = [16], strides = [1]} : vector<16xf32> into vector<32xf32> + // CHECK: [[broadcast:%.+]] = vector.broadcast [[ins_add1]] : vector<32xf32> to vector<1x32xf32> + // CHECK: [[ext_result0:%.+]] = vector.extract_strided_slice [[broadcast]] {offsets = [0, 0], sizes = [1, 16], strides = [1, 1]} : vector<1x32xf32> to vector<1x16xf32> + // CHECK: [[ext_result1:%.+]] = vector.extract_strided_slice [[broadcast]] {offsets = [0, 16], sizes = [1, 16], strides = [1, 1]} : vector<1x32xf32> to vector<1x16xf32> + // CHECK: xegpu.store_nd [[ext_result0]], [[tdesc_c]][[[c0]], [[c0]]] : vector<1x16xf32>, !xegpu.tensor_desc<1x16xf32> + // CHECK: xegpu.store_nd [[ext_result1]], [[tdesc_c]][[[c0]], [[c16]]] : vector<1x16xf32>, !xegpu.tensor_desc<1x16xf32> gpu.func @load_store_nd_with_offsets(%A: memref<1024x1024xf32>, %B: memref<1024x1024xf32>, %C: memref<1024x1024xf32>) { %c0 = arith.constant 0 : index @@ -752,3 +760,28 @@ gpu.module @test_kernel { gpu.return } } + +// ----- +#inst_data = #xegpu.layout +gpu.module @test_kernel { + // CHECK-LABEL: load_add_store_leading_unit_dims + // CHECK-SAME: [[arg0:%.+]]: ui64, [[arg1:%.+]]: ui64, [[arg2:%.+]]: ui64 + // CHECK: [[mask:%.+]] = arith.constant dense : vector<32xi1> + // CHECK: [[offsets:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<32xindex> + // CHECK: [[a:%.+]] = xegpu.load [[arg0]][[[offsets]]], [[mask]] <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint}> : ui64, vector<32xindex>, vector<32xi1> -> vector<32xf32> + // CHECK: [[b:%.+]] = xegpu.load [[arg1]][[[offsets]]], [[mask]] <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint}> : ui64, vector<32xindex>, vector<32xi1> -> vector<32xf32> + // CHECK: [[add:%.+]] = arith.addf [[a]], [[b]] : vector<32xf32> + // CHECK: xegpu.store [[add]], [[arg2]][[[offsets]]], [[mask]] <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint}> : vector<32xf32>, ui64, vector<32xindex>, vector<32xi1> + gpu.func @load_add_store_leading_unit_dims(%A: ui64, %B: ui64, %C: ui64) { + %cst = arith.constant {layout_result_0 = #inst_data} dense<[ + [[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, + 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]] + ]> : vector<1x1x32xindex> + %mask = arith.constant {layout_result_0 = #inst_data} dense : vector<1x1x32xi1> + %a = xegpu.load %A[%cst], %mask {chunk_size = 1, layout_result_0 = #inst_data, l1_hint = #xegpu.cache_hint} : ui64, vector<1x1x32xindex>, vector<1x1x32xi1> -> vector<1x1x32xf32> + %b = xegpu.load %B[%cst], %mask {chunk_size = 1, layout_result_0 = #inst_data, l1_hint = #xegpu.cache_hint} : ui64, vector<1x1x32xindex>, vector<1x1x32xi1> -> vector<1x1x32xf32> + %addf = arith.addf %a, %b {layout_result_0 = #inst_data} : vector<1x1x32xf32> + xegpu.store %addf, %C[%cst], %mask {chunk_size = 1, layout_operand_0 = #inst_data, layout_operand_2 = #inst_data, layout_operand_3 = #inst_data, l1_hint = #xegpu.cache_hint} : vector<1x1x32xf32>, ui64, vector<1x1x32xindex>, vector<1x1x32xi1> + gpu.return + } +} diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir index 742d11f8052ec..52acde4dffc2e 100644 --- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir +++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir @@ -527,4 +527,11 @@ gpu.module @test_distribution { %cst_1 = arith.constant {layout_result_0 = #xegpu.layout} dense<[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]> : vector<1x16xindex> gpu.return } + + // CHECK-LABEL: scalar_broadcast + gpu.func @scalar_broadcast(%arg0: index) { + // CHECK: vector.broadcast {{.*}} : index to vector<1x1x1xindex> + %broadcast = vector.broadcast %arg0 {layout_result_0 = #xegpu.layout} : index to vector<4x1x1xindex> + gpu.return + } } diff --git a/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir index 71e813c0a6300..84875675ac3d0 100644 --- a/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir @@ -2,6 +2,7 @@ // RUN: -expand-strided-metadata \ // RUN: -lower-affine \ // RUN: -test-cf-assert \ +// RUN: -convert-scf-to-cf \ // RUN: -convert-to-llvm | \ // RUN: mlir-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils 2>&1 | \ @@ -11,6 +12,7 @@ // RUN: -expand-strided-metadata \ // RUN: -lower-affine \ // RUN: -test-cf-assert \ +// RUN: -convert-scf-to-cf \ // RUN: -convert-to-llvm="allow-pattern-rollback=0" \ // RUN: -reconcile-unrealized-casts | \ // RUN: mlir-runner -e main -entry-point-result=void \ @@ -38,6 +40,17 @@ func.func @subview_dynamic_rank_reduce(%memref: memref, %offset: index, return } +func.func @subview_zero_size_dim(%memref: memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>>, + %dim_0: index, + %dim_1: index, + %dim_2: index) { + %subview = memref.subview %memref[0, 0, 0] [%dim_0, %dim_1, %dim_2] [1, 1, 1] : + memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>> to + memref> + return +} + + func.func @main() { %0 = arith.constant 0 : index %1 = arith.constant 1 : index @@ -105,6 +118,14 @@ func.func @main() { // CHECK-NOT: ERROR: Runtime op verification failed func.call @subview_dynamic_rank_reduce(%alloca_4_dyn, %0, %1, %0) : (memref, index, index, index) -> () + %alloca_10x4x1 = memref.alloca() : memref<10x4x1xf32> + %alloca_10x4x1_dyn_stride = memref.cast %alloca_10x4x1 : memref<10x4x1xf32> to memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>> + // CHECK-NOT: ERROR: Runtime op verification failed + %dim_0 = arith.constant 0 : index + %dim_1 = arith.constant 4 : index + %dim_2 = arith.constant 1 : index + func.call @subview_zero_size_dim(%alloca_10x4x1_dyn_stride, %dim_0, %dim_1, %dim_2) + : (memref<10x4x1xf32, strided<[?, ?, ?], offset: ?>>, index, index, index) -> () return } diff --git a/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir b/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir index 0c7c4a6cb2d6f..a77fa310a3699 100644 --- a/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir +++ b/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir @@ -34,6 +34,12 @@ func.func @extract_slice_dynamic_rank_reduce(%tensor: tensor, %offset: return } +func.func @extract_slice_zero_size_dim(%arg0: tensor<10x4x1xf32>, %dim_0: index, %dim_1: index, %dim_2: index) { + tensor.extract_slice %arg0[0, 0, 0] [%dim_0, %dim_1, %dim_2] [1, 1, 1] : tensor<10x4x1xf32> to tensor + return +} + + func.func @main() { %0 = arith.constant 0 : index %1 = arith.constant 1 : index @@ -101,6 +107,13 @@ func.func @main() { // CHECK-NOT: ERROR: Runtime op verification failed func.call @extract_slice_dynamic_rank_reduce(%alloca_4_dyn, %0, %1, %0) : (tensor, index, index, index) -> () + %cst10x4x1xf32 = arith.constant dense<1.0> : tensor<10x4x1xf32> + + // CHECK-NOT: ERROR: Runtime op verification failed + %dim_0 = arith.constant 0 : index + %dim_1 = arith.constant 4 : index + %dim_2 = arith.constant 1 : index + func.call @extract_slice_zero_size_dim(%cst10x4x1xf32, %dim_0, %dim_1, %dim_2) : (tensor<10x4x1xf32>, index, index, index) -> () return } diff --git a/mlir/test/python/CMakeLists.txt b/mlir/test/python/CMakeLists.txt index e1e82ef367b1e..2c123811c2998 100644 --- a/mlir/test/python/CMakeLists.txt +++ b/mlir/test/python/CMakeLists.txt @@ -11,7 +11,7 @@ add_public_tablegen_target(MLIRPythonTestIncGen) add_subdirectory(lib) -set(MLIR_PYTHON_TEST_DEPENDS MLIRPythonModules) +set(MLIR_PYTHON_TEST_DEPENDS MLIRPythonModules mlir-runner) if(NOT MLIR_STANDALONE_BUILD) list(APPEND MLIR_PYTHON_TEST_DEPENDS FileCheck count not) endif() diff --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py index d569fcef32bfd..146e213a9229e 100644 --- a/mlir/test/python/execution_engine.py +++ b/mlir/test/python/execution_engine.py @@ -1,6 +1,7 @@ # RUN: env MLIR_RUNNER_UTILS=%mlir_runner_utils MLIR_C_RUNNER_UTILS=%mlir_c_runner_utils %PYTHON %s 2>&1 | FileCheck %s # REQUIRES: host-supports-jit import gc, sys, os, tempfile +from textwrap import dedent from mlir.ir import * from mlir.passmanager import * from mlir.execution_engine import * @@ -21,6 +22,7 @@ "MLIR_C_RUNNER_UTILS", "../../../../lib/libmlir_c_runner_utils.so" ) + # Log everything to stderr and flush so that we have a unified stream to match # errors/info emitted by MLIR to stderr. def log(*args): @@ -337,6 +339,7 @@ def callback(a): ctypes.pointer(ctypes.pointer(get_ranked_memref_descriptor(inp_arr))), ) + run(testUnrankedMemRefWithOffsetCallback) @@ -785,15 +788,25 @@ def testDumpToObjectFile(): try: with Context(): module = Module.parse( - """ - module { - func.func @main() attributes { llvm.emit_c_interface } { - return - } - }""" + dedent( + """ + func.func private @printF32(f32) + func.func @main(%arg0: f32) attributes { llvm.emit_c_interface } { + call @printF32(%arg0) : (f32) -> () + return + } + """ + ) ) - execution_engine = ExecutionEngine(lowerToLLVM(module), opt_level=3) + execution_engine = ExecutionEngine( + lowerToLLVM(module), + opt_level=3, + # Loading MLIR_C_RUNNER_UTILS is necessary even though we don't actually run the code (i.e., call printF32) + # because RTDyldObjectLinkingLayer::emit will try to resolve symbols before dumping + # (see the jitLinkForORC call at the bottom there). + shared_libs=[MLIR_C_RUNNER_UTILS], + ) # CHECK: Object file exists: True print(f"Object file exists: {os.path.exists(object_path)}")