diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index fe8869d237b4d..a2ab58f96b2b5 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -370,7 +370,8 @@ static uint64_t scanCortexA53Errata843419(InputSection *isec, uint64_t &off, class elf::Patch843419Section final : public SyntheticSection { public: - Patch843419Section(Ctx &, InputSection *p, uint64_t off); + Patch843419Section(Ctx &, InputSection *p, uint64_t off, + Defined *patcheeCodeSym); void writeTo(uint8_t *buf) override; @@ -390,7 +391,8 @@ class elf::Patch843419Section final : public SyntheticSection { Symbol *patchSym; }; -Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off) +Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off, + Defined *patcheeCodeSym) : SyntheticSection(ctx, ".text.patch", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, 4), patchee(p), patcheeOffset(off) { @@ -399,6 +401,9 @@ Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off) ctx, ctx.saver.save("__CortexA53843419_" + utohexstr(getLDSTAddr())), STT_FUNC, 0, getSize(), *this); addSyntheticLocal(ctx, ctx.saver.save("$x"), STT_NOTYPE, 0, 0, *this); + int64_t retToPatcheeSymOffset = + getLDSTAddr() - p->getVA(patcheeCodeSym->value) + 4; + addReloc({R_PC, R_AARCH64_JUMP26, 4, retToPatcheeSymOffset, patcheeCodeSym}); } uint64_t Patch843419Section::getLDSTAddr() const { @@ -410,13 +415,8 @@ void Patch843419Section::writeTo(uint8_t *buf) { // patchee Section. write32le(buf, read32le(patchee->content().begin() + patcheeOffset)); - // Apply any relocation transferred from the original patchee section. + // Apply relocations ctx.target->relocateAlloc(*this, buf); - - // Return address is the next instruction after the one we have just copied. - uint64_t s = getLDSTAddr() + 4; - uint64_t p = patchSym->getVA(ctx) + 4; - ctx.target->relocateNoSym(buf + 4, R_AARCH64_JUMP26, s - p); } void AArch64Err843419Patcher::init() { @@ -443,11 +443,14 @@ void AArch64Err843419Patcher::init() { auto *def = dyn_cast(b); if (!def) continue; - if (!isCodeMapSymbol(def) && !isDataMapSymbol(def)) + if (!def->isSection() && !isCodeMapSymbol(def) && !isDataMapSymbol(def)) continue; - if (auto *sec = dyn_cast_or_null(def->section)) - if (sec->flags & SHF_EXECINSTR) - sectionMap[sec].push_back(def); + if (auto *sec = dyn_cast_or_null(def->section)) { + if (def->isSection()) + sectionMap[sec].first = def; + else if (sec->flags & SHF_EXECINSTR) + sectionMap[sec].second.push_back(def); + } } } // For each InputSection make sure the mapping symbols are in sorted in @@ -455,7 +458,7 @@ void AArch64Err843419Patcher::init() { // the same type. For example we must remove the redundant $d.1 from $x.0 // $d.0 $d.1 $x.1. for (auto &kv : sectionMap) { - std::vector &mapSyms = kv.second; + auto &mapSyms = kv.second.second; llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) { return a->value < b->value; }); @@ -529,7 +532,8 @@ void AArch64Err843419Patcher::insertPatches( // Patches that we need to insert. static void implementPatch(Ctx &ctx, uint64_t adrpAddr, uint64_t patcheeOffset, InputSection *isec, - std::vector &patches) { + std::vector &patches, + Defined *patcheeCodeSym) { // There may be a relocation at the same offset that we are patching. There // are four cases that we need to consider. // Case 1: R_AARCH64_JUMP26 branch relocation. We have already patched this @@ -554,7 +558,7 @@ static void implementPatch(Ctx &ctx, uint64_t adrpAddr, uint64_t patcheeOffset, Log(ctx) << "detected cortex-a53-843419 erratum sequence starting at " << utohexstr(adrpAddr) << " in unpatched output."; - auto *ps = make(ctx, isec, patcheeOffset); + auto *ps = make(ctx, isec, patcheeOffset, patcheeCodeSym); patches.push_back(ps); auto makeRelToPatch = [](uint64_t offset, Symbol *patchSym) { @@ -584,7 +588,11 @@ AArch64Err843419Patcher::patchInputSectionDescription( // mapping symbols of the same type. Our range of executable instructions to // scan is therefore [codeSym->value, dataSym->value) or [codeSym->value, // section size). - std::vector &mapSyms = sectionMap[isec]; + auto [it, inserted] = sectionMap.try_emplace( + isec, std::make_pair(nullptr, SmallVector{})); + auto &[sectionSym, mapSyms] = it->second; + if (inserted || sectionSym == nullptr) + sectionSym = addSyntheticLocal(ctx, "", STT_SECTION, 0, 0, *isec); auto codeSym = mapSyms.begin(); while (codeSym != mapSyms.end()) { @@ -597,7 +605,8 @@ AArch64Err843419Patcher::patchInputSectionDescription( uint64_t startAddr = isec->getVA(off); if (uint64_t patcheeOffset = scanCortexA53Errata843419(isec, off, limit)) - implementPatch(ctx, startAddr, patcheeOffset, isec, patches); + implementPatch(ctx, startAddr, patcheeOffset, isec, patches, + sectionSym); } if (dataSym == mapSyms.end()) break; diff --git a/lld/ELF/AArch64ErrataFix.h b/lld/ELF/AArch64ErrataFix.h index cab0b04336982..8e0d305eb9ccd 100644 --- a/lld/ELF/AArch64ErrataFix.h +++ b/lld/ELF/AArch64ErrataFix.h @@ -36,10 +36,13 @@ class AArch64Err843419Patcher { void init(); Ctx &ctx; - // A cache of the mapping symbols defined by the InputSection sorted in order - // of ascending value with redundant symbols removed. These describe - // the ranges of code and data in an executable InputSection. - llvm::DenseMap> sectionMap; + // A cache mapping InputSections to pairs of section symbols (first) and + // the mapping symbols (second) defined by the InputSection sorted in order + // of ascending value with redundant symbols removed. These describe the + // ranges of code and data in an executable InputSection. + llvm::DenseMap>> + sectionMap; bool initialized = false; }; diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 2a97df4785ecb..d9171a4cca006 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -48,6 +48,11 @@ bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) { if (off >= isec->getSize()) return true; const uint8_t *buf = isec->content().begin(); + // Synthetic sections may have a size but empty data - Assume that they won't + // contain a landing pad + if (buf == nullptr && isa(isec)) + return false; + const uint32_t instr = read32le(buf + off); // All BTI instructions are HINT instructions which all have same encoding // apart from bits [11:5] diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index d21376fd3ee47..4f4853fa43e99 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1922,7 +1922,7 @@ ThunkSection *ThunkCreator::getISThunkSec(InputSection *isec) { if (isec->outSecOff < first->outSecOff || last->outSecOff < isec->outSecOff) continue; - ts = addThunkSection(tos, isd, isec->outSecOff); + ts = addThunkSection(tos, isd, isec->outSecOff, /*isPrefix=*/true); thunkedSections[isec] = ts; return ts; } @@ -1981,11 +1981,11 @@ void ThunkCreator::createInitialThunkSections( ThunkSection *ThunkCreator::addThunkSection(OutputSection *os, InputSectionDescription *isd, - uint64_t off) { + uint64_t off, bool isPrefix) { auto *ts = make(ctx, os, off); ts->partition = os->partition; if ((ctx.arg.fixCortexA53Errata843419 || ctx.arg.fixCortexA8) && - !isd->sections.empty()) { + !isd->sections.empty() && !isPrefix) { // The errata fixes are sensitive to addresses modulo 4 KiB. When we add // thunks we disturb the base addresses of sections placed after the thunks // this makes patches we have generated redundant, and may cause us to @@ -2009,6 +2009,12 @@ ThunkSection *ThunkCreator::addThunkSection(OutputSection *os, // 2.) The InputSectionDescription is larger than 4 KiB. This will prevent // any assertion failures that an InputSectionDescription is < 4 KiB // in size. + // + // isPrefix is a ThunkSection explicitly inserted before its target + // section. We suppress the rounding up of the size of these ThunkSections + // as unlike normal ThunkSections, they are small in size, but when BTI is + // enabled very frequent. This can bloat code-size and push the errata + // patches out of branch range. uint64_t isdSize = isd->sections.back()->outSecOff + isd->sections.back()->getSize() - isd->sections.front()->outSecOff; diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 86ca298cd7a56..1ed39bc5f64ac 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -203,7 +203,7 @@ class ThunkCreator { std::pair getSyntheticLandingPad(Defined &d, int64_t a); ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *, - uint64_t off); + uint64_t off, bool isPrefix = false); bool normalizeExistingThunk(Relocation &rel, uint64_t src); diff --git a/lld/test/ELF/aarch64-cortex-a53-843419-thunk-relocation-crash.s b/lld/test/ELF/aarch64-cortex-a53-843419-thunk-relocation-crash.s new file mode 100644 index 0000000000000..94477c3312d0c --- /dev/null +++ b/lld/test/ELF/aarch64-cortex-a53-843419-thunk-relocation-crash.s @@ -0,0 +1,107 @@ +// REQUIRES: aarch64 +// RUN: rm -rf %t && split-file %s %t && cd %t +// RUN: llvm-mc -mattr=+bti -filetype=obj -triple=aarch64 asm -o a.o +// RUN: ld.lld --script lds -fix-cortex-a53-843419 -verbose a.o -o exe \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-PRINT %s +// RUN: llvm-objdump --no-print-imm-hex --no-show-raw-insn --triple=aarch64-linux-gnu -d exe | FileCheck %s + +/// Test case for specific crash wrt interaction between thunks and errata +/// patches where the size of the added thunks meant that a range-extension +/// thunk to the patch was required. We need to check that a BTI Thunk is +/// generated for the patch, and that the patch's direct branch return is also +/// range extended, possibly needing another BTI Thunk. +/// +/// The asm below is based on a crash that was happening in Chromium. +/// For more information see https://issues.chromium.org/issues/440019454 + +//--- asm +.section .note.gnu.property,"a" +.p2align 3 +.long 4 +.long 0x10 // descriptor length +.long 0x5 // GNU property type +.asciz "GNU" +.long 0xc0000000 // GNU_PROPERTY_AARCH64_FEATURE_1_AND +.long 4 +.long 1 // GNU_PROPERTY_AARCH64_FEATURE_1_BTI +.long 0 + + .section .text.01, "ax", %progbits + .balign 4096 + .globl _start + .type _start, %function +_start: + bl far_away_no_bti + + .section .text.far, "ax", %progbits + .globl far_away_no_bti + .type far_away, function +far_away_no_bti: + .space 4096 - 28, 0 + adrp x0, dat + ldr x1, [x1, #0] + ldr x0, [x0, :got_lo12:dat] + .space 0x8000000, 0 + ret + + .section .data + .globl dat +dat: .quad 0 + +// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 8010FFC in unpatched output. + +// CHECK: 0000000000010000 <_start>: +// CHECK-NEXT: 10000: bl 0x10008 <__AArch64AbsLongThunk_far_away_no_bti> + +// CHECK: <__AArch64AbsLongThunk_far_away_no_bti>: +// CHECK-NEXT: 10008: ldr x16, 0x10010 +// CHECK-NEXT: br x16 +// CHECK-NEXT: 10010: 18 00 01 08 .word 0x08010018 + +// Check that the BTI thunks do NOT have their size rounded up to 4 KiB. +// They precede the patch and they contain the landing pad. +// CHECK: <__AArch64BTIThunk_far_away_no_bti>: +// CHECK-NEXT: 8010018: bti c +// CHECK-NEXT: b 0x8010038 + +// CHECK: <__AArch64AbsLongThunk___CortexA53843419_8011004>: +// CHECK-NEXT: 8010020: ldr x16, 0x8010028 +// CHECK-NEXT: br x16 +// CHECK-NEXT: 8010028: 34 10 01 10 .word 0x10011034 + +// CHECK: <__AArch64BTIThunk_>: +// CHECK-NEXT: 8010030: bti c +// CHECK-NEXT: b 0x8011028 + +// CHECK: 8010038 : +// CHECK-NEXT: ... +// CHECK-NEXT: 801101c: adrp x0, 0x10012000 +// CHECK-NEXT: ldr x1, [x1] +// CHECK-NEXT: b 0x8010020 <__AArch64AbsLongThunk___CortexA53843419_8011004> +// CHECK-NEXT: ... +// CHECK-NEXT: 10011028: ret + +// Check that the errata thunk does NOT contain a landing pad +// CHECK: <__CortexA53843419_8011004>: +// CHECK-NEXT: 1001102c: ldr x0, [x0, #64] +// CHECK-NEXT: b 0x10011040 <__AArch64AbsLongThunk_> + +// Rest of generated code for readability +// CHECK: <__AArch64BTIThunk___CortexA53843419_8011004>: +// CHECK-NEXT: 10011034: bti c +// CHECK-NEXT: b 0x1001102c <__CortexA53843419_8011004> + +// CHECK: <__AArch64AbsLongThunk_> +// CHECK-NEXT: 10011040: ldr x16, 0x10011048 +// CHECK-NEXT: br x16 +// CHECK-NEXT: 10011048: 30 00 01 08 .word 0x08010030 + +//--- lds +SECTIONS { + .text 0x10000 : { + *(.text.01); + . += 0x8000000; + *(.text.far); + } +} +