Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4869,6 +4869,32 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
emitArraySubscriptGEP(*this, Int8Ty, Addr.emitRawPointer(*this),
ScaledIdx, false, SignedIndices, E->getExprLoc());
Addr = Address(EltPtr, OrigBaseElemTy, EltAlign);
} else if (getLangOpts().HLSL &&
E->getType().getAddressSpace() == LangAS::hlsl_constant) {
// This is an array inside of a cbuffer.
Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);

SmallVector<llvm::Value *, 2> Indices;
Indices.push_back(EmitIdxAfterBase(/*Promote*/true));

CharUnits ElementSize = getContext().getTypeSizeInChars(E->getType());
CharUnits RowAlignedSize = ElementSize.alignTo(CharUnits::fromQuantity(16));

llvm::Type *EltTyToIndex = Addr.getElementType();
if (RowAlignedSize > ElementSize) {
llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding(
CGM, RowAlignedSize - ElementSize);
EltTyToIndex = llvm::StructType::get(
getLLVMContext(), {EltTyToIndex, Padding}, /*isPacked=*/true);
Indices.push_back(llvm::ConstantInt::get(Indices[0]->getType(), 0));
}

CharUnits EltAlign =
getArrayElementAlign(Addr.getAlignment(), Indices[0], RowAlignedSize);
llvm::Value *EltPtr =
emitArraySubscriptGEP(*this, EltTyToIndex, Addr.emitRawPointer(*this),
Indices, false, SignedIndices, E->getExprLoc());
Addr = Address(EltPtr, Addr.getElementType(), EltAlign);
} else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
// If this is A[i] where A is an array, the frontend will have decayed the
// base to be a ArrayToPointerDecay implicit cast. While correct, it is
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CGExprAgg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2279,6 +2279,10 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
}
}

if (getLangOpts().HLSL && Ty.getAddressSpace() == LangAS::hlsl_constant)
if (CGM.getHLSLRuntime().emitBufferCopy(*this, DestPtr, SrcPtr, Ty))
return;

// Aggregate assignment turns into llvm.memcpy. This is almost valid per
// C99 6.5.16.1p3, which states "If the value being stored in an object is
// read from another object that overlaps in anyway the storage of the first
Expand Down
172 changes: 159 additions & 13 deletions clang/lib/CodeGen/CGHLSLRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
//===----------------------------------------------------------------------===//

#include "CGHLSLRuntime.h"
#include "Address.h"
#include "CGDebugInfo.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "HLSLBufferLayoutBuilder.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attrs.inc"
Expand All @@ -26,6 +26,7 @@
#include "clang/AST/Type.h"
#include "clang/Basic/TargetOptions.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Frontend/HLSL/RootSignatureMetadata.h"
Expand All @@ -43,6 +44,8 @@
#include <cstdint>
#include <optional>

#define DEBUG_TYPE "cghlslruntime"

using namespace clang;
using namespace CodeGen;
using namespace clang::hlsl;
Expand Down Expand Up @@ -265,9 +268,9 @@ CGHLSLRuntime::convertHLSLSpecificType(const Type *T,
assert(T->isHLSLSpecificType() && "Not an HLSL specific type!");

// Check if the target has a specific translation for this type first.
if (llvm::Type *TargetTy =
if (llvm::Type *LayoutTy =
CGM.getTargetCodeGenInfo().getHLSLType(CGM, T, Packoffsets))
return TargetTy;
return LayoutTy;

llvm_unreachable("Generic handling of HLSL types is not supported.");
}
Expand All @@ -284,10 +287,8 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,

// get the layout struct from constant buffer target type
llvm::Type *BufType = BufGV->getValueType();
llvm::Type *BufLayoutType =
cast<llvm::TargetExtType>(BufType)->getTypeParameter(0);
llvm::StructType *LayoutStruct = cast<llvm::StructType>(
cast<llvm::TargetExtType>(BufLayoutType)->getTypeParameter(0));
cast<llvm::TargetExtType>(BufType)->getTypeParameter(0));

// Start metadata list associating the buffer global variable with its
// constatns
Expand Down Expand Up @@ -326,6 +327,9 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
continue;
}

if (CGM.getTargetCodeGenInfo().isHLSLPadding(*ElemIt))
++ElemIt;

assert(ElemIt != LayoutStruct->element_end() &&
"number of elements in layout struct does not match");
llvm::Type *LayoutType = *ElemIt++;
Expand Down Expand Up @@ -423,12 +427,11 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) {
if (BufDecl->hasValidPackoffset())
fillPackoffsetLayout(BufDecl, Layout);

llvm::TargetExtType *TargetTy =
cast<llvm::TargetExtType>(convertHLSLSpecificType(
ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr));
llvm::Type *LayoutTy = convertHLSLSpecificType(
ResHandleTy, BufDecl->hasValidPackoffset() ? &Layout : nullptr);
llvm::GlobalVariable *BufGV = new GlobalVariable(
TargetTy, /*isConstant*/ false,
GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(TargetTy),
LayoutTy, /*isConstant*/ false,
GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(LayoutTy),
llvm::formatv("{0}{1}", BufDecl->getName(),
BufDecl->isCBuffer() ? ".cb" : ".tb"),
GlobalValue::NotThreadLocal);
Expand All @@ -454,7 +457,7 @@ void CGHLSLRuntime::addRootSignature(
SignatureDecl->getRootElements(), nullptr, M);
}

llvm::TargetExtType *
llvm::StructType *
CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
const auto Entry = LayoutTypes.find(StructType);
if (Entry != LayoutTypes.end())
Expand All @@ -463,7 +466,7 @@ CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) {
}

void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType,
llvm::TargetExtType *LayoutTy) {
llvm::StructType *LayoutTy) {
assert(getHLSLBufferLayoutType(StructType) == nullptr &&
"layout type for this struct already exist");
LayoutTypes[StructType] = LayoutTy;
Expand Down Expand Up @@ -997,3 +1000,146 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr(
}
return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl);
}

namespace {
class HLSLBufferCopyEmitter {
CodeGenFunction &CGF;
Address DestPtr;
Address SrcPtr;
llvm::Type *LayoutTy = nullptr;

SmallVector<llvm::Value *> CurStoreIndices;
SmallVector<llvm::Value *> CurLoadIndices;

void emitCopyAtIndices(llvm::Type *FieldTy, llvm::ConstantInt *StoreIndex,
llvm::ConstantInt *LoadIndex) {
CurStoreIndices.push_back(StoreIndex);
CurLoadIndices.push_back(LoadIndex);
auto RestoreIndices = llvm::make_scope_exit([&]() {
CurStoreIndices.pop_back();
CurLoadIndices.pop_back();
});

if (processArray(FieldTy))
return;
if (processBufferLayoutArray(FieldTy))
return;
if (processStruct(FieldTy))
return;

// We have a scalar or vector element - emit a copy.
CharUnits Align = CharUnits::fromQuantity(
CGF.CGM.getDataLayout().getABITypeAlign(FieldTy));
Address SrcGEP = RawAddress(
CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(),
CurLoadIndices, "cbuf.src"),
FieldTy, Align, SrcPtr.isKnownNonNull());
Address DestGEP = CGF.Builder.CreateInBoundsGEP(
DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest");
llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load");
CGF.Builder.CreateStore(Load, DestGEP);
}

bool processArray(llvm::Type *FieldTy) {
auto *AT = dyn_cast<llvm::ArrayType>(FieldTy);
if (!AT)
return false;

// If we have an array then there isn't any padding
// between elements. We just need to copy each element over.
for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I)
emitCopyAtIndices(AT->getElementType(),
llvm::ConstantInt::get(CGF.SizeTy, I),
llvm::ConstantInt::get(CGF.SizeTy, I));
return true;
}

bool processBufferLayoutArray(llvm::Type *FieldTy) {
auto *ST = dyn_cast<llvm::StructType>(FieldTy);
if (!ST || ST->getNumElements() != 2)
return false;

auto *PaddedEltsTy = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
if (!PaddedEltsTy)
return false;

auto *PaddedTy = dyn_cast<llvm::StructType>(PaddedEltsTy->getElementType());
if (!PaddedTy || PaddedTy->getNumElements() != 2)
return false;

if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(
PaddedTy->getElementType(1)))
return false;

llvm::Type *ElementTy = ST->getElementType(1);
if (PaddedTy->getElementType(0) != ElementTy)
return false;

// All but the last of the logical array elements are in the padded array.
unsigned NumElts = PaddedEltsTy->getNumElements() + 1;

// Add an extra indirection to the load for the struct and walk the
// array prefix.
CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 0));
for (unsigned I = 0; I < NumElts - 1; ++I) {
// We need to copy the element itself, without the padding.
CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, I));
emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.SizeTy, I),
llvm::ConstantInt::get(CGF.Int32Ty, 0));
CurLoadIndices.pop_back();
}
CurLoadIndices.pop_back();

// Now copy the last element.
emitCopyAtIndices(ElementTy,
llvm::ConstantInt::get(CGF.SizeTy, NumElts - 1),
llvm::ConstantInt::get(CGF.Int32Ty, 1));

return true;
}

bool processStruct(llvm::Type *FieldTy) {
auto *ST = dyn_cast<llvm::StructType>(FieldTy);
if (!ST)
return false;

unsigned Skipped = 0;
for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) {
llvm::Type *ElementTy = ST->getElementType(I);
if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy))
++Skipped;
else
emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.Int32Ty, I),
llvm::ConstantInt::get(CGF.Int32Ty, I + Skipped));
}
return true;
}

public:
HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr)
: CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {}

bool emitCopy(QualType CType) {
LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType);

LLVM_DEBUG({
dbgs() << "Emitting copy of ";
LayoutTy->print(dbgs());
dbgs() << "\n";
});

// If we don't have an aggregate, we can just fall back to normal memcpy.
if (!LayoutTy->isAggregateType())
return false;

emitCopyAtIndices(LayoutTy, llvm::ConstantInt::get(CGF.SizeTy, 0),
llvm::ConstantInt::get(CGF.SizeTy, 0));
return true;
}
};
} // namespace

bool CGHLSLRuntime::emitBufferCopy(CodeGenFunction &CGF, Address DestPtr,
Address SrcPtr, QualType CType) {
return HLSLBufferCopyEmitter(CGF, DestPtr, SrcPtr).emitCopy(CType);
}
23 changes: 12 additions & 11 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,19 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/IntrinsicsSPIRV.h"

#include "Address.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/HLSLRuntime.h"

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Frontend/HLSL/HLSLResource.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/IntrinsicsSPIRV.h"

#include <optional>
#include <vector>
Expand Down Expand Up @@ -187,24 +186,26 @@ class CGHLSLRuntime {

llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB);

llvm::TargetExtType *
getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
llvm::StructType *getHLSLBufferLayoutType(const RecordType *LayoutStructTy);
void addHLSLBufferLayoutType(const RecordType *LayoutStructTy,
llvm::TargetExtType *LayoutTy);
llvm::StructType *LayoutTy);
void emitInitListOpaqueValues(CodeGenFunction &CGF, InitListExpr *E);

std::optional<LValue>
emitResourceArraySubscriptExpr(const ArraySubscriptExpr *E,
CodeGenFunction &CGF);

bool emitBufferCopy(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr,
QualType CType);

private:
void emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl,
llvm::GlobalVariable *BufGV);
void initializeBufferFromBinding(const HLSLBufferDecl *BufDecl,
llvm::GlobalVariable *GV);
llvm::Triple::ArchType getArch();

llvm::DenseMap<const clang::RecordType *, llvm::TargetExtType *> LayoutTypes;
llvm::DenseMap<const clang::RecordType *, llvm::StructType *> LayoutTypes;
};

} // namespace CodeGen
Expand Down
Loading
Loading