Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions include/circt/Dialect/HW/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,26 @@ def FlattenModules : Pass<"hw-flatten-modules", "mlir::ModuleOp"> {
degenerate into a purely cosmetic construct, at which point it is beneficial
to fully flatten the module hierarchy to simplify further analysis and
optimization of state transfer arcs.

By default, all private modules are inlined. The pass supports heuristics to
control which modules are inlined based on their characteristics.
}];
let options = [
Option<"inlineEmpty", "inline-empty", "bool", "true",
"Inline modules that are empty (only contain hw.output)">,
Option<"inlineNoOutputs", "inline-no-outputs", "bool", "true",
"Inline modules that have no output ports">,
Option<"inlineSingleUse", "inline-single-use", "bool", "true",
"Inline modules that have only one use">,
Option<"inlineSmall", "inline-small", "bool", "true",
"Inline modules that are small (fewer than smallThreshold operations)">,
Option<"smallThreshold", "small-threshold", "unsigned", "8",
"Maximum number of operations for a module to be considered small">,
Comment on lines +64 to +65
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Gating this behind --inline-small-threshold would be more obvious to me as a user.

Option<"inlineWithState", "inline-with-state", "bool", "false",
"Allow inlining of modules that contain state (seq.firreg operations)">,
Option<"inlineAll", "inline-all", "bool", "true",
"Inline all private modules regardless of heuristics (default behavior)">
];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Maybe prefixing these with --hw would be good. I've grown to liking having any option that is only for specific dialects is useful to indicate which dialect it works on. E.g., --dedup-classes in firtool would be better as --firrtl-dedup-classes. The same thing holds here, too.

}

def HWSpecialize : Pass<"hw-specialize", "mlir::ModuleOp"> {
Expand Down
216 changes: 205 additions & 11 deletions lib/Dialect/HW/Transforms/FlattenModules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
#include "circt/Dialect/HW/HWInstanceGraph.h"
#include "circt/Dialect/HW/HWOps.h"
#include "circt/Dialect/HW/HWPasses.h"
#include "circt/Dialect/HW/InnerSymbolNamespace.h"
#include "circt/Dialect/Seq/SeqOps.h"
#include "circt/Support/BackedgeBuilder.h"
#include "mlir/IR/AttrTypeSubElements.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/Inliner.h"
#include "mlir/Transforms/InliningUtils.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Support/Debug.h"

#define DEBUG_TYPE "hw-flatten-modules"

Expand All @@ -32,19 +35,48 @@ using namespace igraph;
using mlir::InlinerConfig;
using mlir::InlinerInterface;

using HierPathTable = DenseMap<hw::InnerRefAttr, SmallVector<hw::HierPathOp>>;

namespace {

// Cache the inner symbol attribute name to avoid repeated lookups
static const StringRef innerSymAttrName =
InnerSymbolTable::getInnerSymbolAttrName();
struct FlattenModulesPass
: public circt::hw::impl::FlattenModulesBase<FlattenModulesPass> {
using FlattenModulesBase::FlattenModulesBase;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I think the following will work:

Suggested change
using FlattenModulesBase::FlattenModulesBase;
using Base::Base;

I've seen this in upstream MLIR and started using it a lot of places internally. 😉


void runOnOperation() override;

private:
/// Determine if a module should be inlined based on various heuristics.
bool shouldInline(HWModuleOp module, igraph::InstanceGraphNode *instanceNode,
size_t bodySize);
};

/// A simple implementation of the `InlinerInterface` that marks all inlining as
/// legal since we know that we only ever attempt to inline `HWModuleOp` bodies
/// at `InstanceOp` sites.
struct PrefixingInliner : public InlinerInterface {
StringRef prefix;
PrefixingInliner(MLIRContext *context, StringRef prefix)
: InlinerInterface(context), prefix(prefix) {}
InnerSymbolNamespace *ns;
HWModuleOp parentModule;
HWModuleOp sourceModule;
DenseMap<StringAttr, StringAttr> *symMapping;
mlir::AttrTypeReplacer *replacer;
HierPathTable *pathsTable;
hw::InnerRefAttr instanceRef;

PrefixingInliner(MLIRContext *context, StringRef prefix,
InnerSymbolNamespace *ns, HWModuleOp parentModule,
HWModuleOp sourceModule,
DenseMap<StringAttr, StringAttr> *symMapping,
mlir::AttrTypeReplacer *replacer, HierPathTable *pathsTable,
hw::InnerRefAttr instanceRef)
: InlinerInterface(context), prefix(prefix), ns(ns),
parentModule(parentModule), sourceModule(sourceModule),
symMapping(symMapping), replacer(replacer), pathsTable(pathsTable),
instanceRef(instanceRef) {}
Comment on lines +71 to +79
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This constructor may be unnecessary if you use struct initialization. E.g., PrefixingInliner{a, b, c}. I don't know if there's any utility in adding a one-to-one constructor here as opposed to the {} one.


bool isLegalToInline(Region *dest, Region *src, bool wouldBeCloned,
IRMapping &valueMapping) const override {
Expand All @@ -64,7 +96,35 @@ struct PrefixingInliner : public InlinerInterface {
void processInlinedBlocks(
iterator_range<Region::iterator> inlinedBlocks) override {
for (Block &block : inlinedBlocks)
block.walk([&](Operation *op) { updateNames(op); });
block.walk([&](Operation *op) {
updateNames(op);
updateInnerSymbols(op);
});

// Update hierarchical paths that reference the inlined instance
updateHierPaths();
}

void updateHierPaths() const {
// If the instance has an inner symbol, update any hierarchical paths
// that reference it
if (!instanceRef)
return;

auto it = pathsTable->find(instanceRef);
if (it == pathsTable->end())
return;

// For each hierarchical path that references this instance
for (hw::HierPathOp path : it->second) {
SmallVector<Attribute, 4> newPath;
for (auto elem : path.getNamepath()) {
// Skip the instance reference being inlined
if (elem != instanceRef)
newPath.push_back(replacer->replace(elem));
}
path.setNamepathAttr(ArrayAttr::get(path.getContext(), newPath));
}
}

StringAttr updateName(StringAttr attr) const {
Expand All @@ -88,19 +148,89 @@ struct PrefixingInliner : public InlinerInterface {
}
}

void updateInnerSymbols(Operation *op) const {
// Rename inner symbols to avoid conflicts
if (auto innerSymAttr =
op->getAttrOfType<hw::InnerSymAttr>(innerSymAttrName)) {
StringAttr symName = innerSymAttr.getSymName();
auto it = symMapping->find(symName);
if (it != symMapping->end())
op->setAttr(innerSymAttrName, hw::InnerSymAttr::get(it->second));
}

// Apply attribute replacements for InnerRefAttr
replacer->replaceElementsIn(op);
}

bool allowSingleBlockOptimization(
iterator_range<Region::iterator> inlinedBlocks) const final {
return true;
}
};
} // namespace

bool FlattenModulesPass::shouldInline(HWModuleOp module,
igraph::InstanceGraphNode *instanceNode,
size_t bodySize) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is bodySize defined? Is this number of ops or something else?

// If inlineAll is enabled, inline everything (default behavior)
if (this->inlineAll)
return true;

// Check whether the module should be inlined based on heuristics.
bool isEmpty = bodySize == 1;
bool hasNoOutputs = module.getNumOutputPorts() == 0;
bool hasOneUse = instanceNode->getNumUses() == 1;
bool hasState = !module.getOps<seq::FirRegOp>().empty();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about memories or other kinds of state?

Comment on lines +179 to +183
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I would move these closer to their usage sites as opposed to declaring them up top.

bool isSmall = bodySize < this->smallThreshold;

// Don't inline modules with state unless explicitly allowed
if (hasState && !this->inlineWithState)
return false;

// Inline if any of the enabled conditions are met:
bool shouldInlineModule = false;

if (this->inlineEmpty && isEmpty)
shouldInlineModule = true;
Comment on lines +193 to +194
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kind of a micro-optimization, but all of these can early return as shouldInlineModule can never be unset after being set in one of these checks. I think just:

Suggested change
if (this->inlineEmpty && isEmpty)
shouldInlineModule = true;
if (this->inlineEmpty && isEmpty)
return shouldInlineModule = true;


if (this->inlineNoOutputs && hasNoOutputs)
shouldInlineModule = true;

if (this->inlineSingleUse && hasOneUse)
shouldInlineModule = true;

if (this->inlineSmall && isSmall)
shouldInlineModule = true;

return shouldInlineModule;
}

void FlattenModulesPass::runOnOperation() {
auto &instanceGraph = getAnalysis<hw::InstanceGraph>();
DenseSet<Operation *> handled;

InlinerConfig config;

// Build a mapping of hierarchical path ops.
DenseSet<StringAttr> leafModules;
HierPathTable pathsTable;
llvm::for_each(getOperation().getOps<hw::HierPathOp>(),
[&](hw::HierPathOp path) {
// Record leaf modules to be banned from inlining.
if (path.isModule())
leafModules.insert(path.leafMod());

// For each instance in the path, record the path
for (auto name : path.getNamepath()) {
if (auto ref = dyn_cast<hw::InnerRefAttr>(name))
pathsTable[ref].push_back(path);
}
});
Comment on lines +217 to +228
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm usually partial to for_each, however, this is some wonky indentation. Consider a simpler for loop to save a lot of indentation:

for (auto path : getOperation().getOps<hw::HierPathOp>()) {
 /* ... */
}


// Cache InnerSymbolNamespace objects per parent module to avoid
// recreating them for each instance in the same parent.
DenseMap<HWModuleOp, std::unique_ptr<InnerSymbolNamespace>> nsCache;

// Iterate over all instances in the instance graph. This ensures we visit
// every module, even private top modules (private and never instantiated).
for (auto *startNode : instanceGraph) {
Expand All @@ -118,13 +248,37 @@ void FlattenModulesPass::runOnOperation() {
if (numUsesLeft == 0)
continue;

for (auto *instRecord : node->uses()) {
// Only inline private `HWModuleOp`s (no extern or generated modules).
auto module =
dyn_cast_or_null<HWModuleOp>(node->getModule().getOperation());
if (!module || !module.isPrivate())
continue;
// Only inline private `HWModuleOp`s (no extern or generated modules).
auto module =
dyn_cast_or_null<HWModuleOp>(node->getModule().getOperation());
if (!module || !module.isPrivate())
continue;

// Do not inline a module if it is targeted by a module NLA.
if (leafModules.count(module.getNameAttr()))
continue;

// Check if module should be inlined based on heuristics
auto *body = module.getBodyBlock();
size_t bodySize = std::distance(body->begin(), body->end());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I think this is number of ops, right, as this is iterator distance? That seems to make sense.

if (!shouldInline(module, node, bodySize))
continue;

// Build symbol mapping for the module before inlining any instances
DenseMap<StringAttr, StringAttr> inlineModuleInnerSyms;
mlir::AttrTypeReplacer innerRefReplacer;

// Scan the module body to collect all inner symbols that need renaming
for (Operation &oldOp : *body) {
oldOp.walk([&](Operation *op) {
if (auto innerSymAttr =
op->getAttrOfType<hw::InnerSymAttr>(innerSymAttrName))
inlineModuleInnerSyms.insert(
{innerSymAttr.getSymName(), StringAttr()});
});
}
Comment on lines +271 to +279
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like this could walk the body as opposed to iterating over the body and walking the ops it finds.


for (auto *instRecord : node->uses()) {
// Only inline at plain old HW `InstanceOp`s.
auto inst = dyn_cast_or_null<InstanceOp>(
instRecord->getInstance().getOperation());
Expand All @@ -133,7 +287,47 @@ void FlattenModulesPass::runOnOperation() {

bool isLastModuleUse = --numUsesLeft == 0;

PrefixingInliner inliner(&getContext(), inst.getInstanceName());
// Get the parent module
HWModuleOp parentModule = inst->getParentOfType<HWModuleOp>();

// Get or create the InnerSymbolNamespace for the parent module
auto &nsPtr = nsCache[parentModule];
if (!nsPtr)
nsPtr = std::make_unique<InnerSymbolNamespace>(parentModule);

// Create fresh symbol names for this instance
DenseMap<StringAttr, StringAttr> oldToNewInnerSyms;
for (auto [oldSym, _] : inlineModuleInnerSyms)
oldToNewInnerSyms.insert(
{oldSym, StringAttr::get(&getContext(),
nsPtr->newName(oldSym.getValue()))});

// Setup the replacer for InnerRefAttr
mlir::AttrTypeReplacer instanceReplacer;
instanceReplacer.addReplacement(
[&](InnerRefAttr attr) -> std::pair<Attribute, WalkResult> {
if (attr.getModule() != module.getModuleNameAttr())
return {attr, WalkResult::skip()};

auto it = oldToNewInnerSyms.find(attr.getName());
if (it == oldToNewInnerSyms.end())
return {attr, WalkResult::skip()};

auto newAttr = InnerRefAttr::get(parentModule.getModuleNameAttr(),
it->second);
return {newAttr, WalkResult::skip()};
});

// Get the instance's inner reference if it has one
hw::InnerRefAttr instanceRef;
if (auto sym = inst.getInnerSymAttr())
instanceRef = inst.getInnerRef();

PrefixingInliner inliner(&getContext(), inst.getInstanceName(),
nsPtr.get(), parentModule, module,
&oldToNewInnerSyms, &instanceReplacer,
&pathsTable, instanceRef);

if (failed(mlir::inlineRegion(inliner, config.getCloneCallback(),
&module.getBody(), inst,
inst.getOperands(), inst.getResults(),
Expand Down
66 changes: 66 additions & 0 deletions test/Dialect/HW/hw-inliner-options.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// RUN: circt-opt %s --pass-pipeline='builtin.module(hw-flatten-modules{inline-all=false inline-single-use=false inline-small=false inline-empty=false inline-no-outputs=false})' | FileCheck %s --check-prefix=NONE
// RUN: circt-opt %s --pass-pipeline='builtin.module(hw-flatten-modules{inline-all=false inline-single-use=true inline-small=false inline-empty=false inline-no-outputs=false})' | FileCheck %s --check-prefix=SINGLE
// RUN: circt-opt %s --pass-pipeline='builtin.module(hw-flatten-modules{inline-all=false inline-single-use=false inline-small=true inline-empty=false inline-no-outputs=false})' | FileCheck %s --check-prefix=SMALL
// RUN: circt-opt %s --pass-pipeline='builtin.module(hw-flatten-modules{inline-all=false small-threshold=3 inline-single-use=false})' | FileCheck %s --check-prefix=THRESHOLD
// RUN: circt-opt %s --pass-pipeline='builtin.module(hw-flatten-modules{inline-with-state=true})' | FileCheck %s --check-prefix=STATE
Comment on lines +1 to +5
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice iteration over all the options here. 👍


// Test that all inlining heuristics can be controlled via command line options

// NONE-LABEL: hw.module @TestSingleUse
// SINGLE-LABEL: hw.module @TestSingleUse
hw.module @TestSingleUse(in %x: i4, out y: i4) {
// NONE-NEXT: hw.instance "small" @SmallModule
// SINGLE-NEXT: %[[V0:.+]] = comb.add %x, %x
// SINGLE-NEXT: hw.output %[[V0]]
%0 = hw.instance "small" @SmallModule(a: %x: i4) -> (b: i4)
hw.output %0 : i4
}
Comment on lines +11 to +17
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: this may want to use a @LargeModule to definitely avoid this getting inlined with the small option. I.e., there could be a bug where small inlining is turned on and it would be good if this was test was not susceptible to that.


hw.module private @SmallModule(in %a: i4, out b: i4) {
%0 = comb.add %a, %a : i4
hw.output %0 : i4
}

// SMALL-LABEL: hw.module @TestSmall
hw.module @TestSmall(in %x: i4, out y: i4) {
// SMALL-NEXT: %[[V0:.+]] = comb.add %x, %x
// SMALL-NEXT: hw.output %[[V0]]
%0 = hw.instance "tiny" @TinyModule(a: %x: i4) -> (b: i4)
hw.output %0 : i4
}

hw.module private @TinyModule(in %a: i4, out b: i4) {
%0 = comb.add %a, %a : i4
hw.output %0 : i4
}
Comment on lines +32 to +35
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: This is just @SmallModule. Though, going off of the comment above, I think you want small to be "large" and tiny to be "small".


// THRESHOLD-LABEL: hw.module @TestThreshold
hw.module @TestThreshold(in %x: i4, out y: i4) {
// THRESHOLD-NEXT: hw.instance "medium" @MediumModule
%0 = hw.instance "medium" @MediumModule(a: %x: i4) -> (b: i4)
hw.output %0 : i4
}

// This module has 5 operations (4 comb ops + 1 hw.output)
// With threshold=3, it should NOT be inlined
hw.module private @MediumModule(in %a: i4, out b: i4) {
%0 = comb.add %a, %a : i4
%1 = comb.mul %0, %a : i4
%2 = comb.xor %1, %a : i4
%3 = comb.or %2, %a : i4
hw.output %3 : i4
}

// STATE-LABEL: hw.module @TestState
hw.module @TestState(in %clk: !seq.clock, in %x: i4, out y: i4) {
// STATE-NEXT: %[[REG:.+]] = seq.firreg %x clock %clk
// STATE-NEXT: hw.output %[[REG]]
%0 = hw.instance "reg" @RegModule(clk: %clk: !seq.clock, a: %x: i4) -> (b: i4)
hw.output %0 : i4
}

hw.module private @RegModule(in %clk: !seq.clock, in %a: i4, out b: i4) {
%0 = seq.firreg %a clock %clk : i4
hw.output %0 : i4
}

Loading