Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 200 additions & 14 deletions llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "AIEMachineScheduler.h"
#include "AIEMaxLatencyFinder.h"
#include "AIEMultiSlotInstrMaterializer.h"
#include "AIERegDefUseTracker.h"
#include "Utils/AIELoopUtils.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
Expand All @@ -37,6 +38,7 @@
// --debug-only=sched-blocks,machine-scheduler
#define DEBUG_LOOPAWARE(X) DEBUG_WITH_TYPE("loop-aware", X)
#define DEBUG_BLOCKS(X) DEBUG_WITH_TYPE("sched-blocks", X)
#define DEBUG_REGALLOC(X) DEBUG_WITH_TYPE("aie-reg-liverange", X)

using namespace llvm;

Expand Down Expand Up @@ -75,8 +77,44 @@ static cl::opt<int> PostPipelinerMaxTryII(
"aie-postpipeliner-maxtry-ii", cl::init(20),
cl::desc("[AIE] Maximum II steps to be tried in the post-ra pipeliner"));

static cl::opt<bool> TestRegDefUseTracker(
"aie-test-regdefuse-tracker", cl::Hidden, cl::init(false),
cl::desc("[AIE] TEST MODE: Run RegDefUseTracker analysis on all loops "
"(for testing only)"));
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is accommodating a dump for the early stages of live range analysis.


namespace llvm::AIE {

// Helper function to get the name of a PostPipelinerMode as a string
const char *getPostPipelinerModeName(PostPipelinerMode Mode) {
switch (Mode) {
case PostPipelinerMode::None:
return "None";
case PostPipelinerMode::Physical:
return "Physical";
case PostPipelinerMode::Virtual:
return "Virtual";
}
return "Unknown";
}

// Option for enabling virtual register mode in the postpipeliner
static cl::opt<bool> PostPipelinerVRegMode(
"aie-postpipeliner-vreg-mode", cl::Hidden, cl::init(true),
cl::desc("[AIE] Enable virtual register mode for the postpipeliner "
"(replaces filtered physical registers with virtual registers)"));

// Option for enabling physical register mode in the postpipeliner
static cl::opt<bool> PostPipelinerPhysMode(
"aie-postpipeliner-phys-mode", cl::Hidden, cl::init(true),
cl::desc("[AIE] Enable physical register mode for the postpipeliner "
"(use physical registers without virtualization)"));

// Option for filtering live ranges with no register choice
static cl::opt<bool> FilterNoChoiceRegs(
"aie-postpipeliner-filter-no-choice", cl::Hidden, cl::init(false),
cl::desc("[AIE] Filter out live ranges with only one available physical "
"register to prevent pipeliner invalidation"));

void dumpInterBlock(const InterBlockEdges &Edges) {
for (const SUnit &SU : Edges) {
dbgs() << "SU" << SU.NodeNum << ": " << *SU.getInstr();
Expand Down Expand Up @@ -392,8 +430,7 @@ bool InterBlockScheduling::leaveBlock() {
BS.clearSchedule();
PipelineExtractor GenSchedule(*this, BS, *TII);
auto &PostSWP = BS.getPostSWP();
PostSWP.visitPipelineSchedule(GenSchedule);
PostSWP.updateTripCount();
PostSWP.materializePipeline(GenSchedule);
break;
}
case SchedulingStage::SchedulingDone:
Expand Down Expand Up @@ -538,6 +575,30 @@ SchedulingStage InterBlockScheduling::updateFixPoint(BlockState &BS) {
return updatePipelining(BS);
}

// Get the first pipeliner mode to try based on command line options.
static PostPipelinerMode firstPipelinerMode() {
// Try physical mode first, then virtual mode.
if (PostPipelinerPhysMode) {
return PostPipelinerMode::Physical;
}
if (PostPipelinerVRegMode) {
return PostPipelinerMode::Virtual;
}
return PostPipelinerMode::None;
}

// Get the next pipeliner mode to try after the current one.
// Returns None when past the last mode.
static PostPipelinerMode nextPipelinerMode(PostPipelinerMode Current) {
// If we were trying Physical mode and Virtual mode is enabled, try Virtual
// next.
if (Current == PostPipelinerMode::Physical && PostPipelinerVRegMode) {
return PostPipelinerMode::Virtual;
}
// Otherwise, we've exhausted all modes.
return PostPipelinerMode::None;
}

SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) {
if (BS.FixPoint.NumIters >
MaxExpensiveIterations + 2 * HR->getConflictHorizon()) {
Expand Down Expand Up @@ -608,10 +669,16 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) {
<< "\n");

// The loop schedule has converged, so we could declare our work done.
// But first try SWP
// But first try SWP if we have a single region and pipelining is enabled
if (BS.getRegions().size() == 1) {
auto &PostSWP = BS.getPostSWP();
if (PostSWP.isPostPipelineCandidate(*BS.TheBlock)) {
// Determine which pipelining mode to use
BS.FixPoint.PipelinerMode = firstPipelinerMode();
if (BS.FixPoint.PipelinerMode == PostPipelinerMode::None) {
return SchedulingStage::SchedulingDone;
}

BS.FixPoint.II = PostSWP.getResMII(*BS.TheBlock);
BS.FixPoint.IITries = 1;
return SchedulingStage::Pipelining;
Expand All @@ -623,14 +690,36 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) {
SchedulingStage InterBlockScheduling::updatePipelining(BlockState &BS) {
// We have been pipelining. Check whether we were successful.
if (BS.FixPoint.Stage == SchedulingStage::PipeliningDone) {
return BS.FixPoint.Stage;
return SchedulingStage::PipeliningDone;
}

// If pipelining is disabled, we shouldn't be here
if (BS.FixPoint.PipelinerMode == PostPipelinerMode::None) {
return SchedulingStage::PipeliningFailed;
}

// Otherwise try a larger II.
// We failed. undo all changes that were required for this attempt.
BS.restorePipelining();

// Try the next mode at the same II.
const PostPipelinerMode NextMode =
nextPipelinerMode(BS.FixPoint.PipelinerMode);
if (NextMode != PostPipelinerMode::None) {
BS.FixPoint.PipelinerMode = NextMode;
DEBUG_LOOPAWARE(dbgs() << "Trying next mode at II=" << BS.FixPoint.II
<< "\n");
return SchedulingStage::Pipelining;
}

// We progressed through all pipeliner modes and failed.
// Try a larger II.
// We cut off at larger IIs to prevent excessive compilation time.
if (++BS.FixPoint.II <= PostPipelinerMaxII &&
++BS.FixPoint.IITries <= PostPipelinerMaxTryII) {
return SchedulingStage::Pipelining;
BS.FixPoint.PipelinerMode = firstPipelinerMode();
if (BS.FixPoint.PipelinerMode != PostPipelinerMode::None) {
return SchedulingStage::Pipelining;
}
Copy link
Collaborator Author

@martien-de-jong martien-de-jong Jan 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks a bit weird: we have been pipelining and are trying to restore to the first allowed pipelinermode for the next II. This should be invariant, so I don't think we can get None here. Perhaps assert.

}

auto *BB = BS.TheBlock;
Expand Down Expand Up @@ -1124,6 +1213,47 @@ void BlockState::setPipelined() {
FixPoint.Stage = SchedulingStage::PipeliningDone;
}

void BlockState::initPipelining() {
// Should only be called when actually pipelining
assert(FixPoint.PipelinerMode != PostPipelinerMode::None &&
"initPipelining called when not pipelining");

DEBUG_REGALLOC(dbgs() << "initPipelining called with mode="
<< getPostPipelinerModeName(FixPoint.PipelinerMode)
<< " II=" << FixPoint.II << "\n");

// For virtual mode, virtualize the already-analyzed live ranges
if (FixPoint.PipelinerMode == PostPipelinerMode::Virtual) {
// In virtual mode, we must have a RegTracker since it was created and
// analyzed in initInterBlock
assert(RegTracker && "RegTracker must exist in virtual mode");

// The analysis was already performed once in initInterBlock.
// We just need to virtualize the physical registers for this attempt.
// (Even if zero live ranges found, the tracker considers the state as
// virtualized)
RegTracker->virtualizeFilteredPhysRegs();
DEBUG_REGALLOC(dbgs() << "Virtualized for pipelining attempt at II="
<< FixPoint.II << "\n");
}
}

void BlockState::restorePipelining() {
// Restore to the original allocation of the virtual registers
// In virtual mode, we must have a tracker
if (FixPoint.PipelinerMode == PostPipelinerMode::Virtual) {
assert(RegTracker && "RegTracker must exist in virtual mode");

// Only restore if registers are still virtualized
// (they won't be if register allocation succeeded and committed)
if (RegTracker->areRegistersVirtualized()) {
// Restore physical registers but keep the analysis results.
// The analysis is invariant and will be reused for the next attempt.
RegTracker->restoreOriginalPhysRegs();
}
}
}

int BlockState::getScheduleLength() const {
int Length = 0;
for (auto &R : Regions) {
Expand Down Expand Up @@ -1184,15 +1314,71 @@ void BlockState::initInterBlock(const MachineSchedContext &Context,
}) &&
"Loop cannot have fixed instructions");
BoundaryEdges = std::make_unique<InterBlockEdges>(Context);

// Start with None - we'll determine the actual mode after scheduling
// converges
FixPoint.PipelinerMode = PostPipelinerMode::None;

if (Regions.size() == 1) {
// Don't worry, this just constructs a mostly empty container class
auto NumInstrs = getTop().getFreeInstructions().size();
PostSWP = std::make_unique<PostPipeliner>(HR, NumInstrs);

// perform static assignment of multi-slot pseudos
if (EnableMultiSlotInstrMaterialization &&
PostSWP->isPostPipelineCandidate(*TheBlock)) {
staticallyMaterializeMultiSlotInstructions(*TheBlock, HR, MaterializeAll);
// Create the persistent tracker that will be used throughout pipelining
RegTracker = std::make_unique<RegLiveRangeTracker>(*TheBlock);

// Create PostSWP with the persistent tracker
const auto NumInstrs = getTop().getFreeInstructions().size();
PostSWP = std::make_unique<PostPipeliner>(HR, NumInstrs, *RegTracker,
*TheBlock->getParent());

// Check if isPostPipelineCandidate, if so, perform materialization and
// register tracking.
// Also run analysis if TestRegDefUseTracker is enabled (for testing).
// Only proceed if at least one pipelining mode is enabled.
const bool PipeliningEnabled =
PostPipelinerVRegMode || PostPipelinerPhysMode;
if ((PipeliningEnabled && PostSWP->isPostPipelineCandidate(*TheBlock)) ||
TestRegDefUseTracker) {
// Perform static assignment of multi-slot pseudos
if (EnableMultiSlotInstrMaterialization) {
staticallyMaterializeMultiSlotInstructions(*TheBlock, HR,
MaterializeAll);
}

// Run register live range analysis ONCE using the invariant semantic
// order. This analysis is done after static MSP materialization to
// analyze the materialized state. The semantic order and physical
// register state are invariant across all pipelining attempts, so we
// only need to analyze once.
RegTracker->analyze(*TheBlock, getTop().getFreeInstructions());
DEBUG_REGALLOC(dbgs() << "Initial analysis (performed once):\n");
DEBUG_REGALLOC(RegTracker->dump());

// Optionally filter out live ranges with no register choice.
// This is also done once since the available registers don't change.
if (FilterNoChoiceRegs) {
RegTracker->filterByRegisterAvailability();
DEBUG_REGALLOC(dbgs() << "After filtering by register availability:\n");
DEBUG_REGALLOC(RegTracker->dump());
}

// Find and dump the most promising scarce range set.
const auto &ScarceRanges = RegTracker->getMostPromisingScarceRanges();
DEBUG_REGALLOC({
dbgs() << "Most promising scarce range set: " << ScarceRanges.size()
<< " ranges\n";
if (!ScarceRanges.empty()) {
const TargetRegisterInfo *TRI =
TheBlock->getParent()->getSubtarget().getRegisterInfo();
dbgs() << "Register class: "
<< TRI->getRegClassName(ScarceRanges[0]->getRegisterClass())
<< "\n";
for (size_t I = 0; I < ScarceRanges.size(); ++I) {
const auto *LR = ScarceRanges[I];
dbgs() << " [" << I
<< "] BaseReg=" << TRI->getName(LR->getBaseReg())
<< " Defs=" << LR->getNumDefs()
<< " Uses=" << LR->getNumUses() << "\n";
}
}
});
}
}

Expand Down
23 changes: 22 additions & 1 deletion llvm/lib/Target/AIE/AIEInterBlockScheduling.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
Expand All @@ -22,6 +22,7 @@
#include "AIEDataDependenceHelper.h"
#include "AIEHazardRecognizer.h"
#include "AIEPostPipeliner.h"
#include "AIERegDefUseTracker.h"
#include "Utils/AIELoopUtils.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
Expand Down Expand Up @@ -82,6 +83,13 @@ class InterBlockEdges {
// handling.
enum class BlockType { Regular, Loop, Epilogue };

// PostPipelinerMode determines whether the postpipeliner operates on physical
// registers or virtualizes them for better scheduling opportunities.
enum class PostPipelinerMode { None, Physical, Virtual };
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What Nome means?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None is just a default value. It can also be used as a sentinel to indicate end() when iterating through pipeliner modes.


// Helper function to get the name of a PostPipelinerMode as a string
const char *getPostPipelinerModeName(PostPipelinerMode Mode);

// These are states in the state machine that drives scheduling
enum class SchedulingStage {
// We are gathering all regions in the block to initialize the BlockState.
Expand Down Expand Up @@ -114,6 +122,8 @@ enum class SchedulingStage {
class FixedpointState {
public:
SchedulingStage Stage = SchedulingStage::Scheduling;
// PostPipeliner mode - physical or virtual register mode
PostPipelinerMode PipelinerMode = PostPipelinerMode::None;
// Parameters of the loop-aware convergence
int LatencyMargin = 0;
SmallMapVector<MachineInstr *, int, 8> PerMILatencyMargin;
Expand Down Expand Up @@ -207,6 +217,9 @@ class BlockState {
// This holds an instance of the PostPipeliner for candidate loops.
std::unique_ptr<PostPipeliner> PostSWP;

// This holds an instance of the RegLiveRangeTracker for loops.
std::unique_ptr<llvm::RegLiveRangeTracker> RegTracker;

public:
BlockState(MachineBasicBlock *Block);
MachineBasicBlock *TheBlock = nullptr;
Expand Down Expand Up @@ -271,6 +284,14 @@ class BlockState {
void clearSchedule();

void setPipelined();

/// Initialize for pipelining - virtualizes physical registers if in test mode
void initPipelining();

/// Restore after failed pipelining - restores physical registers if
/// virtualized
void restorePipelining();

bool isScheduled() const {
return FixPoint.Stage == SchedulingStage::SchedulingDone || isPipelined() ||
pipeliningFailed();
Expand Down
Loading
Loading