Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 105 additions & 90 deletions llvm/lib/Target/AIE/AIEBaseSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
Expand Down Expand Up @@ -371,27 +371,17 @@ class RegionEndEdges : public ScheduleDAGMutation {
class EmitFixedSUnits : public ScheduleDAGMutation {
AAResults *AA;

public:
EmitFixedSUnits(AAResults *AA) : AA(AA) {}

void apply(ScheduleDAGInstrs *DAG) override {
AIEPostRASchedStrategy *Scheduler =
static_cast<AIEScheduleDAGMI *>(DAG)->getSchedImpl();
auto *TII = static_cast<const AIEBaseInstrInfo *>(DAG->TII);
auto *ItinData = DAG->MF.getSubtarget().getInstrItineraryData();
const TargetRegisterInfo *TRI = DAG->MF.getSubtarget().getRegisterInfo();
const BlockState &BS =
Scheduler->getInterBlock().getBlockState(DAG->getBB());
const Region &CurRegion = BS.getCurrentRegion();
AIERegMemEventTracker RET{ItinData, TRI, TII, AA};

private:
void createFixedSUDAGNodes(ScheduleDAGInstrs *DAG,
AIEPostRASchedStrategy *Scheduler,
const Region &CurRegion) {
// First, create SUnits for all "fixed" instructions
// Those will be chained from/to the EntrySU/ExitSU to ensure they are
// placed in the correct cycle. The scheduler will enforce that these fixed
// SUnits get placed exactly at their depth (for the Top zone) or height
// (for the Bot zone).
SUnit *Pred = &DAG->EntrySU;
// We itarate over BUNDLEs or standalone instructions.
// We iterate over BUNDLEs or standalone instructions.
for (MachineInstr &MI : CurRegion.top_fixed_instrs()) {
SUnit &FixedSU = Scheduler->addFixedSUnit(MI, /*IsTop=*/true);
SDep Dep(Pred, SDep::Artificial);
Expand All @@ -409,107 +399,105 @@ class EmitFixedSUnits : public ScheduleDAGMutation {
Succ = &FixedSU;
}
DAG->makeMaps();
}

void establishSafeFreeSUToPrologueDistances(
ScheduleDAGInstrs *DAG, AIEPostRASchedStrategy *Scheduler,
const TargetRegisterInfo *TRI, const AIEBaseInstrInfo *TII,
const InstrItineraryData *ItinData) {

MachineBasicBlock *LoopSucc = nullptr;
for (MachineBasicBlock *Succ : DAG->getBB()->successors()) {
const BlockState &SuccBS = Scheduler->getInterBlock().getBlockState(Succ);
if (SuccBS.Kind == BlockType::Loop && SuccBS.isPipelined()) {
LoopSucc = Succ;
break;
}
}

if (!LoopSucc)
return;

const BlockState &LoopBS =
Scheduler->getInterBlock().getBlockState(LoopSucc);

const BlockState &BS =
Scheduler->getInterBlock().getBlockState(DAG->getBB());

const Region &CurRegion = BS.getCurrentRegion();

// Then, create dependencies between "free" and "fixed" instructions
auto IsFreeSU = [Scheduler](const SUnit &SU) {
return Scheduler->isFreeSU(SU);
};
ArrayRef<AIE::MachineBundle> BotFixedBundles =
CurRegion.getBotFixedBundles();
ArrayRef<AIE::MachineBundle> TopFixedBundles =
CurRegion.getTopFixedBundles();

for (SUnit &FreeSU : make_filter_range(DAG->SUnits, IsFreeSU)) {
const MachineInstr &MI = *FreeSU.getInstr();
ArrayRef<AIE::MachineBundle> LoopTimedBundles = LoopBS.getTop().Bundles;

auto UseOrDefReg = [](const MachineInstr &MI) {
return llvm::any_of(
MI.operands(), [](const MachineOperand &MO) { return MO.isReg(); });
};
AIERegMemEventTracker BackwardRET{ItinData, TRI, TII, AA};

if (MI.hasUnmodeledSideEffects() && !MI.mayLoadOrStore() &&
!TII->isLock(MI.getOpcode()) && !UseOrDefReg(MI)) {
// We are in front of an instruction with side effects, but with no
// memory deps and also no data dependency. Such instruction can be
// responsible for event signaling, for example. In this case, we should
// not interleave this instruction with fixed and already scheduled
// instructions. If the instruction does not meet the requirements, it
// will be handled by the subsequent code.
// This instruction shoud be scheduled before the first bot-fixed
// instruction.
if (!BotFixedBundles.empty()) {
SUnit *FixedDepSU = DAG->getSUnit(&*getBundleStart(
BotFixedBundles.front().getInstrs().front()->getIterator()));
SDep Dep(&FreeSU, SDep::Artificial);
Dep.setLatency(1);
FixedDepSU->addPred(Dep, /*Required=*/true);
}
// This instruction shoud be also scheduled after the first top-fixed
// instruction.
if (!TopFixedBundles.empty()) {
SUnit *FixedDepSU = DAG->getSUnit(&*getBundleStart(
TopFixedBundles.back().getInstrs().front()->getIterator()));
SDep Dep(FixedDepSU, SDep::Artificial);
Dep.setLatency(1);
FreeSU.addPred(Dep, /*Required=*/true);
}
continue;
}
// Track bot-fixed bundles backward (this sets BotFixedRegionSize)
BackwardRET.computeUseDefBackward(BotFixedBundles,
/*InSeparateRegion=*/false);

MachineInstr *FixedDepMI =
AIE::findEarliestRef(MI, BotFixedBundles, BotFixedBundles.size(), AA)
.MI;
if (!FixedDepMI)
continue;
BackwardRET.computeUseDefBackward(LoopTimedBundles,
/*InSeparateRegion=*/true);

// Create dependencies from free instructions to ExitSU
// Side-effect instructions are now handled automatically by
// getSafeOperandsDistanceFromEnd()
auto IsNonBotFixedSU = [Scheduler](const SUnit &SU) {
return !Scheduler->isFixedSU(SU, /*IsTop*/ false);
};

SUnit *FixedDepSU =
DAG->getSUnit(&*getBundleStart(FixedDepMI->getIterator()));
assert(FixedDepSU && "Fixed Bundle has no corresponding SU.");
SDep Dep(&FreeSU, SDep::Artificial);
auto Latency =
AIE::maxLatency(&MI, *TII, *ItinData, /*IncludeStages=*/true);
if (TII->isLock(MI.getOpcode())) {
Dep.setLatency(std::max(
TII->getCoreResumeCycleAfterLock() -
*TII->getFirstMemoryCycle(FixedDepMI->getDesc().SchedClass) + 1,
Latency));
} else if (TII->isLock(FixedDepMI->getOpcode())) {
Dep.setLatency(
std::max(*TII->getLastMemoryCycle(MI.getDesc().SchedClass) -
TII->getCoreStallCycleAfterLock() + 1,
Latency));
} else {
for (SUnit &SU : make_filter_range(DAG->SUnits, IsNonBotFixedSU)) {
const MachineInstr &MI = *SU.getInstr();
if (const unsigned Latency =
BackwardRET.getSafeOperandsDistanceFromBottom(MI)) {
LLVM_DEBUG(dbgs() << "Prologue: SU(" << SU.NodeNum << ") needs latency "
<< Latency << " to ExitSU: " << MI);
LLVM_DEBUG(dbgs() << " Adding new edge\n");
SDep Dep(&SU, SDep::Artificial);
Dep.setLatency(Latency);
DAG->ExitSU.addPred(Dep, /*Required=*/true);
}
FixedDepSU->addPred(Dep, /*Required=*/true);
}
}

void establishSafeFreeSUToEpilogueDistances(
ScheduleDAGInstrs *DAG, AIEPostRASchedStrategy *Scheduler,
const TargetRegisterInfo *TRI, const AIEBaseInstrInfo *TII,
const InstrItineraryData *ItinData) {

const BlockState &BS =
Scheduler->getInterBlock().getBlockState(DAG->getBB());

// We only need to focus on top-fixed instructions when there is an Epilogue
// block.
if (BS.Kind != BlockType::Epilogue)
return;

MachineBasicBlock *Loop = AIELoopUtils::getLoopPredecessor(*DAG->getBB());
assert(Loop);
const BlockState &LBS = Scheduler->getInterBlock().getBlockState(Loop);
assert(LBS.Kind == BlockType::Loop);

if (!LBS.isPipelined()) {
assert(CurRegion.getTopFixedBundles().empty());
if (!LBS.isPipelined())
return;
}

const Region &CurRegion = BS.getCurrentRegion();

ArrayRef<AIE::MachineBundle> TopFixedBundles =
CurRegion.getTopFixedBundles();

ArrayRef<AIE::MachineBundle> LoopTimedBundles = LBS.getTop().Bundles;

RET.computeUseDefForward(TopFixedBundles, /*InSeparateRegion=*/false);
AIERegMemEventTracker ForwardRET{ItinData, TRI, TII, AA};

ForwardRET.computeUseDefForward(TopFixedBundles,
/*InSeparateRegion=*/false);
// It is more cost-effective to reuse the RET to establish individual safety
// margins between the pipelined loop and the free instructions. This
// approach allows us to manage all dependencies related to EntrySU in one
// centralized location. While it is possible to implement this as a
// separate mutator, doing so could be costly, as it would prevent the
// creation of multiple edges from EntrySU to each free instruction that
// depends on both timed regions (TopFixed and LoopTimed).
RET.computeUseDefForward(LoopTimedBundles, /*InSeparateRegion=*/true);
ForwardRET.computeUseDefForward(LoopTimedBundles,
/*InSeparateRegion=*/true);

auto IsNonTopFixedSU = [Scheduler](const SUnit &SU) {
return !Scheduler->isFixedSU(SU, /*IsTop*/ true);
Expand All @@ -519,17 +507,22 @@ class EmitFixedSUnits : public ScheduleDAGMutation {
// account the def/use cycle of each operand.
for (SUnit &SU : make_filter_range(DAG->SUnits, IsNonTopFixedSU)) {
const MachineInstr &MI = *SU.getInstr();
if (const unsigned Latency = RET.getSafeOperandsDistance(MI)) {
if (const unsigned Latency =
ForwardRET.getSafeOperandsDistanceFromTop(MI)) {
SDep Dep(&DAG->EntrySU, SDep::Artificial);
Dep.setLatency(Latency);
SU.addPred(Dep, /*Required=*/true);
}
}
}

void establishSafeFixedSUToExitSUDistances(
ScheduleDAGInstrs *DAG, AIEPostRASchedStrategy *Scheduler,
const AIEBaseInstrInfo *TII, const InstrItineraryData *ItinData) {

auto IsTopFixedSU = [Scheduler](const SUnit &SU) {
return Scheduler->isFixedSU(SU, true);
};

// TODO: this is pessimistic, we can handle this in RegionEndEdges after
// a mutation reordering.
// Establish dependencies to ExitSU for each top-fixed sched. unit by taking
Expand All @@ -542,6 +535,28 @@ class EmitFixedSUnits : public ScheduleDAGMutation {
DAG->ExitSU.addPred(Dep, /*Required=*/true);
}
}

public:
EmitFixedSUnits(AAResults *AA) : AA(AA) {}

void apply(ScheduleDAGInstrs *DAG) override {
AIEPostRASchedStrategy *Scheduler =
static_cast<AIEScheduleDAGMI *>(DAG)->getSchedImpl();
auto *TII = static_cast<const AIEBaseInstrInfo *>(DAG->TII);
auto *ItinData = DAG->MF.getSubtarget().getInstrItineraryData();
const TargetRegisterInfo *TRI = DAG->MF.getSubtarget().getRegisterInfo();
const BlockState &BS =
Scheduler->getInterBlock().getBlockState(DAG->getBB());
const Region &CurRegion = BS.getCurrentRegion();

createFixedSUDAGNodes(DAG, Scheduler, CurRegion);

establishSafeFreeSUToPrologueDistances(DAG, Scheduler, TRI, TII, ItinData);

establishSafeFreeSUToEpilogueDistances(DAG, Scheduler, TRI, TII, ItinData);

establishSafeFixedSUToExitSUDistances(DAG, Scheduler, TII, ItinData);
}
};

/// Collect all "weak" edges in a separate vector. This allows modifying
Expand Down
Loading