Skip to content

Commit 7778d6c

Browse files
authored
Kl max bsp update (#89)
* added staleness to IBspSchedule * active schedule staleness * write contracted orbit graph * added policy unit tests * lazy comm policy revision * lazy policy * buffred sending fix * added fastDeltatracker, added kl_max_bsp_test * revision kl_max_bsp_affinity_test * max bsp affinity tests * added max bsp comm affinity * bsp affinity fix * added bsp comm affinities unit test * test all policies * update comm cost policy * bugfix * fix kl_max_bsp_comm * more unit tests * staleness fix * staleness fix * large tests * update validation test * bugfix node selection * bugfixes * added bestIsPostRemoval * more total unit tests * cross step unit test * lambda unit tests * more unit tests * more unit tests * added more unit test * unit test cleanup * revision kl_improver * bugfix * remove step sentinel, klmove struct move type * comm data swap * more unit tests * update * bugfix node updates * unit tests * update kl sp comm cost policy * bsp unit tests * unit tests * unit test * debug * lambda map update * ds divergence tracker * update * update * imp debug * kl_profile * debug * working! * kl bsp * first split kl improver * update comm cost after split * split bsp cost function * kl_total/kl_lambda * bugfix * update * update * update * commDelta after stop removal fix * step removal unit test * removed updated affinities from bsp cost functions * max bsp integration * enabable unit tests * max bsp unit tests * cmake * kl_max_bsp improver * update bsp max cost * max bsp! * max bsp! * improver mt * removed kl_include, kl_include_mt * enable debug cost, disable mt test * presolve fix and naming bug * remove getpremove data * simplification * update mt1 * update thread ranges * comment test * mt fixed? * update mt * confine remove step in MT mode * thread pool for synch parallel * synch schedule bugfix * update * bugfix staleness * debug setup * updates * update tests * cmake, removed kl_profile * removed unused macro * removed unsused macro * remve perf.data, added comments * caps
1 parent 2055d28 commit 7778d6c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+18046
-5811
lines changed

apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ limitations under the License.
4040
#include "osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp"
4141
#include "osp/bsp/scheduler/ImprovementScheduler.hpp"
4242
#include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp"
43-
#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include.hpp"
44-
#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include_mt.hpp"
43+
#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver.hpp"
44+
#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_mt.hpp"
4545
#include "osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp"
4646
#include "osp/bsp/scheduler/Scheduler.hpp"
4747
#include "osp/bsp/scheduler/Serial.hpp"

include/osp/bsp/model/BspSchedule.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ class BspSchedule : public IBspSchedule<GraphT>, public IBspScheduleEval<GraphT>
254254
*
255255
* @return The staleness of the schedule.
256256
*/
257-
[[nodiscard]] virtual unsigned GetStaleness() const { return 1; }
257+
[[nodiscard]] virtual unsigned GetStaleness() const override { return 1; }
258258

259259
/**
260260
* @brief Sets the superstep assigned to the specified node.

include/osp/bsp/model/BspScheduleRecomp.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ void BspScheduleRecomp<Graph_t>::CleanSchedule()
341341
for (auto const &[key, val] : commSchedule_) {
342342
arrivesAt[std::get<0>(key)][std::get<2>(key)].insert(val);
343343
}
344-
344+
345345
// - computation steps
346346
for (const auto &node : instance_->GetComputationalDag().Vertices()) {
347347
for (unsigned index = 0; index < nodeToProcessorAndSupertepAssignment_[node].size(); ) {
@@ -386,7 +386,7 @@ void BspScheduleRecomp<Graph_t>::CleanSchedule()
386386
usedAt[std::get<0>(key)][std::get<1>(key)].insert(val);
387387
}
388388

389-
// - computation steps
389+
// - computation steps
390390
for (const auto &node : instance_->GetComputationalDag().Vertices()) {
391391
for (unsigned index = 0; index < nodeToProcessorAndSupertepAssignment_[node].size(); ) {
392392
const auto &procAndStep = nodeToProcessorAndSupertepAssignment_[node][index];

include/osp/bsp/model/IBspSchedule.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,17 @@ class IBspSchedule {
9393
* @return The number of supersteps in the schedule.
9494
*/
9595
[[nodiscard]] virtual unsigned NumberOfSupersteps() const = 0;
96+
97+
/**
98+
* @brief Get the staleness of the schedule.
99+
*
100+
* The staleness determines the minimum number of supersteps that must elapse between the
101+
* assignment of a node to a processor and the assignment of one of its dependent neighbors
102+
* to a different processor. For a standard BSP schedule, the staleness is 1.
103+
*
104+
* @return The staleness of the schedule.
105+
*/
106+
virtual unsigned GetStaleness() const { return 1; }
96107
};
97108

98109
} // namespace osp

include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ ReturnStatus GreedyRecomputer<GraphT>::ComputeRecompScheduleAdvanced(BspSchedule
107107

108108
// add further methods, if desired
109109
}
110-
110+
111111
return ReturnStatus::OSP_SUCCESS;
112112
}
113113

@@ -182,7 +182,7 @@ bool GreedyRecomputer<GraphT>::GreedyImprove(BspScheduleRecomp<GraphT> &schedule
182182
CostType increase = workCost_[toProc][compStep] + G.VertexWorkWeight(node) > maxWork_[compStep]
183183
? workCost_[toProc][compStep] + G.VertexWorkWeight(node) - maxWork_[compStep]
184184
: 0;
185-
185+
186186
if (increase < smallestIncrease) {
187187
bestStep = compStep;
188188
smallestIncrease = increase;
@@ -242,15 +242,15 @@ bool GreedyRecomputer<GraphT>::MergeEntireSupersteps(BspScheduleRecomp<GraphT> &
242242
std::vector<bool> stepRemoved(schedule.NumberOfSupersteps(), false);
243243

244244
const GraphT &G = schedule.GetInstance().GetComputationalDag();
245-
245+
246246
unsigned previousStep = 0;
247247
for (unsigned step = 0; step < schedule.NumberOfSupersteps() - 1; ++step) {
248248
if (stepRemoved[step]) {
249249
continue;
250250
}
251251

252252
for (unsigned nextStep = step + 1; nextStep < schedule.NumberOfSupersteps(); ++nextStep) {
253-
253+
254254
// TRY TO MERGE step AND nextStep
255255
std::set<KeyTriple> newCommStepsBefore, newCommStepsAfter;
256256
std::set<std::pair<VertexIdx, unsigned> > newWorkSteps;
@@ -267,7 +267,7 @@ bool GreedyRecomputer<GraphT>::MergeEntireSupersteps(BspScheduleRecomp<GraphT> &
267267
newCommStepsAfter.insert(entry);
268268
continue;
269269
}
270-
270+
271271
if (step > 0 && firstPresent_[node][fromProc] <= previousStep) {
272272
newCommStepsBefore.insert(entry);
273273
} else {
@@ -290,7 +290,7 @@ bool GreedyRecomputer<GraphT>::MergeEntireSupersteps(BspScheduleRecomp<GraphT> &
290290
if (firstPresent_[pred][proc] <= step) {
291291
continue;
292292
}
293-
293+
294294
unsigned sendFromProcBefore = std::numeric_limits<unsigned>::max();
295295
for (unsigned procOffset = 0; procOffset < schedule.GetInstance().NumberOfProcessors(); ++procOffset) {
296296
unsigned fromProc = (proc + procOffset) % schedule.GetInstance().NumberOfProcessors();
@@ -320,9 +320,9 @@ bool GreedyRecomputer<GraphT>::MergeEntireSupersteps(BspScheduleRecomp<GraphT> &
320320
break;
321321
}
322322

323-
// EVALUATE COST
323+
// EVALUATE COST
324324
int costChange = 0;
325-
325+
326326
// work cost in merged step
327327
std::vector<CostType> newWorkCost(schedule.GetInstance().NumberOfProcessors());
328328
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
@@ -337,7 +337,7 @@ bool GreedyRecomputer<GraphT>::MergeEntireSupersteps(BspScheduleRecomp<GraphT> &
337337
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
338338
newMax = std::max(newMax, newWorkCost[proc]);
339339
}
340-
340+
341341
costChange += static_cast<int>(newMax) - static_cast<int>(maxWork_[step] + maxWork_[nextStep]);
342342

343343
// comm cost before merged step
@@ -348,12 +348,12 @@ bool GreedyRecomputer<GraphT>::MergeEntireSupersteps(BspScheduleRecomp<GraphT> &
348348
newRecCost[proc] = recCost_[proc][previousStep];
349349
}
350350
for (const KeyTriple &newComm : newCommStepsBefore) {
351-
CostType commCost = G.VertexCommWeight(std::get<0>(newComm)) *
351+
CostType commCost = G.VertexCommWeight(std::get<0>(newComm)) *
352352
schedule.GetInstance().GetArchitecture().CommunicationCosts(std::get<1>(newComm), std::get<2>(newComm));
353353
newSendCost[std::get<1>(newComm)] += commCost;
354354
newRecCost[std::get<2>(newComm)] += commCost;
355355
}
356-
356+
357357
newMax = 0;
358358
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
359359
newMax = std::max(newMax, newSendCost[proc]);
@@ -378,7 +378,7 @@ bool GreedyRecomputer<GraphT>::MergeEntireSupersteps(BspScheduleRecomp<GraphT> &
378378
newSendCost[std::get<1>(newComm)] += commCost;
379379
newRecCost[std::get<2>(newComm)] += commCost;
380380
}
381-
381+
382382
newMax = 0;
383383
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
384384
newMax = std::max(newMax, newSendCost[proc]);
@@ -424,11 +424,11 @@ bool GreedyRecomputer<GraphT>::MergeEntireSupersteps(BspScheduleRecomp<GraphT> &
424424
for (const KeyTriple &entry : commSteps_[step]) {
425425
neededOnProc_[std::get<0>(entry)][std::get<1>(entry)].erase(neededOnProc_[std::get<0>(entry)][std::get<1>(entry)].lower_bound(step));
426426
}
427-
427+
428428
for (const KeyTriple &entry : commSteps_[nextStep]) {
429429
neededOnProc_[std::get<0>(entry)][std::get<1>(entry)].erase(neededOnProc_[std::get<0>(entry)][std::get<1>(entry)].lower_bound(nextStep));
430430
}
431-
431+
432432
commSteps_[step].clear();
433433
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
434434
sendCost_[proc][step] = 0;
@@ -447,7 +447,7 @@ bool GreedyRecomputer<GraphT>::MergeEntireSupersteps(BspScheduleRecomp<GraphT> &
447447
}
448448

449449
maxComm_[nextStep] = 0;
450-
450+
451451
maxComm_[step] = 0;
452452
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
453453
maxComm_[step] = std::max(maxComm_[step], sendCost_[proc][step]);
@@ -459,7 +459,7 @@ bool GreedyRecomputer<GraphT>::MergeEntireSupersteps(BspScheduleRecomp<GraphT> &
459459
for (const KeyTriple &newComm : newCommStepsBefore) {
460460
AddCommStep(schedule, newComm, previousStep);
461461
}
462-
462+
463463
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
464464
maxComm_[previousStep] = std::max(maxComm_[previousStep], sendCost_[proc][previousStep]);
465465
maxComm_[previousStep] = std::max(maxComm_[previousStep], recCost_[proc][previousStep]);
@@ -499,7 +499,7 @@ bool GreedyRecomputer<GraphT>::RecomputeEntireSupersteps(BspScheduleRecomp<Graph
499499
commStepPerNodeAndReceiver[std::make_pair(std::get<0>(entry), std::get<2>(entry))].emplace_back(std::get<1>(entry), step);
500500
}
501501
}
502-
502+
503503
for (unsigned step = 0; step < schedule.NumberOfSupersteps(); ++step) {
504504
for (unsigned fromProc = 0; fromProc < schedule.GetInstance().NumberOfProcessors(); ++fromProc) {
505505
for (unsigned toProc = 0; toProc < schedule.GetInstance().NumberOfProcessors(); ++toProc) {
@@ -533,7 +533,7 @@ bool GreedyRecomputer<GraphT>::RecomputeEntireSupersteps(BspScheduleRecomp<Graph
533533
internalOutDegree[pred] += 1;
534534
}
535535
}
536-
536+
537537
std::set<VertexIdx> checkIfDisposable;
538538
for (const VertexIdx node : mustReplicate) {
539539
if (internalOutDegree.at(node) == 0) {
@@ -602,14 +602,14 @@ bool GreedyRecomputer<GraphT>::RecomputeEntireSupersteps(BspScheduleRecomp<Graph
602602
// EVALUATE COST
603603

604604
int costChange = 0;
605-
605+
606606
// work cost
607607
CostType newWorkCost = workCost_[toProc][step];
608608
for (const VertexIdx node : mustReplicate) {
609609
newWorkCost += G.VertexWorkWeight(node);
610610
}
611611
CostType newMax = std::max(maxWork_[step], newWorkCost);
612-
612+
613613
costChange += static_cast<int>(newMax) - static_cast<int>(maxWork_[step]);
614614

615615
// comm cost before merged step
@@ -626,7 +626,7 @@ bool GreedyRecomputer<GraphT>::RecomputeEntireSupersteps(BspScheduleRecomp<Graph
626626
newSendCost[std::get<1>(newComm)] += commCost;
627627
newRecCost += commCost;
628628
}
629-
629+
630630
newMax = std::max(maxComm_[step - 1], newRecCost);
631631
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
632632
newMax = std::max(newMax, newSendCost[proc]);
@@ -658,7 +658,7 @@ bool GreedyRecomputer<GraphT>::RecomputeEntireSupersteps(BspScheduleRecomp<Graph
658658
}
659659
for (const auto &changingStep : changedStepsRec) {
660660
unsigned stepChanged = changingStep.first;
661-
661+
662662
std::vector<CostType> newSendCost(schedule.GetInstance().NumberOfProcessors());
663663
CostType newRecCost = recCost_[toProc][stepChanged] - changingStep.second;
664664
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
@@ -667,7 +667,7 @@ bool GreedyRecomputer<GraphT>::RecomputeEntireSupersteps(BspScheduleRecomp<Graph
667667
for (const auto &procAndChange : changedStepsSent[stepChanged]) {
668668
newSendCost[procAndChange.first] -= procAndChange.second;
669669
}
670-
670+
671671
newMax = 0;
672672
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
673673
newMax = std::max(newMax, newSendCost[proc]);
@@ -705,7 +705,7 @@ bool GreedyRecomputer<GraphT>::RecomputeEntireSupersteps(BspScheduleRecomp<Graph
705705
for (const KeyTriple &newComm : newCommStepsBefore) {
706706
AddCommStep(schedule, newComm, step - 1);
707707
}
708-
708+
709709
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
710710
maxComm_[step - 1] = std::max(maxComm_[step - 1], sendCost_[proc][step - 1]);
711711
maxComm_[step - 1] = std::max(maxComm_[step - 1], recCost_[proc][step - 1]);
@@ -734,7 +734,7 @@ bool GreedyRecomputer<GraphT>::RecomputeEntireSupersteps(BspScheduleRecomp<Graph
734734

735735
schedule.GetCommunicationSchedule().clear();
736736
for (unsigned step = 0; step < schedule.NumberOfSupersteps(); ++step) {
737-
for (const KeyTriple &entry : commSteps_[step]) {
737+
for (const KeyTriple &entry : commSteps_[step]) {
738738
schedule.AddCommunicationScheduleEntry(entry, step);
739739
}
740740
}
@@ -759,9 +759,9 @@ bool GreedyRecomputer<GraphT>::BatchRemoveSteps(BspScheduleRecomp<GraphT> &sched
759759
}
760760
}
761761
}
762-
762+
763763
for (unsigned step = 0; step < schedule.NumberOfSupersteps(); ++step) {
764-
764+
765765
bool canReduce = (maxComm_[step] > 0);
766766
while (canReduce) {
767767

@@ -792,8 +792,8 @@ bool GreedyRecomputer<GraphT>::BatchRemoveSteps(BspScheduleRecomp<GraphT> &sched
792792

793793
for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) {
794794
for (unsigned sendOrRec = 0; sendOrRec < 2; ++sendOrRec) {
795-
796-
std::set<KeyTriple> *currentCommSteps;
795+
796+
std::set<KeyTriple> *currentCommSteps;
797797
if (sendOrRec == 0) {
798798
if (!sendSaturated[proc]) {
799799
continue;
@@ -864,11 +864,11 @@ bool GreedyRecomputer<GraphT>::BatchRemoveSteps(BspScheduleRecomp<GraphT> &sched
864864
if (skipStep) {
865865
// weird edge case if all comm steps have weight 0 (can be removed?)
866866
break;
867-
}
867+
}
868868
}
869869
if (skipStep) {
870870
continue;
871-
}
871+
}
872872

873873
if (maxComm_[step] > 0 && commSteps_[step].size() == removedCommSteps.size()) {
874874
commDecrease += schedule.GetInstance().GetArchitecture().SynchronisationCosts();
@@ -898,7 +898,7 @@ bool GreedyRecomputer<GraphT>::BatchRemoveSteps(BspScheduleRecomp<GraphT> &sched
898898
maxComm_[step] = std::max(maxComm_[step], sendCost_[proc][step]);
899899
maxComm_[step] = std::max(maxComm_[step], recCost_[proc][step]);
900900
}
901-
901+
902902
canReduce = true;
903903
improved = true;
904904
}
@@ -936,17 +936,17 @@ void GreedyRecomputer<GraphT>::RefreshAuxData(const BspScheduleRecomp<GraphT> &s
936936

937937
nodesPerProcAndStep_.clear();
938938
nodesPerProcAndStep_.resize(P, std::vector<std::vector<VertexIdx> >(S));
939-
939+
940940
neededOnProc_.clear();
941941
neededOnProc_.resize(N, std::vector<std::multiset<unsigned> >(P, {S}));
942-
942+
943943
maxWork_.clear();
944944
maxComm_.clear();
945945
maxWork_.resize(S, 0);
946946
maxComm_.resize(S, 0);
947947

948948
commSteps_.clear();
949-
commSteps_.resize(S);
949+
commSteps_.resize(S);
950950

951951
for (VertexIdx node = 0; node < N; ++node) {
952952
for (const std::pair<unsigned, unsigned> &procAndStep : schedule.Assignments(node)) {

0 commit comments

Comments
 (0)