OpenASIP  2.0
ResourceConstraintAnalyzer.cc
Go to the documentation of this file.
1 /*
2  Copyright (c) 2002-2015 Tampere University.
3 
4  This file is part of TTA-Based Codesign Environment (TCE).
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23  */
24 /**
25  * @file ResourceConstraintAnalyzer.cc
26  *
27  * Definition of ResourceConstraintAnalyzer class.
28  *
29  * @author Pekka Jääskeläinen 2010-2015
30  * @note rating: red
31  */
32 
34 #include "Machine.hh"
35 #include "ProgramOperation.hh"
36 #include "MoveNode.hh"
37 #include "HWOperation.hh"
38 #include "Instruction.hh"
39 #include "Guard.hh"
40 #include "TerminalFUPort.hh"
41 #include "MoveGuard.hh"
42 #include "Operation.hh"
43 #include "DataDependenceGraph.hh"
44 #include "SimpleResourceManager.hh"
45 #include "Move.hh"
46 #include "MoveNodeSet.hh"
47 #include "DDGMoveNodeSelector.hh"
48 
49 void
53 }
54 
55 /**
56  * Analyzes the resource constraints in the schedule.
57  *
58  * @return true in case at least one resource or dep constrained move found.
59  * Thus always true in case the schedule is "perfect".
60  */
61 bool
63 
66 
69  criticalPath->setProgramOperationNodes(true);
71 
72  optimalScheduleResourceUsage(*criticalPath, graphName_ + ".critical_path");
73  delete criticalPath; criticalPath = NULL;
74 
76  memDeps->setProgramOperationNodes(true);
77  memDeps->writeToDotFile(graphName_ + ".mem_deps.dot");
78  delete memDeps; memDeps = NULL;
79 
80  return true;
81 #if 0
82  // TODO: finish the more intelligent iterative search for resource bottlenecks
83 
84  busConstrained_ = 0;
93  // only sensible basic block with 1 cycle is one with only
94  // reg moves or a store, ignore those
95  minScheduleLength_ = std::max(origDDG__.height(), 2);
97 
98 
99 
100  const int cycleCount = ddg_.largestCycle() + 1;
101  const int nodes = ddg_.nodeCount();
102 
103  /*
104 
105  Collect the resource constrained moves that are lengthening the
106  schedule over the critical path length.
107 
108  Picks up all moves in the instructions > critical path length and
109  "climbs" up the dependency tree and collects the first resource
110  constrained move found (we use top-down scheduling) that is
111  above the minimum schedule length.
112 
113  The idea is try to "lift the resource constrained DDG tree
114  upwards" to try to bring the late moves within the minimum
115  schedule length.
116 
117  What if there's no such move? If all the moves in the tree
118  are after the minimum schedule? In that case some other path is
119  limiting the schedule thus we should just ignore the move and look
120  at other moves.
121 
122  Should we scan all cycles or just the first cycle after the min
123  cycle? We might add too many resources in case the bottleneck is
124  resolved at the first cycle. I guess we should scan until we find
125  *one* such move? Solving its resource constraint could lead to
126  removing a bottleneck so whenever we scan more than one move there's
127  a risk of adding extra resources that do not really contribute to the
128  schedule length as the constraint might have been solved by the
129  single move's bottleneck. This leads to many evaluation iterations but
130  I guess it's better than to waste resources. Might add some kind of
131  parameter for controlling the maximum number of constrained moves
132  to find per iteration.
133 
134  */
135  int maxMovesToFind = 4;
136  foundMoves_.clear();
137  resourceConstrained_ = 0;
138  for (int cycle = minScheduleLength_;
139  cycle < cycleCount && resourceConstrained_ < maxMovesToFind; ++cycle) {
140  DataDependenceGraph::NodeSet moves = ddg_.movesAtCycle(cycle);
141  for (DataDependenceGraph::NodeSet::iterator m = moves.begin();
142  m != moves.end() && resourceConstrained_ < maxMovesToFind; ++m) {
144  if (node != NULL) {
146  << "found a schedule constraining move: "
147  << node->move().toString() << " at cycle "
148  << node->cycle() << " earliest DDG cycle "
149  << ddg_.earliestCycle(*node) << std::endl;
150  ++resourceConstrained_;
151  }
152  }
153  }
154 
155  if (true || resourceConstrained_ > 0) {
156 
157 #if 0
158  ddg_.setEdgeWeightHeuristics(DataDependenceGraph::EWH_REAL);
159 
160  // print out the instructions
161  for (int c = 0; c < cycleCount; ++c) {
162  Application::logStream() << c << ": ";
163  DataDependenceGraph::NodeSet moves = ddg_.movesAtCycle(c);
164  for (DataDependenceGraph::NodeSet::iterator m = moves.begin();
165  m != moves.end(); ++m) {
166  MoveNode& mn = **m;
167 // if (!ddg_.isInCriticalPath(mn) && !mn.move().isJump())
168 // continue;
170  Application::logStream() << " ";
171  }
172  Application::logStream() << std::endl;
173  }
174  ddg_.setEdgeWeightHeuristics(DataDependenceGraph::EWH_DEFAULT);
175 #endif
176 
177  Application::logStream() << "cycles: " << cycleCount << std::endl;
178 
180  << "minimum: " << minScheduleLength_ << " (from DDG longest path)"
181  << std::endl;
182 
184  << "at least "
185  << resourceConstrained_ << " of " << nodes
186  << " were resource constrained:" << std::endl;
187  if (busConstrained_ > 0) {
189  << busConstrained_ << " bus (move slot) constrained"
190  << std::endl;
191  }
192 
193  if (rfReadPortConstrained_ > 0) {
195  << rfReadPortConstrained_ << " RF read port constrained ";
196  for (RFCountMap::const_iterator rfi =
197  readPortConstrainedRfs_.begin();
198  rfi != readPortConstrainedRfs_.end(); ++rfi) {
200  << (*rfi).first << ": " << (*rfi).second << " ";
201  }
202  Application::logStream() << std::endl;
203  }
204 
205  if (rfWritePortConstrained_ > 0) {
207  << std::endl
208  << rfWritePortConstrained_ << " RF write port constrained ";
209  for (RFCountMap::const_iterator rfi =
210  writePortConstrainedRfs_.begin();
211  rfi != writePortConstrainedRfs_.end(); ++rfi) {
213  << (*rfi).first << ": " << (*rfi).second << " ";
214  }
215  Application::logStream() << std::endl;
216  }
217 
218  if (operationConstrained_ > 0) {
220  << std::endl
222  << " operation (FU) constrained ";
223  for (OperationCountMap::const_iterator rfi =
225  rfi != operationConstrainedMoves_.end(); ++rfi) {
227  << (*rfi).first << ": " << (*rfi).second << " ";
228  }
229  Application::logStream() << std::endl;
230  }
231  Application::logStream() << std::endl;
232  return true;
233  }
234 
235  return regAntidepsFound;
236 #endif
237 }
238 
239 
240 void
242  DataDependenceGraph& ddg, TCEString dotFileName,
243  const std::map<int, std::list<MoveNode*> >& schedule) {
244 
245  std::ofstream s(dotFileName.c_str());
246 
247  s << "digraph ddg {" << std::endl;
248 
249 
250  s << "/*" << std::endl << "### dependence constraints: " << std::endl << std::endl;
251 
252  analyzeRegisterAntideps(ddg, s);
253 
254  int smallestCycle = (*schedule.begin()).first;
255  int largestCycle = (*schedule.rbegin()).first;
256 
257  // record stats of operationA -> operationB bypasses to guide
258  // IC customization
259  typedef std::map<std::pair<TCEString, TCEString>, int> BypassMap;
260 
261  typedef std::map<TCEString, int> OpCountMap;
262  OpCountMap maxParallelOps;
263  // The operation mix. I.e., the static occurence of operations
264  // in the code.
265  OpCountMap operationMix;
266  BypassMap bypasses;
267 
268  /* In the critical path DDGs, the trigger nodes might be
269  missing. In that case we use the operand node to record
270  the operation usage cycle. This set is used to ensure the
271  same operation is not recorded more than once. */
272  std::set<ProgramOperation*> recordedOps;
273 
274  for (int c = smallestCycle; c <= largestCycle; ++c) {
275  if (schedule.find(c) == schedule.end()) continue;
276  std::list<MoveNode*> moves = (*schedule.find(c)).second;
277  if (moves.size() == 0) continue;
278 
279  std::map<TCEString, int> parallelOpsAtCycle;
280  for (std::list<MoveNode*>::iterator i = moves.begin();
281  i != moves.end(); ++i) {
282  MoveNode& n = **i;
283  TCEString opName = "";
284 
285  if (n.isBypass()) {
286  std::pair<TCEString, TCEString> key =
287  std::make_pair(
290  bypasses[key]++;
291  }
292 
293  if (n.isDestinationOperation()) {
295  if (!AssocTools::containsKey(recordedOps, &op)) {
296  opName = op.operation().name();
297  operationMix[opName]++;
298  parallelOpsAtCycle[opName]++;
299  recordedOps.insert(&op);
300  }
301  }
302  }
303 
304  for (OpCountMap::const_iterator i = parallelOpsAtCycle.begin();
305  i != parallelOpsAtCycle.end(); ++i) {
306  TCEString opName = (*i).first;
307  int count = (*i).second;
308  maxParallelOps[opName] =
309  std::max(maxParallelOps[opName], count);
310  }
311  }
312 
313  s << std::endl;
314  s << "### resource statistics: " << std::endl << std::endl;
315 
316  const int COL_WIDTH = 14;
317  // print statistics of the graph as a comment
318  s << std::setw(COL_WIDTH) << std::right << "operation stats: ";
319  s << std::endl << std::endl;
320 
321  for (OpCountMap::const_iterator i = maxParallelOps.begin();
322  i != maxParallelOps.end(); ++i) {
323  TCEString opName = (*i).first;
324  int parCount = (*i).second;
325  int total = operationMix[opName];
326  s << std::setw(COL_WIDTH) << std::right
327  << opName + ": ";
328  s << std::setw(COL_WIDTH) << std::right
329  << total;
330  s << " total, " << std::setw(COL_WIDTH) << std::right
331  << parCount << " at most in parallel" << std::endl;
332  }
333 
334  s << std::endl;
335  // print statistics of the graph as a comment
336  s << std::setw(COL_WIDTH) << std::right << "bypass stats: ";
337  s << std::endl << std::endl;
338 
339  for (BypassMap::const_iterator i = bypasses.begin();
340  i != bypasses.end(); ++i) {
341  std::pair<TCEString, TCEString> opPair = (*i).first;
342  int count = (*i).second;
343  s << std::setw(COL_WIDTH) << std::right
344  << opPair.first + " -> " + opPair.second + ": ";
345  s << std::setw(COL_WIDTH) << std::right
346  << count << std::endl;
347  }
348 
350 
351  s << std::endl << "### moves not at the earliest cycles:" << std::endl << std::endl;
352  for (int i = 0; i < ddg.nodeCount(); ++i) {
353  MoveNode& n = ddg.node(i);
354  // Constant writes always report 0 for the src distance.
355  if (n.isSourceConstant()) continue;
356  int ddgEarliest = ddg.maxSourceDistance(n);
357  if (n.cycle() > ddgEarliest)
358  s << n.toString() << " (" << n.cycle() << " > " << ddgEarliest << ")" << std::endl;
359  }
361 
362  s << std::endl << "*/" << std::endl << std::endl;
363 
364  // print the DDG itself
365 
366  // print the "time line" to visualize the schedule
367  s << "\t{" << std::endl
368  << "\t\tnode [shape=plaintext];" << std::endl
369  << "\t\t";
370  for (int c = smallestCycle; c <= largestCycle; ++c) {
371  s << "\"cycle " << c << "\" -> ";
372  }
373  s << "\"cycle " << largestCycle + 1 << "\"; "
374  << std::endl << "\t}" << std::endl;
375 
376  // print the nodes
377  for (int c = smallestCycle; c <= largestCycle; ++c) {
378  if (schedule.find(c) == schedule.end()) continue;
379  std::list<MoveNode*> moves = (*schedule.find(c)).second;
380  if (moves.size() == 0) continue;
381  s << "\t{ rank = same; \"cycle " << c << "\"; ";
382  for (std::list<MoveNode*>::iterator i = moves.begin();
383  i != moves.end(); ++i) {
384  MoveNode& n = **i;
385  s << "n" << n.nodeID() << "; ";
386  }
387  s << "}" << std::endl;
388  }
389 
390  // first print all the nodes and their properties
391  for (int i = 0; i < ddg.nodeCount(); ++i) {
392  MoveNode& n = ddg.node(i);
393  s << "\tn" << n.nodeID()
394  << " [" << n.dotString() << "]; "
395  << std::endl;
396  }
397 
398  // edges
399  for (int count = ddg.edgeCount(), i = 0; i < count ; ++i) {
400  DataDependenceEdge& e = ddg.edge(i);
401  MoveNode& tail = ddg.tailNode(e);
402  MoveNode& head = ddg.headNode(e);
403 
404  s << "\tn" << tail.nodeID() << " -> n"
405  << head.nodeID() << "["
406  << e.dotString() << "];" << std::endl;
407  }
408 
409  s << "}" << std::endl;
410 
411  s.close();
412 }
413 
414 /**
415  * Analyses the program based on a non-resource constrained schedule.
416  *
417  * Produces statistics of maximum number of parallel operations that
418  * can be potentially utilized.
419  *
420  * @todo this is incomplete as the operand to trigger edges are missing,
421  * thus the analysis results are likely to be false.
422  */
423 void
425  DataDependenceGraph& ddg, TCEString graphName) {
426 
427  ddg.writeToDotFile(graphName + ".dot");
428 
429  TCEString dotFileName = graphName + ".optimal_schedule.dot";
430 
431  std::map<int, std::list<MoveNode*> > schedule;
432 
434  for (int nc = 0; nc < ddg.nodeCount(); ++nc) {
435  MoveNode& n = ddg.node(nc);
436  int cycle = ddg.maxSourceDistance(n);
437  // immediate operand moves always get cycle 0, fix this by
438  // assuming the cycle is the same as the latest other operand
439  // move (at least one of them must be a non-immediate move)
440  if (n.isDestinationOperation() &&
441  n.move().source().isImmediate()) {
442  for (int input = 0;
443  input < n.destinationOperation().inputMoveCount();
444  ++input) {
445  MoveNode& inputMove =
446  n.destinationOperation().inputMove(input);
447  if (&inputMove == &n) continue;
448  cycle = std::max(cycle, ddg.maxSourceDistance(inputMove));
449  }
450  }
451  schedule[cycle].push_back(&n);
452  }
454 
455  dumpGraphWithStats(ddg, dotFileName, schedule);
456 }
457 
458 /**
459  * Analyses the program based on a memory resource constrained schedule.
460  *
461  * Produces statistics of maximum number of parallel operations that
462  * can be potentially utilized given that there's only a fixed number of
463  * parallel memory operations allowed.
464  */
465 void
467  DataDependenceGraph& ddg, TCEString graphName) {
468 
469  TCEString dotFileName = graphName + ".memory_bound_optimal_schedule.dot";
470 
471  int maxMemOpsPerCycle = 2;
472  std::map<int, std::list<MoveNode*> > schedule;
473  std::map<int, std::list<ProgramOperationPtr> > operationSchedule;
474 
475  // Maximum number of parallel operations of the given name.
476  std::map<TCEString, unsigned> maxParallelOps;
477 
479 
480  CriticalPathBBMoveNodeSelector selector(ddg, ddg.machine());
481 
483  << "### analyzing " << graphName << std::endl;
484 
485  // Produce the "operation schedule" where we assume there are enough
486  // interconnection and RF port resources to invoke the required operations
487  // in parallel.
488  MoveNodeGroup cands = selector.candidates();
489 
490  while (cands.nodeCount() > 0) {
491  if (cands.nodeCount() > 1) {
493  const Operation& op = po->operation();
494  // Schedule the triggering move first and at the earliest position,
495  // rest of the nodes can fall in to cycles based on the DDG. Assume
496  // operand 1 is always the triggering.
497  for (auto mn : po->inputNode(1)) {
498  int cycle = ddg.earliestCycle(
499  *mn, UINT_MAX, false, false, false, false, false, true);
500  int memOpsInCycle = 0;
501  do {
502  if (!op.usesMemory()) break;
503  // Assuming placing the current mem OP to the cycle.
504  memOpsInCycle = 1;
505  // Count the number of memory operations already triggered
506  // on the planned cycle.
507  for (auto otherOp : operationSchedule[cycle]) {
508  if (otherOp->operation().usesMemory())
509  memOpsInCycle++;
510  }
511  if (memOpsInCycle > maxMemOpsPerCycle) {
513  << "would get " << memOpsInCycle
514  << " mem operations in cycle " << cycle
515  << std::endl;
516  ++cycle;
517  }
518  } while (memOpsInCycle > maxMemOpsPerCycle);
519  mn->setCycle(cycle);
520  schedule[cycle].push_back(mn);
521  operationSchedule[cycle].push_back(po);
523  << "placed operation trigger " << mn->toString() << std::endl;
524  selector.notifyScheduled(*mn);
525  }
526  }
527  for (int n = 0; n < cands.nodeCount(); ++n) {
528  MoveNode& mn = cands.node(n);
529  if (mn.isPlaced()) continue;
530  int cycle = ddg.earliestCycle(
531  mn, UINT_MAX, false, false, false, false, false, true);
532  mn.setCycle(cycle);
533  schedule[cycle].push_back(&mn);
535  << "placed mn " << mn.toString() << std::endl;
536  selector.notifyScheduled(mn);
537  }
538 
539  cands = selector.candidates();
540  }
541  dumpGraphWithStats(ddg, dotFileName, schedule);
543 
544  // Unschedule the nodes so the actual scheduler that might be ran
545  // after this analysis does not get confused.
546  for (auto n : ddg.scheduledMoves()) {
547  n->unsetCycle();
548  }
549 }
550 
551 
552 /**
553  * Analyzes the register antideps in the schedule.
554  *
555  * @return true in case at least one move that was constrained by a
556  * register antidep which could be avoided with additional registers or
557  * better register assignment strategy was found.
558  */
559 bool
561  DataDependenceGraph& ddg, std::ostream& s) {
562 
563  s << "schedule length: " << ddg.largestCycle() - ddg.smallestCycle() << std::endl;
564 
566  s << "DDG height: " << ddg.height() << std::endl;
568 
569  DataDependenceGraph* trueDepGraph = ddg.trueDependenceGraph(false);
571  s << "DDG height without register antideps: "
572  << trueDepGraph->height() << std::endl;
573 
574  trueDepGraph->writeToDotFile((boost::format("%s.tddg_noreg.dot") % graphName_).str());
575 
576  DataDependenceGraph* trueDepGraph2 = ddg.trueDependenceGraph(true);
578  s << "DDG height without any antideps: "
579  << trueDepGraph2->height() << std::endl;
580 
581  trueDepGraph2->writeToDotFile(
582  (boost::format("%s.tddg_noantidep_.dot") % graphName_).str());
583  delete trueDepGraph2;
584 
585  DataDependenceGraph* trueDepGraph3 = ddg.trueDependenceGraph(true, true);
587  s << "DDG height without any antideps and memory deps: "
588  << trueDepGraph3->height() << std::endl;
589 
590  trueDepGraph3->writeToDotFile(
591  (boost::format("%s.tddg_noantidep_nonmemdep.dot") % graphName_).str());
592 
593  delete trueDepGraph3;
594 
595  std::map<int, int> foundCounts;
596 
597  bool restrictingEdgeFound = false;
598 
599  for (int i = 0; i < ddg.nodeCount(); i++) {
600  MoveNode& tail = ddg.node(i);
601  for (int e = 0; e < ddg.outDegree(tail); ++e) {
602  DataDependenceEdge& edge = ddg.outEdge(tail,e);
603  // only consider non-loop register false deps here
604  if (!edge.isFalseDep() ||
606  edge.loopDepth() > 0)
607  continue;
608  MoveNode& head = ddg.headNode(edge);
609  // if there is a *real* dependency path from the tail to head
610  // than the given antidep then it does not help if we rename
611  // the register
612  if (trueDepGraph->hasPath(tail, head))
613  continue;
614 
615  // TODO: figure out why the exception gets thrown (see trunk
616  // Bug 517) and remove the try catch
617  try {
618 
620  if (tail.move().source().isGPR()) {
621  foundCounts[tail.move().source().registerFile().width()]++;
622  } else if (dynamic_cast<const TTAMachine::RegisterGuard*>(
623  &tail.move().guard().guard())) {
624  const TTAMachine::RegisterGuard* g =
625  dynamic_cast<const TTAMachine::RegisterGuard*>(
626  &tail.move().guard().guard());
627  foundCounts[g->registerFile()->width()]++;
628  } else {
630  << "WARNING: not a GPR: "
631  << tail.move().toString() << std::endl;
632  }
633  } else {
634  if (tail.move().destination().isGPR()) {
635  foundCounts[tail.move().destination().registerFile().width()]++;
636  } else {
638  << "WARNING: not a GPR: "
639  << tail.move().toString() << std::endl;
640  }
641  }
642 
643  s << "Limiting antidep: from "
644  << tail.toString() << " to " << head.toString() << " ("
645  << edge.toString() << ")"
646  << std::endl;
647 
648 
649  } catch (const Exception& e) {
650  // just skip this for now, have to take a look why
651  // there are some extra R_G_war edges, example:
652  // ERROR: Move is not predicated. Edge R_G_war:bool.0
653  // between moves: 32 -> add_sub_2.in1t.add eq_gt_gtu.out1 -> bool.0
654  if (Application::verboseLevel() > 0) {
656  << "ERROR: " << e.errorMessage() << " "
657  << "Edge " << edge.toString() << " between moves: "
658  << tail.move().toString() << " "
659  << head.move().toString() << std::endl;
660  }
661  continue;
662 
663  }
664  }
665  }
666  for (std::map<int, int>::iterator i = foundCounts.begin();
667  i != foundCounts.end(); ++i) {
668  int foundCount = (*i).second;
669  int bitWidth = (*i).first;
670  s << "found " << foundCount << " " << bitWidth << " bit "
671  << " register antideps that restrict parallelism" << std::endl;
672 
673  }
674  delete trueDepGraph;
675  return restrictingEdgeFound;
676 }
677 
678 /**
679  * Finds a move that is a (grand)parent of the given move (or the move itself)
680  * and constraints the schedule, that is, could be scheduled before
681  * the minimum schedule length in case a resource constraint was removed.
682  *
683  * A recursive function.
684  *
685  * @return a found parent or NULL if not found in that subtree.
686  */
687 MoveNode*
689 
690 
691  if (child == NULL || !child->isMove())
692  return NULL;
693 
694  int ddgEarliestCycle = origDDG_.earliestCycle(*child);
695  if (ddgEarliestCycle < minScheduleLength_ &&
696  foundMoves_.find(child) == foundMoves_.end() &&
697  analyzeMoveNode(*child)) {
698  return child;
699  } else {
701  for (DataDependenceGraph::NodeSet::const_iterator p = parents.begin();
702  p != parents.end(); ++p) {
703  MoveNode* mn = *p;
704  if (findResourceConstrainedParent(mn) != NULL)
705  return mn;
706  }
707  }
708  return NULL;
709 }
710 
711 /**
712  * Analyzes the resource constraints of a single move in the schedule.
713  *
714  * @return true in case the move is resource constrained.
715  */
716 bool
718 
719  // moves at cycle 0 cannot be scheduled earlier
720  if (node.cycle() == 0)
721  return false;
722 
723  int ddgEarliestCycle = origDDG_.earliestCycle(node);
724  if (ddgEarliestCycle == node.cycle())
725  return false;
726 
727  if (node.move().isJump()) {
729  << "earliest cycle for JUMP: " << ddgEarliestCycle << std::endl;
730  // JUMP cannot be scheduled earlier than the
731  // schedule length - delay slots, they always have a fixed position
732  // in the BB
733  return false;
734  }
735 
736  // operation latency
737  // look for the triggering operand and see if it's less
738  // than the latency away
739  if (node.isSourceOperation()) {
740  ProgramOperation& op = node.sourceOperation();
741  MoveNode& trigger = *op.triggeringMove();
742  MoveNode& opcodeSetting = op.opcodeSettingNode();
743  int latency =
744  (dynamic_cast<TTAProgram::TerminalFUPort&>(
745  opcodeSetting.move().destination())).hwOperation()->
746  latency();
747  if (node.cycle() == trigger.cycle() + latency) {
748  return false;
749  }
750  }
751 
752  const TTAMachine::Machine& targetMachine = origDDG_.machine();
753 
754  // check the instruction the move should have been scheduled at,
755  // resources permitting
756  TTAProgram::Instruction* prevInstr = rm_.instruction(ddgEarliestCycle);
757 
758  const int busCount = targetMachine.busNavigator().count();
759  // is it bus constrained?
760  if (prevInstr->moveCount() == busCount) {
761  // it might be true or we just managed to find a different move
762  // to fill the unused slot later during scheduling
763  ++busConstrained_;
764  return true;
765  }
766 
767  // RF read port constrained?
768  if (node.move().source().isGPR()) {
769  int readsFound = 0;
770  for (int m = 0; m < prevInstr->moveCount(); ++m) {
771  TTAProgram::Move& prevMove = prevInstr->move(m);
772  if (prevMove.source().isGPR() &&
773  &node.move().source().registerFile() ==
774  &prevMove.source().registerFile()) {
775  ++readsFound;
776  }
777  }
778  if (readsFound ==
779  node.move().source().registerFile().maxReads()) {
782  node.move().source().registerFile().name()]++;
783  return true;
784  }
785  }
786 
787  // RF read port constrained?
788  if (node.move().destination().isGPR()) {
789  int writesFound = 0;
790  for (int m = 0; m < prevInstr->moveCount(); ++m) {
791  TTAProgram::Move& prevMove = prevInstr->move(m);
792  if (prevMove.destination().isGPR() &&
793  &node.move().destination().registerFile() ==
794  &prevMove.destination().registerFile()) {
795  ++writesFound;
796  }
797  }
798  if (writesFound ==
799  node.move().destination().registerFile().maxWrites()) {
802  node.move().destination().registerFile().name()]++;
803  return true;
804  }
805  }
806 
807  // assume rest are FU constrained somehow (might be untrue in general,
808  // but with a fully connected machine a good approximation)
809  if (node.isDestinationOperation()) {
810  // if it's a trigger node, it can be contrained by an operand move,
811  // a dependency which is not recorded by the DDG
812  bool operandConstrained = false;
813  if (node.move().isTriggering()) {
815  for (int input = 0; (input < po.inputMoveCount()) &&
816  !operandConstrained; ++input) {
817 
818  const MoveNode& operandMove = po.inputMove(input);
819 
820  if (&operandMove == &node)
821  continue;
822 
823  if (operandMove.cycle() == node.cycle()) {
824  operandConstrained = true;
825  break;
826  }
827  }
828  }
829 
830  if (!operandConstrained) {
832  TCEString opName = node.destinationOperation().operation().name();
833 #if 0
835  << opName << " constrained: "
836  << node.move().toString() << std::endl;
837 #endif
838  operationConstrainedMoves_[opName]++;
839  return true;
840  } else if (operandConstrained){
841  return false; // operand depedency is constrained by the trigger
842  }
843  }
844 
847  << std::endl
848  << "resource set constrained move of unknown reason: "
849  << node.toString() << std::endl;
850  if (node.isSourceOperation()) {
851  ProgramOperation& op = node.sourceOperation();
853  << "ProgramOperation: " << op.toString() << std::endl;
854  }
856  << "earliest cycle: " << origDDG_.earliestCycle(node)
857  << std::endl;
858 
859 
860  for (int c = 3; c >= 0; --c) {
861  int printCycle = node.cycle() - c;
862  if (printCycle > 0) {
864  << printCycle << ": "
865  << rm_.instruction(printCycle)->toString()
866  << std::endl;
867  }
868  }
869  return true;
870 }
DataDependenceGraph::EWH_DEFAULT
@ EWH_DEFAULT
Definition: DataDependenceGraph.hh:94
ResourceConstraintAnalyzer::dumpGraphWithStats
void dumpGraphWithStats(DataDependenceGraph &ddg, TCEString dotFileName, const std::map< int, std::list< MoveNode * > > &schedule)
Definition: ResourceConstraintAnalyzer.cc:241
ProgramOperation::operation
const Operation & operation() const
Definition: ProgramOperation.cc:590
CriticalPathBBMoveNodeSelector
Definition: CriticalPathBBMoveNodeSelector.hh:70
BoostGraph::predecessors
virtual NodeSet predecessors(const Node &node, bool ignoreBackEdges=false, bool ignoreForwardEdges=false) const
BoostGraph::outEdge
virtual Edge & outEdge(const Node &node, const int index) const
TTAProgram::Move::isTriggering
bool isTriggering() const
Definition: Move.cc:284
MoveNode::dotString
std::string dotString() const
Definition: MoveNode.cc:602
DataDependenceGraph::machine
const TTAMachine::Machine & machine() const
Definition: DataDependenceGraph.hh:260
ResourceConstraintAnalyzer::rfReadPortConstrained_
int rfReadPortConstrained_
Definition: ResourceConstraintAnalyzer.hh:85
BoostGraph::tailNode
virtual Node & tailNode(const Edge &edge) const
TTAMachine::Component::name
virtual TCEString name() const
Definition: MachinePart.cc:125
MoveNode::toString
std::string toString() const
Definition: MoveNode.cc:576
TTAProgram::Instruction::move
Move & move(int i) const
Definition: Instruction.cc:193
MoveNodeGroup::node
MoveNode & node(int index) const
Definition: MoveNodeGroup.cc:152
BoostGraph::headNode
virtual Node & headNode(const Edge &edge) const
BoostGraph::node
Node & node(const int index) const
BoostGraph::hasPath
bool hasPath(GraphNode &src, const GraphNode &dest) const
BoostGraph::maxSourceDistance
int maxSourceDistance(const GraphNode &node) const
TTAProgram::Terminal::registerFile
virtual const TTAMachine::RegisterFile & registerFile() const
Definition: Terminal.cc:225
MoveNode::isDestinationOperation
bool isDestinationOperation() const
ResourceConstraintAnalyzer::writePortConstrainedRfs_
RFCountMap writePortConstrainedRfs_
Definition: ResourceConstraintAnalyzer.hh:89
AssocTools::containsKey
static bool containsKey(const ContainerType &aContainer, const KeyType &aKey)
BoostGraph< MoveNode, DataDependenceEdge >::NodeSet
std::set< MoveNode *, typename MoveNode ::Comparator > NodeSet
Definition: BoostGraph.hh:86
TTAProgram::Instruction
Definition: Instruction.hh:57
TTAProgram::Move::destination
Terminal & destination() const
Definition: Move.cc:323
DataDependenceGraph::scheduledMoves
NodeSet scheduledMoves() const
Definition: DataDependenceGraph.cc:1375
GraphNode::nodeID
int nodeID() const
DataDependenceEdge::isFalseDep
bool isFalseDep() const
Definition: DataDependenceEdge.hh:95
ResourceConstraintAnalyzer::rm_
SimpleResourceManager & rm_
Definition: ResourceConstraintAnalyzer.hh:95
TTAProgram::Instruction::toString
std::string toString() const
Definition: Instruction.cc:576
DataDependenceGraph.hh
ProgramOperation
Definition: ProgramOperation.hh:70
TTAProgram::Move::toString
std::string toString() const
Definition: Move.cc:436
MoveNode
Definition: MoveNode.hh:65
MoveNode::isSourceConstant
bool isSourceConstant() const
Definition: MoveNode.cc:238
Application::verboseLevel
static int verboseLevel()
Definition: Application.hh:176
DataDependenceEdge::EDGE_REGISTER
@ EDGE_REGISTER
Definition: DataDependenceEdge.hh:53
Application::logStream
static std::ostream & logStream()
Definition: Application.cc:155
ProgramOperation::opcodeSettingNode
MoveNode & opcodeSettingNode()
Definition: ProgramOperation.cc:487
TTAMachine::Machine::Navigator::count
int count() const
MoveNodeGroup::nodeCount
int nodeCount() const
Definition: MoveNodeGroup.cc:140
BoostGraph::edgeCount
int edgeCount() const
Operation::name
virtual TCEString name() const
Definition: Operation.cc:93
DataDependenceEdge::dependenceType
DependenceType dependenceType() const
Definition: DataDependenceEdge.hh:88
TTAMachine::RegisterFile::maxWrites
virtual int maxWrites() const
Definition: RegisterFile.cc:135
ProgramOperation::triggeringMove
MoveNode * triggeringMove() const
Definition: ProgramOperation.cc:643
ResourceConstraintAnalyzer::analyzeRegisterAntideps
bool analyzeRegisterAntideps(DataDependenceGraph &ddg, std::ostream &s)
Definition: ResourceConstraintAnalyzer.cc:560
ProgramOperationPtr
std::shared_ptr< ProgramOperation > ProgramOperationPtr
Definition: MoveNode.hh:52
BoostGraph::outDegree
virtual int outDegree(const Node &node) const
ResourceConstraintAnalyzer::memoryBoundScheduleResourceUsage
void memoryBoundScheduleResourceUsage(DataDependenceGraph &ddg, TCEString graphName)
Definition: ResourceConstraintAnalyzer.cc:466
MoveNode::isPlaced
bool isPlaced() const
Definition: MoveNode.cc:352
MoveNode::sourceOperation
ProgramOperation & sourceOperation() const
Definition: MoveNode.cc:453
MoveNodeGroup::programOperationPtr
ProgramOperationPtr programOperationPtr() const
Definition: MoveNodeGroup.hh:75
MoveNode::isMove
bool isMove() const
MoveNode::isBypass
bool isBypass() const
Definition: MoveNode.cc:280
DataDependenceGraph::setProgramOperationNodes
virtual void setProgramOperationNodes(bool flag)
Definition: DataDependenceGraph.hh:224
ResourceConstraintAnalyzer::optimalScheduleResourceUsage
void optimalScheduleResourceUsage(DataDependenceGraph &ddg, TCEString graphName)
Definition: ResourceConstraintAnalyzer.cc:424
TTAMachine::RegisterFile::maxReads
virtual int maxReads() const
Definition: RegisterFile.cc:123
MoveNode::cycle
int cycle() const
Definition: MoveNode.cc:421
HWOperation.hh
DataDependenceGraph::largestCycle
int largestCycle() const
Definition: DataDependenceGraph.cc:1838
DataDependenceEdge::loopDepth
int loopDepth() const
Definition: DataDependenceEdge.hh:121
Instruction.hh
TTAMachine::RegisterGuard
Definition: Guard.hh:137
TTAProgram::Move::guard
MoveGuard & guard() const
Definition: Move.cc:345
TTAProgram::Terminal::isGPR
virtual bool isGPR() const
Definition: Terminal.cc:107
Guard.hh
CriticalPathBBMoveNodeSelector::candidates
virtual MoveNodeGroup candidates()
Definition: CriticalPathBBMoveNodeSelector.cc:148
Operation.hh
DataDependenceEdge::toString
TCEString toString() const
Definition: DataDependenceEdge.cc:201
MoveNode::isSourceOperation
bool isSourceOperation() const
Definition: MoveNode.cc:168
DataDependenceGraph::smallestCycle
int smallestCycle() const
Definition: DataDependenceGraph.cc:1818
ResourceConstraintAnalyzer::operationConstrainedMoves_
OperationCountMap operationConstrainedMoves_
Definition: ResourceConstraintAnalyzer.hh:92
TerminalFUPort.hh
ResourceConstraintAnalyzer.hh
ResourceConstraintAnalyzer::findResourceConstrainedParent
MoveNode * findResourceConstrainedParent(MoveNode *child)
Definition: ResourceConstraintAnalyzer.cc:688
TTAProgram::Move
Definition: Move.hh:55
Machine.hh
Exception
Definition: Exception.hh:54
ProgramOperation::inputMoveCount
int inputMoveCount() const
Definition: ProgramOperation.cc:600
Operation
Definition: Operation.hh:59
BoostGraph::height
int height() const
Exception::errorMessage
std::string errorMessage() const
Definition: Exception.cc:123
CriticalPathBBMoveNodeSelector::notifyScheduled
virtual void notifyScheduled(MoveNode &node)
Definition: CriticalPathBBMoveNodeSelector.cc:202
GraphBase::writeToDotFile
virtual void writeToDotFile(const TCEString &fileName) const
DataDependenceGraph::earliestCycle
int earliestCycle(const MoveNode &moveNode, unsigned int ii=UINT_MAX, bool ignoreRegWaRs=false, bool ignoreRegWaWs=false, bool ignoreGuards=false, bool ignoreFUDeps=false, bool ignoreSameOperationEdges=false, bool assumeBypassing=false) const
Definition: DataDependenceGraph.cc:388
TTAProgram::TerminalFUPort
Definition: TerminalFUPort.hh:56
ResourceConstraintAnalyzer::analyzeMoveNode
bool analyzeMoveNode(const MoveNode &node)
Definition: ResourceConstraintAnalyzer.cc:717
ProgramOperation.hh
ResourceConstraintAnalyzer::graphName_
TCEString graphName_
Definition: ResourceConstraintAnalyzer.hh:99
Operation::usesMemory
virtual bool usesMemory() const
Definition: Operation.cc:232
BoostGraph::edge
virtual Edge & edge(const int index) const
GraphEdge::dotString
virtual TCEString dotString() const
Definition: GraphEdge.cc:81
MoveNode::destinationOperation
ProgramOperation & destinationOperation(unsigned int index=0) const
MoveNode::move
TTAProgram::Move & move()
ProgramOperation::toString
std::string toString() const
Definition: ProgramOperation.cc:746
MoveNode::setCycle
void setCycle(const int newcycle)
Definition: MoveNode.cc:503
DataDependenceGraph::trueDependenceGraph
DataDependenceGraph * trueDependenceGraph(bool removeMemAntideps=true, bool ignoreMemDeps=false)
Definition: DataDependenceGraph.cc:3417
ResourceConstraintAnalyzer::analyzePreSchedule
void analyzePreSchedule()
Definition: ResourceConstraintAnalyzer.cc:50
ResourceConstraintAnalyzer::analyze
bool analyze()
Definition: ResourceConstraintAnalyzer.cc:62
MoveNodeSet.hh
TCEString
Definition: TCEString.hh:53
ResourceConstraintAnalyzer::origDDG_
DataDependenceGraph & origDDG_
Definition: ResourceConstraintAnalyzer.hh:94
DataDependenceGraph
Definition: DataDependenceGraph.hh:67
TTAMachine::Machine::busNavigator
virtual BusNavigator busNavigator() const
Definition: Machine.cc:356
DataDependenceGraph::criticalPathGraph
DataDependenceGraph * criticalPathGraph()
Definition: DataDependenceGraph.cc:3458
DataDependenceGraph::memoryDependenceGraph
DataDependenceGraph * memoryDependenceGraph()
Definition: DataDependenceGraph.cc:3482
DataDependenceGraph::EWH_REAL
@ EWH_REAL
Height returns the minimum cycle count for the schedule given enough resources.
Definition: DataDependenceGraph.hh:92
ResourceConstraintAnalyzer::unknownConstrained_
int unknownConstrained_
Definition: ResourceConstraintAnalyzer.hh:87
SimpleResourceManager.hh
DataDependenceEdge
Definition: DataDependenceEdge.hh:43
TTAProgram::Move::source
Terminal & source() const
Definition: Move.cc:302
ResourceConstraintAnalyzer::rfWritePortConstrained_
int rfWritePortConstrained_
Definition: ResourceConstraintAnalyzer.hh:84
TTAProgram::MoveGuard::guard
const TTAMachine::Guard & guard() const
Definition: MoveGuard.cc:86
TTAProgram::Move::isJump
bool isJump() const
Definition: Move.cc:164
MoveNodeGroup
Definition: MoveNodeGroup.hh:48
Move.hh
ResourceConstraintAnalyzer::minScheduleLength_
int minScheduleLength_
Definition: ResourceConstraintAnalyzer.hh:86
TTAProgram::Terminal::isImmediate
virtual bool isImmediate() const
Definition: Terminal.cc:63
DataDependenceEdge::DEP_WAR
@ DEP_WAR
Definition: DataDependenceEdge.hh:48
MoveNode.hh
TTAMachine::BaseRegisterFile::width
virtual int width() const
ResourceConstraintAnalyzer::readPortConstrainedRfs_
RFCountMap readPortConstrainedRfs_
Definition: ResourceConstraintAnalyzer.hh:90
BoostGraph::nodeCount
int nodeCount() const
TTAMachine::RegisterGuard::registerFile
const RegisterFile * registerFile() const
TTAProgram::Instruction::moveCount
int moveCount() const
Definition: Instruction.cc:176
ResourceConstraintAnalyzer::foundMoves_
std::set< MoveNode * > foundMoves_
Definition: ResourceConstraintAnalyzer.hh:97
ResourceConstraintAnalyzer::busConstrained_
int busConstrained_
Definition: ResourceConstraintAnalyzer.hh:82
ResourceConstraintAnalyzer::operationConstrained_
int operationConstrained_
Definition: ResourceConstraintAnalyzer.hh:83
DataDependenceEdge::edgeReason
EdgeReason edgeReason() const
Definition: DataDependenceEdge.hh:91
DDGMoveNodeSelector.hh
ProgramOperation::inputMove
MoveNode & inputMove(int index) const
Definition: ProgramOperation.cc:621
TTAMachine::Machine
Definition: Machine.hh:73
DataDependenceGraph::setEdgeWeightHeuristics
void setEdgeWeightHeuristics(EdgeWeightHeuristics ewh)
Definition: DataDependenceGraph.cc:5716
SimpleResourceManager::instruction
virtual TTAProgram::Instruction * instruction(int cycle) const override
Definition: SimpleResourceManager.cc:442
MoveGuard.hh