TCE  1.21
ProgramPartitioner.cc
Go to the documentation of this file.
1 /*
2  Copyright (c) 2012 Tampere University.
3 
4  This file is part of TTA-Based Codesign Environment (TCE).
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23  */
24 /**
25  * @file ProgramPartitioner.cc
26  *
27  * Declaration of ProgramPartitioner class.
28  *
29  * @author Pekka Jääskeläinen 2012
30  */
31 
32 #include <iostream>
33 
34 #include "CompilerWarnings.hh"
35 
36 IGNORE_COMPILER_WARNING("-Wunused-parameter")
37 
38 #include "ProgramPartitioner.hh"
40 #include "TCETargetMachine.hh"
41 #include "hash_map.hh"
42 #include "Application.hh"
43 #include "tce_config.h"
44 #include <llvm/IR/Instruction.h>
45 
47 
48 char ProgramPartitioner::ID = 0;
49 
50 using namespace llvm;
51 
52 //#define DEBUG_PROGRAM_PARTITIONER
53 // define this if in case the DLP-partitioner does not find
54 // a node id it should assign the registers to the EXTRAS node.
55 #define ASSIGN_UNKNOWN_TO_EXTRAS
56 
57 llvm::Pass*
59  return new ProgramPartitioner();
60 }
61 
62 bool
64  return false;
65 }
66 
67 bool
68 ProgramPartitioner::runOnMachineFunction(llvm::MachineFunction& MF) {
69 
70 #if (!(defined(LLVM_3_5)))
71 #ifdef DEBUG_PROGRAM_PARTITIONER
72  std::cerr << "### ProgramPartitioner disabled for llvm 3.6+ " << std::endl;
73 #endif
74  return true;
75 #endif
76 
77 #ifdef DEBUG_PROGRAM_PARTITIONER
78  std::cerr << "### Running ProgramPartitioner for "
79  << MF.getFunction()->getName().str() << std::endl;
80 #endif
81  const TCETargetMachine& targetMach =
82  dynamic_cast<const TCETargetMachine&>(
83  MF.getTarget());
84 
85  /* Partition only clustered machines with vector backend support for now. */
86  if (targetMach.maxVectorSize() == 0) return false;
87 
88 #ifdef DEBUG_PROGRAM_PARTITIONER
89  PRINT_VAR(targetMach.maxVectorSize());
90 #endif
91 
92  const TCETargetMachinePlugin& tmPlugin =
93  dynamic_cast<const TCETargetMachinePlugin&>(
94  targetMach.targetPlugin());
95 
96  llvm::MachineRegisterInfo& MRI = MF.getRegInfo();
97 
98  hash_map<const llvm::MachineInstr*, unsigned> partitions;
99 
100 
101  for (MachineFunction::const_iterator i = MF.begin();
102  i != MF.end(); i++) {
103  bool changedT, changedD = false;
104  // Loop through instruction in basic blocks looking for parent
105  // or metadata information about line assignment until there is
106  // no new information added.
107  do {
108  changedT = false;
109  changedD = false;
110  for (MachineBasicBlock::const_iterator j = i->begin();
111  j != i->end(); j++) {
112 
113  const llvm::MachineInstr& mi = *j;
114 
115  if (partitions.find(&mi) != partitions.end())
116  continue; /* Already partitioned. */
117  unsigned nodeIndex = tmPlugin.extractElementLane(mi);
118 
119  changedT |= findNodeIndex(mi, partitions, MF, nodeIndex);
120 
121  }
122 
123 #if 1
124  // Same as before but starting from the bottom of basic block to the
125  // top.
126 
127  for (MachineBasicBlock::const_iterator j = i->end();
128  j != i->begin();) {
129  j--;
130  const llvm::MachineInstr& mi = *j;
131 
132  if (partitions.find(&mi) != partitions.end())
133  continue; /* Already partitioned. */
134  unsigned nodeIndex = tmPlugin.extractElementLane(mi);
135 
136  changedD |= findNodeIndex(mi, partitions, MF, nodeIndex);
137 
138  }
139 #endif
140  } while (changedT || changedD);
141  }
142 #ifdef ASSIGN_UNKNOWN_TO_EXTRAS
143 #ifdef DEBUG_PROGRAM_PARTITIONER
144  std::cerr << "[setting the rest to EX]" << std::endl;
145 #endif
146  /* Force the non-partitioned instructions to the
147  extras node. This should include sequential C code and
148  multi-WI address computations, branch condition code,
149  etc. for OpenCL C code. */
150  for (MachineFunction::const_iterator i = MF.begin();
151  i != MF.end(); i++) {
152 
153  for (MachineBasicBlock::const_iterator j = i->begin();
154  j != i->end(); j++) {
155  const llvm::MachineInstr& mi = *j;
156 
157 #ifdef LLVM_OLDER_THAN_10
158  if (mi.getNumOperands() == 0 || !mi.getOperand(0).isReg() ||
159  !mi.getOperand(0).isDef() ||
160  llvm::TargetRegisterInfo::isPhysicalRegister(
161  mi.getOperand(0).getReg()))
162 #else
163  if (mi.getNumOperands() == 0 || !mi.getOperand(0).isReg() ||
164  !mi.getOperand(0).isDef() ||
165  Register::isPhysicalRegister(
166  mi.getOperand(0).getReg()))
167 #endif
168  continue;
169 
170  if (partitions.find(&mi) != partitions.end())
171  continue; /* Partitioned. */
172  const llvm::MachineOperand& result = mi.getOperand(0);
173  const llvm::TargetRegisterClass* newRegClass =
174  tmPlugin.extrasRegClass(MRI.getRegClass(result.getReg()));
175 #ifdef DEBUG_PROGRAM_PARTITIONER
176  std::cerr << "[ORIGINAL REG CLASS "
177  << MRI.getRegClass(result.getReg())->getName()
178  << "]" << std::endl;
179 #endif
180  MRI.setRegClass(result.getReg(), newRegClass);
181 #ifdef DEBUG_PROGRAM_PARTITIONER
182  std::cerr << "[ASSIGNED REG CLASS "
183  << newRegClass->getName() << "]" << std::endl;
184 #endif
185  }
186  }
187 #endif
188  return true;
189 }
190 
191 bool
193  return false;
194 }
195 
196 /**
197  * Attemts to find an index of lane to which the instruction belongs
198  * based on metadata or parent instruction assignment.
199  *
200  * @param mi Machine instruction to analyze
201  * @param paritions map between machine instruction and the lane indexes
202  * @param MF machine function to which the mi belongs
203  * @param nodeIndex index found
204  */
205 bool
207  const llvm::MachineInstr &mi,
208  hash_map<const llvm::MachineInstr*, unsigned>& partitions,
209  llvm::MachineFunction& MF,
210  unsigned int& nodeIndex) {
211 
212  const TCETargetMachine& targetMach =
213  dynamic_cast<const TCETargetMachine&>(
214  MF.getTarget());
215 
216  const TCETargetMachinePlugin& tmPlugin =
217  dynamic_cast<const TCETargetMachinePlugin&>(
218  targetMach.targetPlugin());
219 
220  llvm::MachineRegisterInfo& MRI = MF.getRegInfo();
221 
222  if (nodeIndex != UINT_MAX) {
223 #ifdef DEBUG_PROGRAM_PARTITIONER
224  std::cerr << "[EXTRACT lane " << nodeIndex << "] " << std::endl;
225 #endif
226  }
227 
228  if (nodeIndex == UINT_MAX) {
229  /* Check if one of the parents of this instruction is already
230  partitioned.
231 
232  TODO: do it recursively until a root instruction or
233  a partitioned instruction is found. */
234  for (unsigned opr = 0; opr < mi.getNumOperands(); ++opr) {
235  const llvm::MachineOperand& operand = mi.getOperand(opr);
236  if (!operand.isReg()) continue;
237 
238  llvm::MachineRegisterInfo::def_iterator di =
239  MRI.def_begin(operand.getReg());
240  if (di.atEnd()) continue;
241 
242  const llvm::MachineInstr* parent = (*di).getParent();
243 
244  // TODO: this NULL check not needed anymore?
245  if (parent == NULL) continue;
246  if (partitions.find(parent) == partitions.end()) continue;
247  nodeIndex = partitions[parent];
248  break;
249  }
250  }
251  if (nodeIndex == UINT_MAX &&
252  mi.memoperands_begin() != mi.memoperands_end()) {
253  /* Check if one of the parents of this is a instruction for
254  which we can track the OpenCL work item id metadata. */
255  for (llvm::MachineInstr::mmo_iterator mmoIter =
256  mi.memoperands_begin();
257  mmoIter != mi.memoperands_end(); ++mmoIter) {
258 
259  llvm::MachineMemOperand* mo = *mmoIter;
260 
261  if (mo->getValue() == NULL ||
262  !llvm::isa<const llvm::Instruction>(mo->getValue()))
263  continue;
264  // The matching instruction, if known.
265  const llvm::Instruction* instruction =
266  llvm::cast<const llvm::Instruction>(mo->getValue());
267 
268  if (!instruction->getMetadata("wi")) {
269  continue;
270  }
271  MDNode *md = instruction->getMetadata("wi");
272  ConstantInt* id_x = NULL;
273  if (llvm::isa<MDNode>(md->getOperand(2))) {
274  // new wi metadata format is:
275  // operand 0 - WI_id
276  // operand 1 - MDNode with region id
277  // operand 2 - MDNode with XYZ coordinates
278  MDNode *xyz = dyn_cast<MDNode>(md->getOperand(2));
279  // New XYZ metadata format from pocl
280  // operand 0 - WI_xyz
281  // operand 1 - x coordinate
282  // operand 2 - y coordinate
283  // operand 3 - z coordinate
284  // pick X coordinate
285 #ifdef LLVM_OLDER_THAN_3_6
286  id_x = cast<llvm::ConstantInt>(xyz->getOperand(1));
287 #else
288  id_x = cast<llvm::ConstantInt>(
289  dyn_cast<llvm::ConstantAsMetadata>(
290  xyz->getOperand(1))->getValue());
291 #endif
292  } else {
293  // old metadata format was
294  // operand 0 - WI_id
295  // operand 1 - region id
296  // operand 2 - x coordinate
297  // operand 3 - y coordinate
298  // operand 4 - z coordinate
299  // operand 5 - instruction number
300 #ifdef LLVM_OLDER_THAN_3_6
301  id_x = cast<llvm::ConstantInt>(md->getOperand(2));
302 #else
303  id_x = cast<llvm::ConstantInt>(
304  dyn_cast<llvm::ConstantAsMetadata>(
305  md->getOperand(2))->getValue());
306 #endif
307  }
308  if (id_x == NULL)
309  continue;
310  nodeIndex =
311  (unsigned)id_x->getValue().getZExtValue() %
312  targetMach.maxVectorSize();
313 #ifdef DEBUG_PROGRAM_PARTITIONER
314  std::cerr << "[FOUND OCL WI METADATA: " <<
315  nodeIndex << "]" << std::endl;
316 #endif
317  }
318  }
319 #ifdef DEBUG_PROGRAM_PARTITIONER
320  if (nodeIndex != UINT_MAX) {
321  std::cerr << "[NODE INDEX " << nodeIndex << "]: ";
322  for (int pad = 0; pad < nodeIndex; ++pad)
323  for (int ws = 0; ws < 30; ++ws) std::cerr << " ";
324  } else {
325  std::cerr << "[NODE INDEX UNKNOWN]: ";
326  }
327 #endif
328  if (nodeIndex == UINT_MAX) return false;
329  partitions[&mi] = nodeIndex;
330 
331  /* TODO: Use the partition's register class instead of the super class to force
332  the instruction's regs to be allocated from the partition. */
333  for (unsigned int i = 0; i < mi.getNumOperands(); i++) {
334  if (mi.getOperand(i).isReg() &&
335  mi.getOperand(i).isDef()) {
336 
337  const llvm::MachineOperand& result = mi.getOperand(i);
338 #ifdef LLVM_OLDER_THAN_10
339  if (llvm::TargetRegisterInfo::isPhysicalRegister(
340  mi.getOperand(i).getReg())) continue;
341 #else
342  if (Register::isPhysicalRegister(
343  mi.getOperand(i).getReg())) continue;
344 #endif
345 
346  const llvm::TargetRegisterClass* nodeRegClass =
347  tmPlugin.nodeRegClass(nodeIndex, MRI.getRegClass(result.getReg()));
348 #ifdef DEBUG_PROGRAM_PARTITIONER
349  std::cerr << "[ORIGINAL REG CLASS "
350  << MRI.getRegClass(result.getReg())->getName()
351  << "]" << std::endl;
352 #endif
353  if (nodeRegClass == NULL) {
354 #ifdef DEBUG_PROGRAM_PARTITIONER
355  std::cerr << "[NO REG CLASS FOUND FOR THE NODE]" << std::endl;
356 #endif
357  } else {
358 #ifdef DEBUG_PROGRAM_PARTITIONER
359  std::cerr << "[ASSIGNED REG CLASS "
360  << nodeRegClass->getName() << "]" << std::endl;
361 #endif
362  MRI.setRegClass(result.getReg(), nodeRegClass);
363  }
364  }
365  }
366  return true;
367 }
virtual bool runOnMachineFunction(llvm::MachineFunction &MF) override
virtual bool doInitialization(llvm::Module &M) override
#define POP_COMPILER_DIAGS
virtual unsigned int extractElementLane(const MachineInstr &mi) const =0
#define PRINT_VAR(VARIABLE__)
Definition: Application.hh:110
virtual bool doFinalization(llvm::Module &M) override
llvm::Pass * createProgramPartitionerPass()
virtual TCETargetMachinePlugin & targetPlugin() const
virtual bool findNodeIndex(const llvm::MachineInstr &I, hash_map< const llvm::MachineInstr *, unsigned > &partitions, llvm::MachineFunction &MF, unsigned int &index)
#define IGNORE_COMPILER_WARNING(X)
virtual const llvm::TargetRegisterClass * nodeRegClass(unsigned nodeId, const llvm::TargetRegisterClass *current) const =0