内容简介:LLVM学习笔记(12)
3.3.5. 信息的完善
在RegisterInfoEmitter::run,接下来执行CodeGenRegBank::computeDerivedInfo方法。
1802 void CodeGenRegBank::computeDerivedInfo (){
1803 ();
1804 computeSubRegLaneMasks();
1805
1806 // Compute aweight for each register unit created during getSubRegs.
1807 // This maycreate adopted register units (with unit # >= NumNativeRegUnits).
1808 computeRegUnitWeights();
1809
1810 // Compute aunique set of RegUnitSets. One for each RegClass and inferred
1811 // supersets forthe union of overlapping sets.
1812 ();
1813
1814 computeRegUnitLaneMasks();
1815
1816 // Computeregister class HasDisjunctSubRegs flag.
1817 for (CodeGenRegisterClass &RC : RegClasses) {
1818 RC.HasDisjunctSubRegs = false;
1819 for ( const CodeGenRegister *Reg : RC.getMembers())
1820 RC.HasDisjunctSubRegs |= Reg->HasDisjunctSubRegs;
1821 }
1822
1823 // Get the weightof each set.
1824 for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx)
1825 RegUnitSets[Idx].Weight =getRegUnitSetWeight(RegUnitSets[Idx].Units);
1826
1827 // Find the orderof each set.
1828 RegUnitSetOrder.reserve(RegUnitSets.size());
1829 for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx)
1830 RegUnitSetOrder.push_back(Idx);
1831
1832 std::stable_sort(RegUnitSetOrder.begin(),RegUnitSetOrder.end(),
1833 [ this ](unsignedID1, unsigned ID2) {
1834 return getRegPressureSet(ID1).Units.size() <
1835 getRegPressureSet(ID2).Units.size();
1836 });
1837 for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) {
1838 RegUnitSets[RegUnitSetOrder[Idx]].Order =Idx;
1839 }
1840 }
CodeGenRegBank::computeDerivedInfo方法计算的所谓衍生信息包括以下内容。
3.3.5.1. 完整的索引信息
前面我们已经建立了复合索引,但这些索引不一定完整,比如可用孙子寄存器的复合索引就没有认真构建过。下面就要为所有可用的寄存器建立起完整的索引系统。
1117 void CodeGenRegBank::computeComposites (){
1118 // Keep track ofTopoSigs visited. We only need to visit each TopoSig once,
1119 // and manyregisters will share TopoSigs on regular architectures.
1120 BitVector TopoSigs(getNumTopoSigs());
1121
1122 for ( const auto &Reg1: Registers) {
1123 // Skipidentical subreg structures already processed.
1124 if (TopoSigs.test(Reg1.getTopoSig()))
1125 continue ;
1126 TopoSigs.set(Reg1.getTopoSig());
1127
1128 const CodeGenRegister::SubRegMap &SRM1 = Reg1.getSubRegs();
1129 for (CodeGenRegister::SubRegMap::const_iterator i1 = SRM1.begin(),
1130 e1 = SRM1.end(); i1 != e1; ++i1) {
1131 CodeGenSubRegIndex *Idx1 = i1->first;
1132 CodeGenRegister *Reg2 = i1->second;
1133 // Ignoreidentity compositions.
1134 if (&Reg1 == Reg2)
1135 continue ;
1136 const CodeGenRegister::SubRegMap &SRM2 = Reg2->getSubRegs();
1137 // Trycomposing Idx1 with another SubRegIndex.
1138 for (CodeGenRegister::SubRegMap::const_iterator i2 = SRM2.begin(),
1139 e2 = SRM2.end(); i2 != e2; ++i2) {
1140 CodeGenSubRegIndex *Idx2 =i2->first;
1141 CodeGenRegister *Reg3 = i2->second;
1142 // Ignoreidentity compositions.
1143 if (Reg2 == Reg3)
1144 continue ;
1145 // OKReg1:IdxPair == Reg3. Find the index with Reg:Idx == Reg3.
1146 CodeGenSubRegIndex *Idx3 =Reg1.getSubRegIndex(Reg3);
1147 assert (Idx3&& "Sub-register doesn't have an index");
1148
1149 //Conflicting composition? Emit a warning but allow it.
1150 if (CodeGenSubRegIndex *Prev =Idx1->addComposite(Idx2,Idx3))
1151 PrintWarning(Twine("SubRegIndex") + Idx1->getQualifiedName() +
1152 " and " +Idx2->getQualifiedName() +
1153 " composeambiguously as " + Prev->getQualifiedName() +
1154 " or " +Idx3->getQualifiedName());
1155 }
1156 }
1157 }
1158 }
这里的处理是遍历所有的CodeGenRegister实例,如果已知AàB,BàC的索引(C是B的子寄存器,B是A的子寄存器),建立AàC的(复合)索引。如此一来,绝不会有漏网之鱼。
接下来,我们需要让TableGen能有办法高效、简单地表示这些关系。比如,A覆盖了C。
CodeGenSubRegIndex定义里设置了LaneMask域。大致上LaneMask表示寄存器中可分割的部分。叶子索引(Composed为空)在LaneMask中会设置一个比特,位置是它自己在SubRegIndices容器里出现的次序(1174行),序号超过31的索引将共享同一个LaneMask比特(1187行),表示寄存器中一个特定的部分。而Composed非空索引的LaneMask则是它构成的复合索引的LaneMask的集合(1189行首先将它们的LaneMask置0),表示它所代表的寄存器部分覆盖了哪些更小的部分。
1167 void CodeGenRegBank::computeSubRegLaneMasks (){
1168 // First assignindividual bits to all the leaf indices.
1169 unsigned Bit = 0;
1170 // Determine maskof lanes that cover their registers.
1171 CoveringLanes = ~0u;
1172 for ( auto &Idx : SubRegIndices) {
1173 if (Idx.getComposites().empty()) {
1174 Idx.LaneMask = 1u << Bit;
1175 // Share bit31 in the unlikely case there are more than 32 leafs.
1176 //
1177 // Sharingbits is harmless; it allows graceful degradation in targets
1178 // with morethan 32 vector lanes. They simply get a limited resolution
1179 // view oflanes beyond the 32nd.
1180 //
1181 // See alsothe comment for getSubRegIndexLaneMask().
1182 if (Bit < 31)
1183 ++Bit;
1184 else
1185 // Once bit31 is shared among multiple leafs, the 'lane' it represents
1186 // is nolonger covering its registers.
1187 CoveringLanes &= ~(1u <<Bit);
1188 } else {
1189 Idx.LaneMask = 0;
1190 }
1191 }
1192
1193 // Computetransformation sequences for composeSubRegIndexLaneMask. The idea
1194 // here is thatfor each possible target subregister we look at the leafs
1195 // in thesubregister graph that compose for this target and create
1196 // transformationsequences for the lanemasks. Each step in the sequence
1197 // consists of abitmask and a bitrotate operation. As the rotation amounts
1198 // are usuallythe same for many subregisters we can easily combine the steps
1199 // by combiningthe masks.
1200 for ( const auto &Idx :SubRegIndices) {
1201 const auto &Composites = Idx.getComposites();
1202 auto &LaneTransforms = Idx.CompositionLaneMaskTransform;
1203 // Go throughall leaf subregisters and find the ones that compose with Idx.
1204 // These makeout all possible valid bits in the lane mask we want to
1205 // transform.Looking only at the leafs ensure that only a single bit in
1206 // the mask isset.
1207 unsigned NextBit = 0;
1208 for ( auto &Idx2 : SubRegIndices) {
1209 // Skipnon-leaf subregisters.
1210 if (!Idx2.getComposites().empty())
1211 continue ;
1212 // Replicatethe behaviour from the lane mask generation loop above.
1213 unsigned SrcBit = NextBit;
1214 unsigned SrcMask = 1u << SrcBit;
1215 if (NextBit < 31)
1216 ++NextBit;
1217 assert (Idx2.LaneMask== SrcMask);
1218
1219 // Get thecomposed subregister if there is any.
1220 auto C =Composites.find(&Idx2);
1221 if (C == Composites.end())
1222 continue ;
1223 const CodeGenSubRegIndex *Composite = C->second;
1224 // TheComposed subreg should be a leaf subreg too
1225 assert (Composite->getComposites().empty());
1226
1227 // CreateMask+Rotate operation and merge with existing ops if possible.
1228 unsigned DstBit =Log2_32(Composite->LaneMask);
1229 int Shift = DstBit - SrcBit;
1230 uint8_t RotateLeft = Shift >= 0 ?(uint8_t)Shift : 32+Shift;
1231 for ( auto &I : LaneTransforms) {
1232 if (I.RotateLeft == RotateLeft) {
1233 I.Mask |= SrcMask;
1234 SrcMask = 0;
1235 }
1236 }
1237 if (SrcMask != 0) {
1238 MaskRolPair MaskRol = { SrcMask,RotateLeft };
1239 LaneTransforms.push_back(MaskRol);
1240 }
1241 }
1242 // Optimize ifthe transformation consists of one step only: Set mask to
1243 // 0xffffffff(including some irrelevant invalid bits) so that it should
1244 // merge withmore entries later while compressing the table.
1245 if (LaneTransforms.size() == 1)
1246 LaneTransforms[0].Mask = ~0u;
1247
1248 // Furthercompression optimization: For invalid compositions resulting
1249 // in asequence with 0 entries we can just pick any other. Choose
1250 // Mask0xffffffff with Rotation 0.
1251 if (LaneTransforms.size() == 0) {
1252 MaskRolPair P = { ~0u, 0 };
1253 LaneTransforms.push_back(P);
1254 }
1255 }
1256
1257 // FIXME: What ifad-hoc aliasing introduces overlaps that aren't represented
1258 // by thesub-register graph? This doesn't occur in any known targets.
1259
1260 // Inherit lanesfrom composites.
1261 for ( const auto &Idx :SubRegIndices) {
1262 unsigned Mask = Idx.();
1263 // If somesuper-registers without CoveredBySubRegs use this index, we can
1264 // no longerassume that the lanes are covering their registers.
1265 if (!Idx.AllSuperRegsCovered)
1266 CoveringLanes &= ~Mask;
1267 }
1268
1269 // Compute lane maskcombinations for register classes.
1270 for ( auto &RegClass : RegClasses) {
1271 unsigned LaneMask = 0;
1272 for ( const auto &SubRegIndex : SubRegIndices) {
1273 if(RegClass.getSubClassWithSubReg(&SubRegIndex) == nullptr)
1274 continue ;
1275 LaneMask |= SubRegIndex.LaneMask;
1276 }
1277 RegClass.LaneMask = LaneMask;
1278 }
1279 }
CodeGenSubRegIndex的另一个域CompositionLaneMaskTransform是类型为mutableSmallVector< MaskRolPair,1>的容器,其中MaskRolPair是这样定义的:
42 struct MaskRolPair {
43 unsigned Mask;
44 uint8_t RotateLeft;
45 bool operator ==( const MaskRolPair Other) const {
46 return Mask == Other.Mask && RotateLeft == Other.RotateLeft;
47 }
48 bool operator !=( const MaskRolPair Other) const {
49 return Mask != Other.Mask || RotateLeft != Other.RotateLeft;
50 }
51 };
对Composed为空的索引,CompositionLaneMaskTransform没有用。但对Composed非空的索引,以A+B=C为例,A要记录这样一项:“ (B的LaneMask, 从B的LaneMask得到C的LaneMask所需的向左偏转比特数)”。显然,这种情形下,B和C的Composed都必须为空。
1262行循环开始对所有Composed非空的寄存器索引计算LaneMask。这时Composed为空的索引的LaneMask已经设置好了,Composed非空的寄存器的LaneMask则是0。我们希望Composed非空的索引的LaneMask是所有从它得到的复合索引LaneMask的合集,这由computeLaneMask方法实现。
85 unsigned CodeGenSubRegIndex::computeLaneMask () const {
86 // Alreadycomputed?
87 if (LaneMask)
88 return LaneMask;
89
90 // Recursionguard, shouldn't be required.
91 LaneMask = ~0u;
92
93 // The lane maskis simply the union of all sub-indices.
94 unsigned M = 0;
95 for ( const auto &C :Composed)
96 M |= C.second->computeLaneMask();
97 assert (M&& "Missing lane mask, sub-register cycle?");
98 LaneMask = M;
99 return LaneMask;
100 }
CodeGenRegisterClass也有一个LaneMask成员。它是该类所有寄存器包含的所有寄存器索引的LaneMask的合集。最后,在1270行循环设置它们。
3.3.5.2. 寄存器单元的权重
前面为每个最小可援引的寄存器配了一个RegUnit实例,而这些寄存器的父寄存器则使用子寄存器的RegUnit实例。这些RegUnit实例将产生用于寄存器相干与压力模型的数据。其中RegUnit的Weight域表示该单元的权重,无特殊情况Weight是1,包含多个寄存器单元的寄存器的权重就是它们的总和。RegUnit的Weight值还用于计算寄存器类的权重。不过有些寄存器属于多个寄存器类,因此这些类要一起考虑,其中一个重要的考量是属于同一个寄存器类的寄存器权重应该一致(不过下面可以看到,只能做到尽量一致),并由此调整RegUnit的权重为后面构建RegUnit同类集做好准备。
1489 void CodeGenRegBank::computeRegUnitWeights (){
1490 std::vector<UberRegSet> UberSets;
1491 std::vector<UberRegSet*>RegSets(Registers.size());
1492 (UberSets,RegSets, * this );
1493 // UberSets andRegSets are now immutable.
1494
1495 (UberSets,* this );
1496
1497 // Iterate overeach Register, normalizing the unit weights until reaching
1498 // a fix point.
1499 unsigned NumIters = 0;
1500 for (boolChanged = true; Changed; ++NumIters) {
1501 assert (NumIters<= NumNativeRegUnits && "Runaway register unit weights");
1502 Changed = false;
1503 for ( auto &Reg : Registers) {
1504 CodeGenRegister::RegUnitList NormalUnits;
1505 SparseBitVector<> NormalRegs;
1506 Changed |=(&Reg,UberSets, RegSets, NormalRegs,
1507 NormalUnits, * this );
1508 }
1509 }
1510 }
因此,首先对寄存器进行划分(以寄存器类为依据),分为若干不相关的寄存器集合。然后,对每个集合中的寄存器计算其权重,选出其中最大的权重作为集合的权重。最后,将集合中寄存器的权重尽量调整为集合的权重一致。
在1490行出现的UberRegSet定义如下:
1298 struct UberRegSet {
1299 CodeGenRegister::Vec Regs;
1300 unsigned Weight;
1301 CodeGenRegister::RegUnitList SingularDeterminants;
1302
1303 UberRegSet(): Weight(0) {}
1304 };
每个UberRegSet是包含相同寄存器的寄存器类的联合传递闭包。所有UberRegSet合在一起构成了所有寄存器的一个划分。每个UberRegSet实例也称为一个同类集,computeUberSets方法使用同类集算法进行寄存器划分。
1311 static void computeUberSets (std::vector<UberRegSet>&UberSets,
1312 std::vector<UberRegSet*>&RegSets,
1313 CodeGenRegBank&RegBank) {
1314
1315 const auto &Registers = RegBank.getRegisters();
1316
1317 // The RegisterEnumValue is one greater than its index into Registers.
1318 assert (Registers.size()== Registers.back().EnumValue &&
1319 "register enum valuemismatch");
1320
1321 // Forsimplicitly make the SetID the same as EnumValue.
1322 IntEqClasses UberSetIDs(Registers.size()+1);
1323 std::set<unsigned> AllocatableRegs;
1324 for ( auto &RegClass : RegBank.getRegClasses()) {
1325 if (!RegClass.Allocatable)
1326 continue ;
1327
1328 const CodeGenRegister::Vec &Regs = RegClass.getMembers();
1329 if (Regs.empty())
1330 continue ;
1331
1332 unsigned USetID = UberSetIDs.((*Regs.begin())->EnumValue);
1333 assert (USetID&& "register number 0 is invalid");
1334
1335 AllocatableRegs.insert((*Regs.begin())->EnumValue);
1336 for ( auto I = std::next(Regs.begin()), E = Regs.end(); I!= E; ++I) {
1337 AllocatableRegs.insert((*I)->EnumValue);
1338 UberSetIDs.(USetID,(*I)->EnumValue);
1339 }
1340 }
1341 // Combinenon-allocatable regs.
1342 for ( const auto &Reg :Registers) {
1343 unsigned RegNum = Reg.EnumValue;
1344 if (AllocatableRegs.count(RegNum))
1345 continue ;
1346
1347 UberSetIDs.(0,RegNum);
1348 }
1349 UberSetIDs.();
1350
1351 // Make the firstUberSet a special unallocatable set.
1352 unsigned ZeroID = UberSetIDs[0];
1353
1354 // InsertRegisters into the UberSets formed by union-find.
1355 // Do not resizeafter this.
1356 UberSets.resize(UberSetIDs.getNumClasses());
1357 unsigned i = 0;
1358 for ( const CodeGenRegister &Reg : Registers) {
1359 unsigned USetID =UberSetIDs[Reg.EnumValue];
1360 if (!USetID)
1361 USetID = ZeroID;
1362 else if (USetID == ZeroID)
1363 USetID = 0;
1364
1365 UberRegSet *USet = &UberSets[USetID];
1366 USet->Regs.push_back(&Reg);
1367 sortAndUniqueRegisters(USet->Regs);
1368 RegSets[i++] = USet;
1369 }
1370 }
在1322行,调用IntEqClasses构造函数来初始化一个IntEqClasses实例。这也是一个同类集,但保存的是寄存器对象的EnumValue,寄存器的EnumValue对这个同类集算法是重要的,因为我们需要知道一个同类集中最小的寄存器,而恰好它的EnumValue也是最小的(这是我们在TD文件里必须遵循的规则:按从小到大的次序定义寄存器)。
42 IntEqClasses(unsigned N = 0) : NumClasses(0){(N); }
IntEqClasses在容器EC里保存同类集,这个容器的类型是SmallVector<unsigned,8>。在同类集中存在一个元素作为这个同类集的代表,EC这样记录同类集,以某个元素的值为下标,EC[elem]就是该同类集的代表元素,代表元素的EC项映射回自己。
在同类集扩展期间,EC的代表元素是同类集中较小的成员。在同类集成型后,代表元素将是同类集中最小的成员(即最小的寄存器)。
25 void IntEqClasses::grow (unsignedN) {
26 assert (NumClasses== 0 && "grow() called after compress().");
27 EC.reserve(N);
28 while (EC.size() < N)
29 EC.push_back(EC.size());
30 }
方法grow准备初始同类集,一开始每个寄存器划归一个独立的同类集。接下来,以寄存器类为依据,合并属于同一个寄存器类的寄存器的同类集。一个同类集至少对应一个寄存器类,在寄存器类有重叠时,会对应多个寄存器类。
1332行的findLeader方法查找这个同类集的代表元素:
46 unsigned IntEqClasses::findLeader (unsigneda) const {
47 assert (NumClasses== 0 && "findLeader() called after compress().");
48 while (a !=EC[a])
49 a = EC[a];
50 return a;
51 }
在1336行的循环里,join方法将两个同类集合二为一。假设一开始有两个同类集:EC[5]= 5,EC[3]= 3。那么下面39行的循环会产生这样的EC:EC[5] =3,EC[3]= 3。接下来,如果这个同类集(现在有两个成员)要与同类集EC[4]= 4合并,那么39行的循环会产生这样的结果:EC[5]= 3,EC[4]= 3,EC[3]= 3。
32 void IntEqClasses::join (unsigneda, unsigned b) {
33 assert (NumClasses== 0 && "join() called after compress().");
34 unsigned eca = EC[a];
35 unsigned ecb = EC[b];
36 // Updatepointers while searching for the leaders, compressing the paths
37 // incrementally.The larger leader will eventually be updated, joining the
38 // classes.
39 while (eca !=ecb)
40 if (eca < ecb)
41 EC[b] = eca, b = ecb, ecb = EC[b];
42 else
43 EC[a] = ecb, a = eca, eca = EC[a];
44 }
最后,1342行循环将不可分配的寄存器归入同类集0(EC[0])。
前面的处理不能保证同类集的元素都映射到代表,比如同类集EC[5] = 3,EC[4] =3,EC[3]= 3在与同类集EC[1]= 1合并时,只会产生结果:EC[5]= 3,EC[4]= 3,EC[3]= 1,EC[1]= 1。EC[5]与EC[4]并没有映射到代表,因此需要compress方法予以更正。注意,“EC[i]== i”是一个同类集代表,该表达式成立的次数就是同类集的个数。
53 void IntEqClasses::compress (){
54 if (NumClasses)
55 return ;
56 for (unsignedi = 0, e = EC.size(); i != e; ++i)
57 EC[i] = (EC[i] == i) ? NumClasses++ :EC[EC[i]];
58 }
1358行循环生成以CodeGenRegister对象为内容的同类集对象——UberRegSet实例,容器RegSets将寄存器关联到所属同类集。接下来调用computeUberWeights计算这些UberRegSet的权重数据。
1373 static void computeUberWeights (std::vector<UberRegSet>&UberSets,
1374 CodeGenRegBank&RegBank) {
1375 // Skip the firstunallocatable set.
1376 for (std::vector<UberRegSet>::iterator I = std::next(UberSets.begin()),
1377 E = UberSets.end(); I != E; ++I) {
1378
1379 // Initializeall unit weights in this set, and remember the max units/reg.
1380 const CodeGenRegister *Reg = nullptr;
1381 unsigned MaxWeight = 0, Weight = 0;
1382 for (RegUnitIterator UnitI(I->Regs); UnitI.isValid(); ++UnitI) {
1383 if (Reg != UnitI.getReg()) {
1384 if (Weight > MaxWeight)
1385 MaxWeight = Weight;
1386 Reg = UnitI.getReg();
1387 Weight = 0;
1388 }
1389 unsigned UWeight =RegBank.getRegUnit(*UnitI).Weight;
1390 if (!UWeight) {
1391 UWeight = 1;
1392 RegBank.increaseRegUnitWeight(*UnitI,UWeight);
1393 }
1394 Weight += UWeight;
1395 }
1396 if (Weight > MaxWeight)
1397 MaxWeight = Weight;
1398 if (I->Weight != MaxWeight) {
1399 DEBUG(
1400 dbgs() << "UberSet "<< I - UberSets.begin() << " Weight " << MaxWeight;
1401 for ( auto &Unit : I->Regs)
1402 dbgs() << " "<< Unit->getName();
1403 dbgs() << "\n");
1404 // Update theset weight.
1405 I->Weight = MaxWeight;
1406 }
1407
1408 // Findsingular determinants.
1409 for ( const auto R :I->Regs) {
1410 if (R->getRegUnits().count() == 1&& R-> getWeight (RegBank) ==I->Weight) {
1411 I->SingularDeterminants |=R->getRegUnits();
1412 }
1413 }
1414 }
1415 }
1376行循环遍历同类集的组成寄存器,查找其中最大的寄存器权重,将其设为同类集权重。寄存器权重是其组成RegUnit对象Weight值的总和。但在创建之初,RegUnit对象的Weight都初始化为0。现在,在1390~1393行将RegUnit对象的Weight都设为1。
1382行的迭代器RegUnitIterator的构造函数如下。在上面的调用里,它使用UberRegSet的Regs容器作为参数,RegI及RegE分别是这个容器的首尾迭代器,而UnitI与UnitE则指向RegI所援引的CodeGenRegister对象的RegUnits容器(它记录了这些RegUnit对象在CodeGenRegBank::RegUnits容器中的序号),advance方法确保跳过空的RegUnits容器。
160 RegUnitIterator( const CodeGenRegister::Vec &Regs):
161 RegI(Regs.begin()), RegE(Regs.end()),UnitI(), UnitE() {
162
163 if (RegI != RegE) {
164 UnitI = (*RegI)->getRegUnits().begin();
165 UnitE = (*RegI)->getRegUnits().end();
166 advance();
167 }
168 }
很显然,1382行循环完成对一个寄存器的RegUnit集遍历,计算其RegUnit的Weight总和(即寄存器权重,参见下面的getWeight方法)。同类集的Weight则是其中最大的寄存器权重。如果一个仅有单个RegUnit的寄存器具有同类集中最大的权重(1410行条件),在后面的权重调整中其权重不允许改变,因此通过UberRegSet的SingularDeterminants容器记录它。同一行上的getWeight方法是这样定义的:
524 unsigned CodeGenRegister::getWeight ( const CodeGenRegBank &RegBank) const {
525 unsigned Weight = 0;
526 for (RegUnitList::iterator I = RegUnits.begin(), E = RegUnits.end();
527 I != E; ++I) {
528 Weight += RegBank.getRegUnit(*I).Weight;
529 }
530 return Weight;
531 }
接下来将一个同类集的寄存器的权重调整为一致。方式是调整寄存器组成RegUnit的Weight。normalizeWeight是对寄存器DAG的一个后序(post-order)遍历(1439行循环),即从寄存器的最小可援引部分开始。
1427 static bool normalizeWeight (CodeGenRegister *Reg,
1428 std::vector<UberRegSet> &UberSets,
1429 std::vector<UberRegSet*> &RegSets,
1430 SparseBitVector<> &NormalRegs,
1431 CodeGenRegister::RegUnitList&NormalUnits,
1432 CodeGenRegBank&RegBank) {
1433 if (NormalRegs.test(Reg->EnumValue))
1434 return false;
1435 NormalRegs.set(Reg->EnumValue);
1436
1437 bool Changed = false;
1438 const CodeGenRegister::SubRegMap &SRM = Reg->getSubRegs();
1439 for (CodeGenRegister::SubRegMap::const_iterator SRI = SRM.begin(),
1440 SRE = SRM.end(); SRI != SRE; ++SRI) {
1441 if (SRI->second == Reg)
1442 continue ; // self-cycles happen
1443
1444 Changed |= normalizeWeight(SRI->second,UberSets, RegSets,
1445 NormalRegs,NormalUnits, RegBank);
1446 }
1447 // Postorderregister normalization.
1448
1449 // Inheritregister units newly adopted by subregisters.
1450 if (Reg->(RegBank))
1451 (UberSets,RegBank);
1452
1453 // Check if thisregister is too skinny for its UberRegSet.
1454 UberRegSet *UberSet =RegSets[RegBank.getRegIndex(Reg)];
1455
1456 unsigned RegWeight = Reg->(RegBank);
1457 if (UberSet->Weight > RegWeight) {
1458 // A registerunit's weight can be adjusted only if it is the singular unit
1459 // for thisregister, has not been used to normalize a subregister's set,
1460 // and has notalready been used to singularly determine this UberRegSet.
1461 unsigned AdjustUnit =*Reg->getRegUnits().begin();
1462 if (Reg->getRegUnits().count() != 1
1463 || hasRegUnit(NormalUnits, AdjustUnit)
1464 ||hasRegUnit(UberSet->SingularDeterminants, AdjustUnit)) {
1465 // We don'thave an adjustable unit, so adopt a new one.
1466 AdjustUnit = RegBank.newRegUnit(UberSet->Weight- RegWeight);
1467 Reg->adoptRegUnit(AdjustUnit);
1468 // Adopting aunit does not immediately require recomputing set weights.
1469 }
1470 else {
1471 // Adjust the existing single unit.
1472 RegBank.increaseRegUnitWeight(AdjustUnit,UberSet->Weight - RegWeight);
1473 // The unitmay be shared among sets and registers within this set.
1474 (UberSets,RegBank);
1475 }
1476 Changed = true;
1477 }
1478
1479 // Mark theseunits normalized so superregisters can't change their weights.
1480 NormalUnits |= Reg->getRegUnits();
1481
1482 return Changed;
1483 }
到达1450行,当前寄存器的子寄存器已经由normalizeWeight处理,这时调用CodeGenRegister::inheritRegUnits将寄存器的RegUnits设置为子寄存器RegUnits的合集,只要存在子寄存器,changed就一定是true。显然,如果寄存器RegUnits发生变化,需要重新调用computeUberWeights来重新计算该寄存器的Weight值。
202 bool CodeGenRegister::inheritRegUnits (CodeGenRegBank&RegBank) {
203 bool changed = false;
204 for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();
205 I != E; ++I) {
206 CodeGenRegister *SR = I->second;
207 // Merge thesubregister's units into this register's RegUnits.
208 changed |= (RegUnits |= SR->RegUnits);
209 }
210
211 return changed;
212 }
1454行的getRegIndex实际上返回该寄存器的EnumValue-1,因此得到的UberSet就是这个寄存器所属的同类集。NormalUnits与NormalRegs都是从外部传进来、一开始为空的容器,用于控制这个调整的过程。
如果寄存器的权重小于其同类集的权重(1457行),满足以下所有条件才可以调整其RegUnit的Weight值,然后调用computeUberWeights重新计算该寄存器权重:1.仅包含一个RegUnit,2. 还没调整过,3.不在UberSet的SingularDeterminants容器(仅包含一个RegUnit且权重就是同类集权重)中。否则,只能向这个寄存器多加一个RegUnit来记录这个差值。
由于上述严格的条件,实际上权重的调整只是一个尽力而为的过程。为调整权重而加入的RegUnit实例只是为了使上述算法尽快收敛,这些RegUnit实例在后续处理里根本没有作用。
3.3.5.3. 寄存器单元的同类集
前面以寄存器为粒度构建了同类集信息,完善了寄存器单元(RegUnit)的Weight值设置。现在接着以RegUnit为单位构建同类集,这个同类集由RegUnitSet来表示:
461 struct RegUnitSet {
462 typedef std::vector<unsigned>::const_iterator iterator;
463
464 std::string Name;
465 std::vector<unsigned> Units;
466 unsigned Weight; //Cache the sum of all unit weights.
467 unsigned Order; // Cache the sortkey.
468
469 RegUnitSet() : Weight(0), Order(0) {}
470 };
类似的,在CodeGenRegBank中定义了容器RegUnitSets(std::vector<RegUnitSet>)来保存这些同类集。这些同类集由方法CodeGenRegBank::computeRegUnitSets构造。
1594 void CodeGenRegBank::computeRegUnitSets (){
1595 assert (RegUnitSets.empty()&& "dirty RegUnitSets");
1596
1597 // Compute aunique RegUnitSet for each RegClass.
1598 auto &RegClasses = getRegClasses();
1599 for ( auto &RC : RegClasses) {
1600 if (!RC.Allocatable)
1601 continue ;
1602
1603 //Speculatively grow the RegUnitSets to hold the new set.
1604 RegUnitSets.resize(RegUnitSets.size() + 1);
1605 RegUnitSets.back().Name = RC.getName();
1606
1607 // Compute asorted list of units in this class.
1608 RC.(RegUnitSets.back().Units);
1609
1610 // Find anexisting RegUnitSet.
1611 std::vector<RegUnitSet>::const_iterator SetI =
1612 (RegUnitSets,RegUnitSets.back());
1613 if (SetI != std::prev(RegUnitSets.end()))
1614 RegUnitSets.pop_back();
1615 }
1616
1617 DEBUG(dbgs() << "\nBeforepruning:\n";
1618 for (unsigned USIdx = 0, USEnd = RegUnitSets.size();
1619 USIdx < USEnd; ++USIdx) {
1620 dbgs() << "UnitSet "<< USIdx << " " << RegUnitSets[USIdx].Name
1621 << ":";
1622 for ( auto &U : RegUnitSets[USIdx].Units)
1623 dbgs() << " "<< RegUnits[U].Roots[0]->getName();
1624 dbgs() << "\n";
1625 });
1626
1627 // Iterativelyprune unit sets.
1628 ();
1629
1630 DEBUG(dbgs() << "\nBeforeunion:\n";
1631 for (unsigned USIdx = 0, USEnd = RegUnitSets.size();
1632 USIdx < USEnd; ++USIdx) {
1633 dbgs() << "UnitSet "<< USIdx << " " << RegUnitSets[USIdx].Name
1634 << ":";
1635 for ( auto &U : RegUnitSets[USIdx].Units)
1636 dbgs() << " "<< RegUnits[U].Roots[0]->getName();
1637 dbgs() << "\n";
1638 }
1639 dbgs() << "\nUnionsets:\n");
1640
1641 // Iterate overall unit sets, including new ones added by this loop.
1642 unsigned NumRegUnitSubSets =RegUnitSets.size();
1643 for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) {
1644 // In theory,this is combinatorial. In practice, it needs to be bounded
1645 // by a smallnumber of sets for regpressure to be efficient.
1646 // If theassert is hit, we need to implement pruning.
1647 assert (Idx< (2*NumRegUnitSubSets) && "runaway unit set inference");
1648
1649 // Compare newsets with all original classes.
1650 for (unsigned SearchIdx = (Idx >= NumRegUnitSubSets) ? 0 : Idx+1;
1651 SearchIdx != EndIdx; ++SearchIdx) {
1652 std::set<unsigned> Intersection;
1653 std::set_intersection(RegUnitSets[Idx].Units.begin(),
1654 RegUnitSets[Idx].Units.end(),
1655 RegUnitSets[SearchIdx].Units.begin(),
1656 RegUnitSets[SearchIdx].Units.end(),
1657 std::inserter(Intersection, Intersection.begin()));
1658 if (Intersection.empty())
1659 continue ;
1660
1661 //Speculatively grow the RegUnitSets to hold the new set.
1662 RegUnitSets.resize(RegUnitSets.size() +1);
1663 RegUnitSets.back().Name =
1664 RegUnitSets[Idx].Name + "+" +RegUnitSets[SearchIdx].Name;
1665
1666 std::set_union(RegUnitSets[Idx].Units.begin(),
1667 RegUnitSets[Idx].Units.end(),
1668 RegUnitSets[SearchIdx].Units.begin(),
1669 RegUnitSets[SearchIdx].Units.end(),
1670 std::inserter(RegUnitSets.back().Units,
1671 RegUnitSets.back().Units.begin()));
1672
1673 // Find anexisting RegUnitSet, or add the union to the unique sets.
1674 std::vector<RegUnitSet>::const_iterator SetI =
1675 (RegUnitSets,RegUnitSets.back());
1676 if (SetI != std::prev(RegUnitSets.end()))
1677 RegUnitSets.pop_back();
1678 else {
1679 DEBUG(dbgs() << "UnitSet" << RegUnitSets.size()-1
1680 << " " <<RegUnitSets.back().Name << ":";
1681 for ( auto &U : RegUnitSets.back().Units)
1682 dbgs() << " "<< RegUnits[U].Roots[0]->getName();
1683 dbgs() << "\n";);
1684 }
1685 }
1686 }
1687
1688 // Iterativelyprune unit sets after inferring supersets.
1689 ();
1690
1691 DEBUG(dbgs() << "\n";
1692 for (unsigned USIdx = 0, USEnd = RegUnitSets.size();
1693 USIdx < USEnd; ++USIdx) {
1694 dbgs() << "UnitSet "<< USIdx << " " << RegUnitSets[USIdx].Name
1695 << ":";
1696 for ( auto &U : RegUnitSets[USIdx].Units)
1697 dbgs() << " "<< RegUnits[U].Roots[0]->getName();
1698 dbgs() << "\n";
1699 });
类似的,首先也是将属于一个可分配寄存器类的寄存器的RegUnit构建为一个同类集,因为同一个寄存器类中的寄存器往往要相同的RegUnit,所以还要确保它们不重复,且以序号升序排序。这就是在1608行的CodeGenRegisterClass::buildRegUnitSet方法所要完成的工作。
909 void CodeGenRegisterClass::buildRegUnitSet (
910 std::vector<unsigned> &RegUnits) const {
911 std::vector<unsigned> TmpUnits;
912 for (RegUnitIterator UnitI(Members); UnitI.isValid(); ++UnitI)
913 TmpUnits.push_back(*UnitI);
914 std::sort(TmpUnits.begin(), TmpUnits.end());
915 std::unique_copy(TmpUnits.begin(),TmpUnits.end(),
916 std::back_inserter(RegUnits));
917 }
因为在去除了重复的RegUnit后,不排除两个寄存器类形成两个相同的RegUnit同类集,因此还需要在1612行调用findRegUnitSet方法删除重复的同类集。
1514 static std::vector<RegUnitSet>::const_iterator
( const std::vector<RegUnitSet> &UniqueSets,
1516 const RegUnitSet &Set) {
1517 std::vector<RegUnitSet>::const_iterator
1518 I = UniqueSets.begin(), E =UniqueSets.end();
1519 for (;I != E;++I) {
1530 if (I->Units == Set.Units)
1531 break ;
1532 }
1533 return I;
1534 }
接下来在1628行调用CodeGenRegBank::pruneUnitSets方法裁剪掉某些同类集。裁剪的准则是:对两个同类集,如果其中一个RegUnit的数量比另一个少1或2,而且这两个同类集所有的RegUnit权重都相同。CodeGenRegBank::pruneUnitSets的注释谈到了这个方法的必要性: 我们偶尔会将类似 APSR 与 PC 和通用寄存器混在一起。我们也会看到许多特殊寄存器的子集,比如尾调用以及 Thumb 编码。所有可能的重叠集的数量是组合数,全部生成它们对寄存器压力建模而言太多了。理想地,我们可以在 TableGen 中通过以下方面静态地避免这个问题。 1. 目标机器定义仅包含可分配寄存器的寄存器类,将其他类标记为不可分配。 2. 将特殊寄存器标记为寄存器压力“不在意的”类。不过,我们尝试通过静态地将几乎一样的 RegUnitSet 合并,来处理没有很好定义的目标机器 。
1550 void CodeGenRegBank::pruneUnitSets (){
1551 assert (RegClassUnitSets.empty()&& "this invalidates RegClassUnitSets");
1552
1553 // Form anequivalence class of UnitSets with no significant difference.
1554 std::vector<unsigned> SuperSetIDs;
1555 for (unsignedSubIdx = 0, EndIdx = RegUnitSets.size();
1556 SubIdx != EndIdx; ++SubIdx) {
1557 const RegUnitSet &SubSet = RegUnitSets[SubIdx];
1558 unsigned SuperIdx = 0;
1559 for (;SuperIdx != EndIdx; ++SuperIdx) {
1560 if (SuperIdx == SubIdx)
1561 continue ;
1562
1563 unsigned UnitWeight =RegUnits[SubSet.Units[0]].Weight;
1564 const RegUnitSet &SuperSet = RegUnitSets[SuperIdx];
1565 if (isRegUnitSubSet(SubSet.Units,SuperSet.Units)
1566 && (SubSet.Units.size() + 3> SuperSet.Units.size())
1567 && UnitWeight ==RegUnits[SuperSet.Units[0]].Weight
1568 && UnitWeight ==RegUnits[SuperSet.Units.back()].Weight) {
1569 DEBUG(dbgs() << "UnitSet" << SubIdx << " subsumed by " << SuperIdx
1570 << "\n");
1571 break ;
1572 }
1573 }
1574 if (SuperIdx == EndIdx)
1575 SuperSetIDs.push_back(SubIdx);
1576 }
1577 // PopulatePrunedUnitSets with each equivalence class's superset.
1578 std::vector<RegUnitSet>PrunedUnitSets(SuperSetIDs.size());
1579 for (unsignedi = 0, e = SuperSetIDs.size(); i != e; ++i) {
1580 unsigned SuperIdx = SuperSetIDs[i];
1581 PrunedUnitSets[i].Name =RegUnitSets[SuperIdx].Name;
1582 PrunedUnitSets[i].Units.swap(RegUnitSets[SuperIdx].Units);
1583 }
1584 RegUnitSets.swap(PrunedUnitSets);
1585 }
执行裁剪之后,在1643行循环创建这些同类集的联合闭包,从1647行的断言可以看到,我们不希望同类集的膨胀超过2倍。之后在1689行再次调用pruneUnitSets来再次裁剪同类集。
得到这些同类集之后,我们还需要知道,就RegUnit同类集而言,寄存器类间的包含关系。这就是CodeGenRegBank的容器RegClassUnitSets(std::vector<std::vector<unsigned>>)的作用。这个容器将记录一个指定寄存器类的RegUnit同类集的超集的集合。
CodeGenRegBank::computeRegUnitSets(续)
1701 // For each register class, list the UnitSets that aresupersets.
1702 RegClassUnitSets.resize(RegClasses.size());
1703 int RCIdx = -1;
1704 for ( auto &RC : RegClasses) {
1705 ++RCIdx;
1706 if (!RC.Allocatable)
1707 continue ;
1708
1709 // Recomputethe sorted list of units in this class.
1710 std::vector<unsigned> RCRegUnits;
1711 RC.(RCRegUnits);
1712
1713 // Don'tincrease pressure for unallocatable regclasses.
1714 if (RCRegUnits.empty())
1715 continue ;
1716
1717 DEBUG(dbgs() << "RC "<< RC.getName() << " Units: \n";
1718 for ( auto &U : RCRegUnits)
1719 dbgs() << RegUnits[U].getRoots()[0]->getName()<< " ";
1720 dbgs() << "\n UnitSetIDs:");
1721
1722 // Find allsupersets.
1723 for (unsigned USIdx = 0, USEnd = RegUnitSets.size();
1724 USIdx != USEnd; ++USIdx) {
1725 if (isRegUnitSubSet(RCRegUnits,RegUnitSets[USIdx].Units)) {
1726 DEBUG(dbgs() << " "<< USIdx);
1727 RegClassUnitSets[RCIdx].push_back(USIdx);
1728 }
1729 }
1730 DEBUG(dbgs() << "\n");
1731 assert (!RegClassUnitSets[RCIdx].empty()&& "missing unit set for regclass");
1732 }
1733
1734 // For eachregister unit, ensure that we have the list of UnitSets that
1735 // contain theunit. Normally, this matches an existing list of UnitSets for a
1736 // registerclass. If not, we create a new entry in RegClassUnitSets as a
1737 //"fake" register class.
1738 for (unsignedUnitIdx = 0, UnitEnd = NumNativeRegUnits;
1739 UnitIdx < UnitEnd; ++UnitIdx) {
1740 std::vector<unsigned> RUSets;
1741 for (unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) {
1742 RegUnitSet &RUSet = RegUnitSets[i];
1743 if (std::find(RUSet.Units.begin(),RUSet.Units.end(), UnitIdx)
1744 == RUSet.Units.end())
1745 continue ;
1746 RUSets.push_back(i);
1747 }
1748 unsigned RCUnitSetsIdx = 0;
1749 for (unsigned e = RegClassUnitSets.size();
1750 RCUnitSetsIdx != e; ++RCUnitSetsIdx) {
1751 if (RegClassUnitSets[RCUnitSetsIdx] ==RUSets) {
1752 break ;
1753 }
1754 }
1755 RegUnits[UnitIdx].RegClassUnitSetsIdx =RCUnitSetsIdx;
1756 if (RCUnitSetsIdx ==RegClassUnitSets.size()) {
1757 // Create anew list of UnitSets as a "fake" register class.
1758 RegClassUnitSets.resize(RCUnitSetsIdx +1);
1759 RegClassUnitSets[RCUnitSetsIdx].swap(RUSets);
1760 }
1761 }
1762 }
在1738行,NumNativeRegUnits是原生(即非为调整权重加入的)RegUnit数量。在1738行这个循环里,我们将RegUnit的RegClassUnitSetsIdx域设置为RegClassUnitSets中同类集都包含该RegUnit的项的下标。
3.3.5.4. 寄存器的包含关系
前面我们已经为每个寄存器索引计算的Lane掩码。这个Lane掩码指出从该寄存器索引援引的部分可以覆盖哪些更小的可援引部分。对于叶子寄存器索引,Lane掩码就是它在SubRegIndices容器里的序号。但寄存器索引是一个抽象的概念,脱离了具体的寄存器。很显然,对应具体的寄存器也存在相应的包含关系。因此CodeGenRegister里有一个类型为SmallVector<unsigned,16>的容器RegUnitLaneMasks来记录它的子寄存器的Lane掩码,也就不足为奇了。
1764 void CodeGenRegBank::computeRegUnitLaneMasks (){
1765 for ( auto &Register : Registers) {
1766 // Create aninitial lane mask for all register units.
1767 const auto &RegUnits = Register.getRegUnits();
1768 CodeGenRegister::RegUnitLaneMaskListRegUnitLaneMasks(RegUnits.count(), 0);
1769 // Iteratethrough SubRegisters.
1770 typedef CodeGenRegister::SubRegMap SubRegMap;
1771 const SubRegMap &SubRegs = Register.getSubRegs();
1772 for (SubRegMap::const_iterator S = SubRegs.begin(),
1773 SE = SubRegs.end(); S != SE; ++S) {
1774 CodeGenRegister *SubReg = S->second;
1775 // Ignore non-leafsubregisters, their lane masks are fully covered by
1776 // the leafsubregisters anyway.
1777 if (SubReg->getSubRegs().size() != 0)
1778 continue ;
1779 CodeGenSubRegIndex *SubRegIndex =S->first;
1780 const CodeGenRegister *SubRegister = S->second;
1781 unsigned LaneMask =SubRegIndex->LaneMask;
1782 // DistributeLaneMask to Register Units touched.
1783 for (unsigned SUI : SubRegister->getRegUnits()) {
1784 bool Found = false;
1785 unsigned u = 0;
1786 for (unsigned RU : RegUnits) {
1787 if (SUI == RU) {
1788 RegUnitLaneMasks[u] |= LaneMask;
1789 assert (!Found);
1790 Found = true;
1791 }
1792 ++u;
1793 }
1794 (void)Found;
1795 assert (Found);
1796 }
1797 }
1798 Register.setRegUnitLaneMasks(RegUnitLaneMasks);
1799 }
1800 }
1765行的循环表明,这个过程是对每个寄存器分别进行的。1772行的循环进入当前寄存器的子寄存器集合,1777行滤掉非叶子寄存器。1783行的循环遍历该叶子寄存器的RegUnit组成位图(注意该位图中只能有一位是1),而1786行的循环则遍历当前寄存器的RegUnit组成位图。如果1787行条件满足,循环变量u记录了这是当前寄存器RegUnit组成位图的第几位(当前寄存器一定包含这个叶子寄存器),在RegUnitLaneMasks中u所给出的位置记录该叶子寄存器索引的Lane掩码。这样,RegUnit组成位图与RegUnitLaneMasks也就关联起来了。
3.3.5.5. 其他信息的设置
回到CodeGenRegBank::computeDerivedInfo。1817行循环对寄存器类别设置HasDisjunctSubRegs,这个标记用于显示是否至少有两个子寄存器是互不不干扰的。这个标记的内容来自于寄存器对象中同名标记,在寄存器对象里,如果它包含1个以上的子寄存器,它的HasDisjunctSubRegs标记就是true。1824行循环设置RegUnitSets的Weight(即指定寄存器类所包含寄存器单元的总数)。余下的代码设置RegUnitSets成员的Order值,以及对RegUnitSetOrder容器(类型std::vector<unsigned>)的填充。1832行是对RegUnitSets成员以集合大小排序,结果保存在RegUnitSetOrder容器。那么当给出RegUnitSetOrder[RegUnit.Order]时,我们就能知道这个RegUnit的优先级。
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持 码农网
猜你喜欢:- 【每日笔记】【Go学习笔记】2019-01-04 Codis笔记
- 【每日笔记】【Go学习笔记】2019-01-02 Codis笔记
- 【每日笔记】【Go学习笔记】2019-01-07 Codis笔记
- Golang学习笔记-调度器学习
- Vue学习笔记(二)------axios学习
- 算法/NLP/深度学习/机器学习面试笔记
本站部分资源来源于网络,本站转载出于传递更多信息之目的,版权归原作者或者来源机构所有,如转载稿涉及版权问题,请联系我们。