LLVM学习笔记(12)

栏目: 服务器 · 编程工具 · 发布时间: 7年前

内容简介:LLVM学习笔记(12)

3.3.5.  信息的完善

在RegisterInfoEmitter::run,接下来执行CodeGenRegBank::computeDerivedInfo方法。

1802   void CodeGenRegBank::computeDerivedInfo (){

1803   ();

1804   computeSubRegLaneMasks();

1805  

1806   // Compute aweight for each register unit created during getSubRegs.

1807     // This maycreate adopted register units (with unit # >= NumNativeRegUnits).

1808   computeRegUnitWeights();

1809  

1810   // Compute aunique set of RegUnitSets. One for each RegClass and inferred

1811     // supersets forthe union of overlapping sets.

1812   ();

1813  

1814   computeRegUnitLaneMasks();

1815  

1816   // Computeregister class HasDisjunctSubRegs flag.

1817   for (CodeGenRegisterClass &RC : RegClasses) {

1818   RC.HasDisjunctSubRegs = false;

1819   for ( const CodeGenRegister *Reg : RC.getMembers())

1820   RC.HasDisjunctSubRegs |= Reg->HasDisjunctSubRegs;

1821   }

1822  

1823   // Get the weightof each set.

1824   for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx)

1825   RegUnitSets[Idx].Weight =getRegUnitSetWeight(RegUnitSets[Idx].Units);

1826  

1827   // Find the orderof each set.

1828   RegUnitSetOrder.reserve(RegUnitSets.size());

1829   for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx)

1830   RegUnitSetOrder.push_back(Idx);

1831  

1832   std::stable_sort(RegUnitSetOrder.begin(),RegUnitSetOrder.end(),

1833   [ this ](unsignedID1, unsigned ID2) {

1834   return getRegPressureSet(ID1).Units.size() <

1835   getRegPressureSet(ID2).Units.size();

1836   });

1837   for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) {

1838   RegUnitSets[RegUnitSetOrder[Idx]].Order =Idx;

1839   }

1840   }

CodeGenRegBank::computeDerivedInfo方法计算的所谓衍生信息包括以下内容。

3.3.5.1.       完整的索引信息

前面我们已经建立了复合索引,但这些索引不一定完整,比如可用孙子寄存器的复合索引就没有认真构建过。下面就要为所有可用的寄存器建立起完整的索引系统。

1117   void CodeGenRegBank::computeComposites (){

1118   // Keep track ofTopoSigs visited. We only need to visit each TopoSig once,

1119     // and manyregisters will share TopoSigs on regular architectures.

1120   BitVector TopoSigs(getNumTopoSigs());

1121  

1122   for ( const auto &Reg1: Registers) {

1123   // Skipidentical subreg structures already processed.

1124   if (TopoSigs.test(Reg1.getTopoSig()))

1125   continue ;

1126   TopoSigs.set(Reg1.getTopoSig());

1127  

1128   const CodeGenRegister::SubRegMap &SRM1 = Reg1.getSubRegs();

1129   for (CodeGenRegister::SubRegMap::const_iterator i1 = SRM1.begin(),

1130   e1 = SRM1.end(); i1 != e1; ++i1) {

1131   CodeGenSubRegIndex *Idx1 = i1->first;

1132   CodeGenRegister *Reg2 = i1->second;

1133         // Ignoreidentity compositions.

1134   if (&Reg1 == Reg2)

1135   continue ;

1136   const CodeGenRegister::SubRegMap &SRM2 = Reg2->getSubRegs();

1137   // Trycomposing Idx1 with another SubRegIndex.

1138   for (CodeGenRegister::SubRegMap::const_iterator i2 = SRM2.begin(),

1139   e2 = SRM2.end(); i2 != e2; ++i2) {

1140   CodeGenSubRegIndex *Idx2 =i2->first;

1141   CodeGenRegister *Reg3 = i2->second;

1142   // Ignoreidentity compositions.

1143   if (Reg2 == Reg3)

1144   continue ;

1145   // OKReg1:IdxPair == Reg3. Find the index with Reg:Idx == Reg3.

1146   CodeGenSubRegIndex *Idx3 =Reg1.getSubRegIndex(Reg3);

1147   assert (Idx3&& "Sub-register doesn't have an index");

1148  

1149   //Conflicting composition? Emit a warning but allow it.

1150   if (CodeGenSubRegIndex *Prev =Idx1->addComposite(Idx2,Idx3))

1151   PrintWarning(Twine("SubRegIndex") + Idx1->getQualifiedName() +

1152   " and " +Idx2->getQualifiedName() +

1153   " composeambiguously as " + Prev->getQualifiedName() +

1154   " or " +Idx3->getQualifiedName());

1155   }

1156   }

1157   }

1158   }

这里的处理是遍历所有的CodeGenRegister实例,如果已知AàB,BàC的索引(C是B的子寄存器,B是A的子寄存器),建立AàC的(复合)索引。如此一来,绝不会有漏网之鱼。

接下来,我们需要让TableGen能有办法高效、简单地表示这些关系。比如,A覆盖了C。

CodeGenSubRegIndex定义里设置了LaneMask域。大致上LaneMask表示寄存器中可分割的部分。叶子索引(Composed为空)在LaneMask中会设置一个比特,位置是它自己在SubRegIndices容器里出现的次序(1174行),序号超过31的索引将共享同一个LaneMask比特(1187行),表示寄存器中一个特定的部分。而Composed非空索引的LaneMask则是它构成的复合索引的LaneMask的集合(1189行首先将它们的LaneMask置0),表示它所代表的寄存器部分覆盖了哪些更小的部分。

1167   void CodeGenRegBank::computeSubRegLaneMasks (){

1168   // First assignindividual bits to all the leaf indices.

1169   unsigned Bit = 0;

1170   // Determine maskof lanes that cover their registers.

1171   CoveringLanes = ~0u;

1172   for ( auto &Idx : SubRegIndices) {

1173   if (Idx.getComposites().empty()) {

1174   Idx.LaneMask = 1u << Bit;

1175   // Share bit31 in the unlikely case there are more than 32 leafs.

1176         //

1177         // Sharingbits is harmless; it allows graceful degradation in targets

1178         // with morethan 32 vector lanes. They simply get a limited resolution

1179         // view oflanes beyond the 32nd.

1180         //

1181         // See alsothe comment for getSubRegIndexLaneMask().

1182   if (Bit < 31)

1183   ++Bit;

1184   else

1185   // Once bit31 is shared among multiple leafs, the 'lane' it represents

1186           // is nolonger covering its registers.

1187   CoveringLanes &= ~(1u <<Bit);

1188   } else {

1189   Idx.LaneMask = 0;

1190   }

1191   }

1192  

1193   // Computetransformation sequences for composeSubRegIndexLaneMask. The idea

1194     // here is thatfor each possible target subregister we look at the leafs

1195     // in thesubregister graph that compose for this target and create

1196     // transformationsequences for the lanemasks. Each step in the sequence

1197     // consists of abitmask and a bitrotate operation. As the rotation amounts

1198     // are usuallythe same for many subregisters we can easily combine the steps

1199     // by combiningthe masks.

1200   for ( const auto &Idx :SubRegIndices) {

1201   const auto &Composites = Idx.getComposites();

1202   auto &LaneTransforms = Idx.CompositionLaneMaskTransform;

1203   // Go throughall leaf subregisters and find the ones that compose with Idx.

1204       // These makeout all possible valid bits in the lane mask we want to

1205       // transform.Looking only at the leafs ensure that only a single bit in

1206       // the mask isset.

1207   unsigned NextBit = 0;

1208   for ( auto &Idx2 : SubRegIndices) {

1209   // Skipnon-leaf subregisters.

1210   if (!Idx2.getComposites().empty())

1211   continue ;

1212   // Replicatethe behaviour from the lane mask generation loop above.

1213   unsigned SrcBit = NextBit;

1214   unsigned SrcMask = 1u << SrcBit;

1215   if (NextBit < 31)

1216   ++NextBit;

1217   assert (Idx2.LaneMask== SrcMask);

1218  

1219   // Get thecomposed subregister if there is any.

1220   auto C =Composites.find(&Idx2);

1221   if (C == Composites.end())

1222   continue ;

1223   const CodeGenSubRegIndex *Composite = C->second;

1224   // TheComposed subreg should be a leaf subreg too

1225   assert (Composite->getComposites().empty());

1226  

1227   // CreateMask+Rotate operation and merge with existing ops if possible.

1228   unsigned DstBit =Log2_32(Composite->LaneMask);

1229   int Shift = DstBit - SrcBit;

1230   uint8_t RotateLeft = Shift >= 0 ?(uint8_t)Shift : 32+Shift;

1231   for ( auto &I : LaneTransforms) {

1232   if (I.RotateLeft == RotateLeft) {

1233   I.Mask |= SrcMask;

1234   SrcMask = 0;

1235   }

1236   }

1237   if (SrcMask != 0) {

1238   MaskRolPair MaskRol = { SrcMask,RotateLeft };

1239   LaneTransforms.push_back(MaskRol);

1240   }

1241   }

1242   // Optimize ifthe transformation consists of one step only: Set mask to

1243       // 0xffffffff(including some irrelevant invalid bits) so that it should

1244       // merge withmore entries later while compressing the table.

1245   if (LaneTransforms.size() == 1)

1246   LaneTransforms[0].Mask = ~0u;

1247  

1248   // Furthercompression optimization: For invalid compositions resulting

1249       // in asequence with 0 entries we can just pick any other. Choose

1250       // Mask0xffffffff with Rotation 0.

1251   if (LaneTransforms.size() == 0) {

1252   MaskRolPair P = { ~0u, 0 };

1253   LaneTransforms.push_back(P);

1254   }

1255   }

1256  

1257   // FIXME: What ifad-hoc aliasing introduces overlaps that aren't represented

1258     // by thesub-register graph? This doesn't occur in any known targets.

1259  

1260   // Inherit lanesfrom composites.

1261   for ( const auto &Idx :SubRegIndices) {

1262   unsigned Mask = Idx.();

1263   // If somesuper-registers without CoveredBySubRegs use this index, we can

1264       // no longerassume that the lanes are covering their registers.

1265   if (!Idx.AllSuperRegsCovered)

1266   CoveringLanes &= ~Mask;

1267   }

1268  

1269   // Compute lane maskcombinations for register classes.

1270   for ( auto &RegClass : RegClasses) {

1271   unsigned LaneMask = 0;

1272   for ( const auto &SubRegIndex : SubRegIndices) {

1273   if(RegClass.getSubClassWithSubReg(&SubRegIndex) == nullptr)

1274   continue ;

1275   LaneMask |= SubRegIndex.LaneMask;

1276   }

1277   RegClass.LaneMask = LaneMask;

1278   }

1279   }

CodeGenSubRegIndex的另一个域CompositionLaneMaskTransform是类型为mutableSmallVector< MaskRolPair,1>的容器,其中MaskRolPair是这样定义的:

42        struct MaskRolPair {

43        unsigned Mask;

44        uint8_t RotateLeft;

45        bool operator ==( const MaskRolPair Other) const {

46        return Mask == Other.Mask && RotateLeft == Other.RotateLeft;

47        }

48        bool operator !=( const MaskRolPair Other) const {

49        return Mask != Other.Mask || RotateLeft != Other.RotateLeft;

50        }

51        };

对Composed为空的索引,CompositionLaneMaskTransform没有用。但对Composed非空的索引,以A+B=C为例,A要记录这样一项:“ (B的LaneMask, 从B的LaneMask得到C的LaneMask所需的向左偏转比特数)”。显然,这种情形下,B和C的Composed都必须为空。

1262行循环开始对所有Composed非空的寄存器索引计算LaneMask。这时Composed为空的索引的LaneMask已经设置好了,Composed非空的寄存器的LaneMask则是0。我们希望Composed非空的索引的LaneMask是所有从它得到的复合索引LaneMask的合集,这由computeLaneMask方法实现。

85        unsigned CodeGenSubRegIndex::computeLaneMask () const {

86        // Alreadycomputed?

87        if (LaneMask)

88        return LaneMask;

89       

90        // Recursionguard, shouldn't be required.

91        LaneMask = ~0u;

92       

93        // The lane maskis simply the union of all sub-indices.

94        unsigned M = 0;

95        for ( const auto &C :Composed)

96        M |= C.second->computeLaneMask();

97        assert (M&& "Missing lane mask, sub-register cycle?");

98        LaneMask = M;

99        return LaneMask;

100      }

CodeGenRegisterClass也有一个LaneMask成员。它是该类所有寄存器包含的所有寄存器索引的LaneMask的合集。最后,在1270行循环设置它们。

3.3.5.2.       寄存器单元的权重

前面为每个最小可援引的寄存器配了一个RegUnit实例,而这些寄存器的父寄存器则使用子寄存器的RegUnit实例。这些RegUnit实例将产生用于寄存器相干与压力模型的数据。其中RegUnit的Weight域表示该单元的权重,无特殊情况Weight是1,包含多个寄存器单元的寄存器的权重就是它们的总和。RegUnit的Weight值还用于计算寄存器类的权重。不过有些寄存器属于多个寄存器类,因此这些类要一起考虑,其中一个重要的考量是属于同一个寄存器类的寄存器权重应该一致(不过下面可以看到,只能做到尽量一致),并由此调整RegUnit的权重为后面构建RegUnit同类集做好准备。

1489   void CodeGenRegBank::computeRegUnitWeights (){

1490   std::vector<UberRegSet> UberSets;

1491   std::vector<UberRegSet*>RegSets(Registers.size());

1492   (UberSets,RegSets, * this );

1493   // UberSets andRegSets are now immutable.

1494  

1495   (UberSets,* this );

1496  

1497   // Iterate overeach Register, normalizing the unit weights until reaching

1498     // a fix point.

1499   unsigned NumIters = 0;

1500   for (boolChanged = true; Changed; ++NumIters) {

1501   assert (NumIters<= NumNativeRegUnits && "Runaway register unit weights");

1502   Changed = false;

1503   for ( auto &Reg : Registers) {

1504   CodeGenRegister::RegUnitList NormalUnits;

1505   SparseBitVector<> NormalRegs;

1506   Changed |=(&Reg,UberSets, RegSets, NormalRegs,

1507   NormalUnits, * this );

1508   }

1509   }

1510   }

因此,首先对寄存器进行划分(以寄存器类为依据),分为若干不相关的寄存器集合。然后,对每个集合中的寄存器计算其权重,选出其中最大的权重作为集合的权重。最后,将集合中寄存器的权重尽量调整为集合的权重一致。

在1490行出现的UberRegSet定义如下:

1298   struct UberRegSet {

1299   CodeGenRegister::Vec Regs;

1300   unsigned Weight;

1301   CodeGenRegister::RegUnitList SingularDeterminants;

1302  

1303   UberRegSet(): Weight(0) {}

1304   };

每个UberRegSet是包含相同寄存器的寄存器类的联合传递闭包。所有UberRegSet合在一起构成了所有寄存器的一个划分。每个UberRegSet实例也称为一个同类集,computeUberSets方法使用同类集算法进行寄存器划分。

1311   static void computeUberSets (std::vector<UberRegSet>&UberSets,

1312   std::vector<UberRegSet*>&RegSets,

1313   CodeGenRegBank&RegBank) {

1314  

1315   const auto &Registers = RegBank.getRegisters();

1316  

1317   // The RegisterEnumValue is one greater than its index into Registers.

1318   assert (Registers.size()== Registers.back().EnumValue &&

1319   "register enum valuemismatch");

1320  

1321   // Forsimplicitly make the SetID the same as EnumValue.

1322   IntEqClasses UberSetIDs(Registers.size()+1);

1323   std::set<unsigned> AllocatableRegs;

1324   for ( auto &RegClass : RegBank.getRegClasses()) {

1325   if (!RegClass.Allocatable)

1326   continue ;

1327  

1328   const CodeGenRegister::Vec &Regs = RegClass.getMembers();

1329   if (Regs.empty())

1330   continue ;

1331  

1332   unsigned USetID = UberSetIDs.((*Regs.begin())->EnumValue);

1333   assert (USetID&& "register number 0 is invalid");

1334  

1335   AllocatableRegs.insert((*Regs.begin())->EnumValue);

1336   for ( auto I = std::next(Regs.begin()), E = Regs.end(); I!= E; ++I) {

1337   AllocatableRegs.insert((*I)->EnumValue);

1338   UberSetIDs.(USetID,(*I)->EnumValue);

1339   }

1340   }

1341   // Combinenon-allocatable regs.

1342   for ( const auto &Reg :Registers) {

1343   unsigned RegNum = Reg.EnumValue;

1344   if (AllocatableRegs.count(RegNum))

1345   continue ;

1346  

1347   UberSetIDs.(0,RegNum);

1348   }

1349   UberSetIDs.();

1350  

1351   // Make the firstUberSet a special unallocatable set.

1352   unsigned ZeroID = UberSetIDs[0];

1353  

1354   // InsertRegisters into the UberSets formed by union-find.

1355     // Do not resizeafter this.

1356   UberSets.resize(UberSetIDs.getNumClasses());

1357   unsigned i = 0;

1358   for ( const CodeGenRegister &Reg : Registers) {

1359   unsigned USetID =UberSetIDs[Reg.EnumValue];

1360   if (!USetID)

1361   USetID = ZeroID;

1362   else if (USetID == ZeroID)

1363   USetID = 0;

1364  

1365   UberRegSet *USet = &UberSets[USetID];

1366   USet->Regs.push_back(&Reg);

1367   sortAndUniqueRegisters(USet->Regs);

1368   RegSets[i++] = USet;

1369   }

1370   }

在1322行,调用IntEqClasses构造函数来初始化一个IntEqClasses实例。这也是一个同类集,但保存的是寄存器对象的EnumValue,寄存器的EnumValue对这个同类集算法是重要的,因为我们需要知道一个同类集中最小的寄存器,而恰好它的EnumValue也是最小的(这是我们在TD文件里必须遵循的规则:按从小到大的次序定义寄存器)。

42        IntEqClasses(unsigned N = 0) : NumClasses(0){(N); }

IntEqClasses在容器EC里保存同类集,这个容器的类型是SmallVector<unsigned,8>。在同类集中存在一个元素作为这个同类集的代表,EC这样记录同类集,以某个元素的值为下标,EC[elem]就是该同类集的代表元素,代表元素的EC项映射回自己。

在同类集扩展期间,EC的代表元素是同类集中较小的成员。在同类集成型后,代表元素将是同类集中最小的成员(即最小的寄存器)。

25        void IntEqClasses::grow (unsignedN) {

26        assert (NumClasses== 0 && "grow() called after compress().");

27        EC.reserve(N);

28        while (EC.size() < N)

29        EC.push_back(EC.size());

30        }

方法grow准备初始同类集,一开始每个寄存器划归一个独立的同类集。接下来,以寄存器类为依据,合并属于同一个寄存器类的寄存器的同类集。一个同类集至少对应一个寄存器类,在寄存器类有重叠时,会对应多个寄存器类。

1332行的findLeader方法查找这个同类集的代表元素:

46        unsigned IntEqClasses::findLeader (unsigneda) const {

47        assert (NumClasses== 0 && "findLeader() called after compress().");

48        while (a !=EC[a])

49        a = EC[a];

50        return a;

51        }

在1336行的循环里,join方法将两个同类集合二为一。假设一开始有两个同类集:EC[5]= 5,EC[3]= 3。那么下面39行的循环会产生这样的EC:EC[5] =3,EC[3]= 3。接下来,如果这个同类集(现在有两个成员)要与同类集EC[4]= 4合并,那么39行的循环会产生这样的结果:EC[5]= 3,EC[4]= 3,EC[3]= 3。

32        void IntEqClasses::join (unsigneda, unsigned b) {

33        assert (NumClasses== 0 && "join() called after compress().");

34        unsigned eca = EC[a];

35        unsigned ecb = EC[b];

36        // Updatepointers while searching for the leaders, compressing the paths

37          // incrementally.The larger leader will eventually be updated, joining the

38          // classes.

39        while (eca !=ecb)

40        if (eca < ecb)

41        EC[b] = eca, b = ecb, ecb = EC[b];

42        else

43        EC[a] = ecb, a = eca, eca = EC[a];

44        }

最后,1342行循环将不可分配的寄存器归入同类集0(EC[0])。

前面的处理不能保证同类集的元素都映射到代表,比如同类集EC[5] = 3,EC[4] =3,EC[3]= 3在与同类集EC[1]= 1合并时,只会产生结果:EC[5]= 3,EC[4]= 3,EC[3]= 1,EC[1]= 1。EC[5]与EC[4]并没有映射到代表,因此需要compress方法予以更正。注意,“EC[i]== i”是一个同类集代表,该表达式成立的次数就是同类集的个数。

53        void IntEqClasses::compress (){

54        if (NumClasses)

55        return ;

56        for (unsignedi = 0, e = EC.size(); i != e; ++i)

57        EC[i] = (EC[i] == i) ? NumClasses++ :EC[EC[i]];

58        }

1358行循环生成以CodeGenRegister对象为内容的同类集对象——UberRegSet实例,容器RegSets将寄存器关联到所属同类集。接下来调用computeUberWeights计算这些UberRegSet的权重数据。

1373   static void computeUberWeights (std::vector<UberRegSet>&UberSets,

1374   CodeGenRegBank&RegBank) {

1375   // Skip the firstunallocatable set.

1376   for (std::vector<UberRegSet>::iterator I = std::next(UberSets.begin()),

1377   E = UberSets.end(); I != E; ++I) {

1378  

1379   // Initializeall unit weights in this set, and remember the max units/reg.

1380   const CodeGenRegister *Reg = nullptr;

1381   unsigned MaxWeight = 0, Weight = 0;

1382   for (RegUnitIterator UnitI(I->Regs); UnitI.isValid(); ++UnitI) {

1383   if (Reg != UnitI.getReg()) {

1384   if (Weight > MaxWeight)

1385   MaxWeight = Weight;

1386   Reg = UnitI.getReg();

1387   Weight = 0;

1388   }

1389   unsigned UWeight =RegBank.getRegUnit(*UnitI).Weight;

1390   if (!UWeight) {

1391   UWeight = 1;

1392   RegBank.increaseRegUnitWeight(*UnitI,UWeight);

1393   }

1394   Weight += UWeight;

1395   }

1396   if (Weight > MaxWeight)

1397   MaxWeight = Weight;

1398   if (I->Weight != MaxWeight) {

1399   DEBUG(

1400   dbgs() << "UberSet "<< I - UberSets.begin() << " Weight " << MaxWeight;

1401   for ( auto &Unit : I->Regs)

1402   dbgs() << " "<< Unit->getName();

1403   dbgs() << "\n");

1404   // Update theset weight.

1405   I->Weight = MaxWeight;

1406   }

1407  

1408   // Findsingular determinants.

1409   for ( const auto R :I->Regs) {

1410   if (R->getRegUnits().count() == 1&& R-> getWeight (RegBank) ==I->Weight) {

1411   I->SingularDeterminants |=R->getRegUnits();

1412   }

1413   }

1414   }

1415   }

1376行循环遍历同类集的组成寄存器,查找其中最大的寄存器权重,将其设为同类集权重。寄存器权重是其组成RegUnit对象Weight值的总和。但在创建之初,RegUnit对象的Weight都初始化为0。现在,在1390~1393行将RegUnit对象的Weight都设为1。

1382行的迭代器RegUnitIterator的构造函数如下。在上面的调用里,它使用UberRegSet的Regs容器作为参数,RegI及RegE分别是这个容器的首尾迭代器,而UnitI与UnitE则指向RegI所援引的CodeGenRegister对象的RegUnits容器(它记录了这些RegUnit对象在CodeGenRegBank::RegUnits容器中的序号),advance方法确保跳过空的RegUnits容器。

160      RegUnitIterator( const CodeGenRegister::Vec &Regs):

161      RegI(Regs.begin()), RegE(Regs.end()),UnitI(), UnitE() {

162     

163      if (RegI != RegE) {

164      UnitI = (*RegI)->getRegUnits().begin();

165      UnitE = (*RegI)->getRegUnits().end();

166      advance();

167      }

168      }

很显然,1382行循环完成对一个寄存器的RegUnit集遍历,计算其RegUnit的Weight总和(即寄存器权重,参见下面的getWeight方法)。同类集的Weight则是其中最大的寄存器权重。如果一个仅有单个RegUnit的寄存器具有同类集中最大的权重(1410行条件),在后面的权重调整中其权重不允许改变,因此通过UberRegSet的SingularDeterminants容器记录它。同一行上的getWeight方法是这样定义的:

524      unsigned CodeGenRegister::getWeight ( const CodeGenRegBank &RegBank) const {

525      unsigned Weight = 0;

526      for (RegUnitList::iterator I = RegUnits.begin(), E = RegUnits.end();

527      I != E; ++I) {

528      Weight += RegBank.getRegUnit(*I).Weight;

529      }

530      return Weight;

531      }

接下来将一个同类集的寄存器的权重调整为一致。方式是调整寄存器组成RegUnit的Weight。normalizeWeight是对寄存器DAG的一个后序(post-order)遍历(1439行循环),即从寄存器的最小可援引部分开始。

1427   static bool normalizeWeight (CodeGenRegister *Reg,

1428   std::vector<UberRegSet> &UberSets,

1429   std::vector<UberRegSet*> &RegSets,

1430   SparseBitVector<> &NormalRegs,

1431   CodeGenRegister::RegUnitList&NormalUnits,

1432   CodeGenRegBank&RegBank) {

1433   if (NormalRegs.test(Reg->EnumValue))

1434   return false;

1435   NormalRegs.set(Reg->EnumValue);

1436  

1437   bool Changed = false;

1438   const CodeGenRegister::SubRegMap &SRM = Reg->getSubRegs();

1439   for (CodeGenRegister::SubRegMap::const_iterator SRI = SRM.begin(),

1440   SRE = SRM.end(); SRI != SRE; ++SRI) {

1441   if (SRI->second == Reg)

1442   continue ; // self-cycles happen

1443  

1444   Changed |= normalizeWeight(SRI->second,UberSets, RegSets,

1445   NormalRegs,NormalUnits, RegBank);

1446   }

1447   // Postorderregister normalization.

1448  

1449     // Inheritregister units newly adopted by subregisters.

1450   if (Reg->(RegBank))

1451   (UberSets,RegBank);

1452  

1453   // Check if thisregister is too skinny for its UberRegSet.

1454   UberRegSet *UberSet =RegSets[RegBank.getRegIndex(Reg)];

1455  

1456   unsigned RegWeight = Reg->(RegBank);

1457   if (UberSet->Weight > RegWeight) {

1458   // A registerunit's weight can be adjusted only if it is the singular unit

1459       // for thisregister, has not been used to normalize a subregister's set,

1460       // and has notalready been used to singularly determine this UberRegSet.

1461   unsigned AdjustUnit =*Reg->getRegUnits().begin();

1462   if (Reg->getRegUnits().count() != 1

1463   || hasRegUnit(NormalUnits, AdjustUnit)

1464   ||hasRegUnit(UberSet->SingularDeterminants, AdjustUnit)) {

1465   // We don'thave an adjustable unit, so adopt a new one.

1466   AdjustUnit = RegBank.newRegUnit(UberSet->Weight- RegWeight);

1467   Reg->adoptRegUnit(AdjustUnit);

1468   // Adopting aunit does not immediately require recomputing set weights.

1469   }

1470   else {

1471   // Adjust the existing single unit.

1472   RegBank.increaseRegUnitWeight(AdjustUnit,UberSet->Weight - RegWeight);

1473   // The unitmay be shared among sets and registers within this set.

1474   (UberSets,RegBank);

1475   }

1476   Changed = true;

1477   }

1478  

1479     // Mark theseunits normalized so superregisters can't change their weights.

1480   NormalUnits |= Reg->getRegUnits();

1481  

1482   return Changed;

1483   }

到达1450行,当前寄存器的子寄存器已经由normalizeWeight处理,这时调用CodeGenRegister::inheritRegUnits将寄存器的RegUnits设置为子寄存器RegUnits的合集,只要存在子寄存器,changed就一定是true。显然,如果寄存器RegUnits发生变化,需要重新调用computeUberWeights来重新计算该寄存器的Weight值。

202      bool CodeGenRegister::inheritRegUnits (CodeGenRegBank&RegBank) {

203      bool changed = false;

204      for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();

205      I != E; ++I) {

206      CodeGenRegister *SR = I->second;

207      // Merge thesubregister's units into this register's RegUnits.

208      changed |= (RegUnits |= SR->RegUnits);

209      }

210     

211      return changed;

212      }

1454行的getRegIndex实际上返回该寄存器的EnumValue-1,因此得到的UberSet就是这个寄存器所属的同类集。NormalUnits与NormalRegs都是从外部传进来、一开始为空的容器,用于控制这个调整的过程。

如果寄存器的权重小于其同类集的权重(1457行),满足以下所有条件才可以调整其RegUnit的Weight值,然后调用computeUberWeights重新计算该寄存器权重:1.仅包含一个RegUnit,2. 还没调整过,3.不在UberSet的SingularDeterminants容器(仅包含一个RegUnit且权重就是同类集权重)中。否则,只能向这个寄存器多加一个RegUnit来记录这个差值。

由于上述严格的条件,实际上权重的调整只是一个尽力而为的过程。为调整权重而加入的RegUnit实例只是为了使上述算法尽快收敛,这些RegUnit实例在后续处理里根本没有作用。

3.3.5.3.       寄存器单元的同类集

前面以寄存器为粒度构建了同类集信息,完善了寄存器单元(RegUnit)的Weight值设置。现在接着以RegUnit为单位构建同类集,这个同类集由RegUnitSet来表示:

461      struct RegUnitSet {

462      typedef std::vector<unsigned>::const_iterator iterator;

463     

464      std::string Name;

465      std::vector<unsigned> Units;

466      unsigned Weight; //Cache the sum of all unit weights.

467      unsigned Order;  // Cache the sortkey.

468     

469      RegUnitSet() : Weight(0), Order(0) {}

470      };

类似的,在CodeGenRegBank中定义了容器RegUnitSets(std::vector<RegUnitSet>)来保存这些同类集。这些同类集由方法CodeGenRegBank::computeRegUnitSets构造。

1594   void CodeGenRegBank::computeRegUnitSets (){

1595   assert (RegUnitSets.empty()&& "dirty RegUnitSets");

1596  

1597   // Compute aunique RegUnitSet for each RegClass.

1598   auto &RegClasses = getRegClasses();

1599   for ( auto &RC : RegClasses) {

1600   if (!RC.Allocatable)

1601   continue ;

1602  

1603   //Speculatively grow the RegUnitSets to hold the new set.

1604   RegUnitSets.resize(RegUnitSets.size() + 1);

1605   RegUnitSets.back().Name = RC.getName();

1606  

1607   // Compute asorted list of units in this class.

1608   RC.(RegUnitSets.back().Units);

1609  

1610   // Find anexisting RegUnitSet.

1611   std::vector<RegUnitSet>::const_iterator SetI =

1612   (RegUnitSets,RegUnitSets.back());

1613   if (SetI != std::prev(RegUnitSets.end()))

1614   RegUnitSets.pop_back();

1615   }

1616  

1617   DEBUG(dbgs() << "\nBeforepruning:\n";

1618   for (unsigned USIdx = 0, USEnd = RegUnitSets.size();

1619   USIdx < USEnd; ++USIdx) {

1620   dbgs() << "UnitSet "<< USIdx << " " << RegUnitSets[USIdx].Name

1621   << ":";

1622   for ( auto &U : RegUnitSets[USIdx].Units)

1623   dbgs() << " "<< RegUnits[U].Roots[0]->getName();

1624   dbgs() << "\n";

1625   });

1626  

1627   // Iterativelyprune unit sets.

1628   ();

1629  

1630   DEBUG(dbgs() << "\nBeforeunion:\n";

1631   for (unsigned USIdx = 0, USEnd = RegUnitSets.size();

1632   USIdx < USEnd; ++USIdx) {

1633   dbgs() << "UnitSet "<< USIdx << " " << RegUnitSets[USIdx].Name

1634   << ":";

1635   for ( auto &U : RegUnitSets[USIdx].Units)

1636   dbgs() << " "<< RegUnits[U].Roots[0]->getName();

1637   dbgs() << "\n";

1638   }

1639   dbgs() << "\nUnionsets:\n");

1640  

1641   // Iterate overall unit sets, including new ones added by this loop.

1642   unsigned NumRegUnitSubSets =RegUnitSets.size();

1643   for (unsignedIdx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) {

1644   // In theory,this is combinatorial. In practice, it needs to be bounded

1645       // by a smallnumber of sets for regpressure to be efficient.

1646       // If theassert is hit, we need to implement pruning.

1647   assert (Idx< (2*NumRegUnitSubSets) && "runaway unit set inference");

1648  

1649       // Compare newsets with all original classes.

1650   for (unsigned SearchIdx = (Idx >= NumRegUnitSubSets) ? 0 : Idx+1;

1651   SearchIdx != EndIdx; ++SearchIdx) {

1652   std::set<unsigned> Intersection;

1653   std::set_intersection(RegUnitSets[Idx].Units.begin(),

1654   RegUnitSets[Idx].Units.end(),

1655   RegUnitSets[SearchIdx].Units.begin(),

1656   RegUnitSets[SearchIdx].Units.end(),

1657   std::inserter(Intersection, Intersection.begin()));

1658   if (Intersection.empty())

1659   continue ;

1660  

1661   //Speculatively grow the RegUnitSets to hold the new set.

1662   RegUnitSets.resize(RegUnitSets.size() +1);

1663   RegUnitSets.back().Name =

1664   RegUnitSets[Idx].Name + "+" +RegUnitSets[SearchIdx].Name;

1665  

1666   std::set_union(RegUnitSets[Idx].Units.begin(),

1667   RegUnitSets[Idx].Units.end(),

1668   RegUnitSets[SearchIdx].Units.begin(),

1669   RegUnitSets[SearchIdx].Units.end(),

1670   std::inserter(RegUnitSets.back().Units,

1671   RegUnitSets.back().Units.begin()));

1672  

1673   // Find anexisting RegUnitSet, or add the union to the unique sets.

1674   std::vector<RegUnitSet>::const_iterator SetI =

1675   (RegUnitSets,RegUnitSets.back());

1676   if (SetI != std::prev(RegUnitSets.end()))

1677   RegUnitSets.pop_back();

1678   else {

1679   DEBUG(dbgs() << "UnitSet" << RegUnitSets.size()-1

1680   << " " <<RegUnitSets.back().Name << ":";

1681   for ( auto &U : RegUnitSets.back().Units)

1682   dbgs() << " "<< RegUnits[U].Roots[0]->getName();

1683   dbgs() << "\n";);

1684   }

1685   }

1686   }

1687  

1688   // Iterativelyprune unit sets after inferring supersets.

1689   ();

1690  

1691   DEBUG(dbgs() << "\n";

1692   for (unsigned USIdx = 0, USEnd = RegUnitSets.size();

1693   USIdx < USEnd; ++USIdx) {

1694   dbgs() << "UnitSet "<< USIdx << " " << RegUnitSets[USIdx].Name

1695   << ":";

1696   for ( auto &U : RegUnitSets[USIdx].Units)

1697   dbgs() << " "<< RegUnits[U].Roots[0]->getName();

1698   dbgs() << "\n";

1699   });

类似的,首先也是将属于一个可分配寄存器类的寄存器的RegUnit构建为一个同类集,因为同一个寄存器类中的寄存器往往要相同的RegUnit,所以还要确保它们不重复,且以序号升序排序。这就是在1608行的CodeGenRegisterClass::buildRegUnitSet方法所要完成的工作。

909      void CodeGenRegisterClass::buildRegUnitSet (

910      std::vector<unsigned> &RegUnits) const {

911      std::vector<unsigned> TmpUnits;

912      for (RegUnitIterator UnitI(Members); UnitI.isValid(); ++UnitI)

913      TmpUnits.push_back(*UnitI);

914      std::sort(TmpUnits.begin(), TmpUnits.end());

915      std::unique_copy(TmpUnits.begin(),TmpUnits.end(),

916      std::back_inserter(RegUnits));

917      }

因为在去除了重复的RegUnit后,不排除两个寄存器类形成两个相同的RegUnit同类集,因此还需要在1612行调用findRegUnitSet方法删除重复的同类集。

1514   static std::vector<RegUnitSet>::const_iterator

( const std::vector<RegUnitSet> &UniqueSets,

1516   const RegUnitSet &Set) {

1517   std::vector<RegUnitSet>::const_iterator

1518   I = UniqueSets.begin(), E =UniqueSets.end();

1519   for (;I != E;++I) {

1530   if (I->Units == Set.Units)

1531   break ;

1532   }

1533   return I;

1534   }

接下来在1628行调用CodeGenRegBank::pruneUnitSets方法裁剪掉某些同类集。裁剪的准则是:对两个同类集,如果其中一个RegUnit的数量比另一个少1或2,而且这两个同类集所有的RegUnit权重都相同。CodeGenRegBank::pruneUnitSets的注释谈到了这个方法的必要性: 我们偶尔会将类似 APSR PC 和通用寄存器混在一起。我们也会看到许多特殊寄存器的子集,比如尾调用以及 Thumb 编码。所有可能的重叠集的数量是组合数,全部生成它们对寄存器压力建模而言太多了。理想地,我们可以在 TableGen 中通过以下方面静态地避免这个问题。 1. 目标机器定义仅包含可分配寄存器的寄存器类,将其他类标记为不可分配。 2. 将特殊寄存器标记为寄存器压力“不在意的”类。不过,我们尝试通过静态地将几乎一样的 RegUnitSet 合并,来处理没有很好定义的目标机器

1550   void CodeGenRegBank::pruneUnitSets (){

1551   assert (RegClassUnitSets.empty()&& "this invalidates RegClassUnitSets");

1552  

1553   // Form anequivalence class of UnitSets with no significant difference.

1554   std::vector<unsigned> SuperSetIDs;

1555   for (unsignedSubIdx = 0, EndIdx = RegUnitSets.size();

1556   SubIdx != EndIdx; ++SubIdx) {

1557   const RegUnitSet &SubSet = RegUnitSets[SubIdx];

1558   unsigned SuperIdx = 0;

1559   for (;SuperIdx != EndIdx; ++SuperIdx) {

1560   if (SuperIdx == SubIdx)

1561   continue ;

1562  

1563   unsigned UnitWeight =RegUnits[SubSet.Units[0]].Weight;

1564   const RegUnitSet &SuperSet = RegUnitSets[SuperIdx];

1565   if (isRegUnitSubSet(SubSet.Units,SuperSet.Units)

1566   && (SubSet.Units.size() + 3> SuperSet.Units.size())

1567   && UnitWeight ==RegUnits[SuperSet.Units[0]].Weight

1568   && UnitWeight ==RegUnits[SuperSet.Units.back()].Weight) {

1569   DEBUG(dbgs() << "UnitSet" << SubIdx << " subsumed by " << SuperIdx

1570   << "\n");

1571   break ;

1572   }

1573   }

1574   if (SuperIdx == EndIdx)

1575   SuperSetIDs.push_back(SubIdx);

1576   }

1577   // PopulatePrunedUnitSets with each equivalence class's superset.

1578   std::vector<RegUnitSet>PrunedUnitSets(SuperSetIDs.size());

1579   for (unsignedi = 0, e = SuperSetIDs.size(); i != e; ++i) {

1580   unsigned SuperIdx = SuperSetIDs[i];

1581   PrunedUnitSets[i].Name =RegUnitSets[SuperIdx].Name;

1582   PrunedUnitSets[i].Units.swap(RegUnitSets[SuperIdx].Units);

1583   }

1584   RegUnitSets.swap(PrunedUnitSets);

1585   }

执行裁剪之后,在1643行循环创建这些同类集的联合闭包,从1647行的断言可以看到,我们不希望同类集的膨胀超过2倍。之后在1689行再次调用pruneUnitSets来再次裁剪同类集。

得到这些同类集之后,我们还需要知道,就RegUnit同类集而言,寄存器类间的包含关系。这就是CodeGenRegBank的容器RegClassUnitSets(std::vector<std::vector<unsigned>>)的作用。这个容器将记录一个指定寄存器类的RegUnit同类集的超集的集合。

CodeGenRegBank::computeRegUnitSets(续)

1701   // For each register class, list the UnitSets that aresupersets.

1702   RegClassUnitSets.resize(RegClasses.size());

1703   int RCIdx = -1;

1704   for ( auto &RC : RegClasses) {

1705   ++RCIdx;

1706   if (!RC.Allocatable)

1707   continue ;

1708  

1709       // Recomputethe sorted list of units in this class.

1710   std::vector<unsigned> RCRegUnits;

1711   RC.(RCRegUnits);

1712  

1713   // Don'tincrease pressure for unallocatable regclasses.

1714   if (RCRegUnits.empty())

1715   continue ;

1716  

1717   DEBUG(dbgs() << "RC "<< RC.getName() << " Units: \n";

1718   for ( auto &U : RCRegUnits)

1719   dbgs() << RegUnits[U].getRoots()[0]->getName()<< " ";

1720   dbgs() << "\n  UnitSetIDs:");

1721  

1722   // Find allsupersets.

1723   for (unsigned USIdx = 0, USEnd = RegUnitSets.size();

1724   USIdx != USEnd; ++USIdx) {

1725   if (isRegUnitSubSet(RCRegUnits,RegUnitSets[USIdx].Units)) {

1726   DEBUG(dbgs() << " "<< USIdx);

1727   RegClassUnitSets[RCIdx].push_back(USIdx);

1728   }

1729   }

1730   DEBUG(dbgs() << "\n");

1731   assert (!RegClassUnitSets[RCIdx].empty()&& "missing unit set for regclass");

1732   }

1733  

1734   // For eachregister unit, ensure that we have the list of UnitSets that

1735     // contain theunit. Normally, this matches an existing list of UnitSets for a

1736     // registerclass. If not, we create a new entry in RegClassUnitSets as a

1737     //"fake" register class.

1738   for (unsignedUnitIdx = 0, UnitEnd = NumNativeRegUnits;

1739   UnitIdx < UnitEnd; ++UnitIdx) {

1740   std::vector<unsigned> RUSets;

1741   for (unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) {

1742   RegUnitSet &RUSet = RegUnitSets[i];

1743   if (std::find(RUSet.Units.begin(),RUSet.Units.end(), UnitIdx)

1744   == RUSet.Units.end())

1745   continue ;

1746   RUSets.push_back(i);

1747   }

1748   unsigned RCUnitSetsIdx = 0;

1749   for (unsigned e = RegClassUnitSets.size();

1750   RCUnitSetsIdx != e; ++RCUnitSetsIdx) {

1751   if (RegClassUnitSets[RCUnitSetsIdx] ==RUSets) {

1752   break ;

1753   }

1754   }

1755   RegUnits[UnitIdx].RegClassUnitSetsIdx =RCUnitSetsIdx;

1756   if (RCUnitSetsIdx ==RegClassUnitSets.size()) {

1757   // Create anew list of UnitSets as a "fake" register class.

1758   RegClassUnitSets.resize(RCUnitSetsIdx +1);

1759   RegClassUnitSets[RCUnitSetsIdx].swap(RUSets);

1760   }

1761   }

1762   }

在1738行,NumNativeRegUnits是原生(即非为调整权重加入的)RegUnit数量。在1738行这个循环里,我们将RegUnit的RegClassUnitSetsIdx域设置为RegClassUnitSets中同类集都包含该RegUnit的项的下标。

3.3.5.4.       寄存器的包含关系

前面我们已经为每个寄存器索引计算的Lane掩码。这个Lane掩码指出从该寄存器索引援引的部分可以覆盖哪些更小的可援引部分。对于叶子寄存器索引,Lane掩码就是它在SubRegIndices容器里的序号。但寄存器索引是一个抽象的概念,脱离了具体的寄存器。很显然,对应具体的寄存器也存在相应的包含关系。因此CodeGenRegister里有一个类型为SmallVector<unsigned,16>的容器RegUnitLaneMasks来记录它的子寄存器的Lane掩码,也就不足为奇了。

1764   void CodeGenRegBank::computeRegUnitLaneMasks (){

1765   for ( auto &Register : Registers) {

1766   // Create aninitial lane mask for all register units.

1767   const auto &RegUnits = Register.getRegUnits();

1768   CodeGenRegister::RegUnitLaneMaskListRegUnitLaneMasks(RegUnits.count(), 0);

1769   // Iteratethrough SubRegisters.

1770   typedef CodeGenRegister::SubRegMap SubRegMap;

1771   const SubRegMap &SubRegs = Register.getSubRegs();

1772   for (SubRegMap::const_iterator S = SubRegs.begin(),

1773   SE = SubRegs.end(); S != SE; ++S) {

1774   CodeGenRegister *SubReg = S->second;

1775   // Ignore non-leafsubregisters, their lane masks are fully covered by

1776         // the leafsubregisters anyway.

1777   if (SubReg->getSubRegs().size() != 0)

1778   continue ;

1779   CodeGenSubRegIndex *SubRegIndex =S->first;

1780   const CodeGenRegister *SubRegister = S->second;

1781   unsigned LaneMask =SubRegIndex->LaneMask;

1782   // DistributeLaneMask to Register Units touched.

1783   for (unsigned SUI : SubRegister->getRegUnits()) {

1784   bool Found = false;

1785   unsigned u = 0;

1786   for (unsigned RU : RegUnits) {

1787   if (SUI == RU) {

1788   RegUnitLaneMasks[u] |= LaneMask;

1789   assert (!Found);

1790   Found = true;

1791   }

1792   ++u;

1793   }

1794   (void)Found;

1795   assert (Found);

1796   }

1797   }

1798   Register.setRegUnitLaneMasks(RegUnitLaneMasks);

1799   }

1800   }

1765行的循环表明,这个过程是对每个寄存器分别进行的。1772行的循环进入当前寄存器的子寄存器集合,1777行滤掉非叶子寄存器。1783行的循环遍历该叶子寄存器的RegUnit组成位图(注意该位图中只能有一位是1),而1786行的循环则遍历当前寄存器的RegUnit组成位图。如果1787行条件满足,循环变量u记录了这是当前寄存器RegUnit组成位图的第几位(当前寄存器一定包含这个叶子寄存器),在RegUnitLaneMasks中u所给出的位置记录该叶子寄存器索引的Lane掩码。这样,RegUnit组成位图与RegUnitLaneMasks也就关联起来了。

3.3.5.5.       其他信息的设置

回到CodeGenRegBank::computeDerivedInfo。1817行循环对寄存器类别设置HasDisjunctSubRegs,这个标记用于显示是否至少有两个子寄存器是互不不干扰的。这个标记的内容来自于寄存器对象中同名标记,在寄存器对象里,如果它包含1个以上的子寄存器,它的HasDisjunctSubRegs标记就是true。1824行循环设置RegUnitSets的Weight(即指定寄存器类所包含寄存器单元的总数)。余下的代码设置RegUnitSets成员的Order值,以及对RegUnitSetOrder容器(类型std::vector<unsigned>)的填充。1832行是对RegUnitSets成员以集合大小排序,结果保存在RegUnitSetOrder容器。那么当给出RegUnitSetOrder[RegUnit.Order]时,我们就能知道这个RegUnit的优先级。


以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持 码农网

查看所有标签

猜你喜欢:

本站部分资源来源于网络,本站转载出于传递更多信息之目的,版权归原作者或者来源机构所有,如转载稿涉及版权问题,请联系我们

C语言接口与实现

C语言接口与实现

(美)David R. Hanson / 人民邮电出版社 / 2010-8 / 79.00元

可重用的软件模块是构建大规模可靠应用程序的基石,创建可重用的软件模块是每个程序员和项目经理必须掌握的技能。C语言对创建可重用的API提供的语言和功能支持非常少,虽然C程序员写应用时都会用到API和库,但却很少有人去创建和发布新的能广泛应用的API。本书介绍用一种基于接口的设计方法创建可重用的API,这一方法将接口与实现分离开来,且与语言无关。书中详细描述了24个接口及其实现,便于读者深入了解此方法......一起来看看 《C语言接口与实现》 这本书的介绍吧!

HTML 编码/解码
HTML 编码/解码

HTML 编码/解码

XML 在线格式化
XML 在线格式化

在线 XML 格式化压缩工具