内容简介:SchedTables保存在WriteProcResources,WriteLatencies,ReadAdvanceEntries以及WriterNames容器里的数据是所有处理器公用的,因此下面的方法首先输出包含这些公用数据的数组。
3.6.2.3. 输出代码与数据结构
3.6.2.3.1. 资源使用与时延
SchedTables保存在WriteProcResources,WriteLatencies,ReadAdvanceEntries以及WriterNames容器里的数据是所有处理器公用的,因此下面的方法首先输出包含这些公用数据的数组。
1070 void SubtargetEmitter::EmitSchedClassTables (SchedClassTables &SchedTables,
1071 raw_ostream &OS) {
1072 // Emit global WriteProcResTable.
1073 OS << "\n// {ProcResourceIdx, Cycles}\n"
1074 << "extern const llvm::MCWriteProcResEntry "
1075 << Target << "WriteProcResTable[] = {\n"
1076 << " { 0, 0}, // Invalid\n";
1077 for (unsigned WPRIdx = 1, WPREnd = SchedTables.WriteProcResources.size();
1078 WPRIdx != WPREnd; ++WPRIdx) {
1079 MCWriteProcResEntry &WPREntry = SchedTables.WriteProcResources[WPRIdx];
1080 OS << " {" << format("%2d", WPREntry.ProcResourceIdx) << ", "
1081 << format("%2d", WPREntry.Cycles) << "}";
1082 if (WPRIdx + 1 < WPREnd)
1083 OS << ',';
1084 OS << " // #" << WPRIdx << '\n';
1085 }
1086 OS << "}; // " << Target << "WriteProcResTable\n";
1087
1088 // Emit global WriteLatencyTable.
1089 OS << "\n// {Cycles, WriteResourceID}\n"
1090 << "extern const llvm::MCWriteLatencyEntry "
1091 << Target << "WriteLatencyTable[] = {\n"
1092 << " { 0, 0}, // Invalid\n";
1093 for (unsigned WLIdx = 1, WLEnd = SchedTables.WriteLatencies.size();
1094 WLIdx != WLEnd; ++WLIdx) {
1095 MCWriteLatencyEntry &WLEntry = SchedTables.WriteLatencies[WLIdx];
1096 OS << " {" << format("%2d", WLEntry.Cycles) << ", "
1097 << format("%2d", WLEntry.WriteResourceID) << "}";
1098 if (WLIdx + 1 < WLEnd)
1099 OS << ',';
1100 OS << " // #" << WLIdx << " " << SchedTables.WriterNames[WLIdx] << '\n';
1101 }
1102 OS << "}; // " << Target << "WriteLatencyTable\n";
1103
1104 // Emit global ReadAdvanceTable.
1105 OS << "\n// {UseIdx, WriteResourceID, Cycles}\n"
1106 << "extern const llvm::MCReadAdvanceEntry "
1107 << Target << "ReadAdvanceTable[] = {\n"
1108 << " {0, 0, 0}, // Invalid\n";
1109 for (unsigned RAIdx = 1, RAEnd = SchedTables.ReadAdvanceEntries.size();
1110 RAIdx != RAEnd; ++RAIdx) {
1111 MCReadAdvanceEntry &RAEntry = SchedTables.ReadAdvanceEntries[RAIdx];
1112 OS << " {" << RAEntry.UseIdx << ", "
1113 << format("%2d", RAEntry.WriteResourceID) << ", "
1114 << format("%2d", RAEntry.Cycles) << "}";
1115 if (RAIdx + 1 < RAEnd)
1116 OS << ',';
1117 OS << " // #" << RAIdx << '\n';
1118 }
1119 OS << "}; // " << Target << "ReadAdvanceTable\n";
1120
1121 // Emit a SchedClass table for each processor.
1122 for (CodeGenSchedModels::ProcIter PI = SchedModels.procModelBegin(),
1123 PE = SchedModels.procModelEnd(); PI != PE; ++PI) {
1124 if (!PI->hasInstrSchedModel())
1125 continue ;
1126
1127 std::vector<MCSchedClassDesc> &SCTab =
1128 SchedTables.ProcSchedClasses[1 + (PI - SchedModels.procModelBegin())];
1129
1130 OS << "\n// {Name, NumMicroOps, BeginGroup, EndGroup,"
1131 << " WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#}\n";
1132 OS << "static const llvm::MCSchedClassDesc "
1133 << PI->ModelName << "SchedClasses[] = {\n";
1134
1135 // The first class is always invalid. We no way to distinguish it except by
1136 // name and position.
1137 assert (SchedModels.getSchedClass(0).Name == "NoInstrModel"
1138 && "invalid class not first");
1139 OS << " {DBGFIELD(\"InvalidSchedClass\") "
1140 << MCSchedClassDesc::InvalidNumMicroOps
1141 << ", 0, 0, 0, 0, 0, 0, 0, 0},\n";
1142
1143 for (unsigned SCIdx = 1, SCEnd = SCTab.size(); SCIdx != SCEnd; ++SCIdx) {
1144 MCSchedClassDesc &MCDesc = SCTab[SCIdx];
1145 const CodeGenSchedClass &SchedClass = SchedModels.getSchedClass(SCIdx);
1146 OS << " {DBGFIELD(\"" << SchedClass.Name << "\") ";
1147 if (SchedClass.Name.size() < 18)
1148 OS.indent(18 - SchedClass.Name.size());
1149 OS << MCDesc.NumMicroOps
1150 << ", " << MCDesc.BeginGroup << ", " << MCDesc.EndGroup
1151 << ", " << format("%2d", MCDesc.WriteProcResIdx)
1152 << ", " << MCDesc.NumWriteProcResEntries
1153 << ", " << format("%2d", MCDesc.WriteLatencyIdx)
1154 << ", " << MCDesc.NumWriteLatencyEntries
1155 << ", " << format("%2d", MCDesc.ReadAdvanceIdx)
1156 << ", " << MCDesc.NumReadAdvanceEntries << "}";
1157 if (SCIdx + 1 < SCEnd)
1158 OS << ',';
1159 OS << " // #" << SCIdx << '\n';
1160 }
1161 OS << "}; // " << PI->ModelName << "SchedClasses\n";
1162 }
1163 }
首先在1073~1086行输出X86家族处理器公用的MCWriteProcResEntry数组:
// {ProcResourceIdx, Cycles}
extern const llvm:: X86WriteProcResTable [] = {
{ 0, 0}, // Invalid
{18, 1}, // #1
{19, 1}, // #2
{ 4, 1}, // #3
{ 8, 1}, // #4
…
{ 4, 1}, // #1217
{ 6, 1}, // #1218
{ 7, 2} // #1219
}; // X86WriteProcResTable
接着1088~1102行输出X86家族目标机器公用的MCWriteLatencyEntry数组:
// {Cycles, WriteResourceID}
extern const llvm:: X86WriteLatencyTable [] = {
{ 0, 0}, // Invalid
{100, 0}, // #1 WriteMicrocoded_WriteSystem
{ 1, 0}, // #2 WriteALU_WriteVecLogic_WriteZero_WriteFBlend_WriteShift_WriteJump_WriteFShuffle_WriteStore_WriteMove_WriteLEA_WriteFence_WriteShuffle_WriteVecALU_WriteVecShift_WriteNop_WriteBlend_Write2P237_P4_WritePushF_WritePushA_WritePopF_WritePopA_WriteP06_WriteBSwap32_WriteBSwap64_WriteMoveBE32rm_WriteMoveBE16mr_WriteMoveBE32mr_WriteMoveBE64mr_Write2P0156_2P237_P4_Write3P0156_2P237_P4_WriteP0156_2P237_P4_WriteShiftRMW_WriteShiftClLdRMW_Write2P06_WriteRotateRMW_WriteRotateRMWCL_WriteRCm1_WriteRCmi_WriteShDmr_WriteShDmrCL_WriteBTmr_WriteBTRSCmr_WriteSetCCm_WriteCldStd_WriteP15_WriteJCXZ_WriteLOOP_WriteCALLr_WriteCALLm_WriteRET_WriteRETI_WriteBOUND_WriteINTO_Write2P0156_P23_WriteP0156_P23_WriteSTOS_WriteXADD_WriteCMPXCHG_WriteCMPXCHG8B_WriteCMPXCHG16B_WritePAUSE_WriteXGETBV_WriteRDTSC_WriteRDPMC_WriteRDRAND_WriteST_FP80m_WriteFBSTP_WriteFNSTSW_WriteFNSTCW_WriteFNSAVE_WriteFRSTOR_WriteP1_P23_Write2P1_P23_Write5P0156_WriteFNINIT_WriteP5_WriteP01_P5_WritePBLENDWr_WriteVPBLENDDr_WritePEXTRm_WriteVPGATHERDD128_WriteVPGATHERDD256_WriteVPGATHERQD128_WriteVPGATHERQD256_WriteVPGATHERDQ128_WriteVPGATHERDQ256_WriteVPGATHERQQ128_WriteVPGATHERQQ256_WriteEXTRACTPSr_WriteVGATHERDPS128_WriteVGATHERDPS256_WriteVGATHERQPS128_WriteVGATHERQPS256_WriteVGATHERDPD128_WriteVGATHERDPD256_WriteVGATHERQPD128_WriteVGATHERQPD256_WriteVZEROUPPER_WriteVZEROALL_WriteFShuffle256_WriteShuffle256_WriteVarVecShift_WriteVarBlend_WriteFVarBlend
{ 5, 0}, // #3 WriteALULd_WriteVecLogicLd_WriteFBlendLd_WriteShiftLd_WriteJumpLd_WriteFShuffleLd_WriteVecALULd_WriteShuffleLd_WriteVecIMul_WriteVecShiftLd_WriteFMul_WriteBlendLd_WriteFRcp_WriteFRsqrt_WriteVPBROADCAST128Ld_WritePCMPGTQr_WritePCMPGTQm_WriteCVTPD2PSYrr_WriteCVTPS2PDrm_WriteVCVTPS2PDYrr_WriteCVTSS2SDrm_WriteHADDSUBPr_WriteMULr_WriteFMADDr_WriteRSQRTr_WriteP5Ld_WriteLoad_WriteShuffle256Ld_WriteFShuffle256Ld_WriteVarVecShiftLd
{ 1, 0}, // #4 WriteRMW
…
{ 3, 0}, // #54 WriteIMul
{ 1, 0}, // #55 WriteIMulH
{17, 0} // #56 WritePCmpEStrMLd_WritePCmpIStrILd
}; // X86WriteLatencyTable
旁边输出的注释是具有相同时延数据的SchedWrite定义(具体资源的使用则记录在调度类的MCSchedClassDesc对象里,GenSchedClassTables的938~940行)。另外,WriteResourceID域都是0,表明X86目标机器使用了适用于所有SchedWrite定义的ReadAdvance或SchedReadAdvance定义,或者没有使用ReadAdvance及SchedReadAdvance定义(因为随后输出了X86ReadAdvanceTable,因此显然是前者)。
1104~1119行输出X86家族目标机器公用的MCReadAdvanceEntry数组:
// {UseIdx, WriteResourceID, Cycles}
extern const llvm:: X86ReadAdvanceTable [] = {
{0, 0, 0}, // Invalid
{0, 0, 4}, // #1
{5, 0, 4}, // #2
{6, 0, 4}, // #3
{0, 0, 3}, // #4
{5, 0, 3}, // #5
{6, 0, 3} // #6
}; // X86ReadAdvanceTable
因为WriteResourceID域都是0,因此X86目标机器使用了适用于所有SchedWrite定义的ReadAdvance或SchedReadAdvance定义。另外,UseIdx是读操作数的索引,实际上是X86目标机器只定义了ReadAdvance<ReadAfterLd, 4>与ReadAdvance<ReadAfterLd, 3>,但由于ReadAfterLd在指令定义中可作为第0、5及6个读操作数出现,所以构建出上面的数组。
接下来的代码开始输出描述处理器调度类型的数组。
对X86目标机器,这些数组有:HaswellModelSchedClasses,BtVer2ModelSchedClasses,SandyBridgeModelSchedClasses,及SLMModelSchedClasses。其中SandyBridgeModelSchedClasses相关的数组是这样的:
// {Name, NumMicroOps, BeginGroup, EndGroup, WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#}
static const llvm:: SandyBridgeModelSchedClasses [] = {
{DBGFIELD("InvalidSchedClass") 65535, 0, 0, 0, 0, 0, 0, 0, 0},
{DBGFIELD("IIC_AAA_WriteMicrocoded") 1, 0, 0, 108, 2, 1, 1, 0, 0}, // #1
{DBGFIELD("IIC_AAD_WriteMicrocoded") 1, 0, 0, 108, 2, 1, 1, 0, 0}, // #2
{DBGFIELD("IIC_AAM_WriteMicrocoded") 1, 0, 0, 108, 2, 1, 1, 0, 0}, // #3
{DBGFIELD("IIC_AAS_WriteMicrocoded") 1, 0, 0, 108, 2, 1, 1, 0, 0}, // #4
…
{DBGFIELD("VZEROALL") 0, 0, 0, 0, 0, 0, 0, 0, 0}, // #947
{DBGFIELD("LDMXCSR_VLDMXCSR") 1, 0, 0, 279, 2, 9, 1, 0, 0}, // #948
{DBGFIELD("STMXCSR_VSTMXCSR") 1, 0, 0, 439, 3, 2, 1, 0, 0} // #949
}; // SandyBridgeModelSchedClasses
DBGFIELD声明的域用于调试目的。所以的BeginGroup与EndGroup域都是0(false),表示这些调度类没有组成调度组(LLVM目前没有任何调度组)。剩下的(Idx,number)组给出了对前面生成数组的引用情况。以LDMXCSR_VLDMXCSR为例,它援引X86WriteLatencyTable的第10项:{4, 0}——这个SchedWrite有4周期时延,以及X86WriteProcResTable的第280、281项:{8, 1}, {10, 1}——占用编号为8及10的资源1周期。
3.6.2.3.2. 处理器资源模型
有了所有处理器调度类型的数组后,就该轮到输出描述处理器的数据结构了。同样,类似Atom的处理器与类似SandyBridge的处理器的处理方法是不一样的,因为在.td文件的处理器描述上,它们就有很大的区别。
1165 void SubtargetEmitter::EmitProcessorModels (raw_ostream &OS) {
1166 // For each processor model.
1167 for (CodeGenSchedModels::ProcIter PI = SchedModels.procModelBegin(),
1168 PE = SchedModels.procModelEnd(); PI != PE; ++PI) {
1169 // Emit processor resource table.
1170 if (PI->hasInstrSchedModel())
1171 EmitProcessorResources (*PI, OS);
1172 else if(!PI->ProcResourceDefs.empty())
1173 PrintFatalError(PI->ModelDef->getLoc(), "SchedMachineModel defines "
1174 "ProcResources without defining WriteRes SchedWriteRes");
1175
1176 // Begin processor itinerary properties
1177 OS << "\n";
1178 OS << "static const llvm::MCSchedModel " << PI->ModelName << " = {\n";
1179 (OS, PI->ModelDef, "IssueWidth", ',');
1180 EmitProcessorProp(OS, PI->ModelDef, "MicroOpBufferSize", ',');
1181 EmitProcessorProp(OS, PI->ModelDef, "LoopMicroOpBufferSize", ',');
1182 EmitProcessorProp(OS, PI->ModelDef, "LoadLatency", ',');
1183 EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ',');
1184 EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ',');
1185
1186 OS << " " << (bool)(PI->ModelDef ?
1187 PI->ModelDef->getValueAsBit("PostRAScheduler") : 0)
1188 << ", // " << "PostRAScheduler\n";
1189
1190 OS << " " << (bool)(PI->ModelDef ?
1191 PI->ModelDef->getValueAsBit("CompleteModel") : 0)
1192 << ", // " << "CompleteModel\n";
1193
1194 OS << " " << PI->Index << ", // Processor ID\n";
1195 if (PI->hasInstrSchedModel())
1196 OS << " " << PI->ModelName << "ProcResources" << ",\n"
1197 << " " << PI->ModelName << "SchedClasses" << ",\n"
1198 << " " << PI->ProcResourceDefs.size()+1 << ",\n"
1199 << " " << (SchedModels.schedClassEnd()
1200 - SchedModels.schedClassBegin()) << ",\n";
1201 else
1202 OS << " 0, 0, 0, 0, // No instruction-level machine model.\n";
1203 if (PI->hasItineraries())
1204 OS << " " << PI->ItinsDef->getName() << "};\n";
1205 else
1206 OS << " nullptr}; // No Itinerary\n";
1207 }
1208 }
对类似SandyBridge的处理器,首先调用下面的EmitProcessorResources方法输出描述资源的数据结构。因为这样的处理器需要另外描述资源,而类似Atom的处理器则是在ProcessorItineraries派生定义里给出资源描述。
605 void SubtargetEmitter::EmitProcessorResources( const CodeGenProcModel &ProcModel,
606 raw_ostream &OS) {
607 char Sep = ProcModel.ProcResourceDefs.empty() ? ' ' : ',';
608
609 OS << "\n// {Name, NumUnits, SuperIdx, IsBuffered}\n";
610 OS << "static const llvm::MCProcResourceDesc "
611 << ProcModel.ModelName << "ProcResources" << "[] = {\n"
612 << " {DBGFIELD(\"InvalidUnit\") 0, 0, 0}" << Sep << "\n";
613
614 for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) {
615 Record *PRDef = ProcModel.ProcResourceDefs[i];
616
617 Record *SuperDef = nullptr;
618 unsigned SuperIdx = 0;
619 unsigned NumUnits = 0;
620 int BufferSize = PRDef->getValueAsInt("BufferSize");
621 if (PRDef->isSubClassOf("ProcResGroup")) {
622 RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources");
623 for (RecIter RUI = ResUnits.begin(), RUE = ResUnits.end();
624 RUI != RUE; ++RUI) {
625 NumUnits += (*RUI)->getValueAsInt("NumUnits");
626 }
627 }
628 else {
629 // Find the SuperIdx
630 if (PRDef->getValueInit("Super")->isComplete()) {
631 SuperDef = SchedModels.findProcResUnits(
632 PRDef->getValueAsDef("Super"), ProcModel);
633 SuperIdx = ProcModel.getProcResourceIdx(SuperDef);
634 }
635 NumUnits = PRDef->getValueAsInt("NumUnits");
636 }
637 // Emit the ProcResourceDesc
638 if (i+1 == e)
639 Sep = ' ';
640 OS << " {DBGFIELD(\"" << PRDef->getName() << "\") ";
641 if (PRDef->getName().size() < 15)
642 OS.indent(15 - PRDef->getName().size());
643 OS << NumUnits << ", " << SuperIdx << ", "
644 << BufferSize << "}" << Sep << " // #" << i+1;
645 if (SuperDef)
646 OS << ", Super=" << SuperDef->getName();
647 OS << "\n";
648 }
649 OS << "};\n";
650 }
每个类似SandyBridge的处理器都要输出一个资源数组,描述SandyBridge处理器资源的数组是:
// {Name, NumUnits, SuperIdx, IsBuffered}
static const llvm::MCProcResourceDesc SandyBridgeModelProcResources [] = {
{DBGFIELD("InvalidUnit") 0, 0, 0},
{DBGFIELD("SBDivider") 1, 0, -1}, // #1
{DBGFIELD("SBPort0") 1, 0, -1}, // #2
{DBGFIELD("SBPort1") 1, 0, -1}, // #3
{DBGFIELD("SBPort4") 1, 0, -1}, // #4
{DBGFIELD("SBPort5") 1, 0, -1}, // #5
{DBGFIELD("SBPort05") 2, 0, -1}, // #6
{DBGFIELD("SBPort15") 2, 0, -1}, // #7
{DBGFIELD("SBPort23") 2, 0, -1}, // #8
{DBGFIELD("SBPort015") 3, 0, -1}, // #9
{DBGFIELD("SBPortAny") 6, 0, 54} // #10
};
MCProcResourceDesc的定义与ProcResourceUnits类似。其中SuperIdx为0,表示不存在上级资源。注释里称为IsBuffered的域,实际上是BufferSize,-1表示发布口由统一的保留站(即SandyBridge处理器的硬件调度器)来供给。最后一行的54,表示SandyBridge处理器的硬件调度器一个周期能发布54条微操作。这是Sandy Bridge保留站的深度(reservation station,保留站的作用是排队微操作,直到所有的源操作数就绪,将就绪的微操作调度并分发到可用的执行单元)。
前面X86WriteProcResTable数组元素ProcResourceIdx成员的内容就是这个数组的索引。
方法EmitProcessorProp则对所有的处理器都适用。它辅助生成处理器的MCSchedModel数组。注意,对所有的目标机器家族,描述的第一个处理器总是NoSchedModel。
594 void SubtargetEmitter::EmitProcessorProp (raw_ostream &OS, const Record *R,
595 const char *Name, char Separator) {
596 OS << " ";
597 int V = R ? R->getValueAsInt(Name) : -1;
598 if (V >= 0)
599 OS << V << Separator << " // " << Name;
600 else
601 OS << "MCSchedModel::Default" << Name << Separator;
602 OS << '\n';
603 }
参考在TargetSchedule.td中定义的,缺省的LoadLatency,MicroOpBufferSize,MinLatency,LoopMicroOpBufferSize,IssueWidth,HighLatency,MispredictPenalty都是-1,在输出数组时,这些缺省值被输出为MCSchedModel::Default XXX 。因此,对X86目标机器,NoSchedModel的输出是这样的:
static const llvm::MCSchedModel NoSchedModel = {
MCSchedModel::DefaultIssueWidth,
MCSchedModel::DefaultMicroOpBufferSize,
MCSchedModel::DefaultLoopMicroOpBufferSize,
MCSchedModel::DefaultLoadLatency,
MCSchedModel::DefaultHighLatency,
MCSchedModel::DefaultMispredictPenalty,
0, // PostRAScheduler
1, // CompleteModel
0, // Processor ID
0, 0, 0, 0, // No instruction-level machine model.
nullptr}; // No Itinerary
在X86家族里,指令调度得到良好描述的只有基于Atom,BtVer2,SLM,Haswell,SandyBridge架构的处理器,其他像i386,i686,pentium4m,k6,athlon系列,opteron系列等处理器LLVM并没有给出有关指令调度的细节(估计是找不到相关的文档)。对这些处理器,LLVM给出了一个通用的描述(当然也没有什么战力J)——,它的输出是这样的:
static const llvm::MCSchedModel GenericModel = {
4, // IssueWidth
32, // MicroOpBufferSize
MCSchedModel::DefaultLoopMicroOpBufferSize,
4, // LoadLatency
10, // HighLatency
MCSchedModel::DefaultMispredictPenalty,
0, // PostRAScheduler
1, // CompleteModel
1, // Processor ID
0, 0, 0, 0, // No instruction-level machine model.
nullptr}; // No Itinerary
对于我们前面看过的Atom与SandyBridge处理器,它们的输出则是:
static const llvm::MCSchedModel AtomModel = {
2, // IssueWidth
0, // MicroOpBufferSize
10, // LoopMicroOpBufferSize
3, // LoadLatency
30, // HighLatency
MCSchedModel::DefaultMispredictPenalty,
1, // PostRAScheduler
1, // CompleteModel
2, // Processor ID
0, 0, 0, 0, // No instruction-level machine model.
AtomItineraries};
static const llvm::MCSchedModel SandyBridgeModel = {
4, // IssueWidth
168, // MicroOpBufferSize
28, // LoopMicroOpBufferSize
4, // LoadLatency
MCSchedModel::DefaultHighLatency,
16, // MispredictPenalty
0, // PostRAScheduler
0, // CompleteModel
5, // Processor ID
SandyBridgeModelProcResources ,
SandyBridgeModelSchedClasses ,
11,
950,
nullptr}; // No Itinerary
对Atom处理器,MCSchedModel实例的ProcResourceTable(const MCProcResourceDesc*类型),SchedClassTable(const MCSchedClassDesc*类型),NumProcResourceKinds与NumSchedClasses都是0。而SandyBridge则指向前面生成的数组。
接下来,我们还要生成一个查找表,根据处理器的名字,给出对应的MCSchedModel实例。
1213 void SubtargetEmitter::EmitProcessorLookup (raw_ostream &OS) {
1214 // Gather and sort processor information
1215 std::vector<Record*> ProcessorList =
1216 Records.getAllDerivedDefinitions("Processor");
1217 std::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
1218
1219 // Begin processor table
1220 OS << "\n";
1221 OS << "// Sorted (by key) array of itineraries for CPU subtype.\n"
1222 << "extern const llvm::SubtargetInfoKV "
1223 << Target << "ProcSchedKV[] = {\n";
1224
1225 // For each processor
1226 for (unsigned i = 0, N = ProcessorList.size(); i < N;) {
1227 // Next processor
1228 Record *Processor = ProcessorList[i];
1229
1230 const std::string &Name = Processor->getValueAsString("Name");
1231 const std::string &ProcModelName =
1232 SchedModels.getModelForProc(Processor).ModelName;
1233
1234 // Emit as { "cpu", procinit },
1235 OS << " { \"" << Name << "\", (const void *)&" << ProcModelName << " }";
1236
1237 // Depending on ''if more in the list'' emit comma
1238 if (++i < N) OS << ",";
1239
1240 OS << "\n";
1241 }
1242
1243 // End processor table
1244 OS << "};\n";
1245 }
这个查找表的类型是SubtargetInfoKV:
69 struct SubtargetInfoKV {
70 const char *Key; // K-V key string
71 const void *Value; // K-V pointer value
72
73 // Compare routine for std::lower_bound
74 bool operator <(StringRef S) const {
75 return StringRef(Key) < S;
76 }
77 };
X86家族的这张表不算太大,下面给出它完整的定义:
// Sorted (by key) array of itineraries for CPU subtype.
extern const llvm::SubtargetInfoKV X86ProcSchedKV[] = {
{ "amdfam10", ( const void *)&GenericModel },
{ "athlon", ( const void *)&GenericModel },
{ "athlon-4", ( const void *)&GenericModel },
{ "athlon-fx", ( const void *)&GenericModel },
{ "athlon-mp", ( const void *)&GenericModel },
{ "athlon-tbird", ( const void *)&GenericModel },
{ "athlon-xp", ( const void *)&GenericModel },
{ "athlon64", ( const void *)&GenericModel },
{ "athlon64-sse3", ( const void *)&GenericModel },
{ "atom", ( const void *)&AtomModel },
{ "barcelona", ( const void *)&GenericModel },
{ "bdver1", ( const void *)&GenericModel },
{ "bdver2", ( const void *)&GenericModel },
{ "bdver3", ( const void *)&GenericModel },
{ "bdver4", ( const void *)&GenericModel },
{ "bonnell", ( const void *)&AtomModel },
{ "broadwell", ( const void *)&HaswellModel },
{ "btver1", ( const void *)&GenericModel },
{ "btver2", ( const void *)&BtVer2Model },
{ "c3", ( const void *)&GenericModel },
{ "c3-2", ( const void *)&GenericModel },
{ "core-avx-i", ( const void *)&SandyBridgeModel },
{ "core-avx2", ( const void *)&HaswellModel },
{ "core2", ( const void *)&SandyBridgeModel },
{ "corei7", ( const void *)&SandyBridgeModel },
{ "corei7-avx", ( const void *)&SandyBridgeModel },
{ "generic", ( const void *)&GenericModel },
{ "geode", ( const void *)&GenericModel },
{ "haswell", ( const void *)&HaswellModel },
{ "i386", ( const void *)&GenericModel },
{ "i486", ( const void *)&GenericModel },
{ "i586", ( const void *)&GenericModel },
{ "i686", ( const void *)&GenericModel },
{ "ivybridge", ( const void *)&SandyBridgeModel },
{ "k6", ( const void *)&GenericModel },
{ "k6-2", ( const void *)&GenericModel },
{ "k6-3", ( const void *)&GenericModel },
{ "k8", ( const void *)&GenericModel },
{ "k8-sse3", ( const void *)&GenericModel },
{ "knl", ( const void *)&HaswellModel },
{ "nehalem", ( const void *)&SandyBridgeModel },
{ "nocona", ( const void *)&GenericModel },
{ "opteron", ( const void *)&GenericModel },
{ "opteron-sse3", ( const void *)&GenericModel },
{ "penryn", ( const void *)&SandyBridgeModel },
{ "pentium", ( const void *)&GenericModel },
{ "pentium-m", ( const void *)&GenericModel },
{ "pentium-mmx", ( const void *)&GenericModel },
{ "pentium2", ( const void *)&GenericModel },
{ "pentium3", ( const void *)&GenericModel },
{ "pentium3m", ( const void *)&GenericModel },
{ "pentium4", ( const void *)&GenericModel },
{ "pentium4m", ( const void *)&GenericModel },
{ "pentiumpro", ( const void *)&GenericModel },
{ "prescott", ( const void *)&GenericModel },
{ "sandybridge", ( const void *)&SandyBridgeModel },
{ "silvermont", ( const void *)&SLMModel },
{ "skx", ( const void *)&HaswellModel },
{ "skylake", ( const void *)&HaswellModel },
{ "slm", ( const void *)&SLMModel },
{ "westmere", ( const void *)&SandyBridgeModel },
{ "winchip-c6", ( const void *)&GenericModel },
{ "winchip2", ( const void *)&GenericModel },
{ "x86-64", ( const void *)&SandyBridgeModel },
{ "yonah", ( const void *)&SandyBridgeModel }
};
因此,为了得到尽可能好的性能,应该通过命令行选项告诉LLVM目标机器是什么处理器。
回到SubtargetEmitter::run,下面的代码输出一个重要的方法:InitX86MCSubtargetInfo。
SubtargetEmitter::run(续)
1437 // MCInstrInfo initialization routine.
1438 OS << "static inline void Init" << Target
1439 << "MCSubtargetInfo(MCSubtargetInfo *II, "
1440 << "const Triple &TT, StringRef CPU, StringRef FS) {\n";
1441 OS << " II->InitMCSubtargetInfo(TT, CPU, FS, ";
1442 if (NumFeatures)
1443 OS << Target << "FeatureKV, ";
1444 else
1445 OS << "None, ";
1446 if (NumProcs)
1447 OS << Target << "SubTypeKV, ";
1448 else
1449 OS << "None, ";
1450 OS << '\n'; OS.indent(22);
1451 OS << Target << "ProcSchedKV, "
1452 << Target << "WriteProcResTable, "
1453 << Target << "WriteLatencyTable, "
1454 << Target << "ReadAdvanceTable, ";
1455 if (SchedModels.hasItineraries()) {
1456 OS << '\n'; OS.indent(22);
1457 OS << Target << "Stages, "
1458 << Target << "OperandCycles, "
1459 << Target << "ForwardingPaths";
1460 } else
1461 OS << "0, 0, 0";
1462 OS << ");\n}\n\n";
1463
1464 OS << "} // End llvm namespace \n";
1465
1466 OS << "#endif // GET_SUBTARGETINFO_MC_DESC\n\n";
生成的InitX86MCSubtargetInfo方法的定义如下(连带收尾代码):
#undef DBGFIELD
static inline void InitX86MCSubtargetInfo (MCSubtargetInfo *II, const Triple &TT, StringRef CPU, StringRef FS) {
II->InitMCSubtargetInfo(TT, CPU, FS, X86FeatureKV, X86SubTypeKV,
X86ProcSchedKV, X86WriteProcResTable, X86WriteLatencyTable, X86ReadAdvanceTable,
X86Stages, X86OperandCycles, X86ForwardingPaths);
}
} // End llvm namespace
#endif // GET_SUBTARGETINFO_MC_DESC
通过这个方法,X86目标机器的数据就与机器无关的MC框架挂上钩。
以上所述就是小编给大家介绍的《LLVM学习笔记(43)》,希望对大家有所帮助,如果大家有任何疑问请给我留言,小编会及时回复大家的。在此也非常感谢大家对 码农网 的支持!
猜你喜欢:- 【每日笔记】【Go学习笔记】2019-01-04 Codis笔记
- 【每日笔记】【Go学习笔记】2019-01-02 Codis笔记
- 【每日笔记】【Go学习笔记】2019-01-07 Codis笔记
- Golang学习笔记-调度器学习
- Vue学习笔记(二)------axios学习
- 算法/NLP/深度学习/机器学习面试笔记
本站部分资源来源于网络,本站转载出于传递更多信息之目的,版权归原作者或者来源机构所有,如转载稿涉及版权问题,请联系我们。
CSS3专业网页开发指南
Peter Gasston / 李景媛、吴晓嘉 / 人民邮电出版社 / 2014-3-1 / 45.00元
《CSS3专业网页开发指南》是英国著名Web前端开发工程师Peter Gasston对CSS3高级技术的全面介绍。书中既有CSS3的发展历史、基本语法等入门知识介绍,也涵盖了媒体查询、选择器、伪类与伪元素、网页字体、文本排版、图形处理、动画、布局等CSS3前端开发必不可少的知识,还介绍了CSS3的未来发展方向。全书共分为17章,作者在每一章的讲解中都结合了大量的实例,同时也不忘介绍每一项技术的发展......一起来看看 《CSS3专业网页开发指南》 这本书的介绍吧!