|
| 1 | +# 《Chrome V8源码》22.编译链3:Bytecode的秘密——常量池 |
| 2 | + |
| 3 | +# 1 摘要 |
| 4 | +本篇是编译链专题的第三篇,详细讲解AST到字节码的转换过程。本文组织方式:字节码生成(章节2);延期常量池(章节3);编译收尾工作(章节4)。 |
| 5 | +# 2.字节码生成 |
| 6 | +把AST树翻译成字节码,入口函数源码如下: |
| 7 | +```c++ |
| 8 | +1. MaybeHandle<SharedFunctionInfo> GenerateUnoptimizedCodeForToplevel( |
| 9 | +2. Isolate* isolate, ParseInfo* parse_info, AccountingAllocator* allocator, |
| 10 | +3. IsCompiledScope* is_compiled_scope) { |
| 11 | +4. //省略............ |
| 12 | +5. while (!functions_to_compile.empty()) { |
| 13 | +6. //省略............ |
| 14 | +7. if (job->ExecuteJob() == CompilationJob::FAILED || |
| 15 | +8. FinalizeUnoptimizedCompilationJob(job.get(), shared_info, isolate) == |
| 16 | +9. CompilationJob::FAILED) { |
| 17 | +10. return MaybeHandle<SharedFunctionInfo>(); |
| 18 | +11. } |
| 19 | +12. if (FLAG_stress_lazy_source_positions) { |
| 20 | +13. SharedFunctionInfo::EnsureSourcePositionsAvailable(isolate, shared_info); |
| 21 | +14. } |
| 22 | +15. if (shared_info.is_identical_to(top_level)) { |
| 23 | +16. *is_compiled_scope = shared_info->is_compiled_scope(); |
| 24 | +17. DCHECK(is_compiled_scope->is_compiled()); |
| 25 | +18. } |
| 26 | +19. } |
| 27 | +20. parse_info->ResetCharacterStream(); |
| 28 | +21. return top_level; |
| 29 | +22. } |
| 30 | +``` |
| 31 | +上面第7行代码`job->ExecuteJob()`的作用是生成字节码,它与AST一样以函数为单位生成字节码,最终进入下面的源码: |
| 32 | +```c++ |
| 33 | +1. void BytecodeGenerator::GenerateBytecodeBody() { |
| 34 | +2. VisitArgumentsObject(closure_scope()->arguments()); |
| 35 | +3. VisitNewTargetVariable(closure_scope()->new_target_var()); |
| 36 | +4. FunctionLiteral* literal = info()->literal(); |
| 37 | +5. if (IsResumableFunction(literal->kind())) { |
| 38 | +6. BuildGeneratorObjectVariableInitialization(); |
| 39 | +7. } |
| 40 | +8. if (FLAG_trace) builder()->CallRuntime(Runtime::kTraceEnter); |
| 41 | +9. //省略......................... |
| 42 | +10. BuildIncrementBlockCoverageCounterIfEnabled(literal, SourceRangeKind::kBody); |
| 43 | +11. VisitDeclarations(closure_scope()->declarations()); |
| 44 | +12. VisitModuleNamespaceImports(); |
| 45 | +13. builder()->StackCheck(literal->start_position()); |
| 46 | +14. if (IsBaseConstructor(function_kind())) { |
| 47 | +15. if (literal->requires_brand_initialization()) { |
| 48 | +16. BuildPrivateBrandInitialization(builder()->Receiver()); |
| 49 | +17. } |
| 50 | +18. if (literal->requires_instance_members_initializer()) { |
| 51 | +19. BuildInstanceMemberInitialization(Register::function_closure(), |
| 52 | +20. builder()->Receiver()); |
| 53 | +21. } |
| 54 | +22. } |
| 55 | +23. VisitStatements(literal->body()); |
| 56 | +24. if (!builder()->RemainderOfBlockIsDead()) { |
| 57 | +25. builder()->LoadUndefined(); |
| 58 | +26. BuildReturn(); |
| 59 | +27. } |
| 60 | +28. } |
| 61 | +``` |
| 62 | +上面第11行代码的作用是分析全局变量、生成bytecode。分析样例代码的全局变量时会执行以下功能: |
| 63 | +```c++ |
| 64 | +1. BytecodeArrayBuilder& BytecodeArrayBuilder::LoadConstantPoolEntry( |
| 65 | +2. size_t entry) { |
| 66 | +3. OutputLdaConstant(entry); |
| 67 | +4. return *this; |
| 68 | +5. } |
| 69 | +6. //分隔线============================ |
| 70 | +7. #define DEFINE_BYTECODE_OUTPUT(name, ...) |
| 71 | +``` |
| 72 | +上面第3行代码的作用是把全局变量名称写入常量池(稍后解释)。在Bytecode运行期间,通过`LdaGlobal`指令取出常量值。`OutputLdaConstant()`方法由宏模板`DEFINE_BYTECODE_OUTPUT`实现,请读者自行展开。最终进入`EmitBytecode()`方法,该方法会生成字节码,源码如下: |
| 73 | +```c++ |
| 74 | +1. void BytecodeArrayWriter::EmitBytecode(const BytecodeNode* const node) { |
| 75 | +2. //省略.......... |
| 76 | +3. for (int i = 0; i < operand_count; ++i) { |
| 77 | +4. switch (operand_sizes[i]) { |
| 78 | +5. case OperandSize::kNone: |
| 79 | +6. UNREACHABLE(); |
| 80 | +7. break; |
| 81 | +8. case OperandSize::kByte: |
| 82 | +9. bytecodes()->push_back(static_cast<uint8_t>(operands[i])); |
| 83 | +10. break; |
| 84 | +11. case OperandSize::kShort: { |
| 85 | +12. uint16_t operand = static_cast<uint16_t>(operands[i]); |
| 86 | +13. const uint8_t* raw_operand = reinterpret_cast<const uint8_t*>(&operand); |
| 87 | +14. bytecodes()->push_back(raw_operand[0]); |
| 88 | +15. bytecodes()->push_back(raw_operand[1]); |
| 89 | +16. break; |
| 90 | +17. } |
| 91 | +18. case OperandSize::kQuad: { |
| 92 | +19. const uint8_t* raw_operand = |
| 93 | +20. reinterpret_cast<const uint8_t*>(&operands[i]); |
| 94 | +21. bytecodes()->push_back(raw_operand[0]); |
| 95 | +22. bytecodes()->push_back(raw_operand[1]); |
| 96 | +23. bytecodes()->push_back(raw_operand[2]); |
| 97 | +24. bytecodes()->push_back(raw_operand[3]); |
| 98 | +25. break; |
| 99 | +26. } |
| 100 | +27. } |
| 101 | +28. } |
| 102 | +29. } |
| 103 | +``` |
| 104 | +根据`case`条件生成不同的Bytecdoe,把Bytecdoe用`bytecodes()->push_back()`写入BytecodeArray。 |
| 105 | +返回到GenerateBytecodeBody()的第23行代码,`VisitStatements()`的作用是分析AST并生成Bytecode,源码如下: |
| 106 | +```c++ |
| 107 | +void BytecodeGenerator::VisitStatements( |
| 108 | + const ZonePtrList<Statement>* statements) { |
| 109 | + for (int i = 0; i < statements->length(); i++) { |
| 110 | + // Allocate an outer register allocations scope for the statement. |
| 111 | + RegisterAllocationScope allocation_scope(this); |
| 112 | + Statement* stmt = statements->at(i); |
| 113 | + Visit(stmt); |
| 114 | + if (builder()->RemainderOfBlockIsDead()) break; |
| 115 | + } |
| 116 | +} |
| 117 | +``` |
| 118 | +遍历AST生成Bytecode时,会递归调用`VisitStatements()`方法,图1给出了调用堆栈,供读者分析。 |
| 119 | + |
| 120 | +# 3 延期常量池 |
| 121 | +**注意** `常量`不是编程语言中的常量,而是编译器生成的包括各种标识符在内的信息表,即延期常量池,请注意区分! |
| 122 | +`延期`的含义是要为在编译阶段无法确定的执行路径预留位置,待后面确定时再填充,举例说明如下: |
| 123 | +```c++ |
| 124 | +function test(x) |
| 125 | +{ |
| 126 | + if(x>0) |
| 127 | + add3(x);//x=x+3 |
| 128 | + dosomething........ |
| 129 | +} |
| 130 | +``` |
| 131 | +生成`test()`的字节码时,要把`add3()`的调用地址填充到常量池中。如果此时`add3()`还没有被编译,就预留一个位置,等到`add3()`有调用地址时再填充,这就是延期填充。 |
| 132 | +生成字节码的入口函数如下: |
| 133 | +```c++ |
| 134 | +1. Handle<BytecodeArray> BytecodeGenerator::FinalizeBytecode( |
| 135 | +2. Isolate* isolate, Handle<Script> script) { |
| 136 | +3. DCHECK_EQ(ThreadId::Current(), isolate->thread_id()); |
| 137 | +4. AllocateDeferredConstants(isolate, script); |
| 138 | +5. if (block_coverage_builder_) { |
| 139 | +6. info()->set_coverage_info( |
| 140 | +7. isolate->factory()->NewCoverageInfo(block_coverage_builder_->slots())); |
| 141 | +8. if (FLAG_trace_block_coverage) { |
| 142 | +9. info()->coverage_info()->Print(info()->literal()->GetDebugName()); |
| 143 | +10. } |
| 144 | +11. } |
| 145 | +12. if (HasStackOverflow()) return Handle<BytecodeArray>(); |
| 146 | +13. Handle<BytecodeArray> bytecode_array = builder()->ToBytecodeArray(isolate); |
| 147 | +14. if (incoming_new_target_or_generator_.is_valid()) { |
| 148 | +15. bytecode_array->set_incoming_new_target_or_generator_register( |
| 149 | +16. incoming_new_target_or_generator_); |
| 150 | +17. } |
| 151 | +18. return bytecode_array; |
| 152 | +19. } |
| 153 | +``` |
| 154 | +上面第4行代码的作用是构建延期常量池(DeferredConstants),源码如下: |
| 155 | +```c++ |
| 156 | +1. void BytecodeGenerator::AllocateDeferredConstants(Isolate* isolate, |
| 157 | +2. Handle<Script> script) { |
| 158 | +3. // Build global declaration pair arrays. |
| 159 | +4. for (GlobalDeclarationsBuilder* globals_builder : global_declarations_) { |
| 160 | +5. Handle<FixedArray> declarations = |
| 161 | +6. globals_builder->AllocateDeclarations(info(), script, isolate); |
| 162 | +7. if (declarations.is_null()) return SetStackOverflow(); |
| 163 | +8. builder()->SetDeferredConstantPoolEntry( |
| 164 | +9. globals_builder->constant_pool_entry(), declarations); |
| 165 | +10. } |
| 166 | +11. // Find or build shared function infos. |
| 167 | +12. for (std::pair<FunctionLiteral*, size_t> literal : function_literals_) { |
| 168 | +13. //省略.................. |
| 169 | +14. } |
| 170 | +15. // Find or build shared function infos for the native function templates. |
| 171 | +16. for (std::pair<NativeFunctionLiteral*, size_t> literal : |
| 172 | +17. native_function_literals_) { |
| 173 | +18. //省略.................. |
| 174 | +19. } |
| 175 | +20. // Build object literal constant properties |
| 176 | +21. for (std::pair<ObjectLiteral*, size_t> literal : object_literals_) { |
| 177 | +22. ObjectLiteral* object_literal = literal.first; |
| 178 | +23. //省略.................. |
| 179 | +24. } |
| 180 | +25. } |
| 181 | +26. // Build array literal constant elements |
| 182 | +27. for (std::pair<ArrayLiteral*, size_t> literal : array_literals_) { |
| 183 | +28. //省略.................. |
| 184 | +29. } |
| 185 | +30. // Build class literal boilerplates. |
| 186 | +31. for (std::pair<ClassLiteral*, size_t> literal : class_literals_) { |
| 187 | +32. //省略.................. |
| 188 | +33. } |
| 189 | +34. // Build template literals. |
| 190 | +35. for (std::pair<GetTemplateObject*, size_t> literal : template_objects_) { |
| 191 | +36. //省略.................. |
| 192 | +37. } |
| 193 | +38. } |
| 194 | +``` |
| 195 | +上面第4~10行代码:取出全局变量信息存入常量池。其中第9行代码`constant_pool_entry()`的作用是获取常量池入口,其余代码负责把引用的sharedfunction、constant property等添加到常量池。常量池的类型是FixedArray,请读者自行分析。 |
| 196 | +# 4 收尾工作 |
| 197 | +返回到`FinalizeBytecode()`函数的第13行,源码如下: |
| 198 | +```c++ |
| 199 | +1. Handle<BytecodeArray> BytecodeArrayBuilder::ToBytecodeArray(Isolate* isolate) { |
| 200 | +2. DCHECK(RemainderOfBlockIsDead()); |
| 201 | +3. DCHECK(!bytecode_generated_); |
| 202 | +4. bytecode_generated_ = true; |
| 203 | +5. int register_count = total_register_count(); |
| 204 | +6. if (register_optimizer_) { |
| 205 | +7. //寄存器优化,请自行分析 |
| 206 | +8. } |
| 207 | +9. Handle<ByteArray> handler_table = |
| 208 | +10. handler_table_builder()->ToHandlerTable(isolate); |
| 209 | +11. return bytecode_array_writer_.ToBytecodeArray( |
| 210 | +12. isolate, register_count, parameter_count(), handler_table); |
| 211 | +13. } |
| 212 | +``` |
| 213 | +上面第9行代码:获取bytecode handler table(字节码处理程序表),它保存了字节码处理程序的地址。第11行代码进入ToBytecodeArray(),源码如下: |
| 214 | +```c++ |
| 215 | +1. Handle<BytecodeArray> BytecodeArrayWriter::ToBytecodeArray( |
| 216 | +2. Isolate* isolate, int register_count, int parameter_count, |
| 217 | +3. Handle<ByteArray> handler_table) { |
| 218 | +4. DCHECK_EQ(0, unbound_jumps_); |
| 219 | +5. int bytecode_size = static_cast<int>(bytecodes()->size()); |
| 220 | +6. int frame_size = register_count * kSystemPointerSize; |
| 221 | +7. Handle<FixedArray> constant_pool = |
| 222 | +8. constant_array_builder()->ToFixedArray(isolate); |
| 223 | +9. Handle<BytecodeArray> bytecode_array = isolate->factory()->NewBytecodeArray( |
| 224 | +10. bytecode_size, &bytecodes()->front(), frame_size, parameter_count, |
| 225 | +11. constant_pool); |
| 226 | +12. bytecode_array->set_handler_table(*handler_table); |
| 227 | +13. return bytecode_array; |
| 228 | +14. } |
| 229 | +``` |
| 230 | +上面第7行代码生成常量池,第9行代码进入NewBytecodeArray()方法,源码如下: |
| 231 | +```c++ |
| 232 | +1. Handle<BytecodeArray> Factory::NewBytecodeArray( |
| 233 | +2. int length, const byte* raw_bytecodes, int frame_size, int parameter_count, |
| 234 | +3. Handle<FixedArray> constant_pool) { |
| 235 | +4. DCHECK_LE(0, length); |
| 236 | +5. if (length > BytecodeArray::kMaxLength) { |
| 237 | +6. isolate()->heap()->FatalProcessOutOfMemory("invalid array length"); |
| 238 | +7. } |
| 239 | +8. // Bytecode array is AllocationType::kOld, so constant pool array should be |
| 240 | +9. // too. |
| 241 | +10. DCHECK(!Heap::InYoungGeneration(*constant_pool)); |
| 242 | +11. int size = BytecodeArray::SizeFor(length); |
| 243 | +12. HeapObject result = AllocateRawWithImmortalMap(size, AllocationType::kOld, |
| 244 | +13. *bytecode_array_map()); |
| 245 | +14. Handle<BytecodeArray> instance(BytecodeArray::cast(result), isolate()); |
| 246 | +15. instance->set_length(length); |
| 247 | +16. instance->set_frame_size(frame_size); |
| 248 | +17. instance->set_parameter_count(parameter_count); |
| 249 | +18. instance->set_incoming_new_target_or_generator_register( |
| 250 | +19. interpreter::Register::invalid_value()); |
| 251 | +20. instance->set_osr_loop_nesting_level(0); |
| 252 | +21. instance->set_bytecode_age(BytecodeArray::kNoAgeBytecodeAge); |
| 253 | +22. instance->set_constant_pool(*constant_pool); |
| 254 | +23. instance->set_handler_table(*empty_byte_array()); |
| 255 | +24. instance->set_source_position_table(*undefined_value()); |
| 256 | +25. CopyBytes(reinterpret_cast<byte*>(instance->GetFirstBytecodeAddress()), |
| 257 | +26. raw_bytecodes, length); |
| 258 | +27. instance->clear_padding(); |
| 259 | +28. return instance; |
| 260 | +29. } |
| 261 | +``` |
| 262 | +上面第12行代码的作用是申请一段heap内存(堆对象),该内存用于存储生成的字节码,15~27行代码把生成的字节码存储到刚创建的堆对象中。 |
| 263 | +**技术总结** |
| 264 | +**(1)** 常量池的作用是提供属性名称。在运行期间此名称可以找到对应的属性,提供被调用函数的地址等等,起到了辅助字节码执行的作用。常量池存储在字节码数组的0位置; |
| 265 | +**(2)** 字节码生成后存储在堆对象中,该对象的类型是FixedArray; |
| 266 | +**(3)** 字节码生成的本质是遍历AST树; |
| 267 | +**(4)** 看懂并理解AST树节点的类型再分析字节码的生成会十分容易。 |
| 268 | +好了,今天到这里,下次见。 |
| 269 | +
|
| 270 | +**个人能力有限,有不足与纰漏,欢迎批评指正** |
| 271 | +**微信:qq9123013 备注:v8交流 邮箱:v8blink@outlook.com** |
| 272 | +本文由灰豆原创发布 |
| 273 | +转载出处: https://www.anquanke.com/post/id/259229 |
| 274 | +安全客 - 有思想的安全新媒体 |
| 275 | +
|
| 276 | +
|
0 commit comments