From c3c4adcba6b276180d6b4528d7b88b3f6618f59b Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Fri, 10 May 2024 17:47:17 +0800 Subject: [PATCH] Serveral udpate for the function signature based labeling scheme Changes: - Rename complex labeling scheme to function signature based labeling scheme - Fix the PLT stubs - Add labeling rule for `main` and `_dl_runtime_resolve`. - Clarify the rule for those virtual function from more than one base class. --- riscv-elf.adoc | 67 +++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/riscv-elf.adoc b/riscv-elf.adoc index 4f1b06fc..98572782 100644 --- a/riscv-elf.adoc +++ b/riscv-elf.adoc @@ -662,7 +662,7 @@ using all other PLT sytle. |=== | Default PLT | - | Simple landing pad PLT | Must use this PLT style when `GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_SIMPLE` is set. -| Complex landing pad PLT | Must use this PLT style when `GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_COMPLEX` is set. +| Function signature based landing pad PLT | Must use this PLT style when `GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_FUNC_SIG` is set. |=== The first entry of a shared object PLT is a special entry that calls @@ -675,7 +675,7 @@ entries is intended to speed up program loading by deferring symbol resolution to the first time the function is called. The PLT entry is 16 bytes for the default PLT style and the simple landing pad -PLT style, and 32 bytes for the complex landing pad PLT style. +PLT style, and 32 bytes for the function signature based landing pad PLT style. The first entry in the PLT occupies two 16 byte entries for the default PLT style: @@ -708,36 +708,21 @@ And occupies three 16 byte entries for the simple landing pad PLT style: nop ---- -The complex landing pad PLT style occupies two 32 byte entries: +The function signature based landing pad PLT style occupies two 32 byte entries: [,asm] ---- 1: lpad 0 sub t1, t1, t3 # shifted .got.plt offset + hdr size + 24 - auipc t2, %pcrel_hi(.got.plt) - addi t0, t2, %pcrel_lo(1b) # &.got.plt - l[w|d] t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve + auipc t3, %pcrel_hi(.got.plt) + addi t0, t3, %pcrel_lo(1b) # &.got.plt + l[w|d] t3, %pcrel_lo(1b)(t3) # _dl_runtime_resolve addi t1, t1, -(hdr size + 24) # shifted .got.plt offset srli t1, t1, log2(32/PTRSIZE) # .got.plt offset l[w|d] t0, PTRSIZE(t0) # link map jr t3 nop nop ----- - - -[,asm] ----- -1: lpad 0 - auipc t2, %pcrel_hi(.got.plt) - sub t1, t1, t3 # shifted .got.plt offset + hdr size + 24 - l[w|d] t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve - addi t1, t1, -(hdr size + 24) # shifted .got.plt offset - addi t0, t2, %pcrel_lo(1b) # &.got.plt - srli t1, t1, log2(32/PTRSIZE) # .got.plt offset - l[w|d] t0, PTRSIZE(t0) # link map - jr t3 - nop nop ---- @@ -765,7 +750,7 @@ The code sequences of the PLT entry for the the simple landing pad PLT style: jalr t1, t3 ---- -The code sequences of the PLT entry for the the complex landing pad PLT style: +The code sequences of the PLT entry for the the function signature based landing pad PLT style: [,asm] ---- 1: lpad @@ -1520,7 +1505,7 @@ a different features. | Bit | Bit Name | 0 | GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_SIMPLE | 1 | GNU_PROPERTY_RISCV_FEATURE_1_CFI_SS -| 2 | GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_COMPLEX +| 2 | GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_FUNC_SIG |=== `GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_SIMPLE` This bit indicate that all executable @@ -1538,7 +1523,7 @@ compressed instructions then loading an executable with this bit set requires the execution environment to provide the `Zicfiss` extension or to provide both the `Zcmop` and `Zimop` extensions. -`GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_COMPLEX` This bit indicate that all executable +`GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_FUNC_SIG` This bit indicate that all executable sections are built to be compatible with the landing pad mechanism provided by the `Zicfilp` extension. An executable or shared library with this bit set is required to generate PLTs with the landing pad (`lpad`) instruction, and all @@ -1587,21 +1572,27 @@ attribute is recording for minimal execution environment requirements, so the ISA information from arch attribute is not enough for the disassembler to disassemble the `rv64gcv` version correctly. -== Label Value Compuatation for Complex Labeling Scheme Landing Pad +== Label Value Compuatation for Function Signature based Scheme Landing Pad -The label value for the complex labeling scheme landing pad is computed from the +The label value for the function signature based labeling scheme landing pad is computed from the hash of the function signature string, which uses the same scheme as the "Function types" mangling rule defined in the _Itanium {Cpp} ABI_ -<>, the value is taken from the lower 20 bits of the MD5 -hash result of the function signature string. +<>, and the function signature will use the "Compression" rule +defined in _Itanium {Cpp} ABI_, the value is taken from the lower 20 bits of +the MD5 hash result of the function signature string. -Additionally, here are a few specific rules for {Cpp} member functions: +Additionally, here are a few specific rules: +- `main` funciton is using signature of + `(int, pointer to pointer to char) returning int` (`FiiPPcE`). +- `_dl_runtime_resolve` use zero for the landing pad. - {Cpp} member functions should use the "Pointer-to-member types" mangling rule defined in the _Itanium {Cpp} ABI_ <>. - Virtual functions in {Cpp} should use the member function type of the base class that first defined the virtual function. - +- If a virtual function is inherited from more than one base class, it should + use the type of the first base class. Thunk functions will use the type of + the corresponding base class. Example: @@ -1625,7 +1616,13 @@ public: void memfunc4(); }; -class DerivedDerived : public Derived +class OtherBase +{ +public: + virtual void memfunc2(int); +} + +class DerivedDerived : public Derived, OtherBase { public: virtual void memfunc2(int); @@ -1638,9 +1635,13 @@ The function signatures for the above functions are described below: - `foo` is encoded as `FdiPfE`. - `Base::memfunc1` and `Derived::memfunc1` are both encoded as `M4BaseFvvE`. -- `Base::memfunc2` and `DerivedDerived::memfunc2` are both encoded as `M4BaseFviE`. -- `Derived::memfunc3` and `DerivedDerived::memfunc3` are both encoded as `M7DerivedFvdE`. +- `Base::memfunc2` is encoded as `M4BaseFviE`. +- `OtherBase::memfunc2` is encoded as `M9OtherBaseFviE`. +- `Derived::memfunc3` and `DerivedDerived::memfunc3` are both encoded as + `M7DerivedFvdE`. - `Derived::memfunc4` is encoded as `M7DerivedFvvE`. +- `DerivedDerived::memfunc2` is encoded as `M4BaseFviE`, and the thunk function + for `OtherBase::memfunc2` will be `M9OtherBaseFviE`. == Linker Relaxation