Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[lld][LoongArch] Relax TLS LE/GD/LD. #123600

Open
wants to merge 5 commits into
base: users/ylzsx/r-call36
Choose a base branch
from

Conversation

ylzsx
Copy link
Contributor

@ylzsx ylzsx commented Jan 20, 2025

In local-exec form, the code sequence is converted as follows:

From:
  lu12i.w $rd, %le_hi20_r(sym)
    R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX
  add.w/d $rd, $rd, $tp, %le_add_r(sym)
    R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX
  addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
    R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX
To:
  addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
    R_LARCH_TLS_LE_LO12_R

In global-dynamic or local-dynamic, the code sequence is converted as follows:

From:
  pcalau12i     $a0, %ld_pc_hi20(sym)  | %gd_pc_hi20(sym)
    R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX
  addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
    R_LARCH_GOT_PC_LO12, R_LARCH_RELAX
To:
  pcaddi        $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
    R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2

Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch.

ylzsx added 5 commits January 20, 2025 17:02
In local-exec form, the code sequence is converted as follows:
```
From:
lu12i.w $rd, %le_hi20_r(sym)
  R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX
add.w/d $rd, $rd, $tp, %le_add_r(sym)
  R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX
addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
  R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX
To:
addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
  R_LARCH_TLS_LE_LO12_R
```

In global-dynamic or local-dynamic, the code sequence is converted as
follows:
```
From:
pcalau12i     $a0, %ld_pc_hi20(sym)  | %gd_pc_hi20(sym)
  R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX
addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
  R_LARCH_GOT_PC_LO12, R_LARCH_RELAX
To:
pcaddi        $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
  R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2
```

Note: For initial-exec form, since it involves the conversion from IE to
LE, we will implement it in a future patch.
@llvmbot
Copy link
Member

llvmbot commented Jan 20, 2025

@llvm/pr-subscribers-backend-loongarch
@llvm/pr-subscribers-lld-elf

@llvm/pr-subscribers-lld

Author: Zhaoxin Yang (ylzsx)

Changes

In local-exec form, the code sequence is converted as follows:

From:
  lu12i.w $rd, %le_hi20_r(sym)
    R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX
  add.w/d $rd, $rd, $tp, %le_add_r(sym)
    R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX
  addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
    R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX
To:
  addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
    R_LARCH_TLS_LE_LO12_R

In global-dynamic or local-dynamic, the code sequence is converted as follows:

From:
  pcalau12i     $a0, %ld_pc_hi20(sym)  | %gd_pc_hi20(sym)
    R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX
  addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
    R_LARCH_GOT_PC_LO12, R_LARCH_RELAX
To:
  pcaddi        $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym)
    R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2

Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch.


Patch is 24.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123600.diff

6 Files Affected:

  • (modified) lld/ELF/Arch/LoongArch.cpp (+64-4)
  • (modified) lld/test/ELF/loongarch-relax-emit-relocs.s (+107-5)
  • (added) lld/test/ELF/loongarch-relax-tls-le.s (+115)
  • (modified) lld/test/ELF/loongarch-tls-gd.s (+41-2)
  • (modified) lld/test/ELF/loongarch-tls-ld.s (+38-2)
  • (modified) lld/test/ELF/loongarch-tls-le.s (+16)
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 0aa0cf5b657a0f..2d6d86d2ca63b2 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -761,10 +761,10 @@ static bool isPairRelaxable(ArrayRef<Relocation> relocs, size_t i) {
 
 // Relax code sequence.
 // From:
-//   pcalau12i $a0, %pc_hi20(sym)
-//   addi.w/d $a0, $a0, %pc_lo12(sym)
+//   pcalau12i     $a0, %pc_hi20(sym) | %ld_pc_hi20(sym)  | %gd_pc_hi20(sym)
+//   addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
 // To:
-//   pcaddi $a0, %pc_lo12(sym)
+//   pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
 //
 // From:
 //   pcalau12i $a0, %got_pc_hi20(sym_got)
@@ -778,6 +778,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
   if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
          rLo12.type == R_LARCH_PCALA_LO12) ||
         (rHi20.type == R_LARCH_GOT_PC_HI20 &&
+         rLo12.type == R_LARCH_GOT_PC_LO12) ||
+        (rHi20.type == R_LARCH_TLS_GD_PC_HI20 &&
+         rLo12.type == R_LARCH_GOT_PC_LO12) ||
+        (rHi20.type == R_LARCH_TLS_LD_PC_HI20 &&
          rLo12.type == R_LARCH_GOT_PC_LO12)))
     return;
 
@@ -798,6 +802,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
   else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
            rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
     symBase = rHi20.sym->getVA(ctx);
+  else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC)
+    symBase = ctx.in.got->getGlobalDynAddr(*rHi20.sym);
   else {
     Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
              << rHi20.expr << ") against symbol " << rHi20.sym
@@ -827,7 +833,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
     return;
 
   sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
-  sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
+  if (rHi20.type == R_LARCH_TLS_GD_PC_HI20)
+    sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2;
+  else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20)
+    sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2;
+  else
+    sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
   sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
   remove = 4;
 }
@@ -863,6 +874,35 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i,
   }
 }
 
+// Relax code sequence.
+// From:
+//   lu12i.w $rd, %le_hi20_r(sym)
+//   add.w/d $rd, $rd, $tp, %le_add_r(sym)
+//   addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
+// To:
+//   addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
+static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i,
+                       uint64_t loc, Relocation &r, uint32_t &remove) {
+  uint64_t val = r.sym->getVA(ctx, r.addend);
+  // Check if the val exceeds the range of addi/ld/st.
+  if (!isInt<12>(val))
+    return;
+  uint32_t currInsn = read32le(sec.content().data() + r.offset);
+  switch (r.type) {
+  case R_LARCH_TLS_LE_HI20_R:
+  case R_LARCH_TLS_LE_ADD_R:
+    sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
+    remove = 4;
+    break;
+  case R_LARCH_TLS_LE_LO12_R:
+    currInsn =
+        insn(extractBits(currInsn, 31, 22) << 22, getD5(currInsn), R_TP, 0);
+    sec.relaxAux->writes.push_back(currInsn);
+    sec.relaxAux->relocTypes[i] = R_LARCH_TLS_LE_LO12_R;
+    break;
+  }
+}
+
 static bool relax(Ctx &ctx, InputSection &sec) {
   const uint64_t secAddr = sec.getVA();
   const MutableArrayRef<Relocation> relocs = sec.relocs();
@@ -903,6 +943,8 @@ static bool relax(Ctx &ctx, InputSection &sec) {
     }
     case R_LARCH_PCALA_HI20:
     case R_LARCH_GOT_PC_HI20:
+    case R_LARCH_TLS_GD_PC_HI20:
+    case R_LARCH_TLS_LD_PC_HI20:
       // The overflow check for i+2 will be carried out in isPairRelaxable.
       if (isPairRelaxable(relocs, i))
         relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
@@ -911,6 +953,12 @@ static bool relax(Ctx &ctx, InputSection &sec) {
       if (relaxable(relocs, i))
         relaxCall36(ctx, sec, i, loc, r, remove);
       break;
+    case R_LARCH_TLS_LE_HI20_R:
+    case R_LARCH_TLS_LE_ADD_R:
+    case R_LARCH_TLS_LE_LO12_R:
+      if (relaxable(relocs, i))
+        relaxTlsLe(ctx, sec, i, loc, r, remove);
+      break;
     }
 
     // For all anchors whose offsets are <= r.offset, they are preceded by
@@ -1015,8 +1063,20 @@ void LoongArch::finalizeRelax(int passes) const {
             r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC;
             break;
           case R_LARCH_B26:
+          case R_LARCH_TLS_LE_LO12_R:
+            skip = 4;
+            write32le(p, aux.writes[writesIdx++]);
+            break;
+          case R_LARCH_TLS_GD_PCREL20_S2:
+            // Note: R_LARCH_TLS_LD_PCREL20_S2 must also use R_TLSGD_PC instead
+            // of R_TLSLD_PC because the processing of relocation
+            // R_LARCH_TLS_LD_PC_HI20 is the same as R_LARCH_TLS_GD_PC_HI20. If
+            // not, the value obtained from getRelocTargetVA will be unexpected
+            // and lead to error.
+          case R_LARCH_TLS_LD_PCREL20_S2:
             skip = 4;
             write32le(p, aux.writes[writesIdx++]);
+            r.expr = R_TLSGD_PC;
             break;
           default:
             llvm_unreachable("unsupported type");
diff --git a/lld/test/ELF/loongarch-relax-emit-relocs.s b/lld/test/ELF/loongarch-relax-emit-relocs.s
index a02cd272aba5bf..5bb445dcaff50a 100644
--- a/lld/test/ELF/loongarch-relax-emit-relocs.s
+++ b/lld/test/ELF/loongarch-relax-emit-relocs.s
@@ -1,7 +1,7 @@
 # REQUIRES: loongarch
 ## Test that we can handle --emit-relocs while relaxing.
 
-# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax --defsym ELF32=1 %s -o %t.32.o
 # RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
 # RUN: ld.lld -Ttext=0x10000 -section-start=.got=0x20000 --emit-relocs --relax %t.32.o -o %t.32
 # RUN: ld.lld -Ttext=0x10000 -section-start=.got=0x20000 --emit-relocs --relax %t.64.o -o %t.64
@@ -17,19 +17,39 @@
 # RUN: llvm-objdump -dr %t.64.norelax | FileCheck %s --check-prefix=NORELAX
 
 # RELAX:      00010000 <_start>:
-# RELAX-NEXT:   pcaddi $a0, 0
+# RELAX-NEXT:   pcaddi    $a0, 0
 # RELAX-NEXT:     R_LARCH_RELAX _start
 # RELAX-NEXT:     R_LARCH_RELAX *ABS*
 # RELAX-NEXT:     R_LARCH_PCREL20_S2 _start
 # RELAX-NEXT:     R_LARCH_RELAX *ABS*
-# RELAX-NEXT:   pcaddi $a0, -1
+# RELAX-NEXT:   pcaddi    $a0, -1
 # RELAX-NEXT:     R_LARCH_RELAX _start
 # RELAX-NEXT:     R_LARCH_RELAX *ABS*
 # RELAX-NEXT:     R_LARCH_PCREL20_S2 _start
 # RELAX-NEXT:     R_LARCH_RELAX *ABS*
+# RELAX-NEXT:   lu12i.w   $a0, 0
+# RELAX-NEXT:     R_LARCH_TLS_LE_HI20 a
+# RELAX-NEXT:   ori       $a0, $a0, 0
+# RELAX-NEXT:     R_LARCH_TLS_LE_LO12 a
+# RELAX-NEXT:   pcaddi    $a0, {{[0-9]+}}
+# RELAX-NEXT:     R_LARCH_RELAX a
+# RELAX-NEXT:     R_LARCH_RELAX *ABS*
+# RELAX-NEXT:     R_LARCH_TLS_GD_PCREL20_S2 a
+# RELAX-NEXT:     R_LARCH_RELAX *ABS*
+# RELAX-NEXT:   pcaddi    $a0, {{[0-9]+}}
+# RELAX-NEXT:     R_LARCH_RELAX a
+# RELAX-NEXT:     R_LARCH_RELAX *ABS*
+# RELAX-NEXT:     R_LARCH_TLS_LD_PCREL20_S2 a
+# RELAX-NEXT:     R_LARCH_RELAX *ABS*
+# RELAX-NEXT:   addi.{{[dw]}} $a0, $tp, 0
+# RELAX-NEXT:     R_LARCH_RELAX a
+# RELAX-NEXT:     R_LARCH_RELAX *ABS*
+# RELAX-NEXT:     R_LARCH_RELAX a
+# RELAX-NEXT:     R_LARCH_RELAX *ABS*
+# RELAX-NEXT:     R_LARCH_TLS_LE_LO12_R a
+# RELAX-NEXT:     R_LARCH_RELAX *ABS*
 # RELAX-NEXT:   nop
 # RELAX-NEXT:     R_LARCH_ALIGN *ABS*+0xc
-# RELAX-NEXT:   nop
 # RELAX-NEXT:   ret
 
 # NORELAX:      <_start>:
@@ -45,8 +65,36 @@
 # NORELAX-NEXT:   ld.d      $a0, $a0, 0
 # NORELAX-NEXT:     R_LARCH_GOT_PC_LO12 _start
 # NORELAX-NEXT:     R_LARCH_RELAX *ABS*
-# NORELAX-NEXT:   ret
+# NORELAX-NEXT:   lu12i.w   $a0, 0
+# NORELAX-NEXT:     R_LARCH_TLS_LE_HI20 a
+# NORELAX-NEXT:   ori       $a0, $a0, 0
+# NORELAX-NEXT:     R_LARCH_TLS_LE_LO12 a
+# NORELAX-NEXT:   pcalau12i $a0, 16
+# NORELAX-NEXT:     R_LARCH_TLS_GD_PC_HI20 a
+# NORELAX-NEXT:     R_LARCH_RELAX *ABS*
+# NORELAX-NEXT:   addi.d    $a0, $a0, 8
+# NORELAX-NEXT:     R_LARCH_GOT_PC_LO12 a
+# NORELAX-NEXT:     R_LARCH_RELAX *ABS*
+# NORELAX-NEXT:   pcalau12i $a0, 16
+# NORELAX-NEXT:     R_LARCH_TLS_LD_PC_HI20 a
+# NORELAX-NEXT:     R_LARCH_RELAX *ABS*
+# NORELAX-NEXT:   addi.d    $a0, $a0, 8
+# NORELAX-NEXT:     R_LARCH_GOT_PC_LO12 a
+# NORELAX-NEXT:     R_LARCH_RELAX *ABS*
+# NORELAX-NEXT:   lu12i.w   $a0, 0
+# NORELAX-NEXT:     R_LARCH_TLS_LE_HI20_R a
+# NORELAX-NEXT:     R_LARCH_RELAX *ABS*
+# NORELAX-NEXT:   add.d     $a0, $a0, $tp
+# NORELAX-NEXT:     R_LARCH_TLS_LE_ADD_R a
+# NORELAX-NEXT:     R_LARCH_RELAX *ABS*
+# NORELAX-NEXT:   addi.d    $a0, $a0, 0
+# NORELAX-NEXT:     R_LARCH_TLS_LE_LO12_R a
+# NORELAX-NEXT:     R_LARCH_RELAX *ABS*
+# NORELAX-NEXT:   nop
 # NORELAX-NEXT:     R_LARCH_ALIGN *ABS*+0xc
+# NORELAX-NEXT:   nop
+# NORELAX-NEXT:   nop
+# NORELAX-NEXT:   ret
 
 # CHECKR:      <_start>:
 # CHECKR-NEXT:   pcalau12i $a0, 0
@@ -61,15 +109,69 @@
 # CHECKR-NEXT:   ld.d      $a0, $a0, 0
 # CHECKR-NEXT:     R_LARCH_GOT_PC_LO12 _start
 # CHECKR-NEXT:     R_LARCH_RELAX *ABS*
+# CHECKR-NEXT:   lu12i.w   $a0, 0
+# CHECKR-NEXT:     R_LARCH_TLS_LE_HI20 a
+# CHECKR-NEXT:   ori       $a0, $a0, 0
+# CHECKR-NEXT:     R_LARCH_TLS_LE_LO12 a
+# CHECKR-NEXT:   pcalau12i $a0, 0
+# CHECKR-NEXT:     R_LARCH_TLS_GD_PC_HI20 a
+# CHECKR-NEXT:     R_LARCH_RELAX *ABS*
+# CHECKR-NEXT:   addi.d    $a0, $a0, 0
+# CHECKR-NEXT:     R_LARCH_GOT_PC_LO12 a
+# CHECKR-NEXT:     R_LARCH_RELAX *ABS*
+# CHECKR-NEXT:   pcalau12i $a0, 0
+# CHECKR-NEXT:     R_LARCH_TLS_LD_PC_HI20 a
+# CHECKR-NEXT:     R_LARCH_RELAX *ABS*
+# CHECKR-NEXT:   addi.d    $a0, $a0, 0
+# CHECKR-NEXT:     R_LARCH_GOT_PC_LO12 a
+# CHECKR-NEXT:     R_LARCH_RELAX *ABS*
+# CHECKR-NEXT:   lu12i.w   $a0, 0
+# CHECKR-NEXT:     R_LARCH_TLS_LE_HI20_R a
+# CHECKR-NEXT:     R_LARCH_RELAX *ABS*
+# CHECKR-NEXT:   add.d     $a0, $a0, $tp
+# CHECKR-NEXT:     R_LARCH_TLS_LE_ADD_R a
+# CHECKR-NEXT:     R_LARCH_RELAX *ABS*
+# CHECKR-NEXT:   addi.d    $a0, $a0, 0
+# CHECKR-NEXT:     R_LARCH_TLS_LE_LO12_R a
+# CHECKR-NEXT:     R_LARCH_RELAX *ABS*
 # CHECKR-NEXT:   nop
 # CHECKR-NEXT:     R_LARCH_ALIGN *ABS*+0xc
 # CHECKR-NEXT:   nop
 # CHECKR-NEXT:   nop
 # CHECKR-NEXT:   ret
 
+.macro add dst, src1, src2, src3
+.ifdef ELF32
+add.w \dst, \src1, \src2, \src3
+.else
+add.d \dst, \src1, \src2, \src3
+.endif
+.endm
+.macro addi dst, src1, src2
+.ifdef ELF32
+addi.w \dst, \src1, \src2
+.else
+addi.d \dst, \src1, \src2
+.endif
+.endm
+
 .global _start
 _start:
   la.pcrel $a0, _start
   la.got   $a0, _start
+
+  la.tls.le $a0, a  # without R_LARCH_RELAX reloaction
+  la.tls.gd $a0, a
+  la.tls.ld $a0, a
+
+  lu12i.w $a0, %le_hi20_r(a)
+  add $a0, $a0, $tp, %le_add_r(a)
+  addi $a0, $a0, %le_lo12_r(a)
+
   .p2align 4
   ret
+
+.section .tbss,"awT",@nobits
+.globl a
+a:
+.zero 4
diff --git a/lld/test/ELF/loongarch-relax-tls-le.s b/lld/test/ELF/loongarch-relax-tls-le.s
new file mode 100644
index 00000000000000..b55f284f32cb82
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-tls-le.s
@@ -0,0 +1,115 @@
+# REQUIRES: loongarch
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 -mattr=+relax --defsym ELF32=1 %s -o %t.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t.64.o
+
+# RUN: ld.lld --relax %t.32.o -o %t.32
+# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=RELAX32 %s
+
+# RUN: ld.lld --relax %t.64.o -o %t.64
+# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=RELAX64 %s
+
+# RELAX32-LABEL: <_start>:
+## .LANCHOR0@tprel = 8
+# RELAX32-NEXT:    addi.w  $a0, $tp, 8 
+# RELAX32-NEXT:    ld.w    $a1, $a0, 0
+# RELAX32-NEXT:    ld.w    $a2, $tp, 8
+## .a@tprel - 4 = 0x7fc
+# RELAX32-NEXT:    addi.w  $a1, $zero, 1
+# RELAX32-NEXT:    addi.w $a1, $a1, 2
+# RELAX32-NEXT:    st.w   $a1, $tp, 2044
+## .a@tprel = 0x800
+# RELAX32-NEXT:    lu12i.w $a0, 1
+# RELAX32-NEXT:    add.w   $a0, $a0, $tp
+# RELAX32-NEXT:    addi.w  $a0, $a0, -2048
+
+# RELAX64-LABEL: <_start>:
+## .LANCHOR0@tprel = 8
+# RELAX64-NEXT:    addi.d  $a0, $tp, 8 
+# RELAX64-NEXT:    ld.d    $a1, $a0, 0
+# RELAX64-NEXT:    ld.d    $a2, $tp, 8
+## .a@tprel - 4 = 0x7fc
+# RELAX64-NEXT:    addi.d  $a1, $zero, 1
+# RELAX64-NEXT:    addi.d $a1, $a1, 2
+# RELAX64-NEXT:    st.d   $a1, $tp, 2044
+## .a@tprel = 0x800
+# RELAX64-NEXT:    lu12i.w $a0, 1
+# RELAX64-NEXT:    add.d   $a0, $a0, $tp
+# RELAX64-NEXT:    addi.d  $a0, $a0, -2048
+
+.macro add dst, src1, src2, src3
+.ifdef ELF32
+add.w \dst, \src1, \src2, \src3
+.else
+add.d \dst, \src1, \src2, \src3
+.endif
+.endm
+.macro inst op dst, src1, src2
+.ifdef ELF32
+  .ifc      \op, addi
+    addi.w  \dst, \src1, \src2
+  .else;    .ifc   \op, ld
+    ld.w    \dst, \src1, \src2
+  .else;    .ifc   \op, st
+    st.w    \dst, \src1, \src2
+  .else;    .ifc   \op, ldptr
+    ldptr.w \dst, \src1, \src2
+  .else
+    .error "Unknown op in ELF32 mode"
+  .endif; .endif; .endif; .endif
+.else
+  .ifc      \op, addi
+    addi.d  \dst, \src1, \src2
+  .else;    .ifc   \op, ld
+    ld.d    \dst, \src1, \src2
+  .else;    .ifc   \op, st
+    st.d    \dst, \src1, \src2
+  .else;    .ifc   \op, ldptr
+    ldptr.d \dst, \src1, \src2
+  .else
+    .error "Unknown op in ELF64 mode"
+  .endif; .endif; .endif; .endif
+.endif
+.endm
+
+.macro addi dst, src1, src2
+inst addi \dst, \src1, \src2
+.endm
+.macro ld dst, src1, src2
+inst ld \dst, \src1, \src2
+.endm
+.macro st dst, src1, src2
+inst st \dst, \src1, \src2
+.endm
+.macro ldptr dst, src1, src2
+inst ldptr \dst, \src1, \src2
+.endm
+
+_start:
+## Test instructions not in pairs.
+lu12i.w $a0, %le_hi20_r(.LANCHOR0)
+add $a0, $a0, $tp, %le_add_r(.LANCHOR0)
+addi $a0, $a0, %le_lo12_r(.LANCHOR0)
+ld $a1, $a0, 0
+ld $a2, $a0, %le_lo12_r(.LANCHOR0)
+
+## hi20(a-4) = hi20(0x7fc) = 0. relaxable
+## Test non-adjacent instructions.
+lu12i.w $a0, %le_hi20_r(a-4)
+addi $a1, $zero, 0x1
+add $a0, $a0, $tp, %le_add_r(a-4)
+addi $a1, $a1, 0x2
+st $a1, $a0, %le_lo12_r(a-4)
+
+## hi20(a) = hi20(0x800) = 1. not relaxable
+lu12i.w $a0, %le_hi20_r(a)
+add $a0, $a0, $tp, %le_add_r(a)
+addi $a0, $a0, %le_lo12_r(a)
+
+.section .tbss,"awT",@nobits
+.space 8
+.LANCHOR0:
+.space 0x800-8
+.globl a
+a:
+.zero 4
diff --git a/lld/test/ELF/loongarch-tls-gd.s b/lld/test/ELF/loongarch-tls-gd.s
index 2aecb44c17a343..4cfed41d70f386 100644
--- a/lld/test/ELF/loongarch-tls-gd.s
+++ b/lld/test/ELF/loongarch-tls-gd.s
@@ -1,14 +1,17 @@
 # REQUIRES: loongarch
 # RUN: rm -rf %t && split-file %s %t
 
-## LoongArch psABI doesn't specify TLS relaxation. Though the code sequences are not
-## relaxed, dynamic relocations can be omitted for GD->LE relaxation.
+## LoongArch psABI doesn't specify TLS relaxation. It can be handled the same way as gcc:
+## (a) code sequence can be converted from `pcalau12i+addi.[wd]` to `pcaddi`.
+## (b) dynamic relocations can be omitted for LD->LE relaxation.
 
 # RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/a.s -o %t/a.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 -mattr=+relax %t/a.s -o %t/a.32.relax.o
 # RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/bc.s -o %t/bc.32.o
 # RUN: ld.lld -shared -soname=bc.so %t/bc.32.o -o %t/bc.32.so
 # RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/tga.s -o %t/tga.32.o
 # RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/a.s -o %t/a.64.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/a.s -mattr=+relax -o %t/a.64.relax.o
 # RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/bc.s -o %t/bc.64.o
 # RUN: ld.lld -shared -soname=bc.so %t/bc.64.o -o %t/bc.64.so
 # RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/tga.s -o %t/tga.64.o
@@ -17,6 +20,9 @@
 # RUN: ld.lld -shared %t/a.32.o %t/bc.32.o -o %t/gd.32.so
 # RUN: llvm-readobj -r %t/gd.32.so | FileCheck --check-prefix=GD32-REL %s
 # RUN: llvm-objdump -d --no-show-raw-insn %t/gd.32.so | FileCheck --check-prefix=GD32 %s
+# RUN: ld.lld --relax -shared %t/a.32.relax.o %t/bc.32.o -o %t/gd.32.relax.so
+# RUN: llvm-readobj -r %t/gd.32.relax.so | FileCheck --check-prefix=GD32-REL-RELAX %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t/gd.32.relax.so | FileCheck --check-prefix=GD32-RELAX %s
 
 ## LA32 GD -> LE
 # RUN: ld.lld %t/a.32.o %t/bc.32.o %t/tga.32.o -o %t/le.32
@@ -35,6 +41,9 @@
 # RUN: ld.lld -shared %t/a.64.o %t/bc.64.o -o %t/gd.64.so
 # RUN: llvm-readobj -r %t/gd.64.so | FileCheck --check-prefix=GD64-REL %s
 # RUN: llvm-objdump -d --no-show-raw-insn %t/gd.64.so | FileCheck --check-prefix=GD64 %s
+# RUN: ld.lld --relax -shared %t/a.64.relax.o %t/bc.64.o -o %t/gd.64.relax.so
+# RUN: llvm-readobj -r %t/gd.64.relax.so | FileCheck --check-prefix=GD64-REL-RELAX %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t/gd.64.relax.so | FileCheck --check-prefix=GD64-RELAX %s
 
 ## LA64 GD -> LE
 # RUN: ld.lld %t/a.64.o %t/bc.64.o %t/tga.64.o -o %t/le.64
@@ -66,6 +75,21 @@
 # GD32-NEXT:        addi.w $a0, $a0, 792
 # GD32-NEXT:        bl 44
 
+# GD32-REL-RELAX:      .rela.dyn {
+# GD32-REL-RELAX-NEXT:   0x20300 R_LARCH_TLS_DTPMOD32 a 0x0
+# GD32-REL-RELAX-NEXT:   0x20304 R_LARCH_TLS_DTPREL32 a 0x0
+# GD32-REL-RELAX-NEXT:   0x20308 R_LARCH_TLS_DTPMOD32 b 0x0
+# GD32-REL-RELAX-NEXT:   0x2030C R_LARCH_TLS_DTPREL32 b 0x0
+# GD32-REL-RELAX-NEXT: }
+
+## &DTPMOD(a) - . = 0x20300 - 0x10250 = 16428<<2
+# GD32-RELAX:      10250: pcaddi $a0, 16428
+# GD32-RELAX-NEXT:        bl 44
+
+## &DTPMOD(b) - . = 0x20308 - 0x10258 = 16428<<2
+# GD32-RELAX:      10258: pcaddi $a0, 16428
+# GD32-RELAX-NEXT:        bl 36
+
 # GD64-REL:      .rela.dyn {
 # GD64-REL-NEXT:   0x204C0 R_LARCH_TLS_DTPMOD64 a 0x0
 # GD64-REL-NEXT:   0x204C8 R_LARCH_TLS_DTPREL64 a 0x0
@@ -83,6 +107,21 @@
 # GD64-NEXT:        addi.d $a0, $a0, 1232
 # GD64-NEXT:        bl 36
 
+# GD64-REL-RELAX:      .rela.dyn {
+# GD64-REL-RELAX-NEXT:   0x204C0 R_LARCH_TLS_DTPMOD64 a 0x0
+# GD64-REL-RELAX-NEXT:   0x204C8 R_LARCH_TLS_DTPREL64 a 0x0
+# GD64-REL-RELAX-NEXT:   0x204D0 R_LARCH_TLS_DTPMOD64 b 0x0
+# GD64-REL-RELAX-NEXT:   0x204D8 R_LARCH_TLS_DTPREL64 b 0x0
+# GD64-REL-RELAX-NEXT: }
+
+## &DTPMOD(a) - . = 0x204c0 - 0x10398 = 16458<<2
+# GD64-RELAX:      10398: pcaddi $a0, 16458
+# GD64-RELAX-NEXT:        bl 52
+
+## &DTPMOD(b) - . = 0x204d0 - 0x103a0 = 16460<<2
+# GD64-RELAX:      103a0: pcaddi $a0, 16460
+# GD64-RELAX-NEXT:        bl 44
+
 # NOREL: no relocations
 
 ## .got contains pre-populated values: [a@dtpmod, a@dtprel, b@dtpmod, b@dtprel]
diff --git a/lld/test/ELF/loongarch-tls-ld.s b/lld/test/ELF/loongarch-tls-ld.s
index a5be3ad905b764..6cf6fa92939d52 100644
--- a/lld/test/ELF/loongarch-tls-ld.s
+++ b/lld/test/ELF/loongarch-tls-ld.s
@@ -1,12 +1,15 @@
 # REQUIRES: loongarch
 # RUN: rm -rf %t && split-file %s %t
 
-## LoongArch psABI doesn't specify TLS relaxation. Though the code sequences are not
-## relaxed, dynamic relocations can be omitted for LD->LE relaxation.
+## LoongArch psABI doesn't specify TLS relaxation. It can be handled the same way as gcc:
+## (a) code sequence can be converted from `pcalau12i+addi.[wd]` to `pcaddi`.
+## (b) dynamic relocations can be omitted for LD->LE relaxation.
 
 # RUN: llvm-mc --filetype=obj --triple=loongarch32 --position-independent %t/a.s -o %t/a.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 --position-independent -mattr=+relax %t/a.s -o %t/a.32.relax.o
 # RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/tga.s -o %t/tga.32.o
 # RUN: llvm-mc --filetype=obj --triple=loongarch64 --position-independent %t/a.s -o %t/a.64.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --position-independent -mattr=+relax %t/a.s -o %t/a.64.relax.o
 # RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/tga.s -o %t/tga.64.o
 
 ## LA32 LD
@@ -14,24 +17,34 @@
 # RUN: llvm-readobj -r %t/ld.32.so | FileCheck --check-prefix=LD32-REL %s
 # RUN: llvm-readelf -x .got %t/ld.32.so | FileCheck --check-prefix=LD32-GOT %s
 # RUN: llvm-objdump -d --no-show-raw-insn %t/ld.32.so | FileCheck --check-prefixes=LD32 %s
+# RUN: ld.lld --relax -shared %t/a.32.relax.o -o %t/ld.32.relax.so
+# RUN: llvm-objdump -d --no-show-raw-insn %t/ld.32.relax.so | FileCheck --check-prefixes=LD32-RELAX %s
 
 ## LA32 LD -> LE
 # RUN: ld.lld %t/a.32.o %t/tga.32.o -o %t/le.32
 # RUN: llvm-readelf -r %t/le.32 | FileCheck --check-prefix=NOREL %s
 # RUN: llvm-readelf -x .got %t/le.32 | FileCheck --check-prefix=LE32-GOT %s
 # RUN: llvm-objdump -d --...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants