lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Lightning register liveliness


From: Paul Cercueil
Subject: Re: Lightning register liveliness
Date: Mon, 27 Jan 2020 17:12:18 -0300

Hi Paulo,

Here is a new example program that shows incorrect behaviour with the latest master. My JIT_R1 (== r10) is set at the beginning of the program and read back at the end, but it's used in between as a temporary register by Lightning.

Thanks,
-Paul


C code:
-------------------------------
#include <lightning.h>

#define ADDR_REG JIT_V(JIT_V_NUM - 1)
#define CYCLE_REG JIT_V(JIT_V_NUM - 2)

int main(int argc, char **argv)
{
jit_state_t *_jit;
jit_node_t *node1, *node2, *node3;

init_jit(argv[0]);

_jit = jit_new_state();

jit_prolog();
jit_tramp(256);

jit_extr_i(JIT_V1, JIT_V1);
jit_addr(JIT_R1, JIT_V4, JIT_V1); // <-- JIT_R1 is written here

jit_extr_us(JIT_V1, JIT_V3);
jit_addr(JIT_R2, JIT_V4, JIT_V1);
jit_rshi_u(JIT_V1, JIT_V3, 0x10);
jit_andi(JIT_V1, JIT_V1, 0x7fff);
jit_lshi(JIT_V1, JIT_V1, 2);

jit_ldxi_i(JIT_V4, ADDR_REG, 0x30);
jit_extr_i(JIT_V1, JIT_V1);
jit_stxi_i(0x3c, ADDR_REG, JIT_V0);
jit_addr(JIT_V0, JIT_V4, JIT_V1);
jit_stxi_i(0x7c, ADDR_REG, JIT_V1);

jit_andi(JIT_V4, JIT_R0, 0x10000000);
jit_rshi_u(JIT_V4, JIT_V4, 6);
jit_ori(JIT_V4, JIT_V4, 0x1f9fffff);
jit_andi(JIT_V1, JIT_R0, JIT_V4);
jit_movi(JIT_V4, 0x30000000);
jit_addr(JIT_V1, JIT_V1, JIT_V4);
jit_ldxi_us(JIT_V1, JIT_V1, 0);

jit_addi(JIT_V4, JIT_R0, 2);
jit_stxi_i(0x48, ADDR_REG, JIT_R2);
jit_andi(JIT_R2, JIT_V4, 0x10000000);
jit_rshi_u(JIT_R2, JIT_R2, 6);
jit_ori(JIT_R2, JIT_R2, 0x1f9fffff);
jit_andi(JIT_V4, JIT_V4, JIT_R2);
jit_movi(JIT_R2, 0x30000000);
jit_addr(JIT_V4, JIT_V4, JIT_R2);
jit_ldxi_us(JIT_V4, JIT_V4, 0);

jit_rshi_u(JIT_V3, JIT_V1, 0x8);

jit_extr_i(JIT_V3, JIT_V3);
jit_ldxi_i(JIT_R2, ADDR_REG, 0x8);
jit_addr(JIT_V3, JIT_V3, JIT_R2);

jit_extr_i(JIT_V3, JIT_V3);
jit_lshi(JIT_V3, JIT_V3, 0x12);

jit_extr_uc(JIT_V2, JIT_V1);

jit_ldxi_i(JIT_R2, ADDR_REG, 0x4);
jit_addr(JIT_V2, JIT_V2, JIT_R2);

jit_extr_i(JIT_V2, JIT_V2);
jit_lshi(JIT_V2, JIT_V2, 2);

jit_extr_us(JIT_V2, JIT_V2);

jit_extr_i(JIT_V3, JIT_V3);
jit_orr(JIT_V3, JIT_V3, JIT_V2);

jit_stxi_i(0x40, ADDR_REG, JIT_R0);
jit_ldxi(JIT_R2, ADDR_REG, 0xf8);
jit_stxi_i(0x78, ADDR_REG, JIT_V3);
jit_movi(JIT_R0, 0x489e0000);
jit_callr(JIT_R2);

jit_live(JIT_R1);
jit_live(JIT_V4);

jit_extr_uc(JIT_V2, JIT_V4);
jit_stxi_i(0x4c, ADDR_REG, JIT_V4);
jit_ldxi_i(JIT_V4, ADDR_REG, 0xc);
jit_addr(JIT_V2, JIT_V2, JIT_V4);

jit_extr_i(JIT_V2, JIT_V2);
jit_lshi(JIT_V2, JIT_V2, 2);

jit_ldxi(JIT_R2, ADDR_REG, 0xf8);
jit_stxi_i(0x74, ADDR_REG, JIT_V2);
jit_movi(JIT_R0, 0x489d0800);
jit_callr(JIT_R2);

jit_live(JIT_R1);

jit_andi(JIT_R2, JIT_R1, 0x10000000); // <--- JIT_R1 is read back here


jit_ret();
jit_epilog();

jit_emit();
jit_disassemble();
jit_clear_state();
jit_destroy_state();

return 0;
}
------------------------------


Generated code:
------------------------------
        0x1350000 movsxd rdi,edi
        0x1350003 lea r10,[r13+rdi*1+0x0]       <---- r10 is set here
        0x1350008 movzx rdi,r12w
        0x135000c lea r11,[r13+rdi*1+0x0]
        0x1350011 mov rdi,r12
        0x1350014 shr rdi,0x10
        0x1350018 and rdi,0x7fff
        0x135001f lea rdi,[rdi*4+0x0]
        0x1350027 movsxd r13,DWORD PTR [r15+0x30]
        0x135002b movsxd rdi,edi
        0x135002e mov DWORD PTR [r15+0x3c],ebx
        0x1350032 lea rbx,[r13+rdi*1+0x0]
        0x1350037 mov DWORD PTR [r15+0x7c],edi
        0x135003b mov r13d,0x10000000
        0x1350041 and r13,rax
        0x1350044 shr r13,0x6
        0x1350048 or r13,0x1f9fffff
        0x135004f mov edi,0x7
        0x1350054 and rdi,rax
        0x1350057 mov r13d,0x30000000
        0x135005d add rdi,r13
        0x1350060 movzx rdi,WORD PTR [rdi]
        0x1350064 lea r13,[rax+0x2]
        0x1350068 mov DWORD PTR [r15+0x48],r11d
        0x135006c mov r11d,0x10000000
        0x1350072 and r11,r13
        0x1350075 shr r11,0x6
        0x1350079 or r11,0x1f9fffff
        0x1350080 and r13,0x2
        0x1350084 mov r11d,0x30000000
        0x135008a add r13,r11
        0x135008d movzx r13,WORD PTR [r13+0x0]
        0x1350092 mov r12,rdi
        0x1350095 shr r12,0x8
        0x1350099 movsxd r12,r12d
        0x135009c movsxd r11,DWORD PTR [r15+0x8]
        0x13500a0 add r12,r11
        0x13500a3 movsxd r12,r12d
        0x13500a6 shl r12,0x12
0x13500aa mov r10,rdi <------------ r10 is overwritten here!!!
        0x13500ad movzx rsi,r10b
        0x13500b1 movsxd r11,DWORD PTR [r15+0x4]
        0x13500b5 add rsi,r11
        0x13500b8 movsxd rsi,esi
        0x13500bb lea rsi,[rsi*4+0x0]
        0x13500c3 movzx rsi,si
        0x13500c7 movsxd r12,r12d
        0x13500ca or r12,rsi
        0x13500cd mov DWORD PTR [r15+0x40],eax
        0x13500d1 mov r11,QWORD PTR [r15+0xf8]
        0x13500d8 mov DWORD PTR [r15+0x78],r12d
        0x13500dc mov eax,0x489e0000
        0x13500e1 call r11
        0x13500e4 mov rax,r13
        0x13500e7 movzx rsi,al
        0x13500eb mov DWORD PTR [r15+0x4c],r13d
        0x13500ef movsxd r13,DWORD PTR [r15+0xc]
        0x13500f3 add rsi,r13
        0x13500f6 movsxd rsi,esi
        0x13500f9 lea rsi,[rsi*4+0x0]
        0x1350101 mov r11,QWORD PTR [r15+0xf8]
        0x1350108 mov DWORD PTR [r15+0x74],esi
        0x135010c mov eax,0x489d0800
        0x1350111 call r11
        0x1350114 mov r11d,0x10000000
0x135011a and r11,r10 <--------- r10 is read back here
----------------------


Le mer., janv. 22, 2020 at 17:16, Paulo César Pereira de Andrade <address@hidden> a écrit :
Em qua., 22 de jan. de 2020 às 15:23, Paulo César Pereira de Andrade
<address@hidden> escreveu:

 Em qui., 9 de jan. de 2020 às 00:26, Paul Cercueil
 <address@hidden> escreveu:
 >
 > Hi Paulo,

   Hi Paul,

   Sorry for the delay. Just found this message I previously missed.

 > I am facing a problem related to register liveliness. I could only
 > reproduce it on MinGW, I never faced it on Linux x86_64 or MIPS.
 >
> Here is an example code to reproduce the issue (sorry for the length). > It is not important to understand what the code does; what is important > to see, is that my JIT_V0 (== rbx), which is set at the very beginning > of the code and read back at the last opcode (provided that the node2
 > beqi branch is taken), is overwritten by the call to jit_ltr_u.
 >
> I know that Lightning assumes that the caller-saved registers are dead
 > after a function call, but there's no function call here.
 >
> Could you give me some light in why Lightning uses my registers? Do I
 > have to use jit_live() somewhere here?

I understand what is happening. It is a bug in the logic of live registers, that does not mark JIT_V0 live, and then it is used in the ltr_u translation, when it needs a special kind of register. Due to the way it handles the live state, it thinks it is dead in 'jit_movi(JIT_V0, 0x80065ff8);' and does not properly follow the previous branch in 'node2 = jit_beqi(JIT_V1, 0);'. I will work on a patch, and let you know about it when it is ready.

An ugly hack, for a temporary solution would be to add a jit_live as below:

    jit_subi(CYCLE_REG, CYCLE_REG, 0x14);
 +  jit_live(JIT_V0);
   node2 = jit_beqi(JIT_V1, 0);

  A patch was now pushed. This code should be revisited, but
the added patch should correct any variant of the problem you
experienced.

 > Thanks,
 > -Paul
 >

 Thanks,
 Paulo





reply via email to

[Prev in Thread] Current Thread [Next in Thread]