gnutls-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] GNU gnutls branch, master, updated. gnutls_3_0_15-43-g080f43e


From: Nikos Mavrogiannopoulos
Subject: [SCM] GNU gnutls branch, master, updated. gnutls_3_0_15-43-g080f43e
Date: Thu, 15 Mar 2012 17:47:27 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU gnutls".

http://git.savannah.gnu.org/cgit/gnutls.git/commit/?id=080f43e1485e36addcaf4279a39848cf8b66a4df

The branch, master has been updated
       via  080f43e1485e36addcaf4279a39848cf8b66a4df (commit)
      from  1133c9d652fb700d4330ed398429407013643bc3 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 080f43e1485e36addcaf4279a39848cf8b66a4df
Author: Nikos Mavrogiannopoulos <address@hidden>
Date:   Thu Mar 15 18:34:53 2012 +0100

    Added assembly for macosx

-----------------------------------------------------------------------

Summary of changes:
 cfg.mk                                             |   42 +-
 configure.ac                                       |    4 +
 lib/accelerated/x86/Makefile.am                    |   38 +-
 .../appro-aes-gcm-x86-64-macosx.s}                 |  128 ++--
 .../appro-aes-x86-64-macosx.s}                     |  936 ++++++++++----------
 .../appro-aes-x86-macosx.s}                        |  730 ++++++++--------
 .../cpuid-x86-64-macosx.s}                         |    8 +-
 .../cpuid-x86-macosx.s}                            |   12 +-
 .../padlock-x86-64-macosx.s}                       |  278 +++---
 .../padlock-x86-macosx.s}                          |  238 +++---
 10 files changed, 1208 insertions(+), 1206 deletions(-)
 copy lib/accelerated/x86/{asm/appro-aes-gcm-x86-64.s => 
asm-macosx/appro-aes-gcm-x86-64-macosx.s} (94%)
 copy lib/accelerated/x86/{asm/appro-aes-x86-64.s => 
asm-macosx/appro-aes-x86-64-macosx.s} (81%)
 copy lib/accelerated/x86/{asm-coff/appro-aes-x86-coff.s => 
asm-macosx/appro-aes-x86-macosx.s} (82%)
 copy lib/accelerated/x86/{asm-coff/cpuid-x86-64-coff.s => 
asm-macosx/cpuid-x86-64-macosx.s} (90%)
 copy lib/accelerated/x86/{asm-coff/cpuid-x86-coff.s => 
asm-macosx/cpuid-x86-macosx.s} (86%)
 copy lib/accelerated/x86/{asm/padlock-x86-64.s => 
asm-macosx/padlock-x86-64-macosx.s} (71%)
 copy lib/accelerated/x86/{asm-coff/padlock-x86-coff.s => 
asm-macosx/padlock-x86-macosx.s} (76%)

diff --git a/cfg.mk b/cfg.mk
index 21b5973..ac64acf 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -157,20 +157,28 @@ upload-web:
                cvs commit -m "Update." manual/ reference/ \
                        doxygen/ devel/ cyclo/
 
-ASM_SOURCES:= lib/accelerated/x86/asm-coff/cpuid-x86-64-coff.s \
+ASM_SOURCES:= \
        lib/accelerated/x86/asm/cpuid-x86-64.s \
-       lib/accelerated/x86/asm-coff/cpuid-x86-coff.s \
        lib/accelerated/x86/asm/cpuid-x86.s \
        lib/accelerated/x86/asm/appro-aes-gcm-x86-64.s \
        lib/accelerated/x86/asm/appro-aes-x86-64.s \
        lib/accelerated/x86/asm/appro-aes-x86.s \
        lib/accelerated/x86/asm/padlock-x86-64.s \
        lib/accelerated/x86/asm/padlock-x86.s \
+       lib/accelerated/x86/asm-coff/cpuid-x86-coff.s \
+       lib/accelerated/x86/asm-coff/cpuid-x86-64-coff.s \
        lib/accelerated/x86/asm-coff/appro-aes-gcm-x86-64-coff.s \
        lib/accelerated/x86/asm-coff/appro-aes-x86-64-coff.s \
        lib/accelerated/x86/asm-coff/appro-aes-x86-coff.s \
        lib/accelerated/x86/asm-coff/padlock-x86-64-coff.s \
-       lib/accelerated/x86/asm-coff/padlock-x86-coff.s
+       lib/accelerated/x86/asm-coff/padlock-x86-coff.s \
+       lib/accelerated/x86/asm-macosx/cpuid-x86-64-macosx.s \
+       lib/accelerated/x86/asm-macosx/cpuid-x86-macosx.s \
+       lib/accelerated/x86/asm-macosx/appro-aes-gcm-x86-64-macosx.s \
+       lib/accelerated/x86/asm-macosx/appro-aes-x86-64-macosx.s \
+       lib/accelerated/x86/asm-macosx/appro-aes-x86-macosx.s \
+       lib/accelerated/x86/asm-macosx/padlock-x86-64-macosx.s \
+       lib/accelerated/x86/asm-macosx/padlock-x86-macosx.s
 
 asm-sources: $(ASM_SOURCES)
 
@@ -247,3 +255,31 @@ lib/accelerated/x86/asm-coff/cpuid-x86-64-coff.s: 
devel/perlasm/cpuid-x86_64.pl
 lib/accelerated/x86/asm-coff/cpuid-x86-coff.s: devel/perlasm/cpuid-x86.pl
        cat devel/perlasm/license-gnutls.txt > $@
        perl $< coff >> $@
+
+lib/accelerated/x86/asm-macosx/appro-aes-gcm-x86-64-macosx.s: 
devel/perlasm/ghash-x86_64.pl
+       cat devel/perlasm/license.txt > $@
+       perl $< macosx >> $@
+
+lib/accelerated/x86/asm-macosx/appro-aes-x86-64-macosx.s: 
devel/perlasm/aesni-x86_64.pl
+       cat devel/perlasm/license.txt > $@
+       perl $< macosx >> $@
+
+lib/accelerated/x86/asm-macosx/appro-aes-x86-macosx.s: 
devel/perlasm/aesni-x86.pl
+       cat devel/perlasm/license.txt > $@
+       perl $< macosx >> $@
+
+lib/accelerated/x86/asm-macosx/padlock-x86-64-macosx.s: 
devel/perlasm/e_padlock-x86_64.pl
+       cat devel/perlasm/license.txt > $@
+       perl $< macosx >> $@
+
+lib/accelerated/x86/asm-macosx/padlock-x86-macosx.s: 
devel/perlasm/e_padlock-x86.pl
+       cat devel/perlasm/license.txt > $@
+       perl $< macosx >> $@
+
+lib/accelerated/x86/asm-macosx/cpuid-x86-64-macosx.s: 
devel/perlasm/cpuid-x86_64.pl
+       cat devel/perlasm/license-gnutls.txt > $@
+       perl $< macosx >> $@
+
+lib/accelerated/x86/asm-macosx/cpuid-x86-macosx.s: devel/perlasm/cpuid-x86.pl
+       cat devel/perlasm/license-gnutls.txt > $@
+       perl $< macosx >> $@
diff --git a/configure.ac b/configure.ac
index 6fdf1d4..f1a602e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -67,11 +67,15 @@ case "$host" in
   *mingw32* | *mingw64*)
     have_win=yes
   ;;
+  *darwin*)
+    have_macosx=yes
+  ;;
   *)
   ;;
 esac
 
 AM_CONDITIONAL(WINDOWS, test "$have_win" = yes)
+AM_CONDITIONAL(MACOSX, test "$have_macosx" = yes)
 
 dnl Hardware Acceleration
 AC_ARG_ENABLE(hardware-acceleration,
diff --git a/lib/accelerated/x86/Makefile.am b/lib/accelerated/x86/Makefile.am
index 3ddd066..7040847 100644
--- a/lib/accelerated/x86/Makefile.am
+++ b/lib/accelerated/x86/Makefile.am
@@ -38,22 +38,34 @@ noinst_LTLIBRARIES = libx86.la
 libx86_la_SOURCES = sha-padlock.c hmac-padlock.c aes-x86.c aes-padlock.c 
aes-gcm-padlock.c aes-padlock.h aes-x86.h x86.h sha-padlock.h
 
 if ASM_X86_64
-AM_CFLAGS += -DASM_X86_64 -DASM_X86
-libx86_la_SOURCES += aes-gcm-x86.c
+ AM_CFLAGS += -DASM_X86_64 -DASM_X86
+ libx86_la_SOURCES += aes-gcm-x86.c
 
-if WINDOWS
-libx86_la_SOURCES += asm-coff/appro-aes-x86-64-coff.s 
asm-coff/padlock-x86-64-coff.s asm-coff/cpuid-x86-64-coff.s 
asm-coff/appro-aes-gcm-x86-64-coff.s
-else
-libx86_la_SOURCES += asm/appro-aes-x86-64.s asm/appro-aes-gcm-x86-64.s 
asm/padlock-x86-64.s asm/cpuid-x86-64.s
-endif
+ if WINDOWS
+  libx86_la_SOURCES += asm-coff/appro-aes-x86-64-coff.s 
asm-coff/padlock-x86-64-coff.s asm-coff/cpuid-x86-64-coff.s 
asm-coff/appro-aes-gcm-x86-64-coff.s
+ endif
 
-else
-AM_CFLAGS += -DASM_X86_32 -DASM_X86
+ if MACOSX
+  libx86_la_SOURCES += asm-macosx/appro-aes-x86-64-macosx.s 
asm-macosx/padlock-x86-64-macosx.s asm-macosx/cpuid-x86-64-macosx.s 
asm-macosx/appro-aes-gcm-x86-64-macosx.s
+ endif
+
+ if ELF
+  libx86_la_SOURCES += asm/appro-aes-x86-64.s asm/appro-aes-gcm-x86-64.s 
asm/padlock-x86-64.s asm/cpuid-x86-64.s
+ endif
 
-if WINDOWS
-libx86_la_SOURCES += asm-coff/appro-aes-x86-coff.s asm-coff/padlock-x86-coff.s 
asm-coff/cpuid-x86-coff.s
 else
-libx86_la_SOURCES += asm/appro-aes-x86.s asm/padlock-x86.s asm/cpuid-x86.s
-endif
+ AM_CFLAGS += -DASM_X86_32 -DASM_X86
+
+ if WINDOWS
+  libx86_la_SOURCES += asm-coff/appro-aes-x86-coff.s 
asm-coff/padlock-x86-coff.s asm-coff/cpuid-x86-coff.s
+ endif
+
+ if MACOSX
+  libx86_la_SOURCES += asm-macosx/appro-aes-x86-macosx.s 
asm-macosx/padlock-x86-macosx.s asm-macosx/cpuid-x86-macosx.s
+ endif
+
+ if ELF
+  libx86_la_SOURCES += asm/appro-aes-x86.s asm/padlock-x86.s asm/cpuid-x86.s
+ endif
 
 endif
diff --git a/lib/accelerated/x86/asm/appro-aes-gcm-x86-64.s 
b/lib/accelerated/x86/asm-macosx/appro-aes-gcm-x86-64-macosx.s
similarity index 94%
copy from lib/accelerated/x86/asm/appro-aes-gcm-x86-64.s
copy to lib/accelerated/x86/asm-macosx/appro-aes-gcm-x86-64-macosx.s
index 55da343..732332b 100644
--- a/lib/accelerated/x86/asm/appro-aes-gcm-x86-64.s
+++ b/lib/accelerated/x86/asm-macosx/appro-aes-gcm-x86-64-macosx.s
@@ -37,17 +37,17 @@
 
 .text  
 
-.globl gcm_gmult_4bit
-.type  gcm_gmult_4bit,@function
-.align 16
-gcm_gmult_4bit:
+.globl _gcm_gmult_4bit
+
+.p2align       4
+_gcm_gmult_4bit:
        pushq   %rbx
        pushq   %rbp
        pushq   %r12
-.Lgmult_prologue:
+L$gmult_prologue:
 
        movzbq  15(%rdi),%r8
-       leaq    .Lrem_4bit(%rip),%r11
+       leaq    L$rem_4bit(%rip),%r11
        xorq    %rax,%rax
        xorq    %rbx,%rbx
        movb    %r8b,%al
@@ -58,10 +58,10 @@ gcm_gmult_4bit:
        movq    (%rsi,%rax,1),%r9
        andb    $240,%bl
        movq    %r8,%rdx
-       jmp     .Loop1
+       jmp     L$oop1
 
-.align 16
-.Loop1:
+.p2align       4
+L$oop1:
        shrq    $4,%r8
        andq    $15,%rdx
        movq    %r9,%r10
@@ -76,7 +76,7 @@ gcm_gmult_4bit:
        shlb    $4,%al
        xorq    %r10,%r8
        decq    %rcx
-       js      .Lbreak1
+       js      L$break1
 
        shrq    $4,%r8
        andq    $15,%rdx
@@ -89,10 +89,10 @@ gcm_gmult_4bit:
        xorq    (%r11,%rdx,8),%r9
        movq    %r8,%rdx
        xorq    %r10,%r8
-       jmp     .Loop1
+       jmp     L$oop1
 
-.align 16
-.Lbreak1:
+.p2align       4
+L$break1:
        shrq    $4,%r8
        andq    $15,%rdx
        movq    %r9,%r10
@@ -122,13 +122,13 @@ gcm_gmult_4bit:
 
        movq    16(%rsp),%rbx
        leaq    24(%rsp),%rsp
-.Lgmult_epilogue:
+L$gmult_epilogue:
        .byte   0xf3,0xc3
-.size  gcm_gmult_4bit,.-gcm_gmult_4bit
-.globl gcm_ghash_4bit
-.type  gcm_ghash_4bit,@function
-.align 16
-gcm_ghash_4bit:
+
+.globl _gcm_ghash_4bit
+
+.p2align       4
+_gcm_ghash_4bit:
        pushq   %rbx
        pushq   %rbp
        pushq   %r12
@@ -136,7 +136,7 @@ gcm_ghash_4bit:
        pushq   %r14
        pushq   %r15
        subq    $280,%rsp
-.Lghash_prologue:
+L$ghash_prologue:
        movq    %rdx,%r14
        movq    %rcx,%r15
        subq    $-128,%rsi
@@ -338,10 +338,10 @@ gcm_ghash_4bit:
        movq    8(%rdi),%r8
        movq    0(%rdi),%r9
        addq    %r14,%r15
-       leaq    .Lrem_8bit(%rip),%r11
-       jmp     .Louter_loop
-.align 16
-.Louter_loop:
+       leaq    L$rem_8bit(%rip),%r11
+       jmp     L$outer_loop
+.p2align       4
+L$outer_loop:
        xorq    (%r14),%r9
        movq    8(%r14),%rdx
        leaq    16(%r14),%r14
@@ -676,7 +676,7 @@ gcm_ghash_4bit:
        xorq    %r13,%r9
        bswapq  %r9
        cmpq    %r15,%r14
-       jb      .Louter_loop
+       jb      L$outer_loop
        movq    %r8,8(%rdi)
        movq    %r9,(%rdi)
 
@@ -688,13 +688,13 @@ gcm_ghash_4bit:
        movq    32(%rsi),%rbp
        movq    40(%rsi),%rbx
        leaq    48(%rsi),%rsp
-.Lghash_epilogue:
+L$ghash_epilogue:
        .byte   0xf3,0xc3
-.size  gcm_ghash_4bit,.-gcm_ghash_4bit
-.globl gcm_init_clmul
-.type  gcm_init_clmul,@function
-.align 16
-gcm_init_clmul:
+
+.globl _gcm_init_clmul
+
+.p2align       4
+_gcm_init_clmul:
        movdqu  (%rsi),%xmm2
        pshufd  $78,%xmm2,%xmm2
 
@@ -709,7 +709,7 @@ gcm_init_clmul:
        por     %xmm3,%xmm2
 
 
-       pand    .L0x1c2_polynomial(%rip),%xmm5
+       pand    L$0x1c2_polynomial(%rip),%xmm5
        pxor    %xmm5,%xmm2
 
 
@@ -755,13 +755,13 @@ gcm_init_clmul:
        movdqu  %xmm2,(%rdi)
        movdqu  %xmm0,16(%rdi)
        .byte   0xf3,0xc3
-.size  gcm_init_clmul,.-gcm_init_clmul
-.globl gcm_gmult_clmul
-.type  gcm_gmult_clmul,@function
-.align 16
-gcm_gmult_clmul:
+
+.globl _gcm_gmult_clmul
+
+.p2align       4
+_gcm_gmult_clmul:
        movdqu  (%rdi),%xmm0
-       movdqa  .Lbswap_mask(%rip),%xmm5
+       movdqa  L$bswap_mask(%rip),%xmm5
        movdqu  (%rsi),%xmm2
 .byte  102,15,56,0,197
        movdqa  %xmm0,%xmm1
@@ -805,19 +805,19 @@ gcm_gmult_clmul:
 .byte  102,15,56,0,197
        movdqu  %xmm0,(%rdi)
        .byte   0xf3,0xc3
-.size  gcm_gmult_clmul,.-gcm_gmult_clmul
-.globl gcm_ghash_clmul
-.type  gcm_ghash_clmul,@function
-.align 16
-gcm_ghash_clmul:
-       movdqa  .Lbswap_mask(%rip),%xmm5
+
+.globl _gcm_ghash_clmul
+
+.p2align       4
+_gcm_ghash_clmul:
+       movdqa  L$bswap_mask(%rip),%xmm5
 
        movdqu  (%rdi),%xmm0
        movdqu  (%rsi),%xmm2
 .byte  102,15,56,0,197
 
        subq    $16,%rcx
-       jz      .Lodd_tail
+       jz      L$odd_tail
 
        movdqu  16(%rsi),%xmm8
 
@@ -854,9 +854,9 @@ gcm_ghash_clmul:
 
        leaq    32(%rdx),%rdx
        subq    $32,%rcx
-       jbe     .Leven_tail
+       jbe     L$even_tail
 
-.Lmod_loop:
+L$mod_loop:
 .byte  102,65,15,58,68,192,0
 .byte  102,65,15,58,68,200,17
 .byte  102,15,58,68,220,0
@@ -923,9 +923,9 @@ gcm_ghash_clmul:
 
        leaq    32(%rdx),%rdx
        subq    $32,%rcx
-       ja      .Lmod_loop
+       ja      L$mod_loop
 
-.Leven_tail:
+L$even_tail:
 .byte  102,65,15,58,68,192,0
 .byte  102,65,15,58,68,200,17
 .byte  102,15,58,68,220,0
@@ -962,9 +962,9 @@ gcm_ghash_clmul:
        psrlq   $1,%xmm0
        pxor    %xmm4,%xmm0
        testq   %rcx,%rcx
-       jnz     .Ldone
+       jnz     L$done
 
-.Lodd_tail:
+L$odd_tail:
        movdqu  (%rdx),%xmm3
 .byte  102,15,56,0,221
        pxor    %xmm3,%xmm0
@@ -1006,26 +1006,26 @@ gcm_ghash_clmul:
        pxor    %xmm1,%xmm4
        psrlq   $1,%xmm0
        pxor    %xmm4,%xmm0
-.Ldone:
+L$done:
 .byte  102,15,56,0,197
        movdqu  %xmm0,(%rdi)
        .byte   0xf3,0xc3
-.LSEH_end_gcm_ghash_clmul:
-.size  gcm_ghash_clmul,.-gcm_ghash_clmul
-.align 64
-.Lbswap_mask:
+L$SEH_end_gcm_ghash_clmul:
+
+.p2align       6
+L$bswap_mask:
 .byte  15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
-.L0x1c2_polynomial:
+L$0x1c2_polynomial:
 .byte  1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
-.align 64
-.type  .Lrem_4bit,@object
-.Lrem_4bit:
+.p2align       6
+
+L$rem_4bit:
 .long  0,0,0,471859200,0,943718400,0,610271232
 .long  0,1887436800,0,1822425088,0,1220542464,0,1423966208
 .long  0,3774873600,0,4246732800,0,3644850176,0,3311403008
 .long  0,2441084928,0,2376073216,0,2847932416,0,3051356160
-.type  .Lrem_8bit,@object
-.Lrem_8bit:
+
+L$rem_8bit:
 .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
 .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
 .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
@@ -1060,6 +1060,4 @@ gcm_ghash_clmul:
 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
 
 .byte  
71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 64
-
-.section .note.GNU-stack,"",%progbits
+.p2align       6
diff --git a/lib/accelerated/x86/asm/appro-aes-x86-64.s 
b/lib/accelerated/x86/asm-macosx/appro-aes-x86-64-macosx.s
similarity index 81%
copy from lib/accelerated/x86/asm/appro-aes-x86-64.s
copy to lib/accelerated/x86/asm-macosx/appro-aes-x86-64-macosx.s
index 73c3798..de7ca03 100644
--- a/lib/accelerated/x86/asm/appro-aes-x86-64.s
+++ b/lib/accelerated/x86/asm-macosx/appro-aes-x86-64-macosx.s
@@ -36,49 +36,49 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 .text  
-.globl aesni_encrypt
-.type  aesni_encrypt,@function
-.align 16
-aesni_encrypt:
+.globl _aesni_encrypt
+
+.p2align       4
+_aesni_encrypt:
        movups  (%rdi),%xmm2
        movl    240(%rdx),%eax
        movups  (%rdx),%xmm0
        movups  16(%rdx),%xmm1
        leaq    32(%rdx),%rdx
        xorps   %xmm0,%xmm2
-.Loop_enc1_1:
+L$oop_enc1_1:
 .byte  102,15,56,220,209
        decl    %eax
        movups  (%rdx),%xmm1
        leaq    16(%rdx),%rdx
-       jnz     .Loop_enc1_1    
+       jnz     L$oop_enc1_1    
 .byte  102,15,56,221,209
        movups  %xmm2,(%rsi)
        .byte   0xf3,0xc3
-.size  aesni_encrypt,.-aesni_encrypt
 
-.globl aesni_decrypt
-.type  aesni_decrypt,@function
-.align 16
-aesni_decrypt:
+
+.globl _aesni_decrypt
+
+.p2align       4
+_aesni_decrypt:
        movups  (%rdi),%xmm2
        movl    240(%rdx),%eax
        movups  (%rdx),%xmm0
        movups  16(%rdx),%xmm1
        leaq    32(%rdx),%rdx
        xorps   %xmm0,%xmm2
-.Loop_dec1_2:
+L$oop_dec1_2:
 .byte  102,15,56,222,209
        decl    %eax
        movups  (%rdx),%xmm1
        leaq    16(%rdx),%rdx
-       jnz     .Loop_dec1_2    
+       jnz     L$oop_dec1_2    
 .byte  102,15,56,223,209
        movups  %xmm2,(%rsi)
        .byte   0xf3,0xc3
-.size  aesni_decrypt, .-aesni_decrypt
-.type  _aesni_encrypt3,@function
-.align 16
+
+
+.p2align       4
 _aesni_encrypt3:
        movups  (%rcx),%xmm0
        shrl    $1,%eax
@@ -89,7 +89,7 @@ _aesni_encrypt3:
        xorps   %xmm0,%xmm4
        movups  (%rcx),%xmm0
 
-.Lenc_loop3:
+L$enc_loop3:
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        decl    %eax
@@ -100,7 +100,7 @@ _aesni_encrypt3:
        leaq    32(%rcx),%rcx
 .byte  102,15,56,220,224
        movups  (%rcx),%xmm0
-       jnz     .Lenc_loop3
+       jnz     L$enc_loop3
 
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
@@ -109,9 +109,9 @@ _aesni_encrypt3:
 .byte  102,15,56,221,216
 .byte  102,15,56,221,224
        .byte   0xf3,0xc3
-.size  _aesni_encrypt3,.-_aesni_encrypt3
-.type  _aesni_decrypt3,@function
-.align 16
+
+
+.p2align       4
 _aesni_decrypt3:
        movups  (%rcx),%xmm0
        shrl    $1,%eax
@@ -122,7 +122,7 @@ _aesni_decrypt3:
        xorps   %xmm0,%xmm4
        movups  (%rcx),%xmm0
 
-.Ldec_loop3:
+L$dec_loop3:
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
        decl    %eax
@@ -133,7 +133,7 @@ _aesni_decrypt3:
        leaq    32(%rcx),%rcx
 .byte  102,15,56,222,224
        movups  (%rcx),%xmm0
-       jnz     .Ldec_loop3
+       jnz     L$dec_loop3
 
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
@@ -142,9 +142,9 @@ _aesni_decrypt3:
 .byte  102,15,56,223,216
 .byte  102,15,56,223,224
        .byte   0xf3,0xc3
-.size  _aesni_decrypt3,.-_aesni_decrypt3
-.type  _aesni_encrypt4,@function
-.align 16
+
+
+.p2align       4
 _aesni_encrypt4:
        movups  (%rcx),%xmm0
        shrl    $1,%eax
@@ -156,7 +156,7 @@ _aesni_encrypt4:
        xorps   %xmm0,%xmm5
        movups  (%rcx),%xmm0
 
-.Lenc_loop4:
+L$enc_loop4:
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        decl    %eax
@@ -169,7 +169,7 @@ _aesni_encrypt4:
 .byte  102,15,56,220,224
 .byte  102,15,56,220,232
        movups  (%rcx),%xmm0
-       jnz     .Lenc_loop4
+       jnz     L$enc_loop4
 
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
@@ -180,9 +180,9 @@ _aesni_encrypt4:
 .byte  102,15,56,221,224
 .byte  102,15,56,221,232
        .byte   0xf3,0xc3
-.size  _aesni_encrypt4,.-_aesni_encrypt4
-.type  _aesni_decrypt4,@function
-.align 16
+
+
+.p2align       4
 _aesni_decrypt4:
        movups  (%rcx),%xmm0
        shrl    $1,%eax
@@ -194,7 +194,7 @@ _aesni_decrypt4:
        xorps   %xmm0,%xmm5
        movups  (%rcx),%xmm0
 
-.Ldec_loop4:
+L$dec_loop4:
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
        decl    %eax
@@ -207,7 +207,7 @@ _aesni_decrypt4:
 .byte  102,15,56,222,224
 .byte  102,15,56,222,232
        movups  (%rcx),%xmm0
-       jnz     .Ldec_loop4
+       jnz     L$dec_loop4
 
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
@@ -218,9 +218,9 @@ _aesni_decrypt4:
 .byte  102,15,56,223,224
 .byte  102,15,56,223,232
        .byte   0xf3,0xc3
-.size  _aesni_decrypt4,.-_aesni_decrypt4
-.type  _aesni_encrypt6,@function
-.align 16
+
+
+.p2align       4
 _aesni_encrypt6:
        movups  (%rcx),%xmm0
        shrl    $1,%eax
@@ -240,9 +240,9 @@ _aesni_encrypt6:
 .byte  102,15,56,220,241
        movups  (%rcx),%xmm0
 .byte  102,15,56,220,249
-       jmp     .Lenc_loop6_enter
-.align 16
-.Lenc_loop6:
+       jmp     L$enc_loop6_enter
+.p2align       4
+L$enc_loop6:
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        decl    %eax
@@ -250,7 +250,7 @@ _aesni_encrypt6:
 .byte  102,15,56,220,233
 .byte  102,15,56,220,241
 .byte  102,15,56,220,249
-.Lenc_loop6_enter:
+L$enc_loop6_enter:
        movups  16(%rcx),%xmm1
 .byte  102,15,56,220,208
 .byte  102,15,56,220,216
@@ -260,7 +260,7 @@ _aesni_encrypt6:
 .byte  102,15,56,220,240
 .byte  102,15,56,220,248
        movups  (%rcx),%xmm0
-       jnz     .Lenc_loop6
+       jnz     L$enc_loop6
 
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
@@ -275,9 +275,9 @@ _aesni_encrypt6:
 .byte  102,15,56,221,240
 .byte  102,15,56,221,248
        .byte   0xf3,0xc3
-.size  _aesni_encrypt6,.-_aesni_encrypt6
-.type  _aesni_decrypt6,@function
-.align 16
+
+
+.p2align       4
 _aesni_decrypt6:
        movups  (%rcx),%xmm0
        shrl    $1,%eax
@@ -297,9 +297,9 @@ _aesni_decrypt6:
 .byte  102,15,56,222,241
        movups  (%rcx),%xmm0
 .byte  102,15,56,222,249
-       jmp     .Ldec_loop6_enter
-.align 16
-.Ldec_loop6:
+       jmp     L$dec_loop6_enter
+.p2align       4
+L$dec_loop6:
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
        decl    %eax
@@ -307,7 +307,7 @@ _aesni_decrypt6:
 .byte  102,15,56,222,233
 .byte  102,15,56,222,241
 .byte  102,15,56,222,249
-.Ldec_loop6_enter:
+L$dec_loop6_enter:
        movups  16(%rcx),%xmm1
 .byte  102,15,56,222,208
 .byte  102,15,56,222,216
@@ -317,7 +317,7 @@ _aesni_decrypt6:
 .byte  102,15,56,222,240
 .byte  102,15,56,222,248
        movups  (%rcx),%xmm0
-       jnz     .Ldec_loop6
+       jnz     L$dec_loop6
 
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
@@ -332,9 +332,9 @@ _aesni_decrypt6:
 .byte  102,15,56,223,240
 .byte  102,15,56,223,248
        .byte   0xf3,0xc3
-.size  _aesni_decrypt6,.-_aesni_decrypt6
-.type  _aesni_encrypt8,@function
-.align 16
+
+
+.p2align       4
 _aesni_encrypt8:
        movups  (%rcx),%xmm0
        shrl    $1,%eax
@@ -359,9 +359,9 @@ _aesni_encrypt8:
 .byte  102,68,15,56,220,193
 .byte  102,68,15,56,220,201
        movups  16(%rcx),%xmm1
-       jmp     .Lenc_loop8_enter
-.align 16
-.Lenc_loop8:
+       jmp     L$enc_loop8_enter
+.p2align       4
+L$enc_loop8:
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        decl    %eax
@@ -372,7 +372,7 @@ _aesni_encrypt8:
 .byte  102,68,15,56,220,193
 .byte  102,68,15,56,220,201
        movups  16(%rcx),%xmm1
-.Lenc_loop8_enter:
+L$enc_loop8_enter:
 .byte  102,15,56,220,208
 .byte  102,15,56,220,216
        leaq    32(%rcx),%rcx
@@ -383,7 +383,7 @@ _aesni_encrypt8:
 .byte  102,68,15,56,220,192
 .byte  102,68,15,56,220,200
        movups  (%rcx),%xmm0
-       jnz     .Lenc_loop8
+       jnz     L$enc_loop8
 
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
@@ -402,9 +402,9 @@ _aesni_encrypt8:
 .byte  102,68,15,56,221,192
 .byte  102,68,15,56,221,200
        .byte   0xf3,0xc3
-.size  _aesni_encrypt8,.-_aesni_encrypt8
-.type  _aesni_decrypt8,@function
-.align 16
+
+
+.p2align       4
 _aesni_decrypt8:
        movups  (%rcx),%xmm0
        shrl    $1,%eax
@@ -429,9 +429,9 @@ _aesni_decrypt8:
 .byte  102,68,15,56,222,193
 .byte  102,68,15,56,222,201
        movups  16(%rcx),%xmm1
-       jmp     .Ldec_loop8_enter
-.align 16
-.Ldec_loop8:
+       jmp     L$dec_loop8_enter
+.p2align       4
+L$dec_loop8:
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
        decl    %eax
@@ -442,7 +442,7 @@ _aesni_decrypt8:
 .byte  102,68,15,56,222,193
 .byte  102,68,15,56,222,201
        movups  16(%rcx),%xmm1
-.Ldec_loop8_enter:
+L$dec_loop8_enter:
 .byte  102,15,56,222,208
 .byte  102,15,56,222,216
        leaq    32(%rcx),%rcx
@@ -453,7 +453,7 @@ _aesni_decrypt8:
 .byte  102,68,15,56,222,192
 .byte  102,68,15,56,222,200
        movups  (%rcx),%xmm0
-       jnz     .Ldec_loop8
+       jnz     L$dec_loop8
 
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
@@ -472,23 +472,23 @@ _aesni_decrypt8:
 .byte  102,68,15,56,223,192
 .byte  102,68,15,56,223,200
        .byte   0xf3,0xc3
-.size  _aesni_decrypt8,.-_aesni_decrypt8
-.globl aesni_ecb_encrypt
-.type  aesni_ecb_encrypt,@function
-.align 16
-aesni_ecb_encrypt:
+
+.globl _aesni_ecb_encrypt
+
+.p2align       4
+_aesni_ecb_encrypt:
        andq    $-16,%rdx
-       jz      .Lecb_ret
+       jz      L$ecb_ret
 
        movl    240(%rcx),%eax
        movups  (%rcx),%xmm0
        movq    %rcx,%r11
        movl    %eax,%r10d
        testl   %r8d,%r8d
-       jz      .Lecb_decrypt
+       jz      L$ecb_decrypt
 
        cmpq    $128,%rdx
-       jb      .Lecb_enc_tail
+       jb      L$ecb_enc_tail
 
        movdqu  (%rdi),%xmm2
        movdqu  16(%rdi),%xmm3
@@ -500,9 +500,9 @@ aesni_ecb_encrypt:
        movdqu  112(%rdi),%xmm9
        leaq    128(%rdi),%rdi
        subq    $128,%rdx
-       jmp     .Lecb_enc_loop8_enter
-.align 16
-.Lecb_enc_loop8:
+       jmp     L$ecb_enc_loop8_enter
+.p2align       4
+L$ecb_enc_loop8:
        movups  %xmm2,(%rsi)
        movq    %r11,%rcx
        movdqu  (%rdi),%xmm2
@@ -523,12 +523,12 @@ aesni_ecb_encrypt:
        leaq    128(%rsi),%rsi
        movdqu  112(%rdi),%xmm9
        leaq    128(%rdi),%rdi
-.Lecb_enc_loop8_enter:
+L$ecb_enc_loop8_enter:
 
        call    _aesni_encrypt8
 
        subq    $128,%rdx
-       jnc     .Lecb_enc_loop8
+       jnc     L$ecb_enc_loop8
 
        movups  %xmm2,(%rsi)
        movq    %r11,%rcx
@@ -542,24 +542,24 @@ aesni_ecb_encrypt:
        movups  %xmm9,112(%rsi)
        leaq    128(%rsi),%rsi
        addq    $128,%rdx
-       jz      .Lecb_ret
+       jz      L$ecb_ret
 
-.Lecb_enc_tail:
+L$ecb_enc_tail:
        movups  (%rdi),%xmm2
        cmpq    $32,%rdx
-       jb      .Lecb_enc_one
+       jb      L$ecb_enc_one
        movups  16(%rdi),%xmm3
-       je      .Lecb_enc_two
+       je      L$ecb_enc_two
        movups  32(%rdi),%xmm4
        cmpq    $64,%rdx
-       jb      .Lecb_enc_three
+       jb      L$ecb_enc_three
        movups  48(%rdi),%xmm5
-       je      .Lecb_enc_four
+       je      L$ecb_enc_four
        movups  64(%rdi),%xmm6
        cmpq    $96,%rdx
-       jb      .Lecb_enc_five
+       jb      L$ecb_enc_five
        movups  80(%rdi),%xmm7
-       je      .Lecb_enc_six
+       je      L$ecb_enc_six
        movdqu  96(%rdi),%xmm8
        call    _aesni_encrypt8
        movups  %xmm2,(%rsi)
@@ -569,46 +569,46 @@ aesni_ecb_encrypt:
        movups  %xmm6,64(%rsi)
        movups  %xmm7,80(%rsi)
        movups  %xmm8,96(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_enc_one:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_enc_one:
        movups  (%rcx),%xmm0
        movups  16(%rcx),%xmm1
        leaq    32(%rcx),%rcx
        xorps   %xmm0,%xmm2
-.Loop_enc1_3:
+L$oop_enc1_3:
 .byte  102,15,56,220,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_enc1_3    
+       jnz     L$oop_enc1_3    
 .byte  102,15,56,221,209
        movups  %xmm2,(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_enc_two:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_enc_two:
        xorps   %xmm4,%xmm4
        call    _aesni_encrypt3
        movups  %xmm2,(%rsi)
        movups  %xmm3,16(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_enc_three:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_enc_three:
        call    _aesni_encrypt3
        movups  %xmm2,(%rsi)
        movups  %xmm3,16(%rsi)
        movups  %xmm4,32(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_enc_four:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_enc_four:
        call    _aesni_encrypt4
        movups  %xmm2,(%rsi)
        movups  %xmm3,16(%rsi)
        movups  %xmm4,32(%rsi)
        movups  %xmm5,48(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_enc_five:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_enc_five:
        xorps   %xmm7,%xmm7
        call    _aesni_encrypt6
        movups  %xmm2,(%rsi)
@@ -616,9 +616,9 @@ aesni_ecb_encrypt:
        movups  %xmm4,32(%rsi)
        movups  %xmm5,48(%rsi)
        movups  %xmm6,64(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_enc_six:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_enc_six:
        call    _aesni_encrypt6
        movups  %xmm2,(%rsi)
        movups  %xmm3,16(%rsi)
@@ -626,12 +626,12 @@ aesni_ecb_encrypt:
        movups  %xmm5,48(%rsi)
        movups  %xmm6,64(%rsi)
        movups  %xmm7,80(%rsi)
-       jmp     .Lecb_ret
+       jmp     L$ecb_ret
 
-.align 16
-.Lecb_decrypt:
+.p2align       4
+L$ecb_decrypt:
        cmpq    $128,%rdx
-       jb      .Lecb_dec_tail
+       jb      L$ecb_dec_tail
 
        movdqu  (%rdi),%xmm2
        movdqu  16(%rdi),%xmm3
@@ -643,9 +643,9 @@ aesni_ecb_encrypt:
        movdqu  112(%rdi),%xmm9
        leaq    128(%rdi),%rdi
        subq    $128,%rdx
-       jmp     .Lecb_dec_loop8_enter
-.align 16
-.Lecb_dec_loop8:
+       jmp     L$ecb_dec_loop8_enter
+.p2align       4
+L$ecb_dec_loop8:
        movups  %xmm2,(%rsi)
        movq    %r11,%rcx
        movdqu  (%rdi),%xmm2
@@ -666,13 +666,13 @@ aesni_ecb_encrypt:
        leaq    128(%rsi),%rsi
        movdqu  112(%rdi),%xmm9
        leaq    128(%rdi),%rdi
-.Lecb_dec_loop8_enter:
+L$ecb_dec_loop8_enter:
 
        call    _aesni_decrypt8
 
        movups  (%r11),%xmm0
        subq    $128,%rdx
-       jnc     .Lecb_dec_loop8
+       jnc     L$ecb_dec_loop8
 
        movups  %xmm2,(%rsi)
        movq    %r11,%rcx
@@ -686,24 +686,24 @@ aesni_ecb_encrypt:
        movups  %xmm9,112(%rsi)
        leaq    128(%rsi),%rsi
        addq    $128,%rdx
-       jz      .Lecb_ret
+       jz      L$ecb_ret
 
-.Lecb_dec_tail:
+L$ecb_dec_tail:
        movups  (%rdi),%xmm2
        cmpq    $32,%rdx
-       jb      .Lecb_dec_one
+       jb      L$ecb_dec_one
        movups  16(%rdi),%xmm3
-       je      .Lecb_dec_two
+       je      L$ecb_dec_two
        movups  32(%rdi),%xmm4
        cmpq    $64,%rdx
-       jb      .Lecb_dec_three
+       jb      L$ecb_dec_three
        movups  48(%rdi),%xmm5
-       je      .Lecb_dec_four
+       je      L$ecb_dec_four
        movups  64(%rdi),%xmm6
        cmpq    $96,%rdx
-       jb      .Lecb_dec_five
+       jb      L$ecb_dec_five
        movups  80(%rdi),%xmm7
-       je      .Lecb_dec_six
+       je      L$ecb_dec_six
        movups  96(%rdi),%xmm8
        movups  (%rcx),%xmm0
        call    _aesni_decrypt8
@@ -714,46 +714,46 @@ aesni_ecb_encrypt:
        movups  %xmm6,64(%rsi)
        movups  %xmm7,80(%rsi)
        movups  %xmm8,96(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_dec_one:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_dec_one:
        movups  (%rcx),%xmm0
        movups  16(%rcx),%xmm1
        leaq    32(%rcx),%rcx
        xorps   %xmm0,%xmm2
-.Loop_dec1_4:
+L$oop_dec1_4:
 .byte  102,15,56,222,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_dec1_4    
+       jnz     L$oop_dec1_4    
 .byte  102,15,56,223,209
        movups  %xmm2,(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_dec_two:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_dec_two:
        xorps   %xmm4,%xmm4
        call    _aesni_decrypt3
        movups  %xmm2,(%rsi)
        movups  %xmm3,16(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_dec_three:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_dec_three:
        call    _aesni_decrypt3
        movups  %xmm2,(%rsi)
        movups  %xmm3,16(%rsi)
        movups  %xmm4,32(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_dec_four:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_dec_four:
        call    _aesni_decrypt4
        movups  %xmm2,(%rsi)
        movups  %xmm3,16(%rsi)
        movups  %xmm4,32(%rsi)
        movups  %xmm5,48(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_dec_five:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_dec_five:
        xorps   %xmm7,%xmm7
        call    _aesni_decrypt6
        movups  %xmm2,(%rsi)
@@ -761,9 +761,9 @@ aesni_ecb_encrypt:
        movups  %xmm4,32(%rsi)
        movups  %xmm5,48(%rsi)
        movups  %xmm6,64(%rsi)
-       jmp     .Lecb_ret
-.align 16
-.Lecb_dec_six:
+       jmp     L$ecb_ret
+.p2align       4
+L$ecb_dec_six:
        call    _aesni_decrypt6
        movups  %xmm2,(%rsi)
        movups  %xmm3,16(%rsi)
@@ -772,17 +772,17 @@ aesni_ecb_encrypt:
        movups  %xmm6,64(%rsi)
        movups  %xmm7,80(%rsi)
 
-.Lecb_ret:
+L$ecb_ret:
        .byte   0xf3,0xc3
-.size  aesni_ecb_encrypt,.-aesni_ecb_encrypt
-.globl aesni_ccm64_encrypt_blocks
-.type  aesni_ccm64_encrypt_blocks,@function
-.align 16
-aesni_ccm64_encrypt_blocks:
+
+.globl _aesni_ccm64_encrypt_blocks
+
+.p2align       4
+_aesni_ccm64_encrypt_blocks:
        movl    240(%rcx),%eax
        movdqu  (%r8),%xmm9
-       movdqa  .Lincrement64(%rip),%xmm6
-       movdqa  .Lbswap_mask(%rip),%xmm7
+       movdqa  L$increment64(%rip),%xmm6
+       movdqa  L$bswap_mask(%rip),%xmm7
 
        shrl    $1,%eax
        leaq    0(%rcx),%r11
@@ -790,9 +790,9 @@ aesni_ccm64_encrypt_blocks:
        movdqa  %xmm9,%xmm2
        movl    %eax,%r10d
 .byte  102,68,15,56,0,207
-       jmp     .Lccm64_enc_outer
-.align 16
-.Lccm64_enc_outer:
+       jmp     L$ccm64_enc_outer
+.p2align       4
+L$ccm64_enc_outer:
        movups  (%r11),%xmm0
        movl    %r10d,%eax
        movups  (%rdi),%xmm8
@@ -804,7 +804,7 @@ aesni_ccm64_encrypt_blocks:
        xorps   %xmm0,%xmm3
        movups  (%rcx),%xmm0
 
-.Lccm64_enc2_loop:
+L$ccm64_enc2_loop:
 .byte  102,15,56,220,209
        decl    %eax
 .byte  102,15,56,220,217
@@ -813,7 +813,7 @@ aesni_ccm64_encrypt_blocks:
        leaq    32(%rcx),%rcx
 .byte  102,15,56,220,216
        movups  0(%rcx),%xmm0
-       jnz     .Lccm64_enc2_loop
+       jnz     L$ccm64_enc2_loop
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        paddq   %xmm6,%xmm9
@@ -827,20 +827,20 @@ aesni_ccm64_encrypt_blocks:
        movups  %xmm8,(%rsi)
        leaq    16(%rsi),%rsi
 .byte  102,15,56,0,215
-       jnz     .Lccm64_enc_outer
+       jnz     L$ccm64_enc_outer
 
        movups  %xmm3,(%r9)
        .byte   0xf3,0xc3
-.size  aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
-.globl aesni_ccm64_decrypt_blocks
-.type  aesni_ccm64_decrypt_blocks,@function
-.align 16
-aesni_ccm64_decrypt_blocks:
+
+.globl _aesni_ccm64_decrypt_blocks
+
+.p2align       4
+_aesni_ccm64_decrypt_blocks:
        movl    240(%rcx),%eax
        movups  (%r8),%xmm9
        movdqu  (%r9),%xmm3
-       movdqa  .Lincrement64(%rip),%xmm6
-       movdqa  .Lbswap_mask(%rip),%xmm7
+       movdqa  L$increment64(%rip),%xmm6
+       movdqa  L$bswap_mask(%rip),%xmm7
 
        movaps  %xmm9,%xmm2
        movl    %eax,%r10d
@@ -850,19 +850,19 @@ aesni_ccm64_decrypt_blocks:
        movups  16(%rcx),%xmm1
        leaq    32(%rcx),%rcx
        xorps   %xmm0,%xmm2
-.Loop_enc1_5:
+L$oop_enc1_5:
 .byte  102,15,56,220,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_enc1_5    
+       jnz     L$oop_enc1_5    
 .byte  102,15,56,221,209
        movups  (%rdi),%xmm8
        paddq   %xmm6,%xmm9
        leaq    16(%rdi),%rdi
-       jmp     .Lccm64_dec_outer
-.align 16
-.Lccm64_dec_outer:
+       jmp     L$ccm64_dec_outer
+.p2align       4
+L$ccm64_dec_outer:
        xorps   %xmm2,%xmm8
        movdqa  %xmm9,%xmm2
        movl    %r10d,%eax
@@ -871,7 +871,7 @@ aesni_ccm64_decrypt_blocks:
 .byte  102,15,56,0,215
 
        subq    $1,%rdx
-       jz      .Lccm64_dec_break
+       jz      L$ccm64_dec_break
 
        movups  (%r11),%xmm0
        shrl    $1,%eax
@@ -882,7 +882,7 @@ aesni_ccm64_decrypt_blocks:
        xorps   %xmm8,%xmm3
        movups  (%rcx),%xmm0
 
-.Lccm64_dec2_loop:
+L$ccm64_dec2_loop:
 .byte  102,15,56,220,209
        decl    %eax
 .byte  102,15,56,220,217
@@ -891,7 +891,7 @@ aesni_ccm64_decrypt_blocks:
        leaq    32(%rcx),%rcx
 .byte  102,15,56,220,216
        movups  0(%rcx),%xmm0
-       jnz     .Lccm64_dec2_loop
+       jnz     L$ccm64_dec2_loop
        movups  (%rdi),%xmm8
        paddq   %xmm6,%xmm9
 .byte  102,15,56,220,209
@@ -899,35 +899,35 @@ aesni_ccm64_decrypt_blocks:
        leaq    16(%rdi),%rdi
 .byte  102,15,56,221,208
 .byte  102,15,56,221,216
-       jmp     .Lccm64_dec_outer
+       jmp     L$ccm64_dec_outer
 
-.align 16
-.Lccm64_dec_break:
+.p2align       4
+L$ccm64_dec_break:
 
        movups  (%r11),%xmm0
        movups  16(%r11),%xmm1
        xorps   %xmm0,%xmm8
        leaq    32(%r11),%r11
        xorps   %xmm8,%xmm3
-.Loop_enc1_6:
+L$oop_enc1_6:
 .byte  102,15,56,220,217
        decl    %eax
        movups  (%r11),%xmm1
        leaq    16(%r11),%r11
-       jnz     .Loop_enc1_6    
+       jnz     L$oop_enc1_6    
 .byte  102,15,56,221,217
        movups  %xmm3,(%r9)
        .byte   0xf3,0xc3
-.size  aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
-.globl aesni_ctr32_encrypt_blocks
-.type  aesni_ctr32_encrypt_blocks,@function
-.align 16
-aesni_ctr32_encrypt_blocks:
+
+.globl _aesni_ctr32_encrypt_blocks
+
+.p2align       4
+_aesni_ctr32_encrypt_blocks:
        cmpq    $1,%rdx
-       je      .Lctr32_one_shortcut
+       je      L$ctr32_one_shortcut
 
        movdqu  (%r8),%xmm14
-       movdqa  .Lbswap_mask(%rip),%xmm15
+       movdqa  L$bswap_mask(%rip),%xmm15
        xorl    %eax,%eax
 .byte  102,69,15,58,22,242,3
 .byte  102,68,15,58,34,240,3
@@ -956,15 +956,15 @@ aesni_ctr32_encrypt_blocks:
        pshufd  $128,%xmm12,%xmm3
        pshufd  $64,%xmm12,%xmm4
        cmpq    $6,%rdx
-       jb      .Lctr32_tail
+       jb      L$ctr32_tail
        shrl    $1,%eax
        movq    %rcx,%r11
        movl    %eax,%r10d
        subq    $6,%rdx
-       jmp     .Lctr32_loop6
+       jmp     L$ctr32_loop6
 
-.align 16
-.Lctr32_loop6:
+.p2align       4
+L$ctr32_loop6:
        pshufd  $192,%xmm13,%xmm5
        por     %xmm14,%xmm2
        movups  (%r11),%xmm0
@@ -986,7 +986,7 @@ aesni_ctr32_encrypt_blocks:
        leaq    32(%r11),%rcx
        pxor    %xmm0,%xmm4
 .byte  102,15,56,220,217
-       movdqa  .Lincrement32(%rip),%xmm13
+       movdqa  L$increment32(%rip),%xmm13
        pxor    %xmm0,%xmm5
 .byte  102,15,56,220,225
        movdqa  -40(%rsp),%xmm12
@@ -997,9 +997,9 @@ aesni_ctr32_encrypt_blocks:
        decl    %eax
 .byte  102,15,56,220,241
 .byte  102,15,56,220,249
-       jmp     .Lctr32_enc_loop6_enter
-.align 16
-.Lctr32_enc_loop6:
+       jmp     L$ctr32_enc_loop6_enter
+.p2align       4
+L$ctr32_enc_loop6:
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        decl    %eax
@@ -1007,7 +1007,7 @@ aesni_ctr32_encrypt_blocks:
 .byte  102,15,56,220,233
 .byte  102,15,56,220,241
 .byte  102,15,56,220,249
-.Lctr32_enc_loop6_enter:
+L$ctr32_enc_loop6_enter:
        movups  16(%rcx),%xmm1
 .byte  102,15,56,220,208
 .byte  102,15,56,220,216
@@ -1017,7 +1017,7 @@ aesni_ctr32_encrypt_blocks:
 .byte  102,15,56,220,240
 .byte  102,15,56,220,248
        movups  (%rcx),%xmm0
-       jnz     .Lctr32_enc_loop6
+       jnz     L$ctr32_enc_loop6
 
 .byte  102,15,56,220,209
        paddd   %xmm13,%xmm12
@@ -1064,33 +1064,33 @@ aesni_ctr32_encrypt_blocks:
        leaq    96(%rsi),%rsi
        movl    %r10d,%eax
        subq    $6,%rdx
-       jnc     .Lctr32_loop6
+       jnc     L$ctr32_loop6
 
        addq    $6,%rdx
-       jz      .Lctr32_done
+       jz      L$ctr32_done
        movq    %r11,%rcx
        leal    1(%rax,%rax,1),%eax
 
-.Lctr32_tail:
+L$ctr32_tail:
        por     %xmm14,%xmm2
        movups  (%rdi),%xmm8
        cmpq    $2,%rdx
-       jb      .Lctr32_one
+       jb      L$ctr32_one
 
        por     %xmm14,%xmm3
        movups  16(%rdi),%xmm9
-       je      .Lctr32_two
+       je      L$ctr32_two
 
        pshufd  $192,%xmm13,%xmm5
        por     %xmm14,%xmm4
        movups  32(%rdi),%xmm10
        cmpq    $4,%rdx
-       jb      .Lctr32_three
+       jb      L$ctr32_three
 
        pshufd  $128,%xmm13,%xmm6
        por     %xmm14,%xmm5
        movups  48(%rdi),%xmm11
-       je      .Lctr32_four
+       je      L$ctr32_four
 
        por     %xmm14,%xmm6
        xorps   %xmm7,%xmm7
@@ -1108,41 +1108,41 @@ aesni_ctr32_encrypt_blocks:
        xorps   %xmm6,%xmm1
        movups  %xmm11,48(%rsi)
        movups  %xmm1,64(%rsi)
-       jmp     .Lctr32_done
+       jmp     L$ctr32_done
 
-.align 16
-.Lctr32_one_shortcut:
+.p2align       4
+L$ctr32_one_shortcut:
        movups  (%r8),%xmm2
        movups  (%rdi),%xmm8
        movl    240(%rcx),%eax
-.Lctr32_one:
+L$ctr32_one:
        movups  (%rcx),%xmm0
        movups  16(%rcx),%xmm1
        leaq    32(%rcx),%rcx
        xorps   %xmm0,%xmm2
-.Loop_enc1_7:
+L$oop_enc1_7:
 .byte  102,15,56,220,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_enc1_7    
+       jnz     L$oop_enc1_7    
 .byte  102,15,56,221,209
        xorps   %xmm2,%xmm8
        movups  %xmm8,(%rsi)
-       jmp     .Lctr32_done
+       jmp     L$ctr32_done
 
-.align 16
-.Lctr32_two:
+.p2align       4
+L$ctr32_two:
        xorps   %xmm4,%xmm4
        call    _aesni_encrypt3
        xorps   %xmm2,%xmm8
        xorps   %xmm3,%xmm9
        movups  %xmm8,(%rsi)
        movups  %xmm9,16(%rsi)
-       jmp     .Lctr32_done
+       jmp     L$ctr32_done
 
-.align 16
-.Lctr32_three:
+.p2align       4
+L$ctr32_three:
        call    _aesni_encrypt3
        xorps   %xmm2,%xmm8
        xorps   %xmm3,%xmm9
@@ -1150,10 +1150,10 @@ aesni_ctr32_encrypt_blocks:
        xorps   %xmm4,%xmm10
        movups  %xmm9,16(%rsi)
        movups  %xmm10,32(%rsi)
-       jmp     .Lctr32_done
+       jmp     L$ctr32_done
 
-.align 16
-.Lctr32_four:
+.p2align       4
+L$ctr32_four:
        call    _aesni_encrypt4
        xorps   %xmm2,%xmm8
        xorps   %xmm3,%xmm9
@@ -1164,13 +1164,13 @@ aesni_ctr32_encrypt_blocks:
        movups  %xmm10,32(%rsi)
        movups  %xmm11,48(%rsi)
 
-.Lctr32_done:
+L$ctr32_done:
        .byte   0xf3,0xc3
-.size  aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
-.globl aesni_xts_encrypt
-.type  aesni_xts_encrypt,@function
-.align 16
-aesni_xts_encrypt:
+
+.globl _aesni_xts_encrypt
+
+.p2align       4
+_aesni_xts_encrypt:
        leaq    -104(%rsp),%rsp
        movups  (%r9),%xmm15
        movl    240(%r8),%eax
@@ -1179,19 +1179,19 @@ aesni_xts_encrypt:
        movups  16(%r8),%xmm1
        leaq    32(%r8),%r8
        xorps   %xmm0,%xmm15
-.Loop_enc1_8:
+L$oop_enc1_8:
 .byte  102,68,15,56,220,249
        decl    %eax
        movups  (%r8),%xmm1
        leaq    16(%r8),%r8
-       jnz     .Loop_enc1_8    
+       jnz     L$oop_enc1_8    
 .byte  102,68,15,56,221,249
        movq    %rcx,%r11
        movl    %r10d,%eax
        movq    %rdx,%r9
        andq    $-16,%rdx
 
-       movdqa  .Lxts_magic(%rip),%xmm8
+       movdqa  L$xts_magic(%rip),%xmm8
        pxor    %xmm14,%xmm14
        pcmpgtd %xmm15,%xmm14
        pshufd  $19,%xmm14,%xmm9
@@ -1223,15 +1223,15 @@ aesni_xts_encrypt:
        pcmpgtd %xmm15,%xmm14
        pxor    %xmm9,%xmm15
        subq    $96,%rdx
-       jc      .Lxts_enc_short
+       jc      L$xts_enc_short
 
        shrl    $1,%eax
        subl    $1,%eax
        movl    %eax,%r10d
-       jmp     .Lxts_enc_grandloop
+       jmp     L$xts_enc_grandloop
 
-.align 16
-.Lxts_enc_grandloop:
+.p2align       4
+L$xts_enc_grandloop:
        pshufd  $19,%xmm14,%xmm9
        movdqa  %xmm15,%xmm14
        paddq   %xmm15,%xmm15
@@ -1279,10 +1279,10 @@ aesni_xts_encrypt:
 .byte  102,15,56,220,249
        pxor    %xmm14,%xmm14
        pcmpgtd %xmm15,%xmm14
-       jmp     .Lxts_enc_loop6_enter
+       jmp     L$xts_enc_loop6_enter
 
-.align 16
-.Lxts_enc_loop6:
+.p2align       4
+L$xts_enc_loop6:
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        decl    %eax
@@ -1290,7 +1290,7 @@ aesni_xts_encrypt:
 .byte  102,15,56,220,233
 .byte  102,15,56,220,241
 .byte  102,15,56,220,249
-.Lxts_enc_loop6_enter:
+L$xts_enc_loop6_enter:
        movups  16(%rcx),%xmm1
 .byte  102,15,56,220,208
 .byte  102,15,56,220,216
@@ -1300,7 +1300,7 @@ aesni_xts_encrypt:
 .byte  102,15,56,220,240
 .byte  102,15,56,220,248
        movups  (%rcx),%xmm0
-       jnz     .Lxts_enc_loop6
+       jnz     L$xts_enc_loop6
 
        pshufd  $19,%xmm14,%xmm9
        pxor    %xmm14,%xmm14
@@ -1382,23 +1382,23 @@ aesni_xts_encrypt:
        movups  %xmm7,80(%rsi)
        leaq    96(%rsi),%rsi
        subq    $96,%rdx
-       jnc     .Lxts_enc_grandloop
+       jnc     L$xts_enc_grandloop
 
        leal    3(%rax,%rax,1),%eax
        movq    %r11,%rcx
        movl    %eax,%r10d
 
-.Lxts_enc_short:
+L$xts_enc_short:
        addq    $96,%rdx
-       jz      .Lxts_enc_done
+       jz      L$xts_enc_done
 
        cmpq    $32,%rdx
-       jb      .Lxts_enc_one
-       je      .Lxts_enc_two
+       jb      L$xts_enc_one
+       je      L$xts_enc_two
 
        cmpq    $64,%rdx
-       jb      .Lxts_enc_three
-       je      .Lxts_enc_four
+       jb      L$xts_enc_three
+       je      L$xts_enc_four
 
        pshufd  $19,%xmm14,%xmm9
        movdqa  %xmm15,%xmm14
@@ -1432,10 +1432,10 @@ aesni_xts_encrypt:
        movdqu  %xmm5,48(%rsi)
        movdqu  %xmm6,64(%rsi)
        leaq    80(%rsi),%rsi
-       jmp     .Lxts_enc_done
+       jmp     L$xts_enc_done
 
-.align 16
-.Lxts_enc_one:
+.p2align       4
+L$xts_enc_one:
        movups  (%rdi),%xmm2
        leaq    16(%rdi),%rdi
        xorps   %xmm10,%xmm2
@@ -1443,21 +1443,21 @@ aesni_xts_encrypt:
        movups  16(%rcx),%xmm1
        leaq    32(%rcx),%rcx
        xorps   %xmm0,%xmm2
-.Loop_enc1_9:
+L$oop_enc1_9:
 .byte  102,15,56,220,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_enc1_9    
+       jnz     L$oop_enc1_9    
 .byte  102,15,56,221,209
        xorps   %xmm10,%xmm2
        movdqa  %xmm11,%xmm10
        movups  %xmm2,(%rsi)
        leaq    16(%rsi),%rsi
-       jmp     .Lxts_enc_done
+       jmp     L$xts_enc_done
 
-.align 16
-.Lxts_enc_two:
+.p2align       4
+L$xts_enc_two:
        movups  (%rdi),%xmm2
        movups  16(%rdi),%xmm3
        leaq    32(%rdi),%rdi
@@ -1472,10 +1472,10 @@ aesni_xts_encrypt:
        movups  %xmm2,(%rsi)
        movups  %xmm3,16(%rsi)
        leaq    32(%rsi),%rsi
-       jmp     .Lxts_enc_done
+       jmp     L$xts_enc_done
 
-.align 16
-.Lxts_enc_three:
+.p2align       4
+L$xts_enc_three:
        movups  (%rdi),%xmm2
        movups  16(%rdi),%xmm3
        movups  32(%rdi),%xmm4
@@ -1494,10 +1494,10 @@ aesni_xts_encrypt:
        movups  %xmm3,16(%rsi)
        movups  %xmm4,32(%rsi)
        leaq    48(%rsi),%rsi
-       jmp     .Lxts_enc_done
+       jmp     L$xts_enc_done
 
-.align 16
-.Lxts_enc_four:
+.p2align       4
+L$xts_enc_four:
        movups  (%rdi),%xmm2
        movups  16(%rdi),%xmm3
        movups  32(%rdi),%xmm4
@@ -1520,15 +1520,15 @@ aesni_xts_encrypt:
        movups  %xmm4,32(%rsi)
        movups  %xmm5,48(%rsi)
        leaq    64(%rsi),%rsi
-       jmp     .Lxts_enc_done
+       jmp     L$xts_enc_done
 
-.align 16
-.Lxts_enc_done:
+.p2align       4
+L$xts_enc_done:
        andq    $15,%r9
-       jz      .Lxts_enc_ret
+       jz      L$xts_enc_ret
        movq    %r9,%rdx
 
-.Lxts_enc_steal:
+L$xts_enc_steal:
        movzbl  (%rdi),%eax
        movzbl  -16(%rsi),%ecx
        leaq    1(%rdi),%rdi
@@ -1536,7 +1536,7 @@ aesni_xts_encrypt:
        movb    %cl,0(%rsi)
        leaq    1(%rsi),%rsi
        subq    $1,%rdx
-       jnz     .Lxts_enc_steal
+       jnz     L$xts_enc_steal
 
        subq    %r9,%rsi
        movq    %r11,%rcx
@@ -1548,25 +1548,25 @@ aesni_xts_encrypt:
        movups  16(%rcx),%xmm1
        leaq    32(%rcx),%rcx
        xorps   %xmm0,%xmm2
-.Loop_enc1_10:
+L$oop_enc1_10:
 .byte  102,15,56,220,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_enc1_10   
+       jnz     L$oop_enc1_10   
 .byte  102,15,56,221,209
        xorps   %xmm10,%xmm2
        movups  %xmm2,-16(%rsi)
 
-.Lxts_enc_ret:
+L$xts_enc_ret:
        leaq    104(%rsp),%rsp
-.Lxts_enc_epilogue:
+L$xts_enc_epilogue:
        .byte   0xf3,0xc3
-.size  aesni_xts_encrypt,.-aesni_xts_encrypt
-.globl aesni_xts_decrypt
-.type  aesni_xts_decrypt,@function
-.align 16
-aesni_xts_decrypt:
+
+.globl _aesni_xts_decrypt
+
+.p2align       4
+_aesni_xts_decrypt:
        leaq    -104(%rsp),%rsp
        movups  (%r9),%xmm15
        movl    240(%r8),%eax
@@ -1575,12 +1575,12 @@ aesni_xts_decrypt:
        movups  16(%r8),%xmm1
        leaq    32(%r8),%r8
        xorps   %xmm0,%xmm15
-.Loop_enc1_11:
+L$oop_enc1_11:
 .byte  102,68,15,56,220,249
        decl    %eax
        movups  (%r8),%xmm1
        leaq    16(%r8),%r8
-       jnz     .Loop_enc1_11   
+       jnz     L$oop_enc1_11   
 .byte  102,68,15,56,221,249
        xorl    %eax,%eax
        testq   $15,%rdx
@@ -1593,7 +1593,7 @@ aesni_xts_decrypt:
        movq    %rdx,%r9
        andq    $-16,%rdx
 
-       movdqa  .Lxts_magic(%rip),%xmm8
+       movdqa  L$xts_magic(%rip),%xmm8
        pxor    %xmm14,%xmm14
        pcmpgtd %xmm15,%xmm14
        pshufd  $19,%xmm14,%xmm9
@@ -1625,15 +1625,15 @@ aesni_xts_decrypt:
        pcmpgtd %xmm15,%xmm14
        pxor    %xmm9,%xmm15
        subq    $96,%rdx
-       jc      .Lxts_dec_short
+       jc      L$xts_dec_short
 
        shrl    $1,%eax
        subl    $1,%eax
        movl    %eax,%r10d
-       jmp     .Lxts_dec_grandloop
+       jmp     L$xts_dec_grandloop
 
-.align 16
-.Lxts_dec_grandloop:
+.p2align       4
+L$xts_dec_grandloop:
        pshufd  $19,%xmm14,%xmm9
        movdqa  %xmm15,%xmm14
        paddq   %xmm15,%xmm15
@@ -1681,10 +1681,10 @@ aesni_xts_decrypt:
 .byte  102,15,56,222,249
        pxor    %xmm14,%xmm14
        pcmpgtd %xmm15,%xmm14
-       jmp     .Lxts_dec_loop6_enter
+       jmp     L$xts_dec_loop6_enter
 
-.align 16
-.Lxts_dec_loop6:
+.p2align       4
+L$xts_dec_loop6:
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
        decl    %eax
@@ -1692,7 +1692,7 @@ aesni_xts_decrypt:
 .byte  102,15,56,222,233
 .byte  102,15,56,222,241
 .byte  102,15,56,222,249
-.Lxts_dec_loop6_enter:
+L$xts_dec_loop6_enter:
        movups  16(%rcx),%xmm1
 .byte  102,15,56,222,208
 .byte  102,15,56,222,216
@@ -1702,7 +1702,7 @@ aesni_xts_decrypt:
 .byte  102,15,56,222,240
 .byte  102,15,56,222,248
        movups  (%rcx),%xmm0
-       jnz     .Lxts_dec_loop6
+       jnz     L$xts_dec_loop6
 
        pshufd  $19,%xmm14,%xmm9
        pxor    %xmm14,%xmm14
@@ -1784,23 +1784,23 @@ aesni_xts_decrypt:
        movups  %xmm7,80(%rsi)
        leaq    96(%rsi),%rsi
        subq    $96,%rdx
-       jnc     .Lxts_dec_grandloop
+       jnc     L$xts_dec_grandloop
 
        leal    3(%rax,%rax,1),%eax
        movq    %r11,%rcx
        movl    %eax,%r10d
 
-.Lxts_dec_short:
+L$xts_dec_short:
        addq    $96,%rdx
-       jz      .Lxts_dec_done
+       jz      L$xts_dec_done
 
        cmpq    $32,%rdx
-       jb      .Lxts_dec_one
-       je      .Lxts_dec_two
+       jb      L$xts_dec_one
+       je      L$xts_dec_two
 
        cmpq    $64,%rdx
-       jb      .Lxts_dec_three
-       je      .Lxts_dec_four
+       jb      L$xts_dec_three
+       je      L$xts_dec_four
 
        pshufd  $19,%xmm14,%xmm9
        movdqa  %xmm15,%xmm14
@@ -1837,16 +1837,16 @@ aesni_xts_decrypt:
        leaq    80(%rsi),%rsi
        pshufd  $19,%xmm14,%xmm11
        andq    $15,%r9
-       jz      .Lxts_dec_ret
+       jz      L$xts_dec_ret
 
        movdqa  %xmm15,%xmm10
        paddq   %xmm15,%xmm15
        pand    %xmm8,%xmm11
        pxor    %xmm15,%xmm11
-       jmp     .Lxts_dec_done2
+       jmp     L$xts_dec_done2
 
-.align 16
-.Lxts_dec_one:
+.p2align       4
+L$xts_dec_one:
        movups  (%rdi),%xmm2
        leaq    16(%rdi),%rdi
        xorps   %xmm10,%xmm2
@@ -1854,22 +1854,22 @@ aesni_xts_decrypt:
        movups  16(%rcx),%xmm1
        leaq    32(%rcx),%rcx
        xorps   %xmm0,%xmm2
-.Loop_dec1_12:
+L$oop_dec1_12:
 .byte  102,15,56,222,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_dec1_12   
+       jnz     L$oop_dec1_12   
 .byte  102,15,56,223,209
        xorps   %xmm10,%xmm2
        movdqa  %xmm11,%xmm10
        movups  %xmm2,(%rsi)
        movdqa  %xmm12,%xmm11
        leaq    16(%rsi),%rsi
-       jmp     .Lxts_dec_done
+       jmp     L$xts_dec_done
 
-.align 16
-.Lxts_dec_two:
+.p2align       4
+L$xts_dec_two:
        movups  (%rdi),%xmm2
        movups  16(%rdi),%xmm3
        leaq    32(%rdi),%rdi
@@ -1885,10 +1885,10 @@ aesni_xts_decrypt:
        movups  %xmm2,(%rsi)
        movups  %xmm3,16(%rsi)
        leaq    32(%rsi),%rsi
-       jmp     .Lxts_dec_done
+       jmp     L$xts_dec_done
 
-.align 16
-.Lxts_dec_three:
+.p2align       4
+L$xts_dec_three:
        movups  (%rdi),%xmm2
        movups  16(%rdi),%xmm3
        movups  32(%rdi),%xmm4
@@ -1908,10 +1908,10 @@ aesni_xts_decrypt:
        movups  %xmm3,16(%rsi)
        movups  %xmm4,32(%rsi)
        leaq    48(%rsi),%rsi
-       jmp     .Lxts_dec_done
+       jmp     L$xts_dec_done
 
-.align 16
-.Lxts_dec_four:
+.p2align       4
+L$xts_dec_four:
        pshufd  $19,%xmm14,%xmm9
        movdqa  %xmm15,%xmm14
        paddq   %xmm15,%xmm15
@@ -1941,13 +1941,13 @@ aesni_xts_decrypt:
        movups  %xmm4,32(%rsi)
        movups  %xmm5,48(%rsi)
        leaq    64(%rsi),%rsi
-       jmp     .Lxts_dec_done
+       jmp     L$xts_dec_done
 
-.align 16
-.Lxts_dec_done:
+.p2align       4
+L$xts_dec_done:
        andq    $15,%r9
-       jz      .Lxts_dec_ret
-.Lxts_dec_done2:
+       jz      L$xts_dec_ret
+L$xts_dec_done2:
        movq    %r9,%rdx
        movq    %r11,%rcx
        movl    %r10d,%eax
@@ -1958,17 +1958,17 @@ aesni_xts_decrypt:
        movups  16(%rcx),%xmm1
        leaq    32(%rcx),%rcx
        xorps   %xmm0,%xmm2
-.Loop_dec1_13:
+L$oop_dec1_13:
 .byte  102,15,56,222,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_dec1_13   
+       jnz     L$oop_dec1_13   
 .byte  102,15,56,223,209
        xorps   %xmm11,%xmm2
        movups  %xmm2,(%rsi)
 
-.Lxts_dec_steal:
+L$xts_dec_steal:
        movzbl  16(%rdi),%eax
        movzbl  (%rsi),%ecx
        leaq    1(%rdi),%rdi
@@ -1976,7 +1976,7 @@ aesni_xts_decrypt:
        movb    %cl,16(%rsi)
        leaq    1(%rsi),%rsi
        subq    $1,%rdx
-       jnz     .Lxts_dec_steal
+       jnz     L$xts_dec_steal
 
        subq    %r9,%rsi
        movq    %r11,%rcx
@@ -1988,41 +1988,41 @@ aesni_xts_decrypt:
        movups  16(%rcx),%xmm1
        leaq    32(%rcx),%rcx
        xorps   %xmm0,%xmm2
-.Loop_dec1_14:
+L$oop_dec1_14:
 .byte  102,15,56,222,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_dec1_14   
+       jnz     L$oop_dec1_14   
 .byte  102,15,56,223,209
        xorps   %xmm10,%xmm2
        movups  %xmm2,(%rsi)
 
-.Lxts_dec_ret:
+L$xts_dec_ret:
        leaq    104(%rsp),%rsp
-.Lxts_dec_epilogue:
+L$xts_dec_epilogue:
        .byte   0xf3,0xc3
-.size  aesni_xts_decrypt,.-aesni_xts_decrypt
-.globl aesni_cbc_encrypt
-.type  aesni_cbc_encrypt,@function
-.align 16
-aesni_cbc_encrypt:
+
+.globl _aesni_cbc_encrypt
+
+.p2align       4
+_aesni_cbc_encrypt:
        testq   %rdx,%rdx
-       jz      .Lcbc_ret
+       jz      L$cbc_ret
 
        movl    240(%rcx),%r10d
        movq    %rcx,%r11
        testl   %r9d,%r9d
-       jz      .Lcbc_decrypt
+       jz      L$cbc_decrypt
 
        movups  (%r8),%xmm2
        movl    %r10d,%eax
        cmpq    $16,%rdx
-       jb      .Lcbc_enc_tail
+       jb      L$cbc_enc_tail
        subq    $16,%rdx
-       jmp     .Lcbc_enc_loop
-.align 16
-.Lcbc_enc_loop:
+       jmp     L$cbc_enc_loop
+.p2align       4
+L$cbc_enc_loop:
        movups  (%rdi),%xmm3
        leaq    16(%rdi),%rdi
 
@@ -2031,25 +2031,25 @@ aesni_cbc_encrypt:
        xorps   %xmm0,%xmm3
        leaq    32(%rcx),%rcx
        xorps   %xmm3,%xmm2
-.Loop_enc1_15:
+L$oop_enc1_15:
 .byte  102,15,56,220,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_enc1_15   
+       jnz     L$oop_enc1_15   
 .byte  102,15,56,221,209
        movl    %r10d,%eax
        movq    %r11,%rcx
        movups  %xmm2,0(%rsi)
        leaq    16(%rsi),%rsi
        subq    $16,%rdx
-       jnc     .Lcbc_enc_loop
+       jnc     L$cbc_enc_loop
        addq    $16,%rdx
-       jnz     .Lcbc_enc_tail
+       jnz     L$cbc_enc_tail
        movups  %xmm2,(%r8)
-       jmp     .Lcbc_ret
+       jmp     L$cbc_ret
 
-.Lcbc_enc_tail:
+L$cbc_enc_tail:
        movq    %rdx,%rcx
        xchgq   %rdi,%rsi
 .long  0x9066A4F3      
@@ -2062,25 +2062,25 @@ aesni_cbc_encrypt:
        movq    %rdi,%rsi
        movq    %r11,%rcx
        xorq    %rdx,%rdx
-       jmp     .Lcbc_enc_loop  
+       jmp     L$cbc_enc_loop  
 
-.align 16
-.Lcbc_decrypt:
+.p2align       4
+L$cbc_decrypt:
        movups  (%r8),%xmm9
        movl    %r10d,%eax
        cmpq    $112,%rdx
-       jbe     .Lcbc_dec_tail
+       jbe     L$cbc_dec_tail
        shrl    $1,%r10d
        subq    $112,%rdx
        movl    %r10d,%eax
        movaps  %xmm9,-24(%rsp)
-       jmp     .Lcbc_dec_loop8_enter
-.align 16
-.Lcbc_dec_loop8:
+       jmp     L$cbc_dec_loop8_enter
+.p2align       4
+L$cbc_dec_loop8:
        movaps  %xmm0,-24(%rsp)
        movups  %xmm9,(%rsi)
        leaq    16(%rsi),%rsi
-.Lcbc_dec_loop8_enter:
+L$cbc_dec_loop8_enter:
        movups  (%rcx),%xmm0
        movups  (%rdi),%xmm2
        movups  16(%rdi),%xmm3
@@ -2113,7 +2113,7 @@ aesni_cbc_encrypt:
 .byte  102,68,15,56,222,201
        movups  16(%rcx),%xmm1
 
-       call    .Ldec_loop8_enter
+       call    L$dec_loop8_enter
 
        movups  (%rdi),%xmm1
        movups  16(%rdi),%xmm0
@@ -2143,42 +2143,42 @@ aesni_cbc_encrypt:
        movups  %xmm8,96(%rsi)
        leaq    112(%rsi),%rsi
        subq    $128,%rdx
-       ja      .Lcbc_dec_loop8
+       ja      L$cbc_dec_loop8
 
        movaps  %xmm9,%xmm2
        movaps  %xmm0,%xmm9
        addq    $112,%rdx
-       jle     .Lcbc_dec_tail_collected
+       jle     L$cbc_dec_tail_collected
        movups  %xmm2,(%rsi)
        leal    1(%r10,%r10,1),%eax
        leaq    16(%rsi),%rsi
-.Lcbc_dec_tail:
+L$cbc_dec_tail:
        movups  (%rdi),%xmm2
        movaps  %xmm2,%xmm8
        cmpq    $16,%rdx
-       jbe     .Lcbc_dec_one
+       jbe     L$cbc_dec_one
 
        movups  16(%rdi),%xmm3
        movaps  %xmm3,%xmm7
        cmpq    $32,%rdx
-       jbe     .Lcbc_dec_two
+       jbe     L$cbc_dec_two
 
        movups  32(%rdi),%xmm4
        movaps  %xmm4,%xmm6
        cmpq    $48,%rdx
-       jbe     .Lcbc_dec_three
+       jbe     L$cbc_dec_three
 
        movups  48(%rdi),%xmm5
        cmpq    $64,%rdx
-       jbe     .Lcbc_dec_four
+       jbe     L$cbc_dec_four
 
        movups  64(%rdi),%xmm6
        cmpq    $80,%rdx
-       jbe     .Lcbc_dec_five
+       jbe     L$cbc_dec_five
 
        movups  80(%rdi),%xmm7
        cmpq    $96,%rdx
-       jbe     .Lcbc_dec_six
+       jbe     L$cbc_dec_six
 
        movups  96(%rdi),%xmm8
        movaps  %xmm9,-24(%rsp)
@@ -2206,26 +2206,26 @@ aesni_cbc_encrypt:
        leaq    96(%rsi),%rsi
        movaps  %xmm8,%xmm2
        subq    $112,%rdx
-       jmp     .Lcbc_dec_tail_collected
-.align 16
-.Lcbc_dec_one:
+       jmp     L$cbc_dec_tail_collected
+.p2align       4
+L$cbc_dec_one:
        movups  (%rcx),%xmm0
        movups  16(%rcx),%xmm1
        leaq    32(%rcx),%rcx
        xorps   %xmm0,%xmm2
-.Loop_dec1_16:
+L$oop_dec1_16:
 .byte  102,15,56,222,209
        decl    %eax
        movups  (%rcx),%xmm1
        leaq    16(%rcx),%rcx
-       jnz     .Loop_dec1_16   
+       jnz     L$oop_dec1_16   
 .byte  102,15,56,223,209
        xorps   %xmm9,%xmm2
        movaps  %xmm8,%xmm9
        subq    $16,%rdx
-       jmp     .Lcbc_dec_tail_collected
-.align 16
-.Lcbc_dec_two:
+       jmp     L$cbc_dec_tail_collected
+.p2align       4
+L$cbc_dec_two:
        xorps   %xmm4,%xmm4
        call    _aesni_decrypt3
        xorps   %xmm9,%xmm2
@@ -2235,9 +2235,9 @@ aesni_cbc_encrypt:
        movaps  %xmm3,%xmm2
        leaq    16(%rsi),%rsi
        subq    $32,%rdx
-       jmp     .Lcbc_dec_tail_collected
-.align 16
-.Lcbc_dec_three:
+       jmp     L$cbc_dec_tail_collected
+.p2align       4
+L$cbc_dec_three:
        call    _aesni_decrypt3
        xorps   %xmm9,%xmm2
        xorps   %xmm8,%xmm3
@@ -2248,9 +2248,9 @@ aesni_cbc_encrypt:
        movaps  %xmm4,%xmm2
        leaq    32(%rsi),%rsi
        subq    $48,%rdx
-       jmp     .Lcbc_dec_tail_collected
-.align 16
-.Lcbc_dec_four:
+       jmp     L$cbc_dec_tail_collected
+.p2align       4
+L$cbc_dec_four:
        call    _aesni_decrypt4
        xorps   %xmm9,%xmm2
        movups  48(%rdi),%xmm9
@@ -2263,9 +2263,9 @@ aesni_cbc_encrypt:
        movaps  %xmm5,%xmm2
        leaq    48(%rsi),%rsi
        subq    $64,%rdx
-       jmp     .Lcbc_dec_tail_collected
-.align 16
-.Lcbc_dec_five:
+       jmp     L$cbc_dec_tail_collected
+.p2align       4
+L$cbc_dec_five:
        xorps   %xmm7,%xmm7
        call    _aesni_decrypt6
        movups  16(%rdi),%xmm1
@@ -2284,9 +2284,9 @@ aesni_cbc_encrypt:
        leaq    64(%rsi),%rsi
        movaps  %xmm6,%xmm2
        subq    $80,%rdx
-       jmp     .Lcbc_dec_tail_collected
-.align 16
-.Lcbc_dec_six:
+       jmp     L$cbc_dec_tail_collected
+.p2align       4
+L$cbc_dec_six:
        call    _aesni_decrypt6
        movups  16(%rdi),%xmm1
        movups  32(%rdi),%xmm0
@@ -2307,16 +2307,16 @@ aesni_cbc_encrypt:
        leaq    80(%rsi),%rsi
        movaps  %xmm7,%xmm2
        subq    $96,%rdx
-       jmp     .Lcbc_dec_tail_collected
-.align 16
-.Lcbc_dec_tail_collected:
+       jmp     L$cbc_dec_tail_collected
+.p2align       4
+L$cbc_dec_tail_collected:
        andq    $15,%rdx
        movups  %xmm9,(%r8)
-       jnz     .Lcbc_dec_tail_partial
+       jnz     L$cbc_dec_tail_partial
        movups  %xmm2,(%rsi)
-       jmp     .Lcbc_dec_ret
-.align 16
-.Lcbc_dec_tail_partial:
+       jmp     L$cbc_dec_ret
+.p2align       4
+L$cbc_dec_tail_partial:
        movaps  %xmm2,-24(%rsp)
        movq    $16,%rcx
        movq    %rsi,%rdi
@@ -2324,19 +2324,19 @@ aesni_cbc_encrypt:
        leaq    -24(%rsp),%rsi
 .long  0x9066A4F3      
 
-.Lcbc_dec_ret:
-.Lcbc_ret:
+L$cbc_dec_ret:
+L$cbc_ret:
        .byte   0xf3,0xc3
-.size  aesni_cbc_encrypt,.-aesni_cbc_encrypt
-.globl aesni_set_decrypt_key
-.type  aesni_set_decrypt_key,@function
-.align 16
-aesni_set_decrypt_key:
+
+.globl _aesni_set_decrypt_key
+
+.p2align       4
+_aesni_set_decrypt_key:
 .byte  0x48,0x83,0xEC,0x08     
        call    __aesni_set_encrypt_key
        shll    $4,%esi
        testl   %eax,%eax
-       jnz     .Ldec_key_ret
+       jnz     L$dec_key_ret
        leaq    16(%rdx,%rsi,1),%rdi
 
        movups  (%rdx),%xmm0
@@ -2346,7 +2346,7 @@ aesni_set_decrypt_key:
        leaq    16(%rdx),%rdx
        leaq    -16(%rdi),%rdi
 
-.Ldec_key_inverse:
+L$dec_key_inverse:
        movups  (%rdx),%xmm0
        movups  (%rdi),%xmm1
 .byte  102,15,56,219,192
@@ -2356,143 +2356,143 @@ aesni_set_decrypt_key:
        movups  %xmm0,16(%rdi)
        movups  %xmm1,-16(%rdx)
        cmpq    %rdx,%rdi
-       ja      .Ldec_key_inverse
+       ja      L$dec_key_inverse
 
        movups  (%rdx),%xmm0
 .byte  102,15,56,219,192
        movups  %xmm0,(%rdi)
-.Ldec_key_ret:
+L$dec_key_ret:
        addq    $8,%rsp
        .byte   0xf3,0xc3
-.LSEH_end_set_decrypt_key:
-.size  aesni_set_decrypt_key,.-aesni_set_decrypt_key
-.globl aesni_set_encrypt_key
-.type  aesni_set_encrypt_key,@function
-.align 16
-aesni_set_encrypt_key:
+L$SEH_end_set_decrypt_key:
+
+.globl _aesni_set_encrypt_key
+
+.p2align       4
+_aesni_set_encrypt_key:
 __aesni_set_encrypt_key:
 .byte  0x48,0x83,0xEC,0x08     
        movq    $-1,%rax
        testq   %rdi,%rdi
-       jz      .Lenc_key_ret
+       jz      L$enc_key_ret
        testq   %rdx,%rdx
-       jz      .Lenc_key_ret
+       jz      L$enc_key_ret
 
        movups  (%rdi),%xmm0
        xorps   %xmm4,%xmm4
        leaq    16(%rdx),%rax
        cmpl    $256,%esi
-       je      .L14rounds
+       je      L$14rounds
        cmpl    $192,%esi
-       je      .L12rounds
+       je      L$12rounds
        cmpl    $128,%esi
-       jne     .Lbad_keybits
+       jne     L$bad_keybits
 
-.L10rounds:
+L$10rounds:
        movl    $9,%esi
        movups  %xmm0,(%rdx)
 .byte  102,15,58,223,200,1
-       call    .Lkey_expansion_128_cold
+       call    L$key_expansion_128_cold
 .byte  102,15,58,223,200,2
-       call    .Lkey_expansion_128
+       call    L$key_expansion_128
 .byte  102,15,58,223,200,4
-       call    .Lkey_expansion_128
+       call    L$key_expansion_128
 .byte  102,15,58,223,200,8
-       call    .Lkey_expansion_128
+       call    L$key_expansion_128
 .byte  102,15,58,223,200,16
-       call    .Lkey_expansion_128
+       call    L$key_expansion_128
 .byte  102,15,58,223,200,32
-       call    .Lkey_expansion_128
+       call    L$key_expansion_128
 .byte  102,15,58,223,200,64
-       call    .Lkey_expansion_128
+       call    L$key_expansion_128
 .byte  102,15,58,223,200,128
-       call    .Lkey_expansion_128
+       call    L$key_expansion_128
 .byte  102,15,58,223,200,27
-       call    .Lkey_expansion_128
+       call    L$key_expansion_128
 .byte  102,15,58,223,200,54
-       call    .Lkey_expansion_128
+       call    L$key_expansion_128
        movups  %xmm0,(%rax)
        movl    %esi,80(%rax)
        xorl    %eax,%eax
-       jmp     .Lenc_key_ret
+       jmp     L$enc_key_ret
 
-.align 16
-.L12rounds:
+.p2align       4
+L$12rounds:
        movq    16(%rdi),%xmm2
        movl    $11,%esi
        movups  %xmm0,(%rdx)
 .byte  102,15,58,223,202,1
-       call    .Lkey_expansion_192a_cold
+       call    L$key_expansion_192a_cold
 .byte  102,15,58,223,202,2
-       call    .Lkey_expansion_192b
+       call    L$key_expansion_192b
 .byte  102,15,58,223,202,4
-       call    .Lkey_expansion_192a
+       call    L$key_expansion_192a
 .byte  102,15,58,223,202,8
-       call    .Lkey_expansion_192b
+       call    L$key_expansion_192b
 .byte  102,15,58,223,202,16
-       call    .Lkey_expansion_192a
+       call    L$key_expansion_192a
 .byte  102,15,58,223,202,32
-       call    .Lkey_expansion_192b
+       call    L$key_expansion_192b
 .byte  102,15,58,223,202,64
-       call    .Lkey_expansion_192a
+       call    L$key_expansion_192a
 .byte  102,15,58,223,202,128
-       call    .Lkey_expansion_192b
+       call    L$key_expansion_192b
        movups  %xmm0,(%rax)
        movl    %esi,48(%rax)
        xorq    %rax,%rax
-       jmp     .Lenc_key_ret
+       jmp     L$enc_key_ret
 
-.align 16
-.L14rounds:
+.p2align       4
+L$14rounds:
        movups  16(%rdi),%xmm2
        movl    $13,%esi
        leaq    16(%rax),%rax
        movups  %xmm0,(%rdx)
        movups  %xmm2,16(%rdx)
 .byte  102,15,58,223,202,1
-       call    .Lkey_expansion_256a_cold
+       call    L$key_expansion_256a_cold
 .byte  102,15,58,223,200,1
-       call    .Lkey_expansion_256b
+       call    L$key_expansion_256b
 .byte  102,15,58,223,202,2
-       call    .Lkey_expansion_256a
+       call    L$key_expansion_256a
 .byte  102,15,58,223,200,2
-       call    .Lkey_expansion_256b
+       call    L$key_expansion_256b
 .byte  102,15,58,223,202,4
-       call    .Lkey_expansion_256a
+       call    L$key_expansion_256a
 .byte  102,15,58,223,200,4
-       call    .Lkey_expansion_256b
+       call    L$key_expansion_256b
 .byte  102,15,58,223,202,8
-       call    .Lkey_expansion_256a
+       call    L$key_expansion_256a
 .byte  102,15,58,223,200,8
-       call    .Lkey_expansion_256b
+       call    L$key_expansion_256b
 .byte  102,15,58,223,202,16
-       call    .Lkey_expansion_256a
+       call    L$key_expansion_256a
 .byte  102,15,58,223,200,16
-       call    .Lkey_expansion_256b
+       call    L$key_expansion_256b
 .byte  102,15,58,223,202,32
-       call    .Lkey_expansion_256a
+       call    L$key_expansion_256a
 .byte  102,15,58,223,200,32
-       call    .Lkey_expansion_256b
+       call    L$key_expansion_256b
 .byte  102,15,58,223,202,64
-       call    .Lkey_expansion_256a
+       call    L$key_expansion_256a
        movups  %xmm0,(%rax)
        movl    %esi,16(%rax)
        xorq    %rax,%rax
-       jmp     .Lenc_key_ret
+       jmp     L$enc_key_ret
 
-.align 16
-.Lbad_keybits:
+.p2align       4
+L$bad_keybits:
        movq    $-2,%rax
-.Lenc_key_ret:
+L$enc_key_ret:
        addq    $8,%rsp
        .byte   0xf3,0xc3
-.LSEH_end_set_encrypt_key:
+L$SEH_end_set_encrypt_key:
 
-.align 16
-.Lkey_expansion_128:
+.p2align       4
+L$key_expansion_128:
        movups  %xmm0,(%rax)
        leaq    16(%rax),%rax
-.Lkey_expansion_128_cold:
+L$key_expansion_128_cold:
        shufps  $16,%xmm0,%xmm4
        xorps   %xmm4,%xmm0
        shufps  $140,%xmm0,%xmm4
@@ -2501,13 +2501,13 @@ __aesni_set_encrypt_key:
        xorps   %xmm1,%xmm0
        .byte   0xf3,0xc3
 
-.align 16
-.Lkey_expansion_192a:
+.p2align       4
+L$key_expansion_192a:
        movups  %xmm0,(%rax)
        leaq    16(%rax),%rax
-.Lkey_expansion_192a_cold:
+L$key_expansion_192a_cold:
        movaps  %xmm2,%xmm5
-.Lkey_expansion_192b_warm:
+L$key_expansion_192b_warm:
        shufps  $16,%xmm0,%xmm4
        movdqa  %xmm2,%xmm3
        xorps   %xmm4,%xmm0
@@ -2521,21 +2521,21 @@ __aesni_set_encrypt_key:
        pxor    %xmm3,%xmm2
        .byte   0xf3,0xc3
 
-.align 16
-.Lkey_expansion_192b:
+.p2align       4
+L$key_expansion_192b:
        movaps  %xmm0,%xmm3
        shufps  $68,%xmm0,%xmm5
        movups  %xmm5,(%rax)
        shufps  $78,%xmm2,%xmm3
        movups  %xmm3,16(%rax)
        leaq    32(%rax),%rax
-       jmp     .Lkey_expansion_192b_warm
+       jmp     L$key_expansion_192b_warm
 
-.align 16
-.Lkey_expansion_256a:
+.p2align       4
+L$key_expansion_256a:
        movups  %xmm2,(%rax)
        leaq    16(%rax),%rax
-.Lkey_expansion_256a_cold:
+L$key_expansion_256a_cold:
        shufps  $16,%xmm0,%xmm4
        xorps   %xmm4,%xmm0
        shufps  $140,%xmm0,%xmm4
@@ -2544,8 +2544,8 @@ __aesni_set_encrypt_key:
        xorps   %xmm1,%xmm0
        .byte   0xf3,0xc3
 
-.align 16
-.Lkey_expansion_256b:
+.p2align       4
+L$key_expansion_256b:
        movups  %xmm0,(%rax)
        leaq    16(%rax),%rax
 
@@ -2556,19 +2556,17 @@ __aesni_set_encrypt_key:
        shufps  $170,%xmm1,%xmm1
        xorps   %xmm1,%xmm2
        .byte   0xf3,0xc3
-.size  aesni_set_encrypt_key,.-aesni_set_encrypt_key
-.size  __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
-.align 64
-.Lbswap_mask:
+
+
+.p2align       6
+L$bswap_mask:
 .byte  15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
-.Lincrement32:
+L$increment32:
 .long  6,6,6,0
-.Lincrement64:
+L$increment64:
 .long  1,0,0,0
-.Lxts_magic:
+L$xts_magic:
 .long  0x87,0,1,0
 
 .byte  
65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 64
-
-.section .note.GNU-stack,"",%progbits
+.p2align       6
diff --git a/lib/accelerated/x86/asm-coff/appro-aes-x86-coff.s 
b/lib/accelerated/x86/asm-macosx/appro-aes-x86-macosx.s
similarity index 82%
copy from lib/accelerated/x86/asm-coff/appro-aes-x86-coff.s
copy to lib/accelerated/x86/asm-macosx/appro-aes-x86-macosx.s
index 74e236b..ca05d9e 100644
--- a/lib/accelerated/x86/asm-coff/appro-aes-x86-coff.s
+++ b/lib/accelerated/x86/asm-macosx/appro-aes-x86-macosx.s
@@ -38,10 +38,9 @@
 .file  "devel/perlasm/aesni-x86.s"
 .text
 .globl _aesni_encrypt
-.def   _aesni_encrypt; .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_encrypt:
-.L_aesni_encrypt_begin:
+L_aesni_encrypt_begin:
        movl    4(%esp),%eax
        movl    12(%esp),%edx
        movups  (%eax),%xmm2
@@ -51,20 +50,19 @@ _aesni_encrypt:
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L000enc1_loop_1:
+L000enc1_loop_1:
 .byte  102,15,56,220,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L000enc1_loop_1
+       jnz     L000enc1_loop_1
 .byte  102,15,56,221,209
        movups  %xmm2,(%eax)
        ret
 .globl _aesni_decrypt
-.def   _aesni_decrypt; .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_decrypt:
-.L_aesni_decrypt_begin:
+L_aesni_decrypt_begin:
        movl    4(%esp),%eax
        movl    12(%esp),%edx
        movups  (%eax),%xmm2
@@ -74,17 +72,16 @@ _aesni_decrypt:
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L001dec1_loop_2:
+L001dec1_loop_2:
 .byte  102,15,56,222,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L001dec1_loop_2
+       jnz     L001dec1_loop_2
 .byte  102,15,56,223,209
        movups  %xmm2,(%eax)
        ret
-.def   __aesni_encrypt3;       .scl    3;      .type   32;     .endef
-.align 16
+.align 4
 __aesni_encrypt3:
        movups  (%edx),%xmm0
        shrl    $1,%ecx
@@ -94,7 +91,7 @@ __aesni_encrypt3:
        pxor    %xmm0,%xmm3
        pxor    %xmm0,%xmm4
        movups  (%edx),%xmm0
-.L002enc3_loop:
+L002enc3_loop:
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        decl    %ecx
@@ -105,7 +102,7 @@ __aesni_encrypt3:
        leal    32(%edx),%edx
 .byte  102,15,56,220,224
        movups  (%edx),%xmm0
-       jnz     .L002enc3_loop
+       jnz     L002enc3_loop
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
 .byte  102,15,56,220,225
@@ -113,8 +110,7 @@ __aesni_encrypt3:
 .byte  102,15,56,221,216
 .byte  102,15,56,221,224
        ret
-.def   __aesni_decrypt3;       .scl    3;      .type   32;     .endef
-.align 16
+.align 4
 __aesni_decrypt3:
        movups  (%edx),%xmm0
        shrl    $1,%ecx
@@ -124,7 +120,7 @@ __aesni_decrypt3:
        pxor    %xmm0,%xmm3
        pxor    %xmm0,%xmm4
        movups  (%edx),%xmm0
-.L003dec3_loop:
+L003dec3_loop:
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
        decl    %ecx
@@ -135,7 +131,7 @@ __aesni_decrypt3:
        leal    32(%edx),%edx
 .byte  102,15,56,222,224
        movups  (%edx),%xmm0
-       jnz     .L003dec3_loop
+       jnz     L003dec3_loop
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
 .byte  102,15,56,222,225
@@ -143,8 +139,7 @@ __aesni_decrypt3:
 .byte  102,15,56,223,216
 .byte  102,15,56,223,224
        ret
-.def   __aesni_encrypt4;       .scl    3;      .type   32;     .endef
-.align 16
+.align 4
 __aesni_encrypt4:
        movups  (%edx),%xmm0
        movups  16(%edx),%xmm1
@@ -155,7 +150,7 @@ __aesni_encrypt4:
        pxor    %xmm0,%xmm4
        pxor    %xmm0,%xmm5
        movups  (%edx),%xmm0
-.L004enc4_loop:
+L004enc4_loop:
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        decl    %ecx
@@ -168,7 +163,7 @@ __aesni_encrypt4:
 .byte  102,15,56,220,224
 .byte  102,15,56,220,232
        movups  (%edx),%xmm0
-       jnz     .L004enc4_loop
+       jnz     L004enc4_loop
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
 .byte  102,15,56,220,225
@@ -178,8 +173,7 @@ __aesni_encrypt4:
 .byte  102,15,56,221,224
 .byte  102,15,56,221,232
        ret
-.def   __aesni_decrypt4;       .scl    3;      .type   32;     .endef
-.align 16
+.align 4
 __aesni_decrypt4:
        movups  (%edx),%xmm0
        movups  16(%edx),%xmm1
@@ -190,7 +184,7 @@ __aesni_decrypt4:
        pxor    %xmm0,%xmm4
        pxor    %xmm0,%xmm5
        movups  (%edx),%xmm0
-.L005dec4_loop:
+L005dec4_loop:
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
        decl    %ecx
@@ -203,7 +197,7 @@ __aesni_decrypt4:
 .byte  102,15,56,222,224
 .byte  102,15,56,222,232
        movups  (%edx),%xmm0
-       jnz     .L005dec4_loop
+       jnz     L005dec4_loop
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
 .byte  102,15,56,222,225
@@ -213,8 +207,7 @@ __aesni_decrypt4:
 .byte  102,15,56,223,224
 .byte  102,15,56,223,232
        ret
-.def   __aesni_encrypt6;       .scl    3;      .type   32;     .endef
-.align 16
+.align 4
 __aesni_encrypt6:
        movups  (%edx),%xmm0
        shrl    $1,%ecx
@@ -234,9 +227,9 @@ __aesni_encrypt6:
 .byte  102,15,56,220,241
        movups  (%edx),%xmm0
 .byte  102,15,56,220,249
-       jmp     .L_aesni_encrypt6_enter
-.align 16
-.L006enc6_loop:
+       jmp     L_aesni_encrypt6_enter
+.align 4,0x90
+L006enc6_loop:
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        decl    %ecx
@@ -244,8 +237,8 @@ __aesni_encrypt6:
 .byte  102,15,56,220,233
 .byte  102,15,56,220,241
 .byte  102,15,56,220,249
-.align 16
-.L_aesni_encrypt6_enter:
+.align 4,0x90
+L_aesni_encrypt6_enter:
        movups  16(%edx),%xmm1
 .byte  102,15,56,220,208
 .byte  102,15,56,220,216
@@ -255,7 +248,7 @@ __aesni_encrypt6:
 .byte  102,15,56,220,240
 .byte  102,15,56,220,248
        movups  (%edx),%xmm0
-       jnz     .L006enc6_loop
+       jnz     L006enc6_loop
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
 .byte  102,15,56,220,225
@@ -269,8 +262,7 @@ __aesni_encrypt6:
 .byte  102,15,56,221,240
 .byte  102,15,56,221,248
        ret
-.def   __aesni_decrypt6;       .scl    3;      .type   32;     .endef
-.align 16
+.align 4
 __aesni_decrypt6:
        movups  (%edx),%xmm0
        shrl    $1,%ecx
@@ -290,9 +282,9 @@ __aesni_decrypt6:
 .byte  102,15,56,222,241
        movups  (%edx),%xmm0
 .byte  102,15,56,222,249
-       jmp     .L_aesni_decrypt6_enter
-.align 16
-.L007dec6_loop:
+       jmp     L_aesni_decrypt6_enter
+.align 4,0x90
+L007dec6_loop:
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
        decl    %ecx
@@ -300,8 +292,8 @@ __aesni_decrypt6:
 .byte  102,15,56,222,233
 .byte  102,15,56,222,241
 .byte  102,15,56,222,249
-.align 16
-.L_aesni_decrypt6_enter:
+.align 4,0x90
+L_aesni_decrypt6_enter:
        movups  16(%edx),%xmm1
 .byte  102,15,56,222,208
 .byte  102,15,56,222,216
@@ -311,7 +303,7 @@ __aesni_decrypt6:
 .byte  102,15,56,222,240
 .byte  102,15,56,222,248
        movups  (%edx),%xmm0
-       jnz     .L007dec6_loop
+       jnz     L007dec6_loop
 .byte  102,15,56,222,209
 .byte  102,15,56,222,217
 .byte  102,15,56,222,225
@@ -326,10 +318,9 @@ __aesni_decrypt6:
 .byte  102,15,56,223,248
        ret
 .globl _aesni_ecb_encrypt
-.def   _aesni_ecb_encrypt;     .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_ecb_encrypt:
-.L_aesni_ecb_encrypt_begin:
+L_aesni_ecb_encrypt_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
@@ -340,14 +331,14 @@ _aesni_ecb_encrypt:
        movl    32(%esp),%edx
        movl    36(%esp),%ebx
        andl    $-16,%eax
-       jz      .L008ecb_ret
+       jz      L008ecb_ret
        movl    240(%edx),%ecx
        testl   %ebx,%ebx
-       jz      .L009ecb_decrypt
+       jz      L009ecb_decrypt
        movl    %edx,%ebp
        movl    %ecx,%ebx
        cmpl    $96,%eax
-       jb      .L010ecb_enc_tail
+       jb      L010ecb_enc_tail
        movdqu  (%esi),%xmm2
        movdqu  16(%esi),%xmm3
        movdqu  32(%esi),%xmm4
@@ -356,9 +347,9 @@ _aesni_ecb_encrypt:
        movdqu  80(%esi),%xmm7
        leal    96(%esi),%esi
        subl    $96,%eax
-       jmp     .L011ecb_enc_loop6_enter
-.align 16
-.L012ecb_enc_loop6:
+       jmp     L011ecb_enc_loop6_enter
+.align 4,0x90
+L012ecb_enc_loop6:
        movups  %xmm2,(%edi)
        movdqu  (%esi),%xmm2
        movups  %xmm3,16(%edi)
@@ -373,12 +364,12 @@ _aesni_ecb_encrypt:
        leal    96(%edi),%edi
        movdqu  80(%esi),%xmm7
        leal    96(%esi),%esi
-.L011ecb_enc_loop6_enter:
+L011ecb_enc_loop6_enter:
        call    __aesni_encrypt6
        movl    %ebp,%edx
        movl    %ebx,%ecx
        subl    $96,%eax
-       jnc     .L012ecb_enc_loop6
+       jnc     L012ecb_enc_loop6
        movups  %xmm2,(%edi)
        movups  %xmm3,16(%edi)
        movups  %xmm4,32(%edi)
@@ -387,18 +378,18 @@ _aesni_ecb_encrypt:
        movups  %xmm7,80(%edi)
        leal    96(%edi),%edi
        addl    $96,%eax
-       jz      .L008ecb_ret
-.L010ecb_enc_tail:
+       jz      L008ecb_ret
+L010ecb_enc_tail:
        movups  (%esi),%xmm2
        cmpl    $32,%eax
-       jb      .L013ecb_enc_one
+       jb      L013ecb_enc_one
        movups  16(%esi),%xmm3
-       je      .L014ecb_enc_two
+       je      L014ecb_enc_two
        movups  32(%esi),%xmm4
        cmpl    $64,%eax
-       jb      .L015ecb_enc_three
+       jb      L015ecb_enc_three
        movups  48(%esi),%xmm5
-       je      .L016ecb_enc_four
+       je      L016ecb_enc_four
        movups  64(%esi),%xmm6
        xorps   %xmm7,%xmm7
        call    __aesni_encrypt6
@@ -407,50 +398,50 @@ _aesni_ecb_encrypt:
        movups  %xmm4,32(%edi)
        movups  %xmm5,48(%edi)
        movups  %xmm6,64(%edi)
-       jmp     .L008ecb_ret
-.align 16
-.L013ecb_enc_one:
+       jmp     L008ecb_ret
+.align 4,0x90
+L013ecb_enc_one:
        movups  (%edx),%xmm0
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L017enc1_loop_3:
+L017enc1_loop_3:
 .byte  102,15,56,220,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L017enc1_loop_3
+       jnz     L017enc1_loop_3
 .byte  102,15,56,221,209
        movups  %xmm2,(%edi)
-       jmp     .L008ecb_ret
-.align 16
-.L014ecb_enc_two:
+       jmp     L008ecb_ret
+.align 4,0x90
+L014ecb_enc_two:
        xorps   %xmm4,%xmm4
        call    __aesni_encrypt3
        movups  %xmm2,(%edi)
        movups  %xmm3,16(%edi)
-       jmp     .L008ecb_ret
-.align 16
-.L015ecb_enc_three:
+       jmp     L008ecb_ret
+.align 4,0x90
+L015ecb_enc_three:
        call    __aesni_encrypt3
        movups  %xmm2,(%edi)
        movups  %xmm3,16(%edi)
        movups  %xmm4,32(%edi)
-       jmp     .L008ecb_ret
-.align 16
-.L016ecb_enc_four:
+       jmp     L008ecb_ret
+.align 4,0x90
+L016ecb_enc_four:
        call    __aesni_encrypt4
        movups  %xmm2,(%edi)
        movups  %xmm3,16(%edi)
        movups  %xmm4,32(%edi)
        movups  %xmm5,48(%edi)
-       jmp     .L008ecb_ret
-.align 16
-.L009ecb_decrypt:
+       jmp     L008ecb_ret
+.align 4,0x90
+L009ecb_decrypt:
        movl    %edx,%ebp
        movl    %ecx,%ebx
        cmpl    $96,%eax
-       jb      .L018ecb_dec_tail
+       jb      L018ecb_dec_tail
        movdqu  (%esi),%xmm2
        movdqu  16(%esi),%xmm3
        movdqu  32(%esi),%xmm4
@@ -459,9 +450,9 @@ _aesni_ecb_encrypt:
        movdqu  80(%esi),%xmm7
        leal    96(%esi),%esi
        subl    $96,%eax
-       jmp     .L019ecb_dec_loop6_enter
-.align 16
-.L020ecb_dec_loop6:
+       jmp     L019ecb_dec_loop6_enter
+.align 4,0x90
+L020ecb_dec_loop6:
        movups  %xmm2,(%edi)
        movdqu  (%esi),%xmm2
        movups  %xmm3,16(%edi)
@@ -476,12 +467,12 @@ _aesni_ecb_encrypt:
        leal    96(%edi),%edi
        movdqu  80(%esi),%xmm7
        leal    96(%esi),%esi
-.L019ecb_dec_loop6_enter:
+L019ecb_dec_loop6_enter:
        call    __aesni_decrypt6
        movl    %ebp,%edx
        movl    %ebx,%ecx
        subl    $96,%eax
-       jnc     .L020ecb_dec_loop6
+       jnc     L020ecb_dec_loop6
        movups  %xmm2,(%edi)
        movups  %xmm3,16(%edi)
        movups  %xmm4,32(%edi)
@@ -490,18 +481,18 @@ _aesni_ecb_encrypt:
        movups  %xmm7,80(%edi)
        leal    96(%edi),%edi
        addl    $96,%eax
-       jz      .L008ecb_ret
-.L018ecb_dec_tail:
+       jz      L008ecb_ret
+L018ecb_dec_tail:
        movups  (%esi),%xmm2
        cmpl    $32,%eax
-       jb      .L021ecb_dec_one
+       jb      L021ecb_dec_one
        movups  16(%esi),%xmm3
-       je      .L022ecb_dec_two
+       je      L022ecb_dec_two
        movups  32(%esi),%xmm4
        cmpl    $64,%eax
-       jb      .L023ecb_dec_three
+       jb      L023ecb_dec_three
        movups  48(%esi),%xmm5
-       je      .L024ecb_dec_four
+       je      L024ecb_dec_four
        movups  64(%esi),%xmm6
        xorps   %xmm7,%xmm7
        call    __aesni_decrypt6
@@ -510,54 +501,53 @@ _aesni_ecb_encrypt:
        movups  %xmm4,32(%edi)
        movups  %xmm5,48(%edi)
        movups  %xmm6,64(%edi)
-       jmp     .L008ecb_ret
-.align 16
-.L021ecb_dec_one:
+       jmp     L008ecb_ret
+.align 4,0x90
+L021ecb_dec_one:
        movups  (%edx),%xmm0
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L025dec1_loop_4:
+L025dec1_loop_4:
 .byte  102,15,56,222,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L025dec1_loop_4
+       jnz     L025dec1_loop_4
 .byte  102,15,56,223,209
        movups  %xmm2,(%edi)
-       jmp     .L008ecb_ret
-.align 16
-.L022ecb_dec_two:
+       jmp     L008ecb_ret
+.align 4,0x90
+L022ecb_dec_two:
        xorps   %xmm4,%xmm4
        call    __aesni_decrypt3
        movups  %xmm2,(%edi)
        movups  %xmm3,16(%edi)
-       jmp     .L008ecb_ret
-.align 16
-.L023ecb_dec_three:
+       jmp     L008ecb_ret
+.align 4,0x90
+L023ecb_dec_three:
        call    __aesni_decrypt3
        movups  %xmm2,(%edi)
        movups  %xmm3,16(%edi)
        movups  %xmm4,32(%edi)
-       jmp     .L008ecb_ret
-.align 16
-.L024ecb_dec_four:
+       jmp     L008ecb_ret
+.align 4,0x90
+L024ecb_dec_four:
        call    __aesni_decrypt4
        movups  %xmm2,(%edi)
        movups  %xmm3,16(%edi)
        movups  %xmm4,32(%edi)
        movups  %xmm5,48(%edi)
-.L008ecb_ret:
+L008ecb_ret:
        popl    %edi
        popl    %esi
        popl    %ebx
        popl    %ebp
        ret
 .globl _aesni_ccm64_encrypt_blocks
-.def   _aesni_ccm64_encrypt_blocks;    .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_ccm64_encrypt_blocks:
-.L_aesni_ccm64_encrypt_blocks_begin:
+L_aesni_ccm64_encrypt_blocks_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
@@ -591,7 +581,7 @@ _aesni_ccm64_encrypt_blocks:
        movdqa  %xmm7,%xmm2
        movl    %ecx,%ebx
 .byte  102,15,56,0,253
-.L026ccm64_enc_outer:
+L026ccm64_enc_outer:
        movups  (%ebp),%xmm0
        movl    %ebx,%ecx
        movups  (%esi),%xmm6
@@ -601,7 +591,7 @@ _aesni_ccm64_encrypt_blocks:
        leal    32(%ebp),%edx
        xorps   %xmm0,%xmm3
        movups  (%edx),%xmm0
-.L027ccm64_enc2_loop:
+L027ccm64_enc2_loop:
 .byte  102,15,56,220,209
        decl    %ecx
 .byte  102,15,56,220,217
@@ -610,7 +600,7 @@ _aesni_ccm64_encrypt_blocks:
        leal    32(%edx),%edx
 .byte  102,15,56,220,216
        movups  (%edx),%xmm0
-       jnz     .L027ccm64_enc2_loop
+       jnz     L027ccm64_enc2_loop
 .byte  102,15,56,220,209
 .byte  102,15,56,220,217
        paddq   16(%esp),%xmm7
@@ -623,7 +613,7 @@ _aesni_ccm64_encrypt_blocks:
        movups  %xmm6,(%edi)
        leal    16(%edi),%edi
 .byte  102,15,56,0,213
-       jnz     .L026ccm64_enc_outer
+       jnz     L026ccm64_enc_outer
        movl    48(%esp),%esp
        movl    40(%esp),%edi
        movups  %xmm3,(%edi)
@@ -633,10 +623,9 @@ _aesni_ccm64_encrypt_blocks:
        popl    %ebp
        ret
 .globl _aesni_ccm64_decrypt_blocks
-.def   _aesni_ccm64_decrypt_blocks;    .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_ccm64_decrypt_blocks:
-.L_aesni_ccm64_decrypt_blocks_begin:
+L_aesni_ccm64_decrypt_blocks_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
@@ -673,19 +662,19 @@ _aesni_ccm64_decrypt_blocks:
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L028enc1_loop_5:
+L028enc1_loop_5:
 .byte  102,15,56,220,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L028enc1_loop_5
+       jnz     L028enc1_loop_5
 .byte  102,15,56,221,209
        movups  (%esi),%xmm6
        paddq   16(%esp),%xmm7
        leal    16(%esi),%esi
-       jmp     .L029ccm64_dec_outer
-.align 16
-.L029ccm64_dec_outer:
+       jmp     L029ccm64_dec_outer
+.align 4,0x90
+L029ccm64_dec_outer:
        xorps   %xmm2,%xmm6
        movdqa  %xmm7,%xmm2
        movl    %ebx,%ecx
@@ -693,7 +682,7 @@ _aesni_ccm64_decrypt_blocks:
        leal    16(%edi),%edi
 .byte  102,15,56,0,213
        subl    $1,%eax
-       jz      .L030ccm64_dec_break
+       jz      L030ccm64_dec_break
        movups  (%ebp),%xmm0
        shrl    $1,%ecx
        movups  16(%ebp),%xmm1
@@ -702,7 +691,7 @@ _aesni_ccm64_decrypt_blocks:
        xorps   %xmm0,%xmm2
        xorps   %xmm6,%xmm3
        movups  (%edx),%xmm0
-.L031ccm64_dec2_loop:
+L031ccm64_dec2_loop:
 .byte  102,15,56,220,209
        decl    %ecx
 .byte  102,15,56,220,217
@@ -711,7 +700,7 @@ _aesni_ccm64_decrypt_blocks:
        leal    32(%edx),%edx
 .byte  102,15,56,220,216
        movups  (%edx),%xmm0
-       jnz     .L031ccm64_dec2_loop
+       jnz     L031ccm64_dec2_loop
        movups  (%esi),%xmm6
        paddq   16(%esp),%xmm7
 .byte  102,15,56,220,209
@@ -719,21 +708,21 @@ _aesni_ccm64_decrypt_blocks:
        leal    16(%esi),%esi
 .byte  102,15,56,221,208
 .byte  102,15,56,221,216
-       jmp     .L029ccm64_dec_outer
-.align 16
-.L030ccm64_dec_break:
+       jmp     L029ccm64_dec_outer
+.align 4,0x90
+L030ccm64_dec_break:
        movl    %ebp,%edx
        movups  (%edx),%xmm0
        movups  16(%edx),%xmm1
        xorps   %xmm0,%xmm6
        leal    32(%edx),%edx
        xorps   %xmm6,%xmm3
-.L032enc1_loop_6:
+L032enc1_loop_6:
 .byte  102,15,56,220,217
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L032enc1_loop_6
+       jnz     L032enc1_loop_6
 .byte  102,15,56,221,217
        movl    48(%esp),%esp
        movl    40(%esp),%edi
@@ -744,10 +733,9 @@ _aesni_ccm64_decrypt_blocks:
        popl    %ebp
        ret
 .globl _aesni_ctr32_encrypt_blocks
-.def   _aesni_ctr32_encrypt_blocks;    .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_ctr32_encrypt_blocks:
-.L_aesni_ctr32_encrypt_blocks_begin:
+L_aesni_ctr32_encrypt_blocks_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
@@ -762,7 +750,7 @@ _aesni_ctr32_encrypt_blocks:
        andl    $-16,%esp
        movl    %ebp,80(%esp)
        cmpl    $1,%eax
-       je      .L033ctr32_one_shortcut
+       je      L033ctr32_one_shortcut
        movdqu  (%ebx),%xmm7
        movl    $202182159,(%esp)
        movl    $134810123,4(%esp)
@@ -799,15 +787,15 @@ _aesni_ctr32_encrypt_blocks:
        pshufd  $192,%xmm1,%xmm2
        pshufd  $128,%xmm1,%xmm3
        cmpl    $6,%eax
-       jb      .L034ctr32_tail
+       jb      L034ctr32_tail
        movdqa  %xmm7,32(%esp)
        shrl    $1,%ecx
        movl    %edx,%ebp
        movl    %ecx,%ebx
        subl    $6,%eax
-       jmp     .L035ctr32_loop6
-.align 16
-.L035ctr32_loop6:
+       jmp     L035ctr32_loop6
+.align 4,0x90
+L035ctr32_loop6:
        pshufd  $64,%xmm1,%xmm4
        movdqa  32(%esp),%xmm1
        pshufd  $192,%xmm0,%xmm5
@@ -836,7 +824,7 @@ _aesni_ctr32_encrypt_blocks:
 .byte  102,15,56,220,241
        movups  (%edx),%xmm0
 .byte  102,15,56,220,249
-       call    .L_aesni_encrypt6_enter
+       call    L_aesni_encrypt6_enter
        movups  (%esi),%xmm1
        movups  16(%esi),%xmm0
        xorps   %xmm1,%xmm2
@@ -870,26 +858,26 @@ _aesni_ctr32_encrypt_blocks:
        movl    %ebx,%ecx
        pshufd  $128,%xmm1,%xmm3
        subl    $6,%eax
-       jnc     .L035ctr32_loop6
+       jnc     L035ctr32_loop6
        addl    $6,%eax
-       jz      .L036ctr32_ret
+       jz      L036ctr32_ret
        movl    %ebp,%edx
        leal    1(,%ecx,2),%ecx
        movdqa  32(%esp),%xmm7
-.L034ctr32_tail:
+L034ctr32_tail:
        por     %xmm7,%xmm2
        cmpl    $2,%eax
-       jb      .L037ctr32_one
+       jb      L037ctr32_one
        pshufd  $64,%xmm1,%xmm4
        por     %xmm7,%xmm3
-       je      .L038ctr32_two
+       je      L038ctr32_two
        pshufd  $192,%xmm0,%xmm5
        por     %xmm7,%xmm4
        cmpl    $4,%eax
-       jb      .L039ctr32_three
+       jb      L039ctr32_three
        pshufd  $128,%xmm0,%xmm6
        por     %xmm7,%xmm5
-       je      .L040ctr32_four
+       je      L040ctr32_four
        por     %xmm7,%xmm6
        call    __aesni_encrypt6
        movups  (%esi),%xmm1
@@ -907,29 +895,29 @@ _aesni_ctr32_encrypt_blocks:
        movups  %xmm4,32(%edi)
        movups  %xmm5,48(%edi)
        movups  %xmm6,64(%edi)
-       jmp     .L036ctr32_ret
-.align 16
-.L033ctr32_one_shortcut:
+       jmp     L036ctr32_ret
+.align 4,0x90
+L033ctr32_one_shortcut:
        movups  (%ebx),%xmm2
        movl    240(%edx),%ecx
-.L037ctr32_one:
+L037ctr32_one:
        movups  (%edx),%xmm0
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L041enc1_loop_7:
+L041enc1_loop_7:
 .byte  102,15,56,220,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L041enc1_loop_7
+       jnz     L041enc1_loop_7
 .byte  102,15,56,221,209
        movups  (%esi),%xmm6
        xorps   %xmm2,%xmm6
        movups  %xmm6,(%edi)
-       jmp     .L036ctr32_ret
-.align 16
-.L038ctr32_two:
+       jmp     L036ctr32_ret
+.align 4,0x90
+L038ctr32_two:
        call    __aesni_encrypt3
        movups  (%esi),%xmm5
        movups  16(%esi),%xmm6
@@ -937,9 +925,9 @@ _aesni_ctr32_encrypt_blocks:
        xorps   %xmm6,%xmm3
        movups  %xmm2,(%edi)
        movups  %xmm3,16(%edi)
-       jmp     .L036ctr32_ret
-.align 16
-.L039ctr32_three:
+       jmp     L036ctr32_ret
+.align 4,0x90
+L039ctr32_three:
        call    __aesni_encrypt3
        movups  (%esi),%xmm5
        movups  16(%esi),%xmm6
@@ -950,9 +938,9 @@ _aesni_ctr32_encrypt_blocks:
        xorps   %xmm7,%xmm4
        movups  %xmm3,16(%edi)
        movups  %xmm4,32(%edi)
-       jmp     .L036ctr32_ret
-.align 16
-.L040ctr32_four:
+       jmp     L036ctr32_ret
+.align 4,0x90
+L040ctr32_four:
        call    __aesni_encrypt4
        movups  (%esi),%xmm6
        movups  16(%esi),%xmm7
@@ -966,7 +954,7 @@ _aesni_ctr32_encrypt_blocks:
        xorps   %xmm0,%xmm5
        movups  %xmm4,32(%edi)
        movups  %xmm5,48(%edi)
-.L036ctr32_ret:
+L036ctr32_ret:
        movl    80(%esp),%esp
        popl    %edi
        popl    %esi
@@ -974,10 +962,9 @@ _aesni_ctr32_encrypt_blocks:
        popl    %ebp
        ret
 .globl _aesni_xts_encrypt
-.def   _aesni_xts_encrypt;     .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_xts_encrypt:
-.L_aesni_xts_encrypt_begin:
+L_aesni_xts_encrypt_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
@@ -990,12 +977,12 @@ _aesni_xts_encrypt:
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L042enc1_loop_8:
+L042enc1_loop_8:
 .byte  102,15,56,220,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L042enc1_loop_8
+       jnz     L042enc1_loop_8
 .byte  102,15,56,221,209
        movl    20(%esp),%esi
        movl    24(%esp),%edi
@@ -1019,12 +1006,12 @@ _aesni_xts_encrypt:
        movl    %edx,%ebp
        movl    %ecx,%ebx
        subl    $96,%eax
-       jc      .L043xts_enc_short
+       jc      L043xts_enc_short
        shrl    $1,%ecx
        movl    %ecx,%ebx
-       jmp     .L044xts_enc_loop6
-.align 16
-.L044xts_enc_loop6:
+       jmp     L044xts_enc_loop6
+.align 4,0x90
+L044xts_enc_loop6:
        pshufd  $19,%xmm0,%xmm2
        pxor    %xmm0,%xmm0
        movdqa  %xmm1,(%esp)
@@ -1089,7 +1076,7 @@ _aesni_xts_encrypt:
 .byte  102,15,56,220,241
        movups  (%edx),%xmm0
 .byte  102,15,56,220,249
-       call    .L_aesni_encrypt6_enter
+       call    L_aesni_encrypt6_enter
        movdqa  80(%esp),%xmm1
        pxor    %xmm0,%xmm0
        xorps   (%esp),%xmm2
@@ -1115,23 +1102,23 @@ _aesni_xts_encrypt:
        movl    %ebx,%ecx
        pxor    %xmm2,%xmm1
        subl    $96,%eax
-       jnc     .L044xts_enc_loop6
+       jnc     L044xts_enc_loop6
        leal    1(,%ecx,2),%ecx
        movl    %ebp,%edx
        movl    %ecx,%ebx
-.L043xts_enc_short:
+L043xts_enc_short:
        addl    $96,%eax
-       jz      .L045xts_enc_done6x
+       jz      L045xts_enc_done6x
        movdqa  %xmm1,%xmm5
        cmpl    $32,%eax
-       jb      .L046xts_enc_one
+       jb      L046xts_enc_one
        pshufd  $19,%xmm0,%xmm2
        pxor    %xmm0,%xmm0
        paddq   %xmm1,%xmm1
        pand    %xmm3,%xmm2
        pcmpgtd %xmm1,%xmm0
        pxor    %xmm2,%xmm1
-       je      .L047xts_enc_two
+       je      L047xts_enc_two
        pshufd  $19,%xmm0,%xmm2
        pxor    %xmm0,%xmm0
        movdqa  %xmm1,%xmm6
@@ -1140,7 +1127,7 @@ _aesni_xts_encrypt:
        pcmpgtd %xmm1,%xmm0
        pxor    %xmm2,%xmm1
        cmpl    $64,%eax
-       jb      .L048xts_enc_three
+       jb      L048xts_enc_three
        pshufd  $19,%xmm0,%xmm2
        pxor    %xmm0,%xmm0
        movdqa  %xmm1,%xmm7
@@ -1150,7 +1137,7 @@ _aesni_xts_encrypt:
        pxor    %xmm2,%xmm1
        movdqa  %xmm5,(%esp)
        movdqa  %xmm6,16(%esp)
-       je      .L049xts_enc_four
+       je      L049xts_enc_four
        movdqa  %xmm7,32(%esp)
        pshufd  $19,%xmm0,%xmm7
        movdqa  %xmm1,48(%esp)
@@ -1182,9 +1169,9 @@ _aesni_xts_encrypt:
        movups  %xmm5,48(%edi)
        movups  %xmm6,64(%edi)
        leal    80(%edi),%edi
-       jmp     .L050xts_enc_done
-.align 16
-.L046xts_enc_one:
+       jmp     L050xts_enc_done
+.align 4,0x90
+L046xts_enc_one:
        movups  (%esi),%xmm2
        leal    16(%esi),%esi
        xorps   %xmm5,%xmm2
@@ -1192,20 +1179,20 @@ _aesni_xts_encrypt:
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L051enc1_loop_9:
+L051enc1_loop_9:
 .byte  102,15,56,220,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L051enc1_loop_9
+       jnz     L051enc1_loop_9
 .byte  102,15,56,221,209
        xorps   %xmm5,%xmm2
        movups  %xmm2,(%edi)
        leal    16(%edi),%edi
        movdqa  %xmm5,%xmm1
-       jmp     .L050xts_enc_done
-.align 16
-.L047xts_enc_two:
+       jmp     L050xts_enc_done
+.align 4,0x90
+L047xts_enc_two:
        movaps  %xmm1,%xmm6
        movups  (%esi),%xmm2
        movups  16(%esi),%xmm3
@@ -1220,9 +1207,9 @@ _aesni_xts_encrypt:
        movups  %xmm3,16(%edi)
        leal    32(%edi),%edi
        movdqa  %xmm6,%xmm1
-       jmp     .L050xts_enc_done
-.align 16
-.L048xts_enc_three:
+       jmp     L050xts_enc_done
+.align 4,0x90
+L048xts_enc_three:
        movaps  %xmm1,%xmm7
        movups  (%esi),%xmm2
        movups  16(%esi),%xmm3
@@ -1240,9 +1227,9 @@ _aesni_xts_encrypt:
        movups  %xmm4,32(%edi)
        leal    48(%edi),%edi
        movdqa  %xmm7,%xmm1
-       jmp     .L050xts_enc_done
-.align 16
-.L049xts_enc_four:
+       jmp     L050xts_enc_done
+.align 4,0x90
+L049xts_enc_four:
        movaps  %xmm1,%xmm6
        movups  (%esi),%xmm2
        movups  16(%esi),%xmm3
@@ -1264,28 +1251,28 @@ _aesni_xts_encrypt:
        movups  %xmm5,48(%edi)
        leal    64(%edi),%edi
        movdqa  %xmm6,%xmm1
-       jmp     .L050xts_enc_done
-.align 16
-.L045xts_enc_done6x:
+       jmp     L050xts_enc_done
+.align 4,0x90
+L045xts_enc_done6x:
        movl    112(%esp),%eax
        andl    $15,%eax
-       jz      .L052xts_enc_ret
+       jz      L052xts_enc_ret
        movdqa  %xmm1,%xmm5
        movl    %eax,112(%esp)
-       jmp     .L053xts_enc_steal
-.align 16
-.L050xts_enc_done:
+       jmp     L053xts_enc_steal
+.align 4,0x90
+L050xts_enc_done:
        movl    112(%esp),%eax
        pxor    %xmm0,%xmm0
        andl    $15,%eax
-       jz      .L052xts_enc_ret
+       jz      L052xts_enc_ret
        pcmpgtd %xmm1,%xmm0
        movl    %eax,112(%esp)
        pshufd  $19,%xmm0,%xmm5
        paddq   %xmm1,%xmm1
        pand    96(%esp),%xmm5
        pxor    %xmm1,%xmm5
-.L053xts_enc_steal:
+L053xts_enc_steal:
        movzbl  (%esi),%ecx
        movzbl  -16(%edi),%edx
        leal    1(%esi),%esi
@@ -1293,7 +1280,7 @@ _aesni_xts_encrypt:
        movb    %dl,(%edi)
        leal    1(%edi),%edi
        subl    $1,%eax
-       jnz     .L053xts_enc_steal
+       jnz     L053xts_enc_steal
        subl    112(%esp),%edi
        movl    %ebp,%edx
        movl    %ebx,%ecx
@@ -1303,16 +1290,16 @@ _aesni_xts_encrypt:
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L054enc1_loop_10:
+L054enc1_loop_10:
 .byte  102,15,56,220,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L054enc1_loop_10
+       jnz     L054enc1_loop_10
 .byte  102,15,56,221,209
        xorps   %xmm5,%xmm2
        movups  %xmm2,-16(%edi)
-.L052xts_enc_ret:
+L052xts_enc_ret:
        movl    116(%esp),%esp
        popl    %edi
        popl    %esi
@@ -1320,10 +1307,9 @@ _aesni_xts_encrypt:
        popl    %ebp
        ret
 .globl _aesni_xts_decrypt
-.def   _aesni_xts_decrypt;     .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_xts_decrypt:
-.L_aesni_xts_decrypt_begin:
+L_aesni_xts_decrypt_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
@@ -1336,12 +1322,12 @@ _aesni_xts_decrypt:
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L055enc1_loop_11:
+L055enc1_loop_11:
 .byte  102,15,56,220,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L055enc1_loop_11
+       jnz     L055enc1_loop_11
 .byte  102,15,56,221,209
        movl    20(%esp),%esi
        movl    24(%esp),%edi
@@ -1370,12 +1356,12 @@ _aesni_xts_decrypt:
        pcmpgtd %xmm1,%xmm0
        andl    $-16,%eax
        subl    $96,%eax
-       jc      .L056xts_dec_short
+       jc      L056xts_dec_short
        shrl    $1,%ecx
        movl    %ecx,%ebx
-       jmp     .L057xts_dec_loop6
-.align 16
-.L057xts_dec_loop6:
+       jmp     L057xts_dec_loop6
+.align 4,0x90
+L057xts_dec_loop6:
        pshufd  $19,%xmm0,%xmm2
        pxor    %xmm0,%xmm0
        movdqa  %xmm1,(%esp)
@@ -1440,7 +1426,7 @@ _aesni_xts_decrypt:
 .byte  102,15,56,222,241
        movups  (%edx),%xmm0
 .byte  102,15,56,222,249
-       call    .L_aesni_decrypt6_enter
+       call    L_aesni_decrypt6_enter
        movdqa  80(%esp),%xmm1
        pxor    %xmm0,%xmm0
        xorps   (%esp),%xmm2
@@ -1466,23 +1452,23 @@ _aesni_xts_decrypt:
        movl    %ebx,%ecx
        pxor    %xmm2,%xmm1
        subl    $96,%eax
-       jnc     .L057xts_dec_loop6
+       jnc     L057xts_dec_loop6
        leal    1(,%ecx,2),%ecx
        movl    %ebp,%edx
        movl    %ecx,%ebx
-.L056xts_dec_short:
+L056xts_dec_short:
        addl    $96,%eax
-       jz      .L058xts_dec_done6x
+       jz      L058xts_dec_done6x
        movdqa  %xmm1,%xmm5
        cmpl    $32,%eax
-       jb      .L059xts_dec_one
+       jb      L059xts_dec_one
        pshufd  $19,%xmm0,%xmm2
        pxor    %xmm0,%xmm0
        paddq   %xmm1,%xmm1
        pand    %xmm3,%xmm2
        pcmpgtd %xmm1,%xmm0
        pxor    %xmm2,%xmm1
-       je      .L060xts_dec_two
+       je      L060xts_dec_two
        pshufd  $19,%xmm0,%xmm2
        pxor    %xmm0,%xmm0
        movdqa  %xmm1,%xmm6
@@ -1491,7 +1477,7 @@ _aesni_xts_decrypt:
        pcmpgtd %xmm1,%xmm0
        pxor    %xmm2,%xmm1
        cmpl    $64,%eax
-       jb      .L061xts_dec_three
+       jb      L061xts_dec_three
        pshufd  $19,%xmm0,%xmm2
        pxor    %xmm0,%xmm0
        movdqa  %xmm1,%xmm7
@@ -1501,7 +1487,7 @@ _aesni_xts_decrypt:
        pxor    %xmm2,%xmm1
        movdqa  %xmm5,(%esp)
        movdqa  %xmm6,16(%esp)
-       je      .L062xts_dec_four
+       je      L062xts_dec_four
        movdqa  %xmm7,32(%esp)
        pshufd  $19,%xmm0,%xmm7
        movdqa  %xmm1,48(%esp)
@@ -1533,9 +1519,9 @@ _aesni_xts_decrypt:
        movups  %xmm5,48(%edi)
        movups  %xmm6,64(%edi)
        leal    80(%edi),%edi
-       jmp     .L063xts_dec_done
-.align 16
-.L059xts_dec_one:
+       jmp     L063xts_dec_done
+.align 4,0x90
+L059xts_dec_one:
        movups  (%esi),%xmm2
        leal    16(%esi),%esi
        xorps   %xmm5,%xmm2
@@ -1543,20 +1529,20 @@ _aesni_xts_decrypt:
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L064dec1_loop_12:
+L064dec1_loop_12:
 .byte  102,15,56,222,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L064dec1_loop_12
+       jnz     L064dec1_loop_12
 .byte  102,15,56,223,209
        xorps   %xmm5,%xmm2
        movups  %xmm2,(%edi)
        leal    16(%edi),%edi
        movdqa  %xmm5,%xmm1
-       jmp     .L063xts_dec_done
-.align 16
-.L060xts_dec_two:
+       jmp     L063xts_dec_done
+.align 4,0x90
+L060xts_dec_two:
        movaps  %xmm1,%xmm6
        movups  (%esi),%xmm2
        movups  16(%esi),%xmm3
@@ -1570,9 +1556,9 @@ _aesni_xts_decrypt:
        movups  %xmm3,16(%edi)
        leal    32(%edi),%edi
        movdqa  %xmm6,%xmm1
-       jmp     .L063xts_dec_done
-.align 16
-.L061xts_dec_three:
+       jmp     L063xts_dec_done
+.align 4,0x90
+L061xts_dec_three:
        movaps  %xmm1,%xmm7
        movups  (%esi),%xmm2
        movups  16(%esi),%xmm3
@@ -1590,9 +1576,9 @@ _aesni_xts_decrypt:
        movups  %xmm4,32(%edi)
        leal    48(%edi),%edi
        movdqa  %xmm7,%xmm1
-       jmp     .L063xts_dec_done
-.align 16
-.L062xts_dec_four:
+       jmp     L063xts_dec_done
+.align 4,0x90
+L062xts_dec_four:
        movaps  %xmm1,%xmm6
        movups  (%esi),%xmm2
        movups  16(%esi),%xmm3
@@ -1614,20 +1600,20 @@ _aesni_xts_decrypt:
        movups  %xmm5,48(%edi)
        leal    64(%edi),%edi
        movdqa  %xmm6,%xmm1
-       jmp     .L063xts_dec_done
-.align 16
-.L058xts_dec_done6x:
+       jmp     L063xts_dec_done
+.align 4,0x90
+L058xts_dec_done6x:
        movl    112(%esp),%eax
        andl    $15,%eax
-       jz      .L065xts_dec_ret
+       jz      L065xts_dec_ret
        movl    %eax,112(%esp)
-       jmp     .L066xts_dec_only_one_more
-.align 16
-.L063xts_dec_done:
+       jmp     L066xts_dec_only_one_more
+.align 4,0x90
+L063xts_dec_done:
        movl    112(%esp),%eax
        pxor    %xmm0,%xmm0
        andl    $15,%eax
-       jz      .L065xts_dec_ret
+       jz      L065xts_dec_ret
        pcmpgtd %xmm1,%xmm0
        movl    %eax,112(%esp)
        pshufd  $19,%xmm0,%xmm2
@@ -1637,7 +1623,7 @@ _aesni_xts_decrypt:
        pand    %xmm3,%xmm2
        pcmpgtd %xmm1,%xmm0
        pxor    %xmm2,%xmm1
-.L066xts_dec_only_one_more:
+L066xts_dec_only_one_more:
        pshufd  $19,%xmm0,%xmm5
        movdqa  %xmm1,%xmm6
        paddq   %xmm1,%xmm1
@@ -1651,16 +1637,16 @@ _aesni_xts_decrypt:
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L067dec1_loop_13:
+L067dec1_loop_13:
 .byte  102,15,56,222,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L067dec1_loop_13
+       jnz     L067dec1_loop_13
 .byte  102,15,56,223,209
        xorps   %xmm5,%xmm2
        movups  %xmm2,(%edi)
-.L068xts_dec_steal:
+L068xts_dec_steal:
        movzbl  16(%esi),%ecx
        movzbl  (%edi),%edx
        leal    1(%esi),%esi
@@ -1668,7 +1654,7 @@ _aesni_xts_decrypt:
        movb    %dl,16(%edi)
        leal    1(%edi),%edi
        subl    $1,%eax
-       jnz     .L068xts_dec_steal
+       jnz     L068xts_dec_steal
        subl    112(%esp),%edi
        movl    %ebp,%edx
        movl    %ebx,%ecx
@@ -1678,16 +1664,16 @@ _aesni_xts_decrypt:
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L069dec1_loop_14:
+L069dec1_loop_14:
 .byte  102,15,56,222,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L069dec1_loop_14
+       jnz     L069dec1_loop_14
 .byte  102,15,56,223,209
        xorps   %xmm6,%xmm2
        movups  %xmm2,(%edi)
-.L065xts_dec_ret:
+L065xts_dec_ret:
        movl    116(%esp),%esp
        popl    %edi
        popl    %esi
@@ -1695,10 +1681,9 @@ _aesni_xts_decrypt:
        popl    %ebp
        ret
 .globl _aesni_cbc_encrypt
-.def   _aesni_cbc_encrypt;     .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_cbc_encrypt:
-.L_aesni_cbc_encrypt_begin:
+L_aesni_cbc_encrypt_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
@@ -1712,7 +1697,7 @@ _aesni_cbc_encrypt:
        movl    32(%esp),%edx
        movl    36(%esp),%ebp
        testl   %eax,%eax
-       jz      .L070cbc_abort
+       jz      L070cbc_abort
        cmpl    $0,40(%esp)
        xchgl   %esp,%ebx
        movups  (%ebp),%xmm7
@@ -1720,14 +1705,14 @@ _aesni_cbc_encrypt:
        movl    %edx,%ebp
        movl    %ebx,16(%esp)
        movl    %ecx,%ebx
-       je      .L071cbc_decrypt
+       je      L071cbc_decrypt
        movaps  %xmm7,%xmm2
        cmpl    $16,%eax
-       jb      .L072cbc_enc_tail
+       jb      L072cbc_enc_tail
        subl    $16,%eax
-       jmp     .L073cbc_enc_loop
-.align 16
-.L073cbc_enc_loop:
+       jmp     L073cbc_enc_loop
+.align 4,0x90
+L073cbc_enc_loop:
        movups  (%esi),%xmm7
        leal    16(%esi),%esi
        movups  (%edx),%xmm0
@@ -1735,24 +1720,24 @@ _aesni_cbc_encrypt:
        xorps   %xmm0,%xmm7
        leal    32(%edx),%edx
        xorps   %xmm7,%xmm2
-.L074enc1_loop_15:
+L074enc1_loop_15:
 .byte  102,15,56,220,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L074enc1_loop_15
+       jnz     L074enc1_loop_15
 .byte  102,15,56,221,209
        movl    %ebx,%ecx
        movl    %ebp,%edx
        movups  %xmm2,(%edi)
        leal    16(%edi),%edi
        subl    $16,%eax
-       jnc     .L073cbc_enc_loop
+       jnc     L073cbc_enc_loop
        addl    $16,%eax
-       jnz     .L072cbc_enc_tail
+       jnz     L072cbc_enc_tail
        movaps  %xmm2,%xmm7
-       jmp     .L075cbc_ret
-.L072cbc_enc_tail:
+       jmp     L075cbc_ret
+L072cbc_enc_tail:
        movl    %eax,%ecx
 .long  2767451785
        movl    $16,%ecx
@@ -1763,20 +1748,20 @@ _aesni_cbc_encrypt:
        movl    %ebx,%ecx
        movl    %edi,%esi
        movl    %ebp,%edx
-       jmp     .L073cbc_enc_loop
-.align 16
-.L071cbc_decrypt:
+       jmp     L073cbc_enc_loop
+.align 4,0x90
+L071cbc_decrypt:
        cmpl    $80,%eax
-       jbe     .L076cbc_dec_tail
+       jbe     L076cbc_dec_tail
        movaps  %xmm7,(%esp)
        subl    $80,%eax
-       jmp     .L077cbc_dec_loop6_enter
-.align 16
-.L078cbc_dec_loop6:
+       jmp     L077cbc_dec_loop6_enter
+.align 4,0x90
+L078cbc_dec_loop6:
        movaps  %xmm0,(%esp)
        movups  %xmm7,(%edi)
        leal    16(%edi),%edi
-.L077cbc_dec_loop6_enter:
+L077cbc_dec_loop6_enter:
        movdqu  (%esi),%xmm2
        movdqu  16(%esi),%xmm3
        movdqu  32(%esi),%xmm4
@@ -1806,28 +1791,28 @@ _aesni_cbc_encrypt:
        movups  %xmm6,64(%edi)
        leal    80(%edi),%edi
        subl    $96,%eax
-       ja      .L078cbc_dec_loop6
+       ja      L078cbc_dec_loop6
        movaps  %xmm7,%xmm2
        movaps  %xmm0,%xmm7
        addl    $80,%eax
-       jle     .L079cbc_dec_tail_collected
+       jle     L079cbc_dec_tail_collected
        movups  %xmm2,(%edi)
        leal    16(%edi),%edi
-.L076cbc_dec_tail:
+L076cbc_dec_tail:
        movups  (%esi),%xmm2
        movaps  %xmm2,%xmm6
        cmpl    $16,%eax
-       jbe     .L080cbc_dec_one
+       jbe     L080cbc_dec_one
        movups  16(%esi),%xmm3
        movaps  %xmm3,%xmm5
        cmpl    $32,%eax
-       jbe     .L081cbc_dec_two
+       jbe     L081cbc_dec_two
        movups  32(%esi),%xmm4
        cmpl    $48,%eax
-       jbe     .L082cbc_dec_three
+       jbe     L082cbc_dec_three
        movups  48(%esi),%xmm5
        cmpl    $64,%eax
-       jbe     .L083cbc_dec_four
+       jbe     L083cbc_dec_four
        movups  64(%esi),%xmm6
        movaps  %xmm7,(%esp)
        movups  (%esi),%xmm2
@@ -1850,26 +1835,26 @@ _aesni_cbc_encrypt:
        leal    64(%edi),%edi
        movaps  %xmm6,%xmm2
        subl    $80,%eax
-       jmp     .L079cbc_dec_tail_collected
-.align 16
-.L080cbc_dec_one:
+       jmp     L079cbc_dec_tail_collected
+.align 4,0x90
+L080cbc_dec_one:
        movups  (%edx),%xmm0
        movups  16(%edx),%xmm1
        leal    32(%edx),%edx
        xorps   %xmm0,%xmm2
-.L084dec1_loop_16:
+L084dec1_loop_16:
 .byte  102,15,56,222,209
        decl    %ecx
        movups  (%edx),%xmm1
        leal    16(%edx),%edx
-       jnz     .L084dec1_loop_16
+       jnz     L084dec1_loop_16
 .byte  102,15,56,223,209
        xorps   %xmm7,%xmm2
        movaps  %xmm6,%xmm7
        subl    $16,%eax
-       jmp     .L079cbc_dec_tail_collected
-.align 16
-.L081cbc_dec_two:
+       jmp     L079cbc_dec_tail_collected
+.align 4,0x90
+L081cbc_dec_two:
        xorps   %xmm4,%xmm4
        call    __aesni_decrypt3
        xorps   %xmm7,%xmm2
@@ -1879,9 +1864,9 @@ _aesni_cbc_encrypt:
        leal    16(%edi),%edi
        movaps  %xmm5,%xmm7
        subl    $32,%eax
-       jmp     .L079cbc_dec_tail_collected
-.align 16
-.L082cbc_dec_three:
+       jmp     L079cbc_dec_tail_collected
+.align 4,0x90
+L082cbc_dec_three:
        call    __aesni_decrypt3
        xorps   %xmm7,%xmm2
        xorps   %xmm6,%xmm3
@@ -1892,9 +1877,9 @@ _aesni_cbc_encrypt:
        leal    32(%edi),%edi
        movups  32(%esi),%xmm7
        subl    $48,%eax
-       jmp     .L079cbc_dec_tail_collected
-.align 16
-.L083cbc_dec_four:
+       jmp     L079cbc_dec_tail_collected
+.align 4,0x90
+L083cbc_dec_four:
        call    __aesni_decrypt4
        movups  16(%esi),%xmm1
        movups  32(%esi),%xmm0
@@ -1909,77 +1894,76 @@ _aesni_cbc_encrypt:
        leal    48(%edi),%edi
        movaps  %xmm5,%xmm2
        subl    $64,%eax
-.L079cbc_dec_tail_collected:
+L079cbc_dec_tail_collected:
        andl    $15,%eax
-       jnz     .L085cbc_dec_tail_partial
+       jnz     L085cbc_dec_tail_partial
        movups  %xmm2,(%edi)
-       jmp     .L075cbc_ret
-.align 16
-.L085cbc_dec_tail_partial:
+       jmp     L075cbc_ret
+.align 4,0x90
+L085cbc_dec_tail_partial:
        movaps  %xmm2,(%esp)
        movl    $16,%ecx
        movl    %esp,%esi
        subl    %eax,%ecx
 .long  2767451785
-.L075cbc_ret:
+L075cbc_ret:
        movl    16(%esp),%esp
        movl    36(%esp),%ebp
        movups  %xmm7,(%ebp)
-.L070cbc_abort:
+L070cbc_abort:
        popl    %edi
        popl    %esi
        popl    %ebx
        popl    %ebp
        ret
-.def   __aesni_set_encrypt_key;        .scl    3;      .type   32;     .endef
-.align 16
+.align 4
 __aesni_set_encrypt_key:
        testl   %eax,%eax
-       jz      .L086bad_pointer
+       jz      L086bad_pointer
        testl   %edx,%edx
-       jz      .L086bad_pointer
+       jz      L086bad_pointer
        movups  (%eax),%xmm0
        xorps   %xmm4,%xmm4
        leal    16(%edx),%edx
        cmpl    $256,%ecx
-       je      .L08714rounds
+       je      L08714rounds
        cmpl    $192,%ecx
-       je      .L08812rounds
+       je      L08812rounds
        cmpl    $128,%ecx
-       jne     .L089bad_keybits
-.align 16
-.L09010rounds:
+       jne     L089bad_keybits
+.align 4,0x90
+L09010rounds:
        movl    $9,%ecx
        movups  %xmm0,-16(%edx)
 .byte  102,15,58,223,200,1
-       call    .L091key_128_cold
+       call    L091key_128_cold
 .byte  102,15,58,223,200,2
-       call    .L092key_128
+       call    L092key_128
 .byte  102,15,58,223,200,4
-       call    .L092key_128
+       call    L092key_128
 .byte  102,15,58,223,200,8
-       call    .L092key_128
+       call    L092key_128
 .byte  102,15,58,223,200,16
-       call    .L092key_128
+       call    L092key_128
 .byte  102,15,58,223,200,32
-       call    .L092key_128
+       call    L092key_128
 .byte  102,15,58,223,200,64
-       call    .L092key_128
+       call    L092key_128
 .byte  102,15,58,223,200,128
-       call    .L092key_128
+       call    L092key_128
 .byte  102,15,58,223,200,27
-       call    .L092key_128
+       call    L092key_128
 .byte  102,15,58,223,200,54
-       call    .L092key_128
+       call    L092key_128
        movups  %xmm0,(%edx)
        movl    %ecx,80(%edx)
        xorl    %eax,%eax
        ret
-.align 16
-.L092key_128:
+.align 4,0x90
+L092key_128:
        movups  %xmm0,(%edx)
        leal    16(%edx),%edx
-.L091key_128_cold:
+L091key_128_cold:
        shufps  $16,%xmm0,%xmm4
        xorps   %xmm4,%xmm0
        shufps  $140,%xmm0,%xmm4
@@ -1987,39 +1971,39 @@ __aesni_set_encrypt_key:
        shufps  $255,%xmm1,%xmm1
        xorps   %xmm1,%xmm0
        ret
-.align 16
-.L08812rounds:
+.align 4,0x90
+L08812rounds:
        movq    16(%eax),%xmm2
        movl    $11,%ecx
        movups  %xmm0,-16(%edx)
 .byte  102,15,58,223,202,1
-       call    .L093key_192a_cold
+       call    L093key_192a_cold
 .byte  102,15,58,223,202,2
-       call    .L094key_192b
+       call    L094key_192b
 .byte  102,15,58,223,202,4
-       call    .L095key_192a
+       call    L095key_192a
 .byte  102,15,58,223,202,8
-       call    .L094key_192b
+       call    L094key_192b
 .byte  102,15,58,223,202,16
-       call    .L095key_192a
+       call    L095key_192a
 .byte  102,15,58,223,202,32
-       call    .L094key_192b
+       call    L094key_192b
 .byte  102,15,58,223,202,64
-       call    .L095key_192a
+       call    L095key_192a
 .byte  102,15,58,223,202,128
-       call    .L094key_192b
+       call    L094key_192b
        movups  %xmm0,(%edx)
        movl    %ecx,48(%edx)
        xorl    %eax,%eax
        ret
-.align 16
-.L095key_192a:
+.align 4,0x90
+L095key_192a:
        movups  %xmm0,(%edx)
        leal    16(%edx),%edx
-.align 16
-.L093key_192a_cold:
+.align 4,0x90
+L093key_192a_cold:
        movaps  %xmm2,%xmm5
-.L096key_192b_warm:
+L096key_192b_warm:
        shufps  $16,%xmm0,%xmm4
        movdqa  %xmm2,%xmm3
        xorps   %xmm4,%xmm0
@@ -2032,57 +2016,57 @@ __aesni_set_encrypt_key:
        pshufd  $255,%xmm0,%xmm3
        pxor    %xmm3,%xmm2
        ret
-.align 16
-.L094key_192b:
+.align 4,0x90
+L094key_192b:
        movaps  %xmm0,%xmm3
        shufps  $68,%xmm0,%xmm5
        movups  %xmm5,(%edx)
        shufps  $78,%xmm2,%xmm3
        movups  %xmm3,16(%edx)
        leal    32(%edx),%edx
-       jmp     .L096key_192b_warm
-.align 16
-.L08714rounds:
+       jmp     L096key_192b_warm
+.align 4,0x90
+L08714rounds:
        movups  16(%eax),%xmm2
        movl    $13,%ecx
        leal    16(%edx),%edx
        movups  %xmm0,-32(%edx)
        movups  %xmm2,-16(%edx)
 .byte  102,15,58,223,202,1
-       call    .L097key_256a_cold
+       call    L097key_256a_cold
 .byte  102,15,58,223,200,1
-       call    .L098key_256b
+       call    L098key_256b
 .byte  102,15,58,223,202,2
-       call    .L099key_256a
+       call    L099key_256a
 .byte  102,15,58,223,200,2
-       call    .L098key_256b
+       call    L098key_256b
 .byte  102,15,58,223,202,4
-       call    .L099key_256a
+       call    L099key_256a
 .byte  102,15,58,223,200,4
-       call    .L098key_256b
+       call    L098key_256b
 .byte  102,15,58,223,202,8
-       call    .L099key_256a
+       call    L099key_256a
 .byte  102,15,58,223,200,8
-       call    .L098key_256b
+       call    L098key_256b
 .byte  102,15,58,223,202,16
-       call    .L099key_256a
+       call    L099key_256a
 .byte  102,15,58,223,200,16
-       call    .L098key_256b
+       call    L098key_256b
 .byte  102,15,58,223,202,32
-       call    .L099key_256a
+       call    L099key_256a
 .byte  102,15,58,223,200,32
-       call    .L098key_256b
+       call    L098key_256b
 .byte  102,15,58,223,202,64
-       call    .L099key_256a
+       call    L099key_256a
        movups  %xmm0,(%edx)
        movl    %ecx,16(%edx)
        xorl    %eax,%eax
        ret
-.align 16
-.L099key_256a:
+.align 4,0x90
+L099key_256a:
        movups  %xmm2,(%edx)
        leal    16(%edx),%edx
-.L097key_256a_cold:
+L097key_256a_cold:
        shufps  $16,%xmm0,%xmm4
        xorps   %xmm4,%xmm0
        shufps  $140,%xmm0,%xmm4
@@ -2090,8 +2074,8 @@ __aesni_set_encrypt_key:
        shufps  $255,%xmm1,%xmm1
        xorps   %xmm1,%xmm0
        ret
-.align 16
-.L098key_256b:
+.align 4,0x90
+L098key_256b:
        movups  %xmm0,(%edx)
        leal    16(%edx),%edx
        shufps  $16,%xmm2,%xmm4
@@ -2101,29 +2085,27 @@ __aesni_set_encrypt_key:
        shufps  $170,%xmm1,%xmm1
        xorps   %xmm1,%xmm2
        ret
-.align 4
-.L086bad_pointer:
+.align 2,0x90
+L086bad_pointer:
        movl    $-1,%eax
        ret
-.align 4
-.L089bad_keybits:
+.align 2,0x90
+L089bad_keybits:
        movl    $-2,%eax
        ret
 .globl _aesni_set_encrypt_key
-.def   _aesni_set_encrypt_key; .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_set_encrypt_key:
-.L_aesni_set_encrypt_key_begin:
+L_aesni_set_encrypt_key_begin:
        movl    4(%esp),%eax
        movl    8(%esp),%ecx
        movl    12(%esp),%edx
        call    __aesni_set_encrypt_key
        ret
 .globl _aesni_set_decrypt_key
-.def   _aesni_set_decrypt_key; .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _aesni_set_decrypt_key:
-.L_aesni_set_decrypt_key_begin:
+L_aesni_set_decrypt_key_begin:
        movl    4(%esp),%eax
        movl    8(%esp),%ecx
        movl    12(%esp),%edx
@@ -2131,7 +2113,7 @@ _aesni_set_decrypt_key:
        movl    12(%esp),%edx
        shll    $4,%ecx
        testl   %eax,%eax
-       jnz     .L100dec_key_ret
+       jnz     L100dec_key_ret
        leal    16(%edx,%ecx,1),%eax
        movups  (%edx),%xmm0
        movups  (%eax),%xmm1
@@ -2139,7 +2121,7 @@ _aesni_set_decrypt_key:
        movups  %xmm1,(%edx)
        leal    16(%edx),%edx
        leal    -16(%eax),%eax
-.L101dec_key_inverse:
+L101dec_key_inverse:
        movups  (%edx),%xmm0
        movups  (%eax),%xmm1
 .byte  102,15,56,219,192
@@ -2149,12 +2131,12 @@ _aesni_set_decrypt_key:
        movups  %xmm0,16(%eax)
        movups  %xmm1,-16(%edx)
        cmpl    %edx,%eax
-       ja      .L101dec_key_inverse
+       ja      L101dec_key_inverse
        movups  (%edx),%xmm0
 .byte  102,15,56,219,192
        movups  %xmm0,(%edx)
        xorl    %eax,%eax
-.L100dec_key_ret:
+L100dec_key_ret:
        ret
 .byte  65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
 .byte  83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
diff --git a/lib/accelerated/x86/asm-coff/cpuid-x86-64-coff.s 
b/lib/accelerated/x86/asm-macosx/cpuid-x86-64-macosx.s
similarity index 90%
copy from lib/accelerated/x86/asm-coff/cpuid-x86-64-coff.s
copy to lib/accelerated/x86/asm-macosx/cpuid-x86-64-macosx.s
index c7e9cc4..b4a4e94 100644
--- a/lib/accelerated/x86/asm-coff/cpuid-x86-64-coff.s
+++ b/lib/accelerated/x86/asm-macosx/cpuid-x86-64-macosx.s
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2011-2012 Free Software Foundation, Inc.
+# Copyright (C) 2011 Free Software Foundation, Inc.
 #
 # Author: Nikos Mavrogiannopoulos
 #
@@ -19,10 +19,10 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 .text  
-.globl gnutls_cpuid
-.def   gnutls_cpuid;   .scl 2; .type 32;       .endef
+.globl _gnutls_cpuid
+
 .p2align       4
-gnutls_cpuid:
+_gnutls_cpuid:
        pushq   %rbp
        movq    %rsp,%rbp
        pushq   %rbx
diff --git a/lib/accelerated/x86/asm-coff/cpuid-x86-coff.s 
b/lib/accelerated/x86/asm-macosx/cpuid-x86-macosx.s
similarity index 86%
copy from lib/accelerated/x86/asm-coff/cpuid-x86-coff.s
copy to lib/accelerated/x86/asm-macosx/cpuid-x86-macosx.s
index cf5365e..a69a5c7 100644
--- a/lib/accelerated/x86/asm-coff/cpuid-x86-coff.s
+++ b/lib/accelerated/x86/asm-macosx/cpuid-x86-macosx.s
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2011-2012 Free Software Foundation, Inc.
+# Copyright (C) 2011 Free Software Foundation, Inc.
 #
 # Author: Nikos Mavrogiannopoulos
 #
@@ -21,10 +21,9 @@
 .file  "devel/perlasm/cpuid-x86.s"
 .text
 .globl _gnutls_cpuid
-.def   _gnutls_cpuid;  .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _gnutls_cpuid:
-.L_gnutls_cpuid_begin:
+L_gnutls_cpuid_begin:
        pushl   %ebp
        movl    %esp,%ebp
        subl    $12,%esp
@@ -52,10 +51,9 @@ _gnutls_cpuid:
        popl    %ebp
        ret
 .globl _gnutls_have_cpuid
-.def   _gnutls_have_cpuid;     .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _gnutls_have_cpuid:
-.L_gnutls_have_cpuid_begin:
+L_gnutls_have_cpuid_begin:
        pushfl
        popl    %eax
        orl     $2097152,%eax
diff --git a/lib/accelerated/x86/asm/padlock-x86-64.s 
b/lib/accelerated/x86/asm-macosx/padlock-x86-64-macosx.s
similarity index 71%
copy from lib/accelerated/x86/asm/padlock-x86-64.s
copy to lib/accelerated/x86/asm-macosx/padlock-x86-64-macosx.s
index 020d6e5..a19e824 100644
--- a/lib/accelerated/x86/asm/padlock-x86-64.s
+++ b/lib/accelerated/x86/asm-macosx/padlock-x86-64-macosx.s
@@ -36,91 +36,91 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 .text  
-.globl padlock_capability
-.type  padlock_capability,@function
-.align 16
-padlock_capability:
+.globl _padlock_capability
+
+.p2align       4
+_padlock_capability:
        movq    %rbx,%r8
        xorl    %eax,%eax
        cpuid
        xorl    %eax,%eax
        cmpl    $1953391939,%ebx
-       jne     .Lnoluck
+       jne     L$noluck
        cmpl    $1215460705,%edx
-       jne     .Lnoluck
+       jne     L$noluck
        cmpl    $1936487777,%ecx
-       jne     .Lnoluck
+       jne     L$noluck
        movl    $3221225472,%eax
        cpuid
        movl    %eax,%edx
        xorl    %eax,%eax
        cmpl    $3221225473,%edx
-       jb      .Lnoluck
+       jb      L$noluck
        movl    $3221225473,%eax
        cpuid
        movl    %edx,%eax
        andl    $4294967279,%eax
        orl     $16,%eax
-.Lnoluck:
+L$noluck:
        movq    %r8,%rbx
        .byte   0xf3,0xc3
-.size  padlock_capability,.-padlock_capability
 
-.globl padlock_key_bswap
-.type  padlock_key_bswap,@function
-.align 16
-padlock_key_bswap:
+
+.globl _padlock_key_bswap
+
+.p2align       4
+_padlock_key_bswap:
        movl    240(%rdi),%edx
-.Lbswap_loop:
+L$bswap_loop:
        movl    (%rdi),%eax
        bswapl  %eax
        movl    %eax,(%rdi)
        leaq    4(%rdi),%rdi
        subl    $1,%edx
-       jnz     .Lbswap_loop
+       jnz     L$bswap_loop
        .byte   0xf3,0xc3
-.size  padlock_key_bswap,.-padlock_key_bswap
 
-.globl padlock_verify_context
-.type  padlock_verify_context,@function
-.align 16
-padlock_verify_context:
+
+.globl _padlock_verify_context
+
+.p2align       4
+_padlock_verify_context:
        movq    %rdi,%rdx
        pushf
-       leaq    .Lpadlock_saved_context(%rip),%rax
+       leaq    L$padlock_saved_context(%rip),%rax
        call    _padlock_verify_ctx
        leaq    8(%rsp),%rsp
        .byte   0xf3,0xc3
-.size  padlock_verify_context,.-padlock_verify_context
 
-.type  _padlock_verify_ctx,@function
-.align 16
+
+
+.p2align       4
 _padlock_verify_ctx:
        movq    8(%rsp),%r8
        btq     $30,%r8
-       jnc     .Lverified
+       jnc     L$verified
        cmpq    (%rax),%rdx
-       je      .Lverified
+       je      L$verified
        pushf
        popf
-.Lverified:
+L$verified:
        movq    %rdx,(%rax)
        .byte   0xf3,0xc3
-.size  _padlock_verify_ctx,.-_padlock_verify_ctx
 
-.globl padlock_reload_key
-.type  padlock_reload_key,@function
-.align 16
-padlock_reload_key:
+
+.globl _padlock_reload_key
+
+.p2align       4
+_padlock_reload_key:
        pushf
        popf
        .byte   0xf3,0xc3
-.size  padlock_reload_key,.-padlock_reload_key
 
-.globl padlock_aes_block
-.type  padlock_aes_block,@function
-.align 16
-padlock_aes_block:
+
+.globl _padlock_aes_block
+
+.p2align       4
+_padlock_aes_block:
        movq    %rbx,%r8
        movq    $1,%rcx
        leaq    32(%rdx),%rbx
@@ -128,21 +128,21 @@ padlock_aes_block:
 .byte  0xf3,0x0f,0xa7,0xc8     
        movq    %r8,%rbx
        .byte   0xf3,0xc3
-.size  padlock_aes_block,.-padlock_aes_block
 
-.globl padlock_xstore
-.type  padlock_xstore,@function
-.align 16
-padlock_xstore:
+
+.globl _padlock_xstore
+
+.p2align       4
+_padlock_xstore:
        movl    %esi,%edx
 .byte  0x0f,0xa7,0xc0          
        .byte   0xf3,0xc3
-.size  padlock_xstore,.-padlock_xstore
 
-.globl padlock_sha1_oneshot
-.type  padlock_sha1_oneshot,@function
-.align 16
-padlock_sha1_oneshot:
+
+.globl _padlock_sha1_oneshot
+
+.p2align       4
+_padlock_sha1_oneshot:
        movq    %rdx,%rcx
        movq    %rdi,%rdx
        movups  (%rdi),%xmm0
@@ -159,12 +159,12 @@ padlock_sha1_oneshot:
        movups  %xmm0,(%rdx)
        movl    %eax,16(%rdx)
        .byte   0xf3,0xc3
-.size  padlock_sha1_oneshot,.-padlock_sha1_oneshot
 
-.globl padlock_sha1_blocks
-.type  padlock_sha1_blocks,@function
-.align 16
-padlock_sha1_blocks:
+
+.globl _padlock_sha1_blocks
+
+.p2align       4
+_padlock_sha1_blocks:
        movq    %rdx,%rcx
        movq    %rdi,%rdx
        movups  (%rdi),%xmm0
@@ -181,12 +181,12 @@ padlock_sha1_blocks:
        movups  %xmm0,(%rdx)
        movl    %eax,16(%rdx)
        .byte   0xf3,0xc3
-.size  padlock_sha1_blocks,.-padlock_sha1_blocks
 
-.globl padlock_sha256_oneshot
-.type  padlock_sha256_oneshot,@function
-.align 16
-padlock_sha256_oneshot:
+
+.globl _padlock_sha256_oneshot
+
+.p2align       4
+_padlock_sha256_oneshot:
        movq    %rdx,%rcx
        movq    %rdi,%rdx
        movups  (%rdi),%xmm0
@@ -203,12 +203,12 @@ padlock_sha256_oneshot:
        movups  %xmm0,(%rdx)
        movups  %xmm1,16(%rdx)
        .byte   0xf3,0xc3
-.size  padlock_sha256_oneshot,.-padlock_sha256_oneshot
 
-.globl padlock_sha256_blocks
-.type  padlock_sha256_blocks,@function
-.align 16
-padlock_sha256_blocks:
+
+.globl _padlock_sha256_blocks
+
+.p2align       4
+_padlock_sha256_blocks:
        movq    %rdx,%rcx
        movq    %rdi,%rdx
        movups  (%rdi),%xmm0
@@ -225,12 +225,12 @@ padlock_sha256_blocks:
        movups  %xmm0,(%rdx)
        movups  %xmm1,16(%rdx)
        .byte   0xf3,0xc3
-.size  padlock_sha256_blocks,.-padlock_sha256_blocks
 
-.globl padlock_sha512_blocks
-.type  padlock_sha512_blocks,@function
-.align 16
-padlock_sha512_blocks:
+
+.globl _padlock_sha512_blocks
+
+.p2align       4
+_padlock_sha512_blocks:
        movq    %rdx,%rcx
        movq    %rdi,%rdx
        movups  (%rdi),%xmm0
@@ -254,20 +254,20 @@ padlock_sha512_blocks:
        movups  %xmm2,32(%rdx)
        movups  %xmm3,48(%rdx)
        .byte   0xf3,0xc3
-.size  padlock_sha512_blocks,.-padlock_sha512_blocks
-.globl padlock_ecb_encrypt
-.type  padlock_ecb_encrypt,@function
-.align 16
-padlock_ecb_encrypt:
+
+.globl _padlock_ecb_encrypt
+
+.p2align       4
+_padlock_ecb_encrypt:
        pushq   %rbp
        pushq   %rbx
 
        xorl    %eax,%eax
        testq   $15,%rdx
-       jnz     .Lecb_abort
+       jnz     L$ecb_abort
        testq   $15,%rcx
-       jnz     .Lecb_abort
-       leaq    .Lpadlock_saved_context(%rip),%rax
+       jnz     L$ecb_abort
+       leaq    L$padlock_saved_context(%rip),%rax
        pushf
        cld
        call    _padlock_verify_ctx
@@ -275,15 +275,15 @@ padlock_ecb_encrypt:
        xorl    %eax,%eax
        xorl    %ebx,%ebx
        cmpq    $128,%rcx
-       jbe     .Lecb_short
+       jbe     L$ecb_short
        testl   $32,(%rdx)
-       jnz     .Lecb_aligned
+       jnz     L$ecb_aligned
        testq   $15,%rdi
        setz    %al
        testq   $15,%rsi
        setz    %bl
        testl   %ebx,%eax
-       jnz     .Lecb_aligned
+       jnz     L$ecb_aligned
        negq    %rax
        movq    $512,%rbx
        notq    %rax
@@ -295,9 +295,9 @@ padlock_ecb_encrypt:
        negq    %rax
        andq    $512-1,%rbx
        leaq    (%rax,%rbp,1),%rsp
-       jmp     .Lecb_loop
-.align 16
-.Lecb_loop:
+       jmp     L$ecb_loop
+.p2align       4
+L$ecb_loop:
        cmpq    %rcx,%rbx
        cmovaq  %rcx,%rbx
        movq    %rdi,%r8
@@ -308,13 +308,13 @@ padlock_ecb_encrypt:
        testq   $15,%rdi
        cmovnzq %rsp,%rdi
        testq   $15,%rsi
-       jz      .Lecb_inp_aligned
+       jz      L$ecb_inp_aligned
        shrq    $3,%rcx
 .byte  0xf3,0x48,0xa5          
        subq    %rbx,%rdi
        movq    %rbx,%rcx
        movq    %rdi,%rsi
-.Lecb_inp_aligned:
+L$ecb_inp_aligned:
        leaq    -16(%rdx),%rax
        leaq    16(%rdx),%rbx
        shrq    $4,%rcx
@@ -322,76 +322,76 @@ padlock_ecb_encrypt:
        movq    %r8,%rdi
        movq    %r11,%rbx
        testq   $15,%rdi
-       jz      .Lecb_out_aligned
+       jz      L$ecb_out_aligned
        movq    %rbx,%rcx
        shrq    $3,%rcx
        leaq    (%rsp),%rsi
 .byte  0xf3,0x48,0xa5          
        subq    %rbx,%rdi
-.Lecb_out_aligned:
+L$ecb_out_aligned:
        movq    %r9,%rsi
        movq    %r10,%rcx
        addq    %rbx,%rdi
        addq    %rbx,%rsi
        subq    %rbx,%rcx
        movq    $512,%rbx
-       jnz     .Lecb_loop
+       jnz     L$ecb_loop
 
        cmpq    %rsp,%rbp
-       je      .Lecb_done
+       je      L$ecb_done
 
        pxor    %xmm0,%xmm0
        leaq    (%rsp),%rax
-.Lecb_bzero:
+L$ecb_bzero:
        movaps  %xmm0,(%rax)
        leaq    16(%rax),%rax
        cmpq    %rax,%rbp
-       ja      .Lecb_bzero
+       ja      L$ecb_bzero
 
-.Lecb_done:
+L$ecb_done:
        leaq    (%rbp),%rsp
-       jmp     .Lecb_exit
-.align 16
-.Lecb_short:
+       jmp     L$ecb_exit
+.p2align       4
+L$ecb_short:
        movq    %rsp,%rbp
        subq    %rcx,%rsp
        xorq    %rbx,%rbx
-.Lecb_short_copy:
+L$ecb_short_copy:
        movups  (%rsi,%rbx,1),%xmm0
        leaq    16(%rbx),%rbx
        cmpq    %rbx,%rcx
        movaps  %xmm0,-16(%rsp,%rbx,1)
-       ja      .Lecb_short_copy
+       ja      L$ecb_short_copy
        movq    %rsp,%rsi
        movq    %rcx,%rbx
-       jmp     .Lecb_loop
-.align 16
-.Lecb_aligned:
+       jmp     L$ecb_loop
+.p2align       4
+L$ecb_aligned:
        leaq    -16(%rdx),%rax
        leaq    16(%rdx),%rbx
        shrq    $4,%rcx
 .byte  0xf3,0x0f,0xa7,200      
-.Lecb_exit:
+L$ecb_exit:
        movl    $1,%eax
        leaq    8(%rsp),%rsp
-.Lecb_abort:
+L$ecb_abort:
        popq    %rbx
        popq    %rbp
        .byte   0xf3,0xc3
-.size  padlock_ecb_encrypt,.-padlock_ecb_encrypt
-.globl padlock_cbc_encrypt
-.type  padlock_cbc_encrypt,@function
-.align 16
-padlock_cbc_encrypt:
+
+.globl _padlock_cbc_encrypt
+
+.p2align       4
+_padlock_cbc_encrypt:
        pushq   %rbp
        pushq   %rbx
 
        xorl    %eax,%eax
        testq   $15,%rdx
-       jnz     .Lcbc_abort
+       jnz     L$cbc_abort
        testq   $15,%rcx
-       jnz     .Lcbc_abort
-       leaq    .Lpadlock_saved_context(%rip),%rax
+       jnz     L$cbc_abort
+       leaq    L$padlock_saved_context(%rip),%rax
        pushf
        cld
        call    _padlock_verify_ctx
@@ -399,15 +399,15 @@ padlock_cbc_encrypt:
        xorl    %eax,%eax
        xorl    %ebx,%ebx
        cmpq    $64,%rcx
-       jbe     .Lcbc_short
+       jbe     L$cbc_short
        testl   $32,(%rdx)
-       jnz     .Lcbc_aligned
+       jnz     L$cbc_aligned
        testq   $15,%rdi
        setz    %al
        testq   $15,%rsi
        setz    %bl
        testl   %ebx,%eax
-       jnz     .Lcbc_aligned
+       jnz     L$cbc_aligned
        negq    %rax
        movq    $512,%rbx
        notq    %rax
@@ -419,9 +419,9 @@ padlock_cbc_encrypt:
        negq    %rax
        andq    $512-1,%rbx
        leaq    (%rax,%rbp,1),%rsp
-       jmp     .Lcbc_loop
-.align 16
-.Lcbc_loop:
+       jmp     L$cbc_loop
+.p2align       4
+L$cbc_loop:
        cmpq    %rcx,%rbx
        cmovaq  %rcx,%rbx
        movq    %rdi,%r8
@@ -432,13 +432,13 @@ padlock_cbc_encrypt:
        testq   $15,%rdi
        cmovnzq %rsp,%rdi
        testq   $15,%rsi
-       jz      .Lcbc_inp_aligned
+       jz      L$cbc_inp_aligned
        shrq    $3,%rcx
 .byte  0xf3,0x48,0xa5          
        subq    %rbx,%rdi
        movq    %rbx,%rcx
        movq    %rdi,%rsi
-.Lcbc_inp_aligned:
+L$cbc_inp_aligned:
        leaq    -16(%rdx),%rax
        leaq    16(%rdx),%rbx
        shrq    $4,%rcx
@@ -448,70 +448,68 @@ padlock_cbc_encrypt:
        movq    %r8,%rdi
        movq    %r11,%rbx
        testq   $15,%rdi
-       jz      .Lcbc_out_aligned
+       jz      L$cbc_out_aligned
        movq    %rbx,%rcx
        shrq    $3,%rcx
        leaq    (%rsp),%rsi
 .byte  0xf3,0x48,0xa5          
        subq    %rbx,%rdi
-.Lcbc_out_aligned:
+L$cbc_out_aligned:
        movq    %r9,%rsi
        movq    %r10,%rcx
        addq    %rbx,%rdi
        addq    %rbx,%rsi
        subq    %rbx,%rcx
        movq    $512,%rbx
-       jnz     .Lcbc_loop
+       jnz     L$cbc_loop
 
        cmpq    %rsp,%rbp
-       je      .Lcbc_done
+       je      L$cbc_done
 
        pxor    %xmm0,%xmm0
        leaq    (%rsp),%rax
-.Lcbc_bzero:
+L$cbc_bzero:
        movaps  %xmm0,(%rax)
        leaq    16(%rax),%rax
        cmpq    %rax,%rbp
-       ja      .Lcbc_bzero
+       ja      L$cbc_bzero
 
-.Lcbc_done:
+L$cbc_done:
        leaq    (%rbp),%rsp
-       jmp     .Lcbc_exit
-.align 16
-.Lcbc_short:
+       jmp     L$cbc_exit
+.p2align       4
+L$cbc_short:
        movq    %rsp,%rbp
        subq    %rcx,%rsp
        xorq    %rbx,%rbx
-.Lcbc_short_copy:
+L$cbc_short_copy:
        movups  (%rsi,%rbx,1),%xmm0
        leaq    16(%rbx),%rbx
        cmpq    %rbx,%rcx
        movaps  %xmm0,-16(%rsp,%rbx,1)
-       ja      .Lcbc_short_copy
+       ja      L$cbc_short_copy
        movq    %rsp,%rsi
        movq    %rcx,%rbx
-       jmp     .Lcbc_loop
-.align 16
-.Lcbc_aligned:
+       jmp     L$cbc_loop
+.p2align       4
+L$cbc_aligned:
        leaq    -16(%rdx),%rax
        leaq    16(%rdx),%rbx
        shrq    $4,%rcx
 .byte  0xf3,0x0f,0xa7,208      
        movdqa  (%rax),%xmm0
        movdqa  %xmm0,-16(%rdx)
-.Lcbc_exit:
+L$cbc_exit:
        movl    $1,%eax
        leaq    8(%rsp),%rsp
-.Lcbc_abort:
+L$cbc_abort:
        popq    %rbx
        popq    %rbp
        .byte   0xf3,0xc3
-.size  padlock_cbc_encrypt,.-padlock_cbc_encrypt
+
 .byte  
86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 16
+.p2align       4
 .data  
-.align 8
-.Lpadlock_saved_context:
+.p2align       3
+L$padlock_saved_context:
 .quad  0
-
-.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/asm-coff/padlock-x86-coff.s 
b/lib/accelerated/x86/asm-macosx/padlock-x86-macosx.s
similarity index 76%
copy from lib/accelerated/x86/asm-coff/padlock-x86-coff.s
copy to lib/accelerated/x86/asm-macosx/padlock-x86-macosx.s
index c9231f1..b3822f4 100644
--- a/lib/accelerated/x86/asm-coff/padlock-x86-coff.s
+++ b/lib/accelerated/x86/asm-macosx/padlock-x86-macosx.s
@@ -38,10 +38,9 @@
 .file  "devel/perlasm/e_padlock-x86.s"
 .text
 .globl _padlock_capability
-.def   _padlock_capability;    .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_capability:
-.L_padlock_capability_begin:
+L_padlock_capability_begin:
        pushl   %ebx
        pushfl
        popl    %eax
@@ -54,21 +53,21 @@ _padlock_capability:
        xorl    %eax,%ecx
        xorl    %eax,%eax
        btl     $21,%ecx
-       jnc     .L000noluck
+       jnc     L000noluck
        .byte   0x0f,0xa2
        xorl    %eax,%eax
        cmpl    $0x746e6543,%ebx
-       jne     .L000noluck
+       jne     L000noluck
        cmpl    $0x48727561,%edx
-       jne     .L000noluck
+       jne     L000noluck
        cmpl    $0x736c7561,%ecx
-       jne     .L000noluck
+       jne     L000noluck
        movl    $3221225472,%eax
        .byte   0x0f,0xa2
        movl    %eax,%edx
        xorl    %eax,%eax
        cmpl    $3221225473,%edx
-       jb      .L000noluck
+       jb      L000noluck
        movl    $1,%eax
        .byte   0x0f,0xa2
        orl     $15,%eax
@@ -84,61 +83,57 @@ _padlock_capability:
        shll    $4,%ebx
        andl    $4294967279,%eax
        orl     %ebx,%eax
-.L000noluck:
+L000noluck:
        popl    %ebx
        ret
 .globl _padlock_key_bswap
-.def   _padlock_key_bswap;     .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_key_bswap:
-.L_padlock_key_bswap_begin:
+L_padlock_key_bswap_begin:
        movl    4(%esp),%edx
        movl    240(%edx),%ecx
-.L001bswap_loop:
+L001bswap_loop:
        movl    (%edx),%eax
        bswap   %eax
        movl    %eax,(%edx)
        leal    4(%edx),%edx
        subl    $1,%ecx
-       jnz     .L001bswap_loop
+       jnz     L001bswap_loop
        ret
 .globl _padlock_verify_context
-.def   _padlock_verify_context;        .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_verify_context:
-.L_padlock_verify_context_begin:
+L_padlock_verify_context_begin:
        movl    4(%esp),%edx
-       leal    .Lpadlock_saved_context,%eax
+       leal    Lpadlock_saved_context-L002verify_pic_point,%eax
        pushfl
        call    __padlock_verify_ctx
-.L002verify_pic_point:
+L002verify_pic_point:
        leal    4(%esp),%esp
        ret
-.def   __padlock_verify_ctx;   .scl    3;      .type   32;     .endef
-.align 16
+.align 4
 __padlock_verify_ctx:
+       addl    (%esp),%eax
        btl     $30,4(%esp)
-       jnc     .L003verified
+       jnc     L003verified
        cmpl    (%eax),%edx
-       je      .L003verified
+       je      L003verified
        pushfl
        popfl
-.L003verified:
+L003verified:
        movl    %edx,(%eax)
        ret
 .globl _padlock_reload_key
-.def   _padlock_reload_key;    .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_reload_key:
-.L_padlock_reload_key_begin:
+L_padlock_reload_key_begin:
        pushfl
        popfl
        ret
 .globl _padlock_aes_block
-.def   _padlock_aes_block;     .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_aes_block:
-.L_padlock_aes_block_begin:
+L_padlock_aes_block_begin:
        pushl   %edi
        pushl   %esi
        pushl   %ebx
@@ -154,10 +149,9 @@ _padlock_aes_block:
        popl    %edi
        ret
 .globl _padlock_ecb_encrypt
-.def   _padlock_ecb_encrypt;   .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_ecb_encrypt:
-.L_padlock_ecb_encrypt_begin:
+L_padlock_ecb_encrypt_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
@@ -167,27 +161,27 @@ _padlock_ecb_encrypt:
        movl    28(%esp),%edx
        movl    32(%esp),%ecx
        testl   $15,%edx
-       jnz     .L004ecb_abort
+       jnz     L004ecb_abort
        testl   $15,%ecx
-       jnz     .L004ecb_abort
-       leal    .Lpadlock_saved_context,%eax
+       jnz     L004ecb_abort
+       leal    Lpadlock_saved_context-L005ecb_pic_point,%eax
        pushfl
        cld
        call    __padlock_verify_ctx
-.L005ecb_pic_point:
+L005ecb_pic_point:
        leal    16(%edx),%edx
        xorl    %eax,%eax
        xorl    %ebx,%ebx
        cmpl    $128,%ecx
-       jbe     .L006ecb_short
+       jbe     L006ecb_short
        testl   $32,(%edx)
-       jnz     .L007ecb_aligned
+       jnz     L007ecb_aligned
        testl   $15,%edi
        setz    %al
        testl   $15,%esi
        setz    %bl
        testl   %ebx,%eax
-       jnz     .L007ecb_aligned
+       jnz     L007ecb_aligned
        negl    %eax
        movl    $512,%ebx
        notl    %eax
@@ -200,9 +194,9 @@ _padlock_ecb_encrypt:
        andl    $511,%ebx
        leal    (%eax,%ebp,1),%esp
        andl    $-16,%esp
-       jmp     .L008ecb_loop
-.align 16
-.L008ecb_loop:
+       jmp     L008ecb_loop
+.align 4,0x90
+L008ecb_loop:
        movl    %edi,(%ebp)
        movl    %esi,4(%ebp)
        movl    %ecx,8(%ebp)
@@ -211,13 +205,13 @@ _padlock_ecb_encrypt:
        testl   $15,%edi
        cmovnzl %esp,%edi
        testl   $15,%esi
-       jz      .L009ecb_inp_aligned
+       jz      L009ecb_inp_aligned
        shrl    $2,%ecx
 .byte  243,165
        subl    %ebx,%edi
        movl    %ebx,%ecx
        movl    %edi,%esi
-.L009ecb_inp_aligned:
+L009ecb_inp_aligned:
        leal    -16(%edx),%eax
        leal    16(%edx),%ebx
        shrl    $4,%ecx
@@ -225,69 +219,68 @@ _padlock_ecb_encrypt:
        movl    (%ebp),%edi
        movl    12(%ebp),%ebx
        testl   $15,%edi
-       jz      .L010ecb_out_aligned
+       jz      L010ecb_out_aligned
        movl    %ebx,%ecx
        shrl    $2,%ecx
        leal    (%esp),%esi
 .byte  243,165
        subl    %ebx,%edi
-.L010ecb_out_aligned:
+L010ecb_out_aligned:
        movl    4(%ebp),%esi
        movl    8(%ebp),%ecx
        addl    %ebx,%edi
        addl    %ebx,%esi
        subl    %ebx,%ecx
        movl    $512,%ebx
-       jnz     .L008ecb_loop
+       jnz     L008ecb_loop
        cmpl    %ebp,%esp
-       je      .L011ecb_done
+       je      L011ecb_done
        pxor    %xmm0,%xmm0
        leal    (%esp),%eax
-.L012ecb_bzero:
+L012ecb_bzero:
        movaps  %xmm0,(%eax)
        leal    16(%eax),%eax
        cmpl    %eax,%ebp
-       ja      .L012ecb_bzero
-.L011ecb_done:
+       ja      L012ecb_bzero
+L011ecb_done:
        leal    24(%ebp),%esp
-       jmp     .L013ecb_exit
-.align 16
-.L006ecb_short:
+       jmp     L013ecb_exit
+.align 4,0x90
+L006ecb_short:
        xorl    %eax,%eax
        leal    -24(%esp),%ebp
        subl    %ecx,%eax
        leal    (%eax,%ebp,1),%esp
        andl    $-16,%esp
        xorl    %ebx,%ebx
-.L014ecb_short_copy:
+L014ecb_short_copy:
        movups  (%esi,%ebx,1),%xmm0
        leal    16(%ebx),%ebx
        cmpl    %ebx,%ecx
        movaps  %xmm0,-16(%esp,%ebx,1)
-       ja      .L014ecb_short_copy
+       ja      L014ecb_short_copy
        movl    %esp,%esi
        movl    %ecx,%ebx
-       jmp     .L008ecb_loop
-.align 16
-.L007ecb_aligned:
+       jmp     L008ecb_loop
+.align 4,0x90
+L007ecb_aligned:
        leal    -16(%edx),%eax
        leal    16(%edx),%ebx
        shrl    $4,%ecx
 .byte  243,15,167,200
-.L013ecb_exit:
+L013ecb_exit:
        movl    $1,%eax
        leal    4(%esp),%esp
-.L004ecb_abort:
+L004ecb_abort:
        popl    %edi
        popl    %esi
        popl    %ebx
        popl    %ebp
        ret
 .globl _padlock_cbc_encrypt
-.def   _padlock_cbc_encrypt;   .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_cbc_encrypt:
-.L_padlock_cbc_encrypt_begin:
+L_padlock_cbc_encrypt_begin:
        pushl   %ebp
        pushl   %ebx
        pushl   %esi
@@ -297,27 +290,27 @@ _padlock_cbc_encrypt:
        movl    28(%esp),%edx
        movl    32(%esp),%ecx
        testl   $15,%edx
-       jnz     .L015cbc_abort
+       jnz     L015cbc_abort
        testl   $15,%ecx
-       jnz     .L015cbc_abort
-       leal    .Lpadlock_saved_context,%eax
+       jnz     L015cbc_abort
+       leal    Lpadlock_saved_context-L016cbc_pic_point,%eax
        pushfl
        cld
        call    __padlock_verify_ctx
-.L016cbc_pic_point:
+L016cbc_pic_point:
        leal    16(%edx),%edx
        xorl    %eax,%eax
        xorl    %ebx,%ebx
        cmpl    $64,%ecx
-       jbe     .L017cbc_short
+       jbe     L017cbc_short
        testl   $32,(%edx)
-       jnz     .L018cbc_aligned
+       jnz     L018cbc_aligned
        testl   $15,%edi
        setz    %al
        testl   $15,%esi
        setz    %bl
        testl   %ebx,%eax
-       jnz     .L018cbc_aligned
+       jnz     L018cbc_aligned
        negl    %eax
        movl    $512,%ebx
        notl    %eax
@@ -330,9 +323,9 @@ _padlock_cbc_encrypt:
        andl    $511,%ebx
        leal    (%eax,%ebp,1),%esp
        andl    $-16,%esp
-       jmp     .L019cbc_loop
-.align 16
-.L019cbc_loop:
+       jmp     L019cbc_loop
+.align 4,0x90
+L019cbc_loop:
        movl    %edi,(%ebp)
        movl    %esi,4(%ebp)
        movl    %ecx,8(%ebp)
@@ -341,13 +334,13 @@ _padlock_cbc_encrypt:
        testl   $15,%edi
        cmovnzl %esp,%edi
        testl   $15,%esi
-       jz      .L020cbc_inp_aligned
+       jz      L020cbc_inp_aligned
        shrl    $2,%ecx
 .byte  243,165
        subl    %ebx,%edi
        movl    %ebx,%ecx
        movl    %edi,%esi
-.L020cbc_inp_aligned:
+L020cbc_inp_aligned:
        leal    -16(%edx),%eax
        leal    16(%edx),%ebx
        shrl    $4,%ecx
@@ -357,103 +350,97 @@ _padlock_cbc_encrypt:
        movl    (%ebp),%edi
        movl    12(%ebp),%ebx
        testl   $15,%edi
-       jz      .L021cbc_out_aligned
+       jz      L021cbc_out_aligned
        movl    %ebx,%ecx
        shrl    $2,%ecx
        leal    (%esp),%esi
 .byte  243,165
        subl    %ebx,%edi
-.L021cbc_out_aligned:
+L021cbc_out_aligned:
        movl    4(%ebp),%esi
        movl    8(%ebp),%ecx
        addl    %ebx,%edi
        addl    %ebx,%esi
        subl    %ebx,%ecx
        movl    $512,%ebx
-       jnz     .L019cbc_loop
+       jnz     L019cbc_loop
        cmpl    %ebp,%esp
-       je      .L022cbc_done
+       je      L022cbc_done
        pxor    %xmm0,%xmm0
        leal    (%esp),%eax
-.L023cbc_bzero:
+L023cbc_bzero:
        movaps  %xmm0,(%eax)
        leal    16(%eax),%eax
        cmpl    %eax,%ebp
-       ja      .L023cbc_bzero
-.L022cbc_done:
+       ja      L023cbc_bzero
+L022cbc_done:
        leal    24(%ebp),%esp
-       jmp     .L024cbc_exit
-.align 16
-.L017cbc_short:
+       jmp     L024cbc_exit
+.align 4,0x90
+L017cbc_short:
        xorl    %eax,%eax
        leal    -24(%esp),%ebp
        subl    %ecx,%eax
        leal    (%eax,%ebp,1),%esp
        andl    $-16,%esp
        xorl    %ebx,%ebx
-.L025cbc_short_copy:
+L025cbc_short_copy:
        movups  (%esi,%ebx,1),%xmm0
        leal    16(%ebx),%ebx
        cmpl    %ebx,%ecx
        movaps  %xmm0,-16(%esp,%ebx,1)
-       ja      .L025cbc_short_copy
+       ja      L025cbc_short_copy
        movl    %esp,%esi
        movl    %ecx,%ebx
-       jmp     .L019cbc_loop
-.align 16
-.L018cbc_aligned:
+       jmp     L019cbc_loop
+.align 4,0x90
+L018cbc_aligned:
        leal    -16(%edx),%eax
        leal    16(%edx),%ebx
        shrl    $4,%ecx
 .byte  243,15,167,208
        movaps  (%eax),%xmm0
        movaps  %xmm0,-16(%edx)
-.L024cbc_exit:
+L024cbc_exit:
        movl    $1,%eax
        leal    4(%esp),%esp
-.L015cbc_abort:
+L015cbc_abort:
        popl    %edi
        popl    %esi
        popl    %ebx
        popl    %ebp
        ret
 .globl _padlock_xstore
-.def   _padlock_xstore;        .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_xstore:
-.L_padlock_xstore_begin:
+L_padlock_xstore_begin:
        pushl   %edi
        movl    8(%esp),%edi
        movl    12(%esp),%edx
 .byte  15,167,192
        popl    %edi
        ret
-.def   __win32_segv_handler;   .scl    3;      .type   32;     .endef
-.align 16
+.align 4
 __win32_segv_handler:
        movl    $1,%eax
        movl    4(%esp),%edx
        movl    12(%esp),%ecx
        cmpl    $3221225477,(%edx)
-       jne     .L026ret
+       jne     L026ret
        addl    $4,184(%ecx)
        movl    $0,%eax
-.L026ret:
+L026ret:
        ret
 .globl _padlock_sha1_oneshot
-.def   _padlock_sha1_oneshot;  .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_sha1_oneshot:
-.L_padlock_sha1_oneshot_begin:
+L_padlock_sha1_oneshot_begin:
        pushl   %edi
        pushl   %esi
        xorl    %eax,%eax
        movl    12(%esp),%edi
        movl    16(%esp),%esi
        movl    20(%esp),%ecx
-       pushl   __win32_segv_handler
-.byte  100,255,48
-.byte  100,137,32
        movl    %esp,%edx
        addl    $-128,%esp
        movups  (%edi),%xmm0
@@ -467,19 +454,16 @@ _padlock_sha1_oneshot:
        movaps  (%esp),%xmm0
        movl    16(%esp),%eax
        movl    %edx,%esp
-.byte  100,143,5,0,0,0,0
-       leal    4(%esp),%esp
-       movl    16(%esp),%edi
+       movl    12(%esp),%edi
        movups  %xmm0,(%edi)
        movl    %eax,16(%edi)
        popl    %esi
        popl    %edi
        ret
 .globl _padlock_sha1_blocks
-.def   _padlock_sha1_blocks;   .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_sha1_blocks:
-.L_padlock_sha1_blocks_begin:
+L_padlock_sha1_blocks_begin:
        pushl   %edi
        pushl   %esi
        movl    12(%esp),%edi
@@ -505,19 +489,15 @@ _padlock_sha1_blocks:
        popl    %edi
        ret
 .globl _padlock_sha256_oneshot
-.def   _padlock_sha256_oneshot;        .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_sha256_oneshot:
-.L_padlock_sha256_oneshot_begin:
+L_padlock_sha256_oneshot_begin:
        pushl   %edi
        pushl   %esi
        xorl    %eax,%eax
        movl    12(%esp),%edi
        movl    16(%esp),%esi
        movl    20(%esp),%ecx
-       pushl   __win32_segv_handler
-.byte  100,255,48
-.byte  100,137,32
        movl    %esp,%edx
        addl    $-128,%esp
        movups  (%edi),%xmm0
@@ -531,19 +511,16 @@ _padlock_sha256_oneshot:
        movaps  (%esp),%xmm0
        movaps  16(%esp),%xmm1
        movl    %edx,%esp
-.byte  100,143,5,0,0,0,0
-       leal    4(%esp),%esp
-       movl    16(%esp),%edi
+       movl    12(%esp),%edi
        movups  %xmm0,(%edi)
        movups  %xmm1,16(%edi)
        popl    %esi
        popl    %edi
        ret
 .globl _padlock_sha256_blocks
-.def   _padlock_sha256_blocks; .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_sha256_blocks:
-.L_padlock_sha256_blocks_begin:
+L_padlock_sha256_blocks_begin:
        pushl   %edi
        pushl   %esi
        movl    12(%esp),%edi
@@ -569,10 +546,9 @@ _padlock_sha256_blocks:
        popl    %edi
        ret
 .globl _padlock_sha512_blocks
-.def   _padlock_sha512_blocks; .scl    2;      .type   32;     .endef
-.align 16
+.align 4
 _padlock_sha512_blocks:
-.L_padlock_sha512_blocks_begin:
+L_padlock_sha512_blocks_begin:
        pushl   %edi
        pushl   %esi
        movl    12(%esp),%edi
@@ -608,8 +584,8 @@ _padlock_sha512_blocks:
 .byte  109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65
 .byte  77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101
 .byte  110,115,115,108,46,111,114,103,62,0
-.align 16
+.align 4,0x90
 .data
-.align 4
-.Lpadlock_saved_context:
+.align 2,0x90
+Lpadlock_saved_context:
 .long  0


hooks/post-receive
-- 
GNU gnutls



reply via email to

[Prev in Thread] Current Thread [Next in Thread]