summaryrefslogtreecommitdiffstats
path: root/Asm/x86
diff options
context:
space:
mode:
Diffstat (limited to 'Asm/x86')
-rwxr-xr-xAsm/x86/7zAsm.asm93
-rwxr-xr-xAsm/x86/7zCrcOpt.asm147
-rwxr-xr-xAsm/x86/AesOpt.asm237
3 files changed, 477 insertions, 0 deletions
diff --git a/Asm/x86/7zAsm.asm b/Asm/x86/7zAsm.asm
new file mode 100755
index 0000000..5a5f271
--- /dev/null
+++ b/Asm/x86/7zAsm.asm
@@ -0,0 +1,93 @@
+; 7zAsm.asm -- ASM macros
+; 2009-12-12 : Igor Pavlov : Public domain
+
+MY_ASM_START macro
+ ifdef x64
+ .code
+ else
+ .386
+ .model flat
+ _TEXT$00 SEGMENT PARA PUBLIC 'CODE'
+ endif
+endm
+
+MY_PROC macro name:req, numParams:req
+ align 16
+ proc_numParams equ numParams
+ ifdef x64
+ proc_name equ name
+ name PROC
+ else
+ proc_fastcall_name equ @CatStr(@,name,@, %numParams * 4)
+ public proc_fastcall_name
+ proc_fastcall_name:
+ endif
+endm
+
+MY_ENDP macro
+ ifdef x64
+ ret
+ proc_name ENDP
+ else
+ ret (proc_numParams - 2) * 4
+ endif
+endm
+
+ifdef x64
+ REG_SIZE equ 8
+else
+ REG_SIZE equ 4
+endif
+
+ x0 equ EAX
+ x1 equ ECX
+ x2 equ EDX
+ x3 equ EBX
+ x4 equ ESP
+ x5 equ EBP
+ x6 equ ESI
+ x7 equ EDI
+
+ x0_L equ AL
+ x1_L equ CL
+ x2_L equ DL
+ x3_L equ BL
+
+ x0_H equ AH
+ x1_H equ CH
+ x2_H equ DH
+ x3_H equ BH
+
+ifdef x64
+ r0 equ RAX
+ r1 equ RCX
+ r2 equ RDX
+ r3 equ RBX
+ r4 equ RSP
+ r5 equ RBP
+ r6 equ RSI
+ r7 equ RDI
+else
+ r0 equ x0
+ r1 equ x1
+ r2 equ x2
+ r3 equ x3
+ r4 equ x4
+ r5 equ x5
+ r6 equ x6
+ r7 equ x7
+endif
+
+MY_PUSH_4_REGS macro
+ push r3
+ push r5
+ push r6
+ push r7
+endm
+
+MY_POP_4_REGS macro
+ pop r7
+ pop r6
+ pop r5
+ pop r3
+endm
diff --git a/Asm/x86/7zCrcOpt.asm b/Asm/x86/7zCrcOpt.asm
new file mode 100755
index 0000000..2de5171
--- /dev/null
+++ b/Asm/x86/7zCrcOpt.asm
@@ -0,0 +1,147 @@
+; 7zCrcOpt.asm -- CRC32 calculation : optimized version
+; 2009-12-12 : Igor Pavlov : Public domain
+
+include 7zAsm.asm
+
+MY_ASM_START
+
+rD equ r2
+rN equ r7
+
+ifdef x64
+ num_VAR equ r8
+ table_VAR equ r9
+else
+ data_size equ (REG_SIZE * 5)
+ crc_table equ (REG_SIZE + data_size)
+ num_VAR equ [r4 + data_size]
+ table_VAR equ [r4 + crc_table]
+endif
+
+SRCDAT equ rN + rD + 4 *
+
+CRC macro op:req, dest:req, src:req, t:req
+ op dest, DWORD PTR [r5 + src * 4 + 0400h * t]
+endm
+
+CRC_XOR macro dest:req, src:req, t:req
+ CRC xor, dest, src, t
+endm
+
+CRC_MOV macro dest:req, src:req, t:req
+ CRC mov, dest, src, t
+endm
+
+CRC1b macro
+ movzx x6, BYTE PTR [rD]
+ inc rD
+ movzx x3, x0_L
+ xor x6, x3
+ shr x0, 8
+ CRC xor, x0, r6, 0
+ dec rN
+endm
+
+MY_PROLOG macro crc_end:req
+ MY_PUSH_4_REGS
+
+ mov x0, x1
+ mov rN, num_VAR
+ mov r5, table_VAR
+ test rN, rN
+ jz crc_end
+ @@:
+ test rD, 7
+ jz @F
+ CRC1b
+ jnz @B
+ @@:
+ cmp rN, 16
+ jb crc_end
+ add rN, rD
+ mov num_VAR, rN
+ sub rN, 8
+ and rN, NOT 7
+ sub rD, rN
+ xor x0, [SRCDAT 0]
+endm
+
+MY_EPILOG macro crc_end:req
+ xor x0, [SRCDAT 0]
+ mov rD, rN
+ mov rN, num_VAR
+ sub rN, rD
+ crc_end:
+ test rN, rN
+ jz @F
+ CRC1b
+ jmp crc_end
+ @@:
+ MY_POP_4_REGS
+endm
+
+MY_PROC CrcUpdateT8, 4
+ MY_PROLOG crc_end_8
+ mov x1, [SRCDAT 1]
+ align 16
+ main_loop_8:
+ mov x6, [SRCDAT 2]
+ movzx x3, x1_L
+ CRC_XOR x6, r3, 3
+ movzx x3, x1_H
+ CRC_XOR x6, r3, 2
+ shr x1, 16
+ movzx x3, x1_L
+ movzx x1, x1_H
+ CRC_XOR x6, r3, 1
+ movzx x3, x0_L
+ CRC_XOR x6, r1, 0
+
+ mov x1, [SRCDAT 3]
+ CRC_XOR x6, r3, 7
+ movzx x3, x0_H
+ shr x0, 16
+ CRC_XOR x6, r3, 6
+ movzx x3, x0_L
+ CRC_XOR x6, r3, 5
+ movzx x3, x0_H
+ CRC_MOV x0, r3, 4
+ xor x0, x6
+ add rD, 8
+ jnz main_loop_8
+
+ MY_EPILOG crc_end_8
+MY_ENDP
+
+MY_PROC CrcUpdateT4, 4
+ MY_PROLOG crc_end_4
+ align 16
+ main_loop_4:
+ movzx x1, x0_L
+ movzx x3, x0_H
+ shr x0, 16
+ movzx x6, x0_H
+ and x0, 0FFh
+ CRC_MOV x1, r1, 3
+ xor x1, [SRCDAT 1]
+ CRC_XOR x1, r3, 2
+ CRC_XOR x1, r6, 0
+ CRC_XOR x1, r0, 1
+
+ movzx x0, x1_L
+ movzx x3, x1_H
+ shr x1, 16
+ movzx x6, x1_H
+ and x1, 0FFh
+ CRC_MOV x0, r0, 3
+ xor x0, [SRCDAT 2]
+ CRC_XOR x0, r3, 2
+ CRC_XOR x0, r6, 0
+ CRC_XOR x0, r1, 1
+ add rD, 8
+ jnz main_loop_4
+
+ MY_EPILOG crc_end_4
+MY_ENDP
+
+end
diff --git a/Asm/x86/AesOpt.asm b/Asm/x86/AesOpt.asm
new file mode 100755
index 0000000..c32e48f
--- /dev/null
+++ b/Asm/x86/AesOpt.asm
@@ -0,0 +1,237 @@
+; AesOpt.asm -- Intel's AES.
+; 2009-12-12 : Igor Pavlov : Public domain
+
+include 7zAsm.asm
+
+MY_ASM_START
+
+ifndef x64
+ .xmm
+endif
+
+ifdef x64
+ num equ r8
+else
+ num equ [r4 + REG_SIZE * 4]
+endif
+
+rD equ r2
+rN equ r0
+
+MY_PROLOG macro reg:req
+ ifdef x64
+ movdqa [r4 + 8], xmm6
+ movdqa [r4 + 8 + 16], xmm7
+ endif
+
+ push r3
+ push r5
+ push r6
+
+ mov rN, num
+ mov x6, [r1 + 16]
+ shl x6, 5
+
+ movdqa reg, [r1]
+ add r1, 32
+endm
+
+MY_EPILOG macro
+ pop r6
+ pop r5
+ pop r3
+
+ ifdef x64
+ movdqa xmm6, [r4 + 8]
+ movdqa xmm7, [r4 + 8 + 16]
+ endif
+
+ MY_ENDP
+endm
+
+ways equ 4
+ways16 equ (ways * 16)
+
+OP_W macro op, op2
+ i = 0
+ rept ways
+ op @CatStr(xmm,%i), op2
+ i = i + 1
+ endm
+endm
+
+LOAD_OP macro op:req, offs:req
+ op xmm0, [r1 + r3 offs]
+endm
+
+LOAD_OP_W macro op:req, offs:req
+ movdqa xmm7, [r1 + r3 offs]
+ OP_W op, xmm7
+endm
+
+
+; ---------- AES-CBC Decode ----------
+
+CBC_DEC_UPDATE macro reg, offs
+ pxor reg, xmm6
+ movdqa xmm6, [rD + offs]
+ movdqa [rD + offs], reg
+endm
+
+DECODE macro op:req
+ op aesdec, +16
+ @@:
+ op aesdec, +0
+ op aesdec, -16
+ sub x3, 32
+ jnz @B
+ op aesdeclast, +0
+endm
+
+MY_PROC AesCbc_Decode_Intel, 3
+ MY_PROLOG xmm6
+
+ sub x6, 32
+
+ jmp check2
+
+ align 16
+ nextBlocks2:
+ mov x3, x6
+ OP_W movdqa, [rD + i * 16]
+ LOAD_OP_W pxor, +32
+ DECODE LOAD_OP_W
+ OP_W CBC_DEC_UPDATE, i * 16
+ add rD, ways16
+ check2:
+ sub rN, ways
+ jnc nextBlocks2
+
+ add rN, ways
+ jmp check
+
+ nextBlock:
+ mov x3, x6
+ movdqa xmm1, [rD]
+ LOAD_OP movdqa, +32
+ pxor xmm0, xmm1
+ DECODE LOAD_OP
+ pxor xmm0, xmm6
+ movdqa [rD], xmm0
+ movdqa xmm6, xmm1
+ add rD, 16
+ check:
+ sub rN, 1
+ jnc nextBlock
+
+ movdqa [r1 - 32], xmm6
+ MY_EPILOG
+
+
+; ---------- AES-CBC Encode ----------
+
+ENCODE macro op:req
+ op aesenc, -16
+ @@:
+ op aesenc, +0
+ op aesenc, +16
+ add r3, 32
+ jnz @B
+ op aesenclast, +0
+endm
+
+MY_PROC AesCbc_Encode_Intel, 3
+ MY_PROLOG xmm0
+
+ add r1, r6
+ neg r6
+ add r6, 32
+
+ jmp check_e
+
+ align 16
+ nextBlock_e:
+ mov r3, r6
+ pxor xmm0, [rD]
+ pxor xmm0, [r1 + r3 - 32]
+ ENCODE LOAD_OP
+ movdqa [rD], xmm0
+ add rD, 16
+ check_e:
+ sub rN, 1
+ jnc nextBlock_e
+
+ movdqa [r1 + r6 - 64], xmm0
+ MY_EPILOG
+
+
+; ---------- AES-CTR ----------
+
+XOR_UPD_1 macro reg, offs
+ pxor reg, [rD + offs]
+endm
+
+XOR_UPD_2 macro reg, offs
+ movdqa [rD + offs], reg
+endm
+
+MY_PROC AesCtr_Code_Intel, 3
+ MY_PROLOG xmm6
+
+ mov r5, r4
+ shr r5, 4
+ dec r5
+ shl r5, 4
+
+ mov DWORD PTR [r5], 1
+ mov DWORD PTR [r5 + 4], 0
+ mov DWORD PTR [r5 + 8], 0
+ mov DWORD PTR [r5 + 12], 0
+
+ add r1, r6
+ neg r6
+ add r6, 32
+
+ jmp check2_c
+
+ align 16
+ nextBlocks2_c:
+ movdqa xmm7, [r5]
+
+ i = 0
+ rept ways
+ paddq xmm6, xmm7
+ movdqa @CatStr(xmm,%i), xmm6
+ i = i + 1
+ endm
+
+ mov r3, r6
+ LOAD_OP_W pxor, -32
+ ENCODE LOAD_OP_W
+ OP_W XOR_UPD_1, i * 16
+ OP_W XOR_UPD_2, i * 16
+ add rD, ways16
+ check2_c:
+ sub rN, ways
+ jnc nextBlocks2_c
+
+ add rN, ways
+ jmp check_c
+
+ nextBlock_c:
+ paddq xmm6, [r5]
+ mov r3, r6
+ movdqa xmm0, [r1 + r3 - 32]
+ pxor xmm0, xmm6
+ ENCODE LOAD_OP
+ XOR_UPD_1 xmm0, 0
+ XOR_UPD_2 xmm0, 0
+ add rD, 16
+ check_c:
+ sub rN, 1
+ jnc nextBlock_c
+
+ movdqa [r1 + r6 - 64], xmm6
+ MY_EPILOG
+
+end