// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. // +build !appengine // +build !noasm // +build gc #include "textflag.h" // func encodeBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x06, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBlockAsm MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 SHLQ $0x10, R10 IMULQ R8, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_long_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeBlockAsm JMP memmove_long_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm: LEAQ (AX)(R8*1), BP CMPQ R8, $0x03 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm memmove_long_repeat_emit_encodeBlockAsm: LEAQ (AX)(R8*1), BP MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x07, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOU 32(R10), X6 MOVOU 48(R10), X7 MOVOU 64(R10), X8 MOVOU 80(R10), X9 MOVOU 96(R10), X10 MOVOU 112(R10), X11 MOVOA X4, (R13) MOVOA X5, 16(R13) MOVOA X6, 32(R13) MOVOA X7, 48(R13) MOVOA X8, 64(R13) MOVOA X9, 80(R13) MOVOA X10, 96(R13) MOVOA X11, 112(R13) ADDQ $0x80, R13 ADDQ $0x80, R10 ADDQ $0x80, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm matchlen_loopback_repeat_extend_encodeBlockAsm: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_loop_repeat_extend_encodeBlockAsm: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm matchlen_single_repeat_extend_encodeBlockAsm: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm matchlen_single_loopback_repeat_extend_encodeBlockAsm: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm repeat_extend_forward_end_encodeBlockAsm: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm emit_repeat_again_match_repeat_encodeBlockAsm: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm cant_repeat_two_offset_match_repeat_encodeBlockAsm: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm CMPL BP, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsm LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsm repeat_five_match_repeat_encodeBlockAsm: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_encodeBlockAsm: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_encodeBlockAsm: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_encodeBlockAsm: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_encodeBlockAsm: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm four_bytes_loop_back_repeat_as_copy_encodeBlockAsm: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsm MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm JMP two_byte_offset_repeat_as_copy_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm MOVL 20(SP), CX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeBlockAsm JMP memmove_long_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm: LEAQ (AX)(R8*1), DI CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm memmove_long_match_emit_encodeBlockAsm: LEAQ (AX)(R8*1), DI MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm: match_nolit_loop_encodeBlockAsm: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm matchlen_loopback_match_nolit_encodeBlockAsm: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm matchlen_loop_match_nolit_encodeBlockAsm: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm matchlen_single_match_nolit_encodeBlockAsm: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm matchlen_single_loopback_match_nolit_encodeBlockAsm: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm match_nolit_end_encodeBlockAsm: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm four_bytes_loop_back_match_nolit_encodeBlockAsm: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm JMP four_bytes_loop_back_match_nolit_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm JMP two_byte_offset_match_nolit_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x32, DI SHLQ $0x10, BP IMULQ R8, BP SHRQ $0x32, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm INCL CX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_long_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm JMP memmove_long_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm memmove_long_emit_remainder_encodeBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x07, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOU 32(BP), X6 MOVOU 48(BP), X7 MOVOU 64(BP), X8 MOVOU 80(BP), X9 MOVOU 96(BP), X10 MOVOU 112(BP), X11 MOVOA X4, (R8) MOVOA X5, 16(R8) MOVOA X6, 32(R8) MOVOA X7, 48(R8) MOVOA X8, 64(R8) MOVOA X9, 80(R8) MOVOA X10, 96(R8) MOVOA X11, 112(R8) ADDQ $0x80, R8 ADDQ $0x80, BP ADDQ $0x80, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm12B(SB), $16408-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000080, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm12B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBlockAsm12B MOVL BP, 20(SP) MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 SHLQ $0x18, R10 IMULQ R8, R10 SHRQ $0x34, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm12B LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm12B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm12B LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm12B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm12B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeBlockAsm12B JMP memmove_long_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm12B: LEAQ (AX)(R8*1), BP CMPQ R8, $0x03 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm12B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm12B memmove_long_repeat_emit_encodeBlockAsm12B: LEAQ (AX)(R8*1), BP MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x07, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOU 32(R10), X6 MOVOU 48(R10), X7 MOVOU 64(R10), X8 MOVOU 80(R10), X9 MOVOU 96(R10), X10 MOVOU 112(R10), X11 MOVOA X4, (R13) MOVOA X5, 16(R13) MOVOA X6, 32(R13) MOVOA X7, 48(R13) MOVOA X8, 64(R13) MOVOA X9, 80(R13) MOVOA X10, 96(R13) MOVOA X11, 112(R13) ADDQ $0x80, R13 ADDQ $0x80, R10 ADDQ $0x80, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm12B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm12B matchlen_loopback_repeat_extend_encodeBlockAsm12B: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm12B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_loop_repeat_extend_encodeBlockAsm12B: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm12B matchlen_single_repeat_extend_encodeBlockAsm12B: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_single_loopback_repeat_extend_encodeBlockAsm12B: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm12B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm12B repeat_extend_forward_end_encodeBlockAsm12B: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm12B MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm12B CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm12B LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: two_byte_offset_repeat_as_copy_encodeBlockAsm12B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm12B: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm12B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm12B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm12B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm12B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm12B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm12B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm12B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm12B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeBlockAsm12B JMP memmove_long_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm12B: LEAQ (AX)(R8*1), DI CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm12B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm12B memmove_long_match_emit_encodeBlockAsm12B: LEAQ (AX)(R8*1), DI MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm12B matchlen_loopback_match_nolit_encodeBlockAsm12B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm12B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm12B matchlen_loop_match_nolit_encodeBlockAsm12B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm12B matchlen_single_match_nolit_encodeBlockAsm12B: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm12B matchlen_single_loopback_match_nolit_encodeBlockAsm12B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm12B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B match_nolit_end_encodeBlockAsm12B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) two_byte_offset_match_nolit_encodeBlockAsm12B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B JMP two_byte_offset_match_nolit_encodeBlockAsm12B two_byte_offset_short_match_nolit_encodeBlockAsm12B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm12B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm12B: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm12B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x18, DI IMULQ R8, DI SHRQ $0x34, DI SHLQ $0x18, BP IMULQ R8, BP SHRQ $0x34, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm12B INCL CX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm12B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm12B JMP memmove_long_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm12B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm12B memmove_long_emit_remainder_encodeBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x07, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOU 32(BP), X6 MOVOU 48(BP), X7 MOVOU 64(BP), X8 MOVOU 80(BP), X9 MOVOU 96(BP), X10 MOVOU 112(BP), X11 MOVOA X4, (R8) MOVOA X5, 16(R8) MOVOA X6, 32(R8) MOVOA X7, 48(R8) MOVOA X8, 64(R8) MOVOA X9, 80(R8) MOVOA X10, 96(R8) MOVOA X11, 112(R8) ADDQ $0x80, R8 ADDQ $0x80, BP ADDQ $0x80, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm10B(SB), $4120-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000020, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm10B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBlockAsm10B MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x36, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm10B LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm10B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm10B LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm10B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm10B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeBlockAsm10B JMP memmove_long_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm10B: LEAQ (AX)(R8*1), BP CMPQ R8, $0x03 JB emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm10B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm10B memmove_long_repeat_emit_encodeBlockAsm10B: LEAQ (AX)(R8*1), BP MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x07, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOU 32(R10), X6 MOVOU 48(R10), X7 MOVOU 64(R10), X8 MOVOU 80(R10), X9 MOVOU 96(R10), X10 MOVOU 112(R10), X11 MOVOA X4, (R13) MOVOA X5, 16(R13) MOVOA X6, 32(R13) MOVOA X7, 48(R13) MOVOA X8, 64(R13) MOVOA X9, 80(R13) MOVOA X10, 96(R13) MOVOA X11, 112(R13) ADDQ $0x80, R13 ADDQ $0x80, R10 ADDQ $0x80, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm10B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm10B matchlen_loopback_repeat_extend_encodeBlockAsm10B: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm10B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_loop_repeat_extend_encodeBlockAsm10B: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm10B matchlen_single_repeat_extend_encodeBlockAsm10B: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_single_loopback_repeat_extend_encodeBlockAsm10B: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm10B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm10B repeat_extend_forward_end_encodeBlockAsm10B: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm10B MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm10B CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm10B LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: two_byte_offset_repeat_as_copy_encodeBlockAsm10B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm10B: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm10B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm10B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm10B candidate3_match_encodeBlockAsm10B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm10B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm10B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm10B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm10B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm10B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm10B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeBlockAsm10B JMP memmove_long_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm10B: LEAQ (AX)(R8*1), DI CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm10B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm10B memmove_long_match_emit_encodeBlockAsm10B: LEAQ (AX)(R8*1), DI MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm10B matchlen_loopback_match_nolit_encodeBlockAsm10B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm10B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm10B matchlen_loop_match_nolit_encodeBlockAsm10B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm10B matchlen_single_match_nolit_encodeBlockAsm10B: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm10B matchlen_single_loopback_match_nolit_encodeBlockAsm10B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm10B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm10B match_nolit_end_encodeBlockAsm10B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) two_byte_offset_match_nolit_encodeBlockAsm10B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm10B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B JMP two_byte_offset_match_nolit_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm10B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm10B: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm10B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: MOVQ $0x9e3779b1, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x20, DI IMULQ R8, DI SHRQ $0x36, DI SHLQ $0x20, BP IMULQ R8, BP SHRQ $0x36, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm10B INCL CX JMP search_loop_encodeBlockAsm10B emit_remainder_encodeBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm10B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm10B two_bytes_emit_remainder_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm10B JMP memmove_long_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm10B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm10B memmove_long_emit_remainder_encodeBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x07, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOU 32(BP), X6 MOVOU 48(BP), X7 MOVOU 64(BP), X8 MOVOU 80(BP), X9 MOVOU 96(BP), X10 MOVOU 112(BP), X11 MOVOA X4, (R8) MOVOA X5, 16(R8) MOVOA X6, 32(R8) MOVOA X7, 48(R8) MOVOA X8, 64(R8) MOVOA X9, 80(R8) MOVOA X10, 96(R8) MOVOA X11, 112(R8) ADDQ $0x80, R8 ADDQ $0x80, BP ADDQ $0x80, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm8B(SB), $1048-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000008, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm8B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x04, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBlockAsm8B MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x38, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm8B LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm8B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm8B LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm8B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm8B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeBlockAsm8B JMP memmove_long_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm8B: LEAQ (AX)(R8*1), BP CMPQ R8, $0x03 JB emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm8B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm8B memmove_long_repeat_emit_encodeBlockAsm8B: LEAQ (AX)(R8*1), BP MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x07, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOU 32(R10), X6 MOVOU 48(R10), X7 MOVOU 64(R10), X8 MOVOU 80(R10), X9 MOVOU 96(R10), X10 MOVOU 112(R10), X11 MOVOA X4, (R13) MOVOA X5, 16(R13) MOVOA X6, 32(R13) MOVOA X7, 48(R13) MOVOA X8, 64(R13) MOVOA X9, 80(R13) MOVOA X10, 96(R13) MOVOA X11, 112(R13) ADDQ $0x80, R13 ADDQ $0x80, R10 ADDQ $0x80, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm8B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm8B matchlen_loopback_repeat_extend_encodeBlockAsm8B: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm8B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_loop_repeat_extend_encodeBlockAsm8B: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm8B matchlen_single_repeat_extend_encodeBlockAsm8B: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_single_loopback_repeat_extend_encodeBlockAsm8B: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm8B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm8B repeat_extend_forward_end_encodeBlockAsm8B: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm8B MOVL BP, SI LEAL -4(BP), BP CMPL SI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm8B CMPL SI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm8B LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: two_byte_offset_repeat_as_copy_encodeBlockAsm8B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX MOVL BP, SI LEAL -4(BP), BP CMPL SI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm8B: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm8B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm8B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm8B candidate3_match_encodeBlockAsm8B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm8B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm8B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm8B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm8B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm8B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm8B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm8B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeBlockAsm8B JMP memmove_long_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm8B: LEAQ (AX)(R8*1), DI CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm8B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm8B memmove_long_match_emit_encodeBlockAsm8B: LEAQ (AX)(R8*1), DI MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm8B matchlen_loopback_match_nolit_encodeBlockAsm8B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm8B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm8B matchlen_loop_match_nolit_encodeBlockAsm8B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm8B matchlen_single_match_nolit_encodeBlockAsm8B: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm8B matchlen_single_loopback_match_nolit_encodeBlockAsm8B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm8B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm8B match_nolit_end_encodeBlockAsm8B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) two_byte_offset_match_nolit_encodeBlockAsm8B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm8B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX MOVL R9, BP LEAL -4(R9), R9 CMPL BP, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B JMP two_byte_offset_match_nolit_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm8B: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm8B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: MOVQ $0x9e3779b1, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x20, DI IMULQ R8, DI SHRQ $0x38, DI SHLQ $0x20, BP IMULQ R8, BP SHRQ $0x38, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm8B INCL CX JMP search_loop_encodeBlockAsm8B emit_remainder_encodeBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm8B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm8B two_bytes_emit_remainder_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm8B JMP memmove_long_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm8B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm8B memmove_long_emit_remainder_encodeBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x07, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOU 32(BP), X6 MOVOU 48(BP), X7 MOVOU 64(BP), X8 MOVOU 80(BP), X9 MOVOU 96(BP), X10 MOVOU 112(BP), X11 MOVOA X4, (R8) MOVOA X5, 16(R8) MOVOA X6, 32(R8) MOVOA X7, 48(R8) MOVOA X8, 64(R8) MOVOA X9, 80(R8) MOVOA X10, 96(R8) MOVOA X11, 112(R8) ADDQ $0x80, R8 ADDQ $0x80, BP ADDQ $0x80, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x06, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 SHLQ $0x10, R10 IMULQ R8, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI LEAL -1(DI), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: MOVL BP, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm JMP memmove_long_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm: LEAQ (AX)(DI*1), BP CMPQ DI, $0x03 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4through7 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm memmove_long_repeat_emit_encodeSnappyBlockAsm: LEAQ (AX)(DI*1), BP MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVQ DI, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R8)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R8)(R11*1), X4 MOVOU -16(R8)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_loop_repeat_extend_encodeSnappyBlockAsm: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm matchlen_single_repeat_extend_encodeSnappyBlockAsm: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm repeat_extend_forward_end_encodeSnappyBlockAsm: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: TESTL BP, BP JZ repeat_end_emit_encodeSnappyBlockAsm MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsm two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm no_repeat_found_encodeSnappyBlockAsm: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm candidate3_match_encodeSnappyBlockAsm: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm candidate2_match_encodeSnappyBlockAsm: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm match_extend_back_loop_encodeSnappyBlockAsm: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm JMP match_extend_back_loop_encodeSnappyBlockAsm match_extend_back_end_encodeSnappyBlockAsm: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsm CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm four_bytes_match_emit_encodeSnappyBlockAsm: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm three_bytes_match_emit_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm two_bytes_match_emit_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm JMP memmove_long_match_emit_encodeSnappyBlockAsm one_byte_match_emit_encodeSnappyBlockAsm: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm: LEAQ (AX)(R8*1), DI CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm memmove_long_match_emit_encodeSnappyBlockAsm: LEAQ (AX)(R8*1), DI MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm: match_nolit_loop_encodeSnappyBlockAsm: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm matchlen_loopback_match_nolit_encodeSnappyBlockAsm: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_loop_match_nolit_encodeSnappyBlockAsm: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm matchlen_single_match_nolit_encodeSnappyBlockAsm: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm match_nolit_end_encodeSnappyBlockAsm: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x32, DI SHLQ $0x10, BP IMULQ R8, BP SHRQ $0x32, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm INCL CX JMP search_loop_encodeSnappyBlockAsm emit_remainder_encodeSnappyBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm four_bytes_emit_remainder_encodeSnappyBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm three_bytes_emit_remainder_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm two_bytes_emit_remainder_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm memmove_long_emit_remainder_encodeSnappyBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x07, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOU 32(BP), X6 MOVOU 48(BP), X7 MOVOU 64(BP), X8 MOVOU 80(BP), X9 MOVOU 96(BP), X10 MOVOU 112(BP), X11 MOVOA X4, (R8) MOVOA X5, 16(R8) MOVOA X6, 32(R8) MOVOA X7, 48(R8) MOVOA X8, 64(R8) MOVOA X9, 80(R8) MOVOA X10, 96(R8) MOVOA X11, 112(R8) ADDQ $0x80, R8 ADDQ $0x80, BP ADDQ $0x80, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000080, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm12B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm12B MOVL BP, 20(SP) MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 SHLQ $0x18, R10 IMULQ R8, R10 SHRQ $0x34, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm12B LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm12B MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI LEAL -1(DI), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm12B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(DI*1), BP CMPQ DI, $0x03 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4through7 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B memmove_long_repeat_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(DI*1), BP MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVQ DI, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R8)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R8)(R11*1), X4 MOVOU -16(R8)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm12B matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B matchlen_single_repeat_extend_encodeSnappyBlockAsm12B: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B repeat_extend_forward_end_encodeSnappyBlockAsm12B: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm12B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm12B: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm12B no_repeat_found_encodeSnappyBlockAsm12B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm12B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm12B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm12B candidate3_match_encodeSnappyBlockAsm12B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm12B candidate2_match_encodeSnappyBlockAsm12B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm12B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm12B match_extend_back_loop_encodeSnappyBlockAsm12B: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm12B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm12B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm12B JMP match_extend_back_loop_encodeSnappyBlockAsm12B match_extend_back_end_encodeSnappyBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm12B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm12B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm12B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm12B two_bytes_match_emit_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBlockAsm12B one_byte_match_emit_encodeSnappyBlockAsm12B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(R8*1), DI CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B memmove_long_match_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(R8*1), DI MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm12B: match_nolit_loop_encodeSnappyBlockAsm12B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm12B matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B matchlen_single_match_nolit_encodeSnappyBlockAsm12B: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm12B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B match_nolit_end_encodeSnappyBlockAsm12B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm12B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x18, DI IMULQ R8, DI SHRQ $0x34, DI SHLQ $0x18, BP IMULQ R8, BP SHRQ $0x34, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm12B INCL CX JMP search_loop_encodeSnappyBlockAsm12B emit_remainder_encodeSnappyBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B two_bytes_emit_remainder_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B memmove_long_emit_remainder_encodeSnappyBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x07, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOU 32(BP), X6 MOVOU 48(BP), X7 MOVOU 64(BP), X8 MOVOU 80(BP), X9 MOVOU 96(BP), X10 MOVOU 112(BP), X11 MOVOA X4, (R8) MOVOA X5, 16(R8) MOVOA X6, 32(R8) MOVOA X7, 48(R8) MOVOA X8, 64(R8) MOVOA X9, 80(R8) MOVOA X10, 96(R8) MOVOA X11, 112(R8) ADDQ $0x80, R8 ADDQ $0x80, BP ADDQ $0x80, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000020, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm10B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm10B MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x36, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm10B LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm10B MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI LEAL -1(DI), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm10B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(DI*1), BP CMPQ DI, $0x03 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4through7 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B memmove_long_repeat_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(DI*1), BP MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVQ DI, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R8)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R8)(R11*1), X4 MOVOU -16(R8)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm10B matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B matchlen_single_repeat_extend_encodeSnappyBlockAsm10B: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B repeat_extend_forward_end_encodeSnappyBlockAsm10B: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm10B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm10B: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm10B no_repeat_found_encodeSnappyBlockAsm10B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm10B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm10B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm10B candidate3_match_encodeSnappyBlockAsm10B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm10B candidate2_match_encodeSnappyBlockAsm10B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm10B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm10B match_extend_back_loop_encodeSnappyBlockAsm10B: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm10B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm10B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm10B JMP match_extend_back_loop_encodeSnappyBlockAsm10B match_extend_back_end_encodeSnappyBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm10B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm10B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm10B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm10B two_bytes_match_emit_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBlockAsm10B one_byte_match_emit_encodeSnappyBlockAsm10B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(R8*1), DI CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B memmove_long_match_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(R8*1), DI MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm10B: match_nolit_loop_encodeSnappyBlockAsm10B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm10B matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B matchlen_single_match_nolit_encodeSnappyBlockAsm10B: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm10B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B match_nolit_end_encodeSnappyBlockAsm10B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm10B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: MOVQ $0x9e3779b1, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x20, DI IMULQ R8, DI SHRQ $0x36, DI SHLQ $0x20, BP IMULQ R8, BP SHRQ $0x36, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm10B INCL CX JMP search_loop_encodeSnappyBlockAsm10B emit_remainder_encodeSnappyBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B two_bytes_emit_remainder_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B memmove_long_emit_remainder_encodeSnappyBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x07, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOU 32(BP), X6 MOVOU 48(BP), X7 MOVOU 64(BP), X8 MOVOU 80(BP), X9 MOVOU 96(BP), X10 MOVOU 112(BP), X11 MOVOA X4, (R8) MOVOA X5, 16(R8) MOVOA X6, 32(R8) MOVOA X7, 48(R8) MOVOA X8, 64(R8) MOVOA X9, 80(R8) MOVOA X10, 96(R8) MOVOA X11, 112(R8) ADDQ $0x80, R8 ADDQ $0x80, BP ADDQ $0x80, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000008, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm8B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x04, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm8B MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x38, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm8B LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm8B MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI LEAL -1(DI), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm8B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(DI*1), BP CMPQ DI, $0x03 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4through7 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B memmove_long_repeat_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(DI*1), BP MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVQ DI, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R8)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R8)(R11*1), X4 MOVOU -16(R8)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm8B matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B matchlen_single_repeat_extend_encodeSnappyBlockAsm8B: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B repeat_extend_forward_end_encodeSnappyBlockAsm8B: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm8B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm8B: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm8B no_repeat_found_encodeSnappyBlockAsm8B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm8B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm8B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm8B candidate3_match_encodeSnappyBlockAsm8B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm8B candidate2_match_encodeSnappyBlockAsm8B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm8B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm8B match_extend_back_loop_encodeSnappyBlockAsm8B: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm8B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm8B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm8B JMP match_extend_back_loop_encodeSnappyBlockAsm8B match_extend_back_end_encodeSnappyBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm8B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm8B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm8B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm8B two_bytes_match_emit_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBlockAsm8B one_byte_match_emit_encodeSnappyBlockAsm8B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(R8*1), DI CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B memmove_long_match_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(R8*1), DI MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x07, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOU 32(R9), X6 MOVOU 48(R9), X7 MOVOU 64(R9), X8 MOVOU 80(R9), X9 MOVOU 96(R9), X10 MOVOU 112(R9), X11 MOVOA X4, (R12) MOVOA X5, 16(R12) MOVOA X6, 32(R12) MOVOA X7, 48(R12) MOVOA X8, 64(R12) MOVOA X9, 80(R12) MOVOA X10, 96(R12) MOVOA X11, 112(R12) ADDQ $0x80, R12 ADDQ $0x80, R9 ADDQ $0x80, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm8B: match_nolit_loop_encodeSnappyBlockAsm8B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm8B matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B matchlen_single_match_nolit_encodeSnappyBlockAsm8B: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm8B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B match_nolit_end_encodeSnappyBlockAsm8B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm8B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: MOVQ $0x9e3779b1, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x20, DI IMULQ R8, DI SHRQ $0x38, DI SHLQ $0x20, BP IMULQ R8, BP SHRQ $0x38, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm8B INCL CX JMP search_loop_encodeSnappyBlockAsm8B emit_remainder_encodeSnappyBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B two_bytes_emit_remainder_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B memmove_long_emit_remainder_encodeSnappyBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x07, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOU 32(BP), X6 MOVOU 48(BP), X7 MOVOU 64(BP), X8 MOVOU 80(BP), X9 MOVOU 96(BP), X10 MOVOU 112(BP), X11 MOVOA X4, (R8) MOVOA X5, 16(R8) MOVOA X6, 32(R8) MOVOA X7, 48(R8) MOVOA X8, 64(R8) MOVOA X9, 80(R8) MOVOA X10, 96(R8) MOVOA X11, 112(R8) ADDQ $0x80, R8 ADDQ $0x80, BP ADDQ $0x80, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func emitLiteral(dst []byte, lit []byte) int // Requires: SSE2 TEXT ·emitLiteral(SB), NOSPLIT, $0-56 MOVQ lit_len+32(FP), DX MOVQ dst_base+0(FP), AX MOVQ lit_base+24(FP), CX TESTQ DX, DX JZ emit_literal_end_standalone_skip MOVL DX, BX LEAL -1(DX), BP CMPL BP, $0x3c JLT one_byte_standalone CMPL BP, $0x00000100 JLT two_bytes_standalone CMPL BP, $0x00010000 JLT three_bytes_standalone CMPL BP, $0x01000000 JLT four_bytes_standalone MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP memmove_long_standalone four_bytes_standalone: MOVL BP, SI SHRL $0x10, SI MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB SI, 3(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP memmove_long_standalone three_bytes_standalone: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP memmove_long_standalone two_bytes_standalone: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, BX ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_standalone JMP memmove_long_standalone one_byte_standalone: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, BX ADDQ $0x01, AX memmove_standalone: CMPQ DX, $0x03 JB emit_lit_memmove_standalone_memmove_move_1or2 JE emit_lit_memmove_standalone_memmove_move_3 CMPQ DX, $0x08 JB emit_lit_memmove_standalone_memmove_move_4through7 CMPQ DX, $0x10 JBE emit_lit_memmove_standalone_memmove_move_8through16 CMPQ DX, $0x20 JBE emit_lit_memmove_standalone_memmove_move_17through32 JMP emit_lit_memmove_standalone_memmove_move_33through64 emit_lit_memmove_standalone_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(DX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(DX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(DX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(DX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_standalone JMP emit_literal_end_standalone memmove_long_standalone: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVQ DX, SI SHRQ $0x07, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_standalonelarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_standalonelarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOU 32(BP), X6 MOVOU 48(BP), X7 MOVOU 64(BP), X8 MOVOU 80(BP), X9 MOVOU 96(BP), X10 MOVOU 112(BP), X11 MOVOA X4, (R8) MOVOA X5, 16(R8) MOVOA X6, 32(R8) MOVOA X7, 48(R8) MOVOA X8, 64(R8) MOVOA X9, 80(R8) MOVOA X10, 96(R8) MOVOA X11, 112(R8) ADDQ $0x80, R8 ADDQ $0x80, BP ADDQ $0x80, DI DECQ SI JNA emit_lit_memmove_long_standalonelarge_big_loop_back emit_lit_memmove_long_standalonelarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ DX, DI JAE emit_lit_memmove_long_standalonelarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_standalone JMP emit_literal_end_standalone emit_literal_end_standalone_skip: XORQ BX, BX emit_literal_end_standalone: MOVQ BX, ret+48(FP) RET // func emitRepeat(dst []byte, offset int, length int) int TEXT ·emitRepeat(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX emit_repeat_again_standalone: MOVL DX, BP LEAL -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone CMPL CX, $0x00000800 JLT repeat_two_offset_standalone cant_repeat_two_offset_standalone: CMPL DX, $0x00000104 JLT repeat_three_standalone CMPL DX, $0x00010100 JLT repeat_four_standalone CMPL DX, $0x0100ffff JLT repeat_five_standalone LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone repeat_five_standalone: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_repeat_end repeat_four_standalone: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_repeat_end repeat_three_standalone: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_repeat_end repeat_two_standalone: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_repeat_end repeat_two_offset_standalone: XORQ BP, BP LEAL 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX gen_emit_repeat_end: MOVQ BX, ret+40(FP) RET // func emitCopy(dst []byte, offset int, length int) int TEXT ·emitCopy(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX CMPL CX, $0x00010000 JL two_byte_offset_standalone four_bytes_loop_back_standalone: CMPL DX, $0x40 JLE four_bytes_remain_standalone MOVB $0xff, (AX) MOVL CX, 1(AX) LEAL -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 JL four_bytes_remain_standalone emit_repeat_again_standalone_emit_copy: MOVL DX, BP LEAL -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone_emit_copy CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy cant_repeat_two_offset_standalone_emit_copy: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy repeat_five_standalone_emit_copy: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy: XORQ BP, BP LEAL 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end JMP four_bytes_loop_back_standalone four_bytes_remain_standalone: TESTL DX, DX JZ gen_emit_copy_end MOVB $0x03, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVL CX, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end two_byte_offset_standalone: CMPL DX, $0x40 JLE two_byte_offset_short_standalone MOVB $0xee, (AX) MOVW CX, 1(AX) LEAL -60(DX), DX ADDQ $0x03, AX ADDQ $0x03, BX emit_repeat_again_standalone_emit_copy_short: MOVL DX, BP LEAL -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy_short CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy_short cant_repeat_two_offset_standalone_emit_copy_short: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy_short CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy_short CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy_short LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy_short repeat_five_standalone_emit_copy_short: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy_short: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy_short: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy_short: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy_short: XORQ BP, BP LEAL 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end JMP two_byte_offset_standalone two_byte_offset_short_standalone: CMPL DX, $0x0c JGE emit_copy_three_standalone CMPL CX, $0x00000800 JGE emit_copy_three_standalone MOVB $0x01, BP LEAL -16(BP)(DX*4), DX MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end emit_copy_three_standalone: MOVB $0x02, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX gen_emit_copy_end: MOVQ BX, ret+40(FP) RET // func emitCopyNoRepeat(dst []byte, offset int, length int) int TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX CMPL CX, $0x00010000 JL two_byte_offset_standalone_snappy four_bytes_loop_back_standalone_snappy: CMPL DX, $0x40 JLE four_bytes_remain_standalone_snappy MOVB $0xff, (AX) MOVL CX, 1(AX) LEAL -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 JL four_bytes_remain_standalone_snappy JMP four_bytes_loop_back_standalone_snappy four_bytes_remain_standalone_snappy: TESTL DX, DX JZ gen_emit_copy_end_snappy MOVB $0x03, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVL CX, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end_snappy two_byte_offset_standalone_snappy: CMPL DX, $0x40 JLE two_byte_offset_short_standalone_snappy MOVB $0xee, (AX) MOVW CX, 1(AX) LEAL -60(DX), DX ADDQ $0x03, AX ADDQ $0x03, BX JMP two_byte_offset_standalone_snappy two_byte_offset_short_standalone_snappy: CMPL DX, $0x0c JGE emit_copy_three_standalone_snappy CMPL CX, $0x00000800 JGE emit_copy_three_standalone_snappy MOVB $0x01, BP LEAL -16(BP)(DX*4), DX MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end_snappy emit_copy_three_standalone_snappy: MOVB $0x02, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX gen_emit_copy_end_snappy: MOVQ BX, ret+40(FP) RET // func matchLen(a []byte, b []byte) int TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX MOVQ a_len+8(FP), DX XORL BP, BP CMPL DX, $0x08 JL matchlen_single_standalone matchlen_loopback_standalone: MOVQ (AX)(BP*1), BX XORQ (CX)(BP*1), BX TESTQ BX, BX JZ matchlen_loop_standalone BSFQ BX, BX SARQ $0x03, BX LEAL (BP)(BX*1), BP JMP gen_match_len_end matchlen_loop_standalone: LEAL -8(DX), DX LEAL 8(BP), BP CMPL DX, $0x08 JGE matchlen_loopback_standalone matchlen_single_standalone: TESTL DX, DX JZ gen_match_len_end matchlen_single_loopback_standalone: MOVB (AX)(BP*1), BL CMPB (CX)(BP*1), BL JNE gen_match_len_end LEAL 1(BP), BP DECL DX JNZ matchlen_single_loopback_standalone gen_match_len_end: MOVQ BP, ret+48(FP) RET