// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. // +build !appengine // +build !noasm // +build gc #include "textflag.h" // func encodeBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x06, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBlockAsm MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 SHLQ $0x10, R10 IMULQ R8, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_long_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeBlockAsm JMP memmove_long_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm memmove_long_repeat_emit_encodeBlockAsm: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP // matchLen XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm matchlen_loopback_repeat_extend_encodeBlockAsm: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_loop_repeat_extend_encodeBlockAsm: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm matchlen_single_repeat_extend_encodeBlockAsm: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm matchlen_single_loopback_repeat_extend_encodeBlockAsm: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm repeat_extend_forward_end_encodeBlockAsm: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_match_repeat_encodeBlockAsm: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm cant_repeat_two_offset_match_repeat_encodeBlockAsm: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm CMPL BP, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsm LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsm repeat_five_match_repeat_encodeBlockAsm: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_encodeBlockAsm: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_encodeBlockAsm: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_encodeBlockAsm: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_encodeBlockAsm: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: // emitCopy CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm four_bytes_loop_back_repeat_as_copy_encodeBlockAsm: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsm MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BP, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short LEAL -16842747(BP), BP MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm JMP two_byte_offset_repeat_as_copy_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm MOVL 20(SP), CX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: MOVL CX, SI SUBL 12(SP), SI LEAQ 5(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeBlockAsm JMP memmove_long_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm: LEAQ (AX)(R8*1), DI // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm memmove_long_match_emit_encodeBlockAsm: LEAQ (AX)(R8*1), DI // genMemMoveLong MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm: match_nolit_loop_encodeBlockAsm: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP // matchLen XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm matchlen_loopback_match_nolit_encodeBlockAsm: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm matchlen_loop_match_nolit_encodeBlockAsm: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm matchlen_single_match_nolit_encodeBlockAsm: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm matchlen_single_loopback_match_nolit_encodeBlockAsm: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm match_nolit_end_encodeBlockAsm: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm four_bytes_loop_back_match_nolit_encodeBlockAsm: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm JMP four_bytes_loop_back_match_nolit_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short CMPL R9, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short LEAL -16842747(R9), R9 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm JMP two_byte_offset_match_nolit_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x32, DI SHLQ $0x10, BP IMULQ R8, BP SHRQ $0x32, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm INCL CX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_long_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm JMP memmove_long_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm memmove_long_emit_remainder_encodeBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm4MB(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm4MB(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm4MB: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm4MB MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm4MB: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x06, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBlockAsm4MB MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 SHLQ $0x10, R10 IMULQ R8, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm4MB LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm4MB repeat_extend_back_loop_encodeBlockAsm4MB: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm4MB MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm4MB LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm4MB repeat_extend_back_end_encodeBlockAsm4MB: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm4MB CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm4MB CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm4MB MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeBlockAsm4MB three_bytes_repeat_emit_encodeBlockAsm4MB: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm4MB two_bytes_repeat_emit_encodeBlockAsm4MB: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeBlockAsm4MB JMP memmove_long_repeat_emit_encodeBlockAsm4MB one_byte_repeat_emit_encodeBlockAsm4MB: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm4MB: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm4MB: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB memmove_long_repeat_emit_encodeBlockAsm4MB: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm4MB: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP // matchLen XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm4MB matchlen_loopback_repeat_extend_encodeBlockAsm4MB: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm4MB BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_loop_repeat_extend_encodeBlockAsm4MB: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm4MB matchlen_single_repeat_extend_encodeBlockAsm4MB: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm4MB LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB repeat_extend_forward_end_encodeBlockAsm4MB: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm4MB // emitRepeat MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm4MB CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm4MB cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm4MB CMPL BP, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm4MB LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_match_repeat_encodeBlockAsm4MB: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_match_repeat_encodeBlockAsm4MB: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_match_repeat_encodeBlockAsm4MB: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_match_repeat_encodeBlockAsm4MB: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_as_copy_encodeBlockAsm4MB: // emitCopy CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm4MB four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB // emitRepeat MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: TESTL BP, BP JZ repeat_end_emit_encodeBlockAsm4MB MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX // emitRepeat MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short CMPL BP, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short LEAL -65536(BP), BP MOVL BP, SI MOVW $0x001d, (AX) MOVW BP, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB JMP two_byte_offset_repeat_as_copy_encodeBlockAsm4MB two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm4MB: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm4MB no_repeat_found_encodeBlockAsm4MB: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm4MB SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm4MB MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm4MB MOVL 20(SP), CX JMP search_loop_encodeBlockAsm4MB candidate3_match_encodeBlockAsm4MB: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm4MB candidate2_match_encodeBlockAsm4MB: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm4MB: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm4MB match_extend_back_loop_encodeBlockAsm4MB: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm4MB MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm4MB LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm4MB JMP match_extend_back_loop_encodeBlockAsm4MB match_extend_back_end_encodeBlockAsm4MB: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm4MB: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm4MB MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm4MB CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm4MB CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm4MB MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBlockAsm4MB three_bytes_match_emit_encodeBlockAsm4MB: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm4MB two_bytes_match_emit_encodeBlockAsm4MB: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeBlockAsm4MB JMP memmove_long_match_emit_encodeBlockAsm4MB one_byte_match_emit_encodeBlockAsm4MB: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm4MB: LEAQ (AX)(R8*1), DI // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm4MB: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm4MB memmove_long_match_emit_encodeBlockAsm4MB: LEAQ (AX)(R8*1), DI // genMemMoveLong MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm4MB: match_nolit_loop_encodeBlockAsm4MB: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP // matchLen XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm4MB matchlen_loopback_match_nolit_encodeBlockAsm4MB: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm4MB BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm4MB matchlen_loop_match_nolit_encodeBlockAsm4MB: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm4MB matchlen_single_match_nolit_encodeBlockAsm4MB: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm4MB matchlen_single_loopback_match_nolit_encodeBlockAsm4MB: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm4MB LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm4MB match_nolit_end_encodeBlockAsm4MB: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm4MB four_bytes_loop_back_match_nolit_encodeBlockAsm4MB: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm4MB MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm4MB // emitRepeat MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB JMP four_bytes_loop_back_match_nolit_encodeBlockAsm4MB four_bytes_remain_match_nolit_encodeBlockAsm4MB: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_match_nolit_encodeBlockAsm4MB: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm4MB MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX // emitRepeat MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short CMPL R9, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short LEAL -65536(R9), R9 MOVL R9, BP MOVW $0x001d, (AX) MOVW R9, 2(AX) SARL $0x10, BP MOVB BP, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB JMP two_byte_offset_match_nolit_encodeBlockAsm4MB two_byte_offset_short_match_nolit_encodeBlockAsm4MB: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm4MB CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm4MB MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB emit_copy_three_match_nolit_encodeBlockAsm4MB: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm4MB: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm4MB MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm4MB: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x32, DI SHLQ $0x10, BP IMULQ R8, BP SHRQ $0x32, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm4MB INCL CX JMP search_loop_encodeBlockAsm4MB emit_remainder_encodeBlockAsm4MB: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm4MB: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm4MB CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm4MB CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm4MB MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeBlockAsm4MB three_bytes_emit_remainder_encodeBlockAsm4MB: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm4MB two_bytes_emit_remainder_encodeBlockAsm4MB: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm4MB JMP memmove_long_emit_remainder_encodeBlockAsm4MB one_byte_emit_remainder_encodeBlockAsm4MB: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm4MB: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm4MB: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB memmove_long_emit_remainder_encodeBlockAsm4MB: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm4MB: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm12B(SB), $16408-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000080, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm12B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBlockAsm12B MOVL BP, 20(SP) MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 SHLQ $0x18, R10 IMULQ R8, R10 SHRQ $0x34, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm12B LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm12B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm12B LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm12B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm12B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeBlockAsm12B JMP memmove_long_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm12B: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm12B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm12B memmove_long_repeat_emit_encodeBlockAsm12B: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm12B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP // matchLen XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm12B matchlen_loopback_repeat_extend_encodeBlockAsm12B: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm12B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_loop_repeat_extend_encodeBlockAsm12B: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm12B matchlen_single_repeat_extend_encodeBlockAsm12B: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_single_loopback_repeat_extend_encodeBlockAsm12B: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm12B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm12B repeat_extend_forward_end_encodeBlockAsm12B: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm12B // emitRepeat MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm12B CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm12B LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: // emitCopy two_byte_offset_repeat_as_copy_encodeBlockAsm12B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX // emitRepeat MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm12B: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm12B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm12B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm12B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm12B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm12B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI LEAQ 3(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm12B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm12B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm12B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeBlockAsm12B JMP memmove_long_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm12B: LEAQ (AX)(R8*1), DI // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm12B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm12B memmove_long_match_emit_encodeBlockAsm12B: LEAQ (AX)(R8*1), DI // genMemMoveLong MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP // matchLen XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm12B matchlen_loopback_match_nolit_encodeBlockAsm12B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm12B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm12B matchlen_loop_match_nolit_encodeBlockAsm12B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm12B matchlen_single_match_nolit_encodeBlockAsm12B: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm12B matchlen_single_loopback_match_nolit_encodeBlockAsm12B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm12B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B match_nolit_end_encodeBlockAsm12B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBlockAsm12B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX // emitRepeat MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B JMP two_byte_offset_match_nolit_encodeBlockAsm12B two_byte_offset_short_match_nolit_encodeBlockAsm12B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm12B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm12B: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm12B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x18, DI IMULQ R8, DI SHRQ $0x34, DI SHLQ $0x18, BP IMULQ R8, BP SHRQ $0x34, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm12B INCL CX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm12B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm12B JMP memmove_long_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm12B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm12B memmove_long_emit_remainder_encodeBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm10B(SB), $4120-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000020, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm10B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBlockAsm10B MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x36, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm10B LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm10B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm10B LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm10B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm10B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeBlockAsm10B JMP memmove_long_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm10B: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm10B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm10B memmove_long_repeat_emit_encodeBlockAsm10B: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm10B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP // matchLen XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm10B matchlen_loopback_repeat_extend_encodeBlockAsm10B: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm10B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_loop_repeat_extend_encodeBlockAsm10B: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm10B matchlen_single_repeat_extend_encodeBlockAsm10B: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_single_loopback_repeat_extend_encodeBlockAsm10B: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm10B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm10B repeat_extend_forward_end_encodeBlockAsm10B: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm10B // emitRepeat MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm10B CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B CMPL SI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm10B LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: // emitCopy two_byte_offset_repeat_as_copy_encodeBlockAsm10B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX // emitRepeat MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm10B: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm10B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm10B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm10B candidate3_match_encodeBlockAsm10B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm10B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm10B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm10B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI LEAQ 3(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm10B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm10B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm10B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeBlockAsm10B JMP memmove_long_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm10B: LEAQ (AX)(R8*1), DI // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm10B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm10B memmove_long_match_emit_encodeBlockAsm10B: LEAQ (AX)(R8*1), DI // genMemMoveLong MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP // matchLen XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm10B matchlen_loopback_match_nolit_encodeBlockAsm10B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm10B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm10B matchlen_loop_match_nolit_encodeBlockAsm10B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm10B matchlen_single_match_nolit_encodeBlockAsm10B: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm10B matchlen_single_loopback_match_nolit_encodeBlockAsm10B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm10B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm10B match_nolit_end_encodeBlockAsm10B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBlockAsm10B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm10B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX // emitRepeat MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B JMP two_byte_offset_match_nolit_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm10B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm10B: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm10B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: MOVQ $0x9e3779b1, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x20, DI IMULQ R8, DI SHRQ $0x36, DI SHLQ $0x20, BP IMULQ R8, BP SHRQ $0x36, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm10B INCL CX JMP search_loop_encodeBlockAsm10B emit_remainder_encodeBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm10B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm10B two_bytes_emit_remainder_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm10B JMP memmove_long_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm10B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm10B memmove_long_emit_remainder_encodeBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm8B(SB), $1048-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000008, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm8B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x04, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBlockAsm8B MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x38, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeBlockAsm8B LEAL 1(CX), SI MOVL 12(SP), DI MOVL SI, BP SUBL 16(SP), BP JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: CMPL SI, DI JLE repeat_extend_back_end_encodeBlockAsm8B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeBlockAsm8B LEAL -1(SI), SI DECL BP JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm8B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm8B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeBlockAsm8B JMP memmove_long_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm8B: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeBlockAsm8B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm8B memmove_long_repeat_emit_encodeBlockAsm8B: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeBlockAsm8B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(BP*1), BP // matchLen XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm8B matchlen_loopback_repeat_extend_encodeBlockAsm8B: MOVQ (R9)(R11*1), R10 XORQ (BP)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeBlockAsm8B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_loop_repeat_extend_encodeBlockAsm8B: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm8B matchlen_single_repeat_extend_encodeBlockAsm8B: TESTL R8, R8 JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_single_loopback_repeat_extend_encodeBlockAsm8B: MOVB (R9)(R11*1), R10 CMPB (BP)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm8B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm8B repeat_extend_forward_end_encodeBlockAsm8B: ADDL R11, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm8B // emitRepeat MOVL BP, SI LEAL -4(BP), BP CMPL SI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm8B CMPL SI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm8B LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: // emitCopy two_byte_offset_repeat_as_copy_encodeBlockAsm8B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX // emitRepeat MOVL BP, SI LEAL -4(BP), BP CMPL SI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: LEAL -4(BP), BP MOVW $0x0015, (AX) MOVB BP, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: SHLL $0x02, BP ORL $0x01, BP MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm8B: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBlockAsm8B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeBlockAsm8B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm8B candidate3_match_encodeBlockAsm8B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeBlockAsm8B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: CMPL CX, SI JLE match_extend_back_end_encodeBlockAsm8B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBlockAsm8B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI LEAQ 3(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm8B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeBlockAsm8B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeBlockAsm8B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm8B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeBlockAsm8B JMP memmove_long_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm8B: LEAQ (AX)(R8*1), DI // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBlockAsm8B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeBlockAsm8B memmove_long_match_emit_encodeBlockAsm8B: LEAQ (AX)(R8*1), DI // genMemMoveLong MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP // matchLen XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm8B matchlen_loopback_match_nolit_encodeBlockAsm8B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeBlockAsm8B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm8B matchlen_loop_match_nolit_encodeBlockAsm8B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm8B matchlen_single_match_nolit_encodeBlockAsm8B: TESTL SI, SI JZ match_nolit_end_encodeBlockAsm8B matchlen_single_loopback_match_nolit_encodeBlockAsm8B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm8B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm8B match_nolit_end_encodeBlockAsm8B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBlockAsm8B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm8B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX // emitRepeat MOVL R9, BP LEAL -4(R9), R9 CMPL BP, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: LEAL -4(R9), R9 MOVW $0x0015, (AX) MOVB R9, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: SHLL $0x02, R9 ORL $0x01, R9 MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) SARL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B JMP two_byte_offset_match_nolit_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm8B: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm8B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: MOVQ $0x9e3779b1, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x20, DI IMULQ R8, DI SHRQ $0x38, DI SHLQ $0x20, BP IMULQ R8, BP SHRQ $0x38, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeBlockAsm8B INCL CX JMP search_loop_encodeBlockAsm8B emit_remainder_encodeBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm8B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm8B two_bytes_emit_remainder_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm8B JMP memmove_long_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm8B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm8B memmove_long_emit_remainder_encodeBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBetterBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBetterBlockAsm(SB), $327704-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000a00, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBetterBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBetterBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x07, BP LEAL 1(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBetterBlockAsm MOVL BP, 20(SP) MOVQ $0x00cf1bbcdcbfa563, R8 MOVQ $0x9e3779b1, BP MOVQ SI, R9 MOVQ SI, R10 SHLQ $0x08, R9 IMULQ R8, R9 SHRQ $0x30, R9 SHLQ $0x20, R10 IMULQ BP, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 262168(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) MOVL CX, 262168(SP)(R10*4) CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBetterBlockAsm CMPL (DX)(DI*1), SI JEQ candidateS_match_encodeBetterBlockAsm MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: SHRQ $0x08, SI MOVQ SI, R9 SHLQ $0x08, R9 IMULQ R8, R9 SHRQ $0x30, R9 MOVL 24(SP)(R9*4), BP INCL CX MOVL CX, 24(SP)(R9*4) CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBetterBlockAsm DECL CX MOVL DI, BP candidate_match_encodeBetterBlockAsm: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBetterBlockAsm match_extend_back_loop_encodeBetterBlockAsm: CMPL CX, SI JLE match_extend_back_end_encodeBetterBlockAsm MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBetterBlockAsm LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBetterBlockAsm JMP match_extend_back_loop_encodeBetterBlockAsm match_extend_back_end_encodeBetterBlockAsm: MOVL CX, SI SUBL 12(SP), SI LEAQ 5(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm: MOVL CX, SI ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), R9 // matchLen XORL R11, R11 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeBetterBlockAsm matchlen_loopback_match_nolit_encodeBetterBlockAsm: MOVQ (R8)(R11*1), R10 XORQ (R9)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm matchlen_loop_match_nolit_encodeBetterBlockAsm: LEAL -8(DI), DI LEAL 8(R11), R11 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm matchlen_single_match_nolit_encodeBetterBlockAsm: TESTL DI, DI JZ match_nolit_end_encodeBetterBlockAsm matchlen_single_loopback_match_nolit_encodeBetterBlockAsm: MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm LEAL 1(R11), R11 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm match_nolit_end_encodeBetterBlockAsm: MOVL CX, DI SUBL BP, DI // Check if repeat CMPL 16(SP), DI JEQ match_is_repeat_encodeBetterBlockAsm CMPL R11, $0x01 JG match_length_ok_encodeBetterBlockAsm CMPL DI, $0x0000ffff JLE match_length_ok_encodeBetterBlockAsm MOVL 20(SP), CX INCL CX JMP search_loop_encodeBetterBlockAsm match_length_ok_encodeBetterBlockAsm: MOVL DI, 16(SP) MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_match_emit_encodeBetterBlockAsm CMPL BP, $0x00000100 JLT two_bytes_match_emit_encodeBetterBlockAsm CMPL BP, $0x00010000 JLT three_bytes_match_emit_encodeBetterBlockAsm CMPL BP, $0x01000000 JLT four_bytes_match_emit_encodeBetterBlockAsm MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeBetterBlockAsm four_bytes_match_emit_encodeBetterBlockAsm: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBetterBlockAsm three_bytes_match_emit_encodeBetterBlockAsm: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm two_bytes_match_emit_encodeBetterBlockAsm: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_match_emit_encodeBetterBlockAsm JMP memmove_long_match_emit_encodeBetterBlockAsm one_byte_match_emit_encodeBetterBlockAsm: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBetterBlockAsm: MOVQ BP, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm memmove_long_match_emit_encodeBetterBlockAsm: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R12 SHRQ $0x05, R12 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 DECQ R12 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_match_emit_encodeBetterBlockAsm: ADDL R11, CX ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy CMPL DI, $0x00010000 JL two_byte_offset_match_nolit_encodeBetterBlockAsm four_bytes_loop_back_match_nolit_encodeBetterBlockAsm: CMPL R11, $0x40 JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm MOVB $0xff, (AX) MOVL DI, 1(AX) LEAL -64(R11), R11 ADDQ $0x05, AX CMPL R11, $0x04 JL four_bytes_remain_match_nolit_encodeBetterBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy CMPL DI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy CMPL R11, $0x00010100 JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy CMPL R11, $0x0100ffff JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy LEAL -16842747(R11), R11 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: LEAL -65536(R11), R11 MOVL R11, DI MOVW $0x001d, (AX) MOVW R11, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm four_bytes_remain_match_nolit_encodeBetterBlockAsm: TESTL R11, R11 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm MOVB $0x03, BL LEAL -4(BX)(R11*4), R11 MOVB R11, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_match_nolit_encodeBetterBlockAsm: CMPL R11, $0x40 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(R11), R11 ADDQ $0x03, AX // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short CMPL DI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short CMPL R11, $0x00010100 JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short CMPL R11, $0x0100ffff JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short LEAL -16842747(R11), R11 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: LEAL -65536(R11), R11 MOVL R11, DI MOVW $0x001d, (AX) MOVW R11, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm JMP two_byte_offset_match_nolit_encodeBetterBlockAsm two_byte_offset_short_match_nolit_encodeBetterBlockAsm: CMPL R11, $0x0c JGE emit_copy_three_match_nolit_encodeBetterBlockAsm CMPL DI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm MOVB $0x01, BL LEAL -16(BX)(R11*4), R11 MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm emit_copy_three_match_nolit_encodeBetterBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R11*4), R11 MOVB R11, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm match_is_repeat_encodeBetterBlockAsm: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_match_emit_repeat_encodeBetterBlockAsm CMPL BP, $0x00000100 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm CMPL BP, $0x00010000 JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm CMPL BP, $0x01000000 JLT four_bytes_match_emit_repeat_encodeBetterBlockAsm MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm four_bytes_match_emit_repeat_encodeBetterBlockAsm: MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm three_bytes_match_emit_repeat_encodeBetterBlockAsm: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm two_bytes_match_emit_repeat_encodeBetterBlockAsm: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_match_emit_repeat_encodeBetterBlockAsm JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm one_byte_match_emit_repeat_encodeBetterBlockAsm: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_1or2 JE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: MOVQ BP, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm memmove_long_match_emit_repeat_encodeBetterBlockAsm: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R12 SHRQ $0x05, R12 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 DECQ R12 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: ADDL R11, CX ADDL $0x04, R11 MOVL CX, 12(SP) // emitRepeat emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm CMPL DI, $0x00000800 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm CMPL R11, $0x00010100 JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm CMPL R11, $0x0100ffff JLT repeat_five_match_nolit_repeat_encodeBetterBlockAsm LEAL -16842747(R11), R11 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm repeat_five_match_nolit_repeat_encodeBetterBlockAsm: LEAL -65536(R11), R11 MOVL R11, DI MOVW $0x001d, (AX) MOVW R11, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_repeat_encodeBetterBlockAsm: LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_repeat_encodeBetterBlockAsm: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_repeat_encodeBetterBlockAsm: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm: CMPL CX, 8(SP) JGE emit_remainder_encodeBetterBlockAsm CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm: MOVQ $0x00cf1bbcdcbfa563, BP MOVQ $0x9e3779b1, DI INCL SI MOVQ (DX)(SI*1), R8 MOVQ R8, R9 MOVQ R8, R10 SHRQ $0x08, R10 LEAL 1(SI), R11 MOVQ -2(DX)(CX*1), R8 SHLQ $0x08, R9 IMULQ BP, R9 SHRQ $0x30, R9 SHLQ $0x20, R10 IMULQ DI, R10 SHRQ $0x32, R10 MOVL SI, 24(SP)(R9*4) MOVL R11, 262168(SP)(R10*4) MOVQ R8, R9 MOVQ R8, R10 SHRQ $0x08, R10 LEAL -2(CX), R8 LEAL -1(CX), SI SHLQ $0x08, R9 IMULQ BP, R9 SHRQ $0x30, R9 SHLQ $0x20, R10 IMULQ DI, R10 SHRQ $0x32, R10 MOVL R8, 24(SP)(R9*4) MOVL SI, 262168(SP)(R10*4) JMP search_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBetterBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBetterBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm four_bytes_emit_remainder_encodeBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm three_bytes_emit_remainder_encodeBetterBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm two_bytes_emit_remainder_encodeBetterBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBetterBlockAsm JMP memmove_long_emit_remainder_encodeBetterBlockAsm one_byte_emit_remainder_encodeBetterBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBetterBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm memmove_long_emit_remainder_encodeBetterBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBetterBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBetterBlockAsm4MB(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000a00, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBetterBlockAsm4MB: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBetterBlockAsm4MB MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm4MB: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x07, BP LEAL 1(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBetterBlockAsm4MB MOVL BP, 20(SP) MOVQ $0x00cf1bbcdcbfa563, R8 MOVQ $0x9e3779b1, BP MOVQ SI, R9 MOVQ SI, R10 SHLQ $0x08, R9 IMULQ R8, R9 SHRQ $0x30, R9 SHLQ $0x20, R10 IMULQ BP, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 262168(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) MOVL CX, 262168(SP)(R10*4) CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBetterBlockAsm4MB CMPL (DX)(DI*1), SI JEQ candidateS_match_encodeBetterBlockAsm4MB MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: SHRQ $0x08, SI MOVQ SI, R9 SHLQ $0x08, R9 IMULQ R8, R9 SHRQ $0x30, R9 MOVL 24(SP)(R9*4), BP INCL CX MOVL CX, 24(SP)(R9*4) CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBetterBlockAsm4MB DECL CX MOVL DI, BP candidate_match_encodeBetterBlockAsm4MB: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBetterBlockAsm4MB match_extend_back_loop_encodeBetterBlockAsm4MB: CMPL CX, SI JLE match_extend_back_end_encodeBetterBlockAsm4MB MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBetterBlockAsm4MB LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBetterBlockAsm4MB JMP match_extend_back_loop_encodeBetterBlockAsm4MB match_extend_back_end_encodeBetterBlockAsm4MB: MOVL CX, SI SUBL 12(SP), SI LEAQ 4(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBetterBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm4MB: MOVL CX, SI ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), R9 // matchLen XORL R11, R11 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeBetterBlockAsm4MB matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: MOVQ (R8)(R11*1), R10 XORQ (R9)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_loop_match_nolit_encodeBetterBlockAsm4MB: LEAL -8(DI), DI LEAL 8(R11), R11 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB matchlen_single_match_nolit_encodeBetterBlockAsm4MB: TESTL DI, DI JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB: MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm4MB LEAL 1(R11), R11 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB match_nolit_end_encodeBetterBlockAsm4MB: MOVL CX, DI SUBL BP, DI // Check if repeat CMPL 16(SP), DI JEQ match_is_repeat_encodeBetterBlockAsm4MB CMPL R11, $0x01 JG match_length_ok_encodeBetterBlockAsm4MB CMPL DI, $0x0000ffff JLE match_length_ok_encodeBetterBlockAsm4MB MOVL 20(SP), CX INCL CX JMP search_loop_encodeBetterBlockAsm4MB match_length_ok_encodeBetterBlockAsm4MB: MOVL DI, 16(SP) MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_match_emit_encodeBetterBlockAsm4MB CMPL BP, $0x00000100 JLT two_bytes_match_emit_encodeBetterBlockAsm4MB CMPL BP, $0x00010000 JLT three_bytes_match_emit_encodeBetterBlockAsm4MB MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB three_bytes_match_emit_encodeBetterBlockAsm4MB: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB two_bytes_match_emit_encodeBetterBlockAsm4MB: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_match_emit_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_encodeBetterBlockAsm4MB one_byte_match_emit_encodeBetterBlockAsm4MB: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm4MB: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: MOVQ BP, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB memmove_long_match_emit_encodeBetterBlockAsm4MB: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R12 SHRQ $0x05, R12 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 DECQ R12 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_match_emit_encodeBetterBlockAsm4MB: ADDL R11, CX ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy CMPL DI, $0x00010000 JL two_byte_offset_match_nolit_encodeBetterBlockAsm4MB four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB: CMPL R11, $0x40 JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB MOVB $0xff, (AX) MOVL DI, 1(AX) LEAL -64(R11), R11 ADDQ $0x05, AX CMPL R11, $0x04 JL four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB // emitRepeat MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy CMPL DI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy CMPL R11, $0x00010100 JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy LEAL -65536(R11), R11 MOVL R11, DI MOVW $0x001d, (AX) MOVW R11, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: TESTL R11, R11 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB MOVB $0x03, BL LEAL -4(BX)(R11*4), R11 MOVB R11, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: CMPL R11, $0x40 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(R11), R11 ADDQ $0x03, AX // emitRepeat MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short CMPL DI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short CMPL R11, $0x00010100 JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short LEAL -65536(R11), R11 MOVL R11, DI MOVW $0x001d, (AX) MOVW R11, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB JMP two_byte_offset_match_nolit_encodeBetterBlockAsm4MB two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: CMPL R11, $0x0c JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB CMPL DI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB MOVB $0x01, BL LEAL -16(BX)(R11*4), R11 MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: MOVB $0x02, BL LEAL -4(BX)(R11*4), R11 MOVB R11, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB match_is_repeat_encodeBetterBlockAsm4MB: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_match_emit_repeat_encodeBetterBlockAsm4MB CMPL BP, $0x00000100 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB CMPL BP, $0x00010000 JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB MOVL BP, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_match_emit_repeat_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm4MB: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_1or2 JE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: MOVQ BP, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R12 SHRQ $0x05, R12 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 DECQ R12 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: ADDL R11, CX ADDL $0x04, R11 MOVL CX, 12(SP) // emitRepeat MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB CMPL DI, $0x00000800 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB CMPL R11, $0x00010100 JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB LEAL -65536(R11), R11 MOVL R11, DI MOVW $0x001d, (AX) MOVW R11, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: CMPL CX, 8(SP) JGE emit_remainder_encodeBetterBlockAsm4MB CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBetterBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm4MB: MOVQ $0x00cf1bbcdcbfa563, BP MOVQ $0x9e3779b1, DI INCL SI MOVQ (DX)(SI*1), R8 MOVQ R8, R9 MOVQ R8, R10 SHRQ $0x08, R10 LEAL 1(SI), R11 MOVQ -2(DX)(CX*1), R8 SHLQ $0x08, R9 IMULQ BP, R9 SHRQ $0x30, R9 SHLQ $0x20, R10 IMULQ DI, R10 SHRQ $0x32, R10 MOVL SI, 24(SP)(R9*4) MOVL R11, 262168(SP)(R10*4) MOVQ R8, R9 MOVQ R8, R10 SHRQ $0x08, R10 LEAL -2(CX), R8 LEAL -1(CX), SI SHLQ $0x08, R9 IMULQ BP, R9 SHRQ $0x30, R9 SHLQ $0x20, R10 IMULQ DI, R10 SHRQ $0x32, R10 MOVL R8, 24(SP)(R9*4) MOVL SI, 262168(SP)(R10*4) JMP search_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBetterBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBetterBlockAsm4MB: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBetterBlockAsm4MB CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBetterBlockAsm4MB CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBetterBlockAsm4MB MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB three_bytes_emit_remainder_encodeBetterBlockAsm4MB: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB two_bytes_emit_remainder_encodeBetterBlockAsm4MB: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBetterBlockAsm4MB JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB one_byte_emit_remainder_encodeBetterBlockAsm4MB: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBetterBlockAsm4MB: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB memmove_long_emit_remainder_encodeBetterBlockAsm4MB: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBetterBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBetterBlockAsm12B(SB), $81944-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000280, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBetterBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBetterBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm12B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x06, BP LEAL 1(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBetterBlockAsm12B MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ $0x9e3779b1, BP MOVQ SI, R9 MOVQ SI, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 SHLQ $0x20, R10 IMULQ BP, R10 SHRQ $0x34, R10 MOVL 24(SP)(R9*4), BP MOVL 65560(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) MOVL CX, 65560(SP)(R10*4) CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBetterBlockAsm12B CMPL (DX)(DI*1), SI JEQ candidateS_match_encodeBetterBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: SHRQ $0x08, SI MOVQ SI, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 MOVL 24(SP)(R9*4), BP INCL CX MOVL CX, 24(SP)(R9*4) CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBetterBlockAsm12B DECL CX MOVL DI, BP candidate_match_encodeBetterBlockAsm12B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBetterBlockAsm12B match_extend_back_loop_encodeBetterBlockAsm12B: CMPL CX, SI JLE match_extend_back_end_encodeBetterBlockAsm12B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBetterBlockAsm12B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBetterBlockAsm12B JMP match_extend_back_loop_encodeBetterBlockAsm12B match_extend_back_end_encodeBetterBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI LEAQ 3(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm12B: MOVL CX, SI ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), R9 // matchLen XORL R11, R11 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeBetterBlockAsm12B matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: MOVQ (R8)(R11*1), R10 XORQ (R9)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm12B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_loop_match_nolit_encodeBetterBlockAsm12B: LEAL -8(DI), DI LEAL 8(R11), R11 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B matchlen_single_match_nolit_encodeBetterBlockAsm12B: TESTL DI, DI JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_single_loopback_match_nolit_encodeBetterBlockAsm12B: MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm12B LEAL 1(R11), R11 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm12B match_nolit_end_encodeBetterBlockAsm12B: MOVL CX, DI SUBL BP, DI // Check if repeat CMPL 16(SP), DI JEQ match_is_repeat_encodeBetterBlockAsm12B MOVL DI, 16(SP) MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_match_emit_encodeBetterBlockAsm12B CMPL BP, $0x00000100 JLT two_bytes_match_emit_encodeBetterBlockAsm12B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm12B two_bytes_match_emit_encodeBetterBlockAsm12B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_match_emit_encodeBetterBlockAsm12B JMP memmove_long_match_emit_encodeBetterBlockAsm12B one_byte_match_emit_encodeBetterBlockAsm12B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm12B: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBetterBlockAsm12B: MOVQ BP, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B memmove_long_match_emit_encodeBetterBlockAsm12B: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R12 SHRQ $0x05, R12 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 DECQ R12 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_match_emit_encodeBetterBlockAsm12B: ADDL R11, CX ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBetterBlockAsm12B: CMPL R11, $0x40 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(R11), R11 ADDQ $0x03, AX // emitRepeat MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short CMPL DI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B JMP two_byte_offset_match_nolit_encodeBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: CMPL R11, $0x0c JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B CMPL DI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R11*4), R11 MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B emit_copy_three_match_nolit_encodeBetterBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R11*4), R11 MOVB R11, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B match_is_repeat_encodeBetterBlockAsm12B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_match_emit_repeat_encodeBetterBlockAsm12B CMPL BP, $0x00000100 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm12B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_match_emit_repeat_encodeBetterBlockAsm12B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B one_byte_match_emit_repeat_encodeBetterBlockAsm12B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm12B: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: MOVQ BP, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R12 SHRQ $0x05, R12 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 DECQ R12 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: ADDL R11, CX ADDL $0x04, R11 MOVL CX, 12(SP) // emitRepeat MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B CMPL DI, $0x00000800 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm12B: CMPL CX, 8(SP) JGE emit_remainder_encodeBetterBlockAsm12B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm12B: MOVQ $0x0000cf1bbcdcbf9b, BP MOVQ $0x9e3779b1, DI INCL SI MOVQ (DX)(SI*1), R8 MOVQ R8, R9 MOVQ R8, R10 SHRQ $0x08, R10 LEAL 1(SI), R11 MOVQ -2(DX)(CX*1), R8 SHLQ $0x10, R9 IMULQ BP, R9 SHRQ $0x32, R9 SHLQ $0x20, R10 IMULQ DI, R10 SHRQ $0x34, R10 MOVL SI, 24(SP)(R9*4) MOVL R11, 65560(SP)(R10*4) MOVQ R8, R9 MOVQ R8, R10 SHRQ $0x08, R10 LEAL -2(CX), R8 LEAL -1(CX), SI SHLQ $0x10, R9 IMULQ BP, R9 SHRQ $0x32, R9 SHLQ $0x20, R10 IMULQ DI, R10 SHRQ $0x34, R10 MOVL R8, 24(SP)(R9*4) MOVL SI, 65560(SP)(R10*4) JMP search_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBetterBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBetterBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBetterBlockAsm12B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B two_bytes_emit_remainder_encodeBetterBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B one_byte_emit_remainder_encodeBetterBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBetterBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B memmove_long_emit_remainder_encodeBetterBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBetterBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBetterBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBetterBlockAsm10B(SB), $20504-56 MOVQ dst_base+0(FP), AX MOVQ $0x000000a0, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBetterBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBetterBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm10B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 1(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBetterBlockAsm10B MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ $0x9e3779b1, BP MOVQ SI, R9 MOVQ SI, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x34, R9 SHLQ $0x20, R10 IMULQ BP, R10 SHRQ $0x36, R10 MOVL 24(SP)(R9*4), BP MOVL 16408(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) MOVL CX, 16408(SP)(R10*4) CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBetterBlockAsm10B CMPL (DX)(DI*1), SI JEQ candidateS_match_encodeBetterBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: SHRQ $0x08, SI MOVQ SI, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x34, R9 MOVL 24(SP)(R9*4), BP INCL CX MOVL CX, 24(SP)(R9*4) CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBetterBlockAsm10B DECL CX MOVL DI, BP candidate_match_encodeBetterBlockAsm10B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBetterBlockAsm10B match_extend_back_loop_encodeBetterBlockAsm10B: CMPL CX, SI JLE match_extend_back_end_encodeBetterBlockAsm10B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBetterBlockAsm10B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBetterBlockAsm10B JMP match_extend_back_loop_encodeBetterBlockAsm10B match_extend_back_end_encodeBetterBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI LEAQ 3(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm10B: MOVL CX, SI ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), R9 // matchLen XORL R11, R11 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeBetterBlockAsm10B matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: MOVQ (R8)(R11*1), R10 XORQ (R9)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm10B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_loop_match_nolit_encodeBetterBlockAsm10B: LEAL -8(DI), DI LEAL 8(R11), R11 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B matchlen_single_match_nolit_encodeBetterBlockAsm10B: TESTL DI, DI JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_single_loopback_match_nolit_encodeBetterBlockAsm10B: MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm10B LEAL 1(R11), R11 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm10B match_nolit_end_encodeBetterBlockAsm10B: MOVL CX, DI SUBL BP, DI // Check if repeat CMPL 16(SP), DI JEQ match_is_repeat_encodeBetterBlockAsm10B MOVL DI, 16(SP) MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_match_emit_encodeBetterBlockAsm10B CMPL BP, $0x00000100 JLT two_bytes_match_emit_encodeBetterBlockAsm10B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm10B two_bytes_match_emit_encodeBetterBlockAsm10B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_match_emit_encodeBetterBlockAsm10B JMP memmove_long_match_emit_encodeBetterBlockAsm10B one_byte_match_emit_encodeBetterBlockAsm10B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm10B: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBetterBlockAsm10B: MOVQ BP, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B memmove_long_match_emit_encodeBetterBlockAsm10B: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R12 SHRQ $0x05, R12 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 DECQ R12 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_match_emit_encodeBetterBlockAsm10B: ADDL R11, CX ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBetterBlockAsm10B: CMPL R11, $0x40 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(R11), R11 ADDQ $0x03, AX // emitRepeat MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short CMPL DI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B JMP two_byte_offset_match_nolit_encodeBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: CMPL R11, $0x0c JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B CMPL DI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R11*4), R11 MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B emit_copy_three_match_nolit_encodeBetterBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R11*4), R11 MOVB R11, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B match_is_repeat_encodeBetterBlockAsm10B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_match_emit_repeat_encodeBetterBlockAsm10B CMPL BP, $0x00000100 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm10B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_match_emit_repeat_encodeBetterBlockAsm10B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B one_byte_match_emit_repeat_encodeBetterBlockAsm10B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm10B: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: MOVQ BP, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R12 SHRQ $0x05, R12 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 DECQ R12 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: ADDL R11, CX ADDL $0x04, R11 MOVL CX, 12(SP) // emitRepeat MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B CMPL DI, $0x00000800 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm10B: CMPL CX, 8(SP) JGE emit_remainder_encodeBetterBlockAsm10B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm10B: MOVQ $0x0000cf1bbcdcbf9b, BP MOVQ $0x9e3779b1, DI INCL SI MOVQ (DX)(SI*1), R8 MOVQ R8, R9 MOVQ R8, R10 SHRQ $0x08, R10 LEAL 1(SI), R11 MOVQ -2(DX)(CX*1), R8 SHLQ $0x10, R9 IMULQ BP, R9 SHRQ $0x34, R9 SHLQ $0x20, R10 IMULQ DI, R10 SHRQ $0x36, R10 MOVL SI, 24(SP)(R9*4) MOVL R11, 16408(SP)(R10*4) MOVQ R8, R9 MOVQ R8, R10 SHRQ $0x08, R10 LEAL -2(CX), R8 LEAL -1(CX), SI SHLQ $0x10, R9 IMULQ BP, R9 SHRQ $0x34, R9 SHLQ $0x20, R10 IMULQ DI, R10 SHRQ $0x36, R10 MOVL R8, 24(SP)(R9*4) MOVL SI, 16408(SP)(R10*4) JMP search_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBetterBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBetterBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBetterBlockAsm10B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B two_bytes_emit_remainder_encodeBetterBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B one_byte_emit_remainder_encodeBetterBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBetterBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B memmove_long_emit_remainder_encodeBetterBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBetterBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBetterBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBetterBlockAsm8B(SB), $5144-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000028, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBetterBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBetterBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm8B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x04, BP LEAL 1(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeBetterBlockAsm8B MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ $0x9e3779b1, BP MOVQ SI, R9 MOVQ SI, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x36, R9 SHLQ $0x20, R10 IMULQ BP, R10 SHRQ $0x38, R10 MOVL 24(SP)(R9*4), BP MOVL 4120(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) MOVL CX, 4120(SP)(R10*4) CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBetterBlockAsm8B CMPL (DX)(DI*1), SI JEQ candidateS_match_encodeBetterBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: SHRQ $0x08, SI MOVQ SI, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x36, R9 MOVL 24(SP)(R9*4), BP INCL CX MOVL CX, 24(SP)(R9*4) CMPL (DX)(BP*1), SI JEQ candidate_match_encodeBetterBlockAsm8B DECL CX MOVL DI, BP candidate_match_encodeBetterBlockAsm8B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeBetterBlockAsm8B match_extend_back_loop_encodeBetterBlockAsm8B: CMPL CX, SI JLE match_extend_back_end_encodeBetterBlockAsm8B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeBetterBlockAsm8B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeBetterBlockAsm8B JMP match_extend_back_loop_encodeBetterBlockAsm8B match_extend_back_end_encodeBetterBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI LEAQ 3(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm8B: MOVL CX, SI ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), R9 // matchLen XORL R11, R11 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeBetterBlockAsm8B matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: MOVQ (R8)(R11*1), R10 XORQ (R9)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm8B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_loop_match_nolit_encodeBetterBlockAsm8B: LEAL -8(DI), DI LEAL 8(R11), R11 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B matchlen_single_match_nolit_encodeBetterBlockAsm8B: TESTL DI, DI JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_single_loopback_match_nolit_encodeBetterBlockAsm8B: MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm8B LEAL 1(R11), R11 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm8B match_nolit_end_encodeBetterBlockAsm8B: MOVL CX, DI SUBL BP, DI // Check if repeat CMPL 16(SP), DI JEQ match_is_repeat_encodeBetterBlockAsm8B MOVL DI, 16(SP) MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(BP*1), R9 SUBL BP, R8 LEAL -1(R8), BP CMPL BP, $0x3c JLT one_byte_match_emit_encodeBetterBlockAsm8B CMPL BP, $0x00000100 JLT two_bytes_match_emit_encodeBetterBlockAsm8B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm8B two_bytes_match_emit_encodeBetterBlockAsm8B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_match_emit_encodeBetterBlockAsm8B JMP memmove_long_match_emit_encodeBetterBlockAsm8B one_byte_match_emit_encodeBetterBlockAsm8B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm8B: LEAQ (AX)(R8*1), BP // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_1or2: MOVB (R9), R10 MOVB -1(R9)(R8*1), R9 MOVB R10, (AX) MOVB R9, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_3: MOVW (R9), R10 MOVB 2(R9), R9 MOVW R10, (AX) MOVB R9, 2(AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeBetterBlockAsm8B: MOVQ BP, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B memmove_long_match_emit_encodeBetterBlockAsm8B: LEAQ (AX)(R8*1), BP // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R12 SHRQ $0x05, R12 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 DECQ R12 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ BP, AX emit_literal_done_match_emit_encodeBetterBlockAsm8B: ADDL R11, CX ADDL $0x04, R11 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBetterBlockAsm8B: CMPL R11, $0x40 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(R11), R11 ADDQ $0x03, AX // emitRepeat MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B JMP two_byte_offset_match_nolit_encodeBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: CMPL R11, $0x0c JGE emit_copy_three_match_nolit_encodeBetterBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R11*4), R11 MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B emit_copy_three_match_nolit_encodeBetterBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R11*4), R11 MOVB R11, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B match_is_repeat_encodeBetterBlockAsm8B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI LEAL -1(DI), BP CMPL BP, $0x3c JLT one_byte_match_emit_repeat_encodeBetterBlockAsm8B CMPL BP, $0x00000100 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm8B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_match_emit_repeat_encodeBetterBlockAsm8B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B one_byte_match_emit_repeat_encodeBetterBlockAsm8B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm8B: LEAQ (AX)(DI*1), BP // genMemMoveShort CMPQ DI, $0x03 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_3 CMPQ DI, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 CMPQ DI, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 CMPQ DI, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: MOVQ BP, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: LEAQ (AX)(DI*1), BP // genMemMoveLong MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVQ DI, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R12 SUBQ R9, R12 DECQ R10 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R8)(R12*1), R9 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R9 ADDQ $0x20, R12 DECQ R10 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R8)(R12*1), X4 MOVOU -16(R8)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ DI, R12 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) MOVQ BP, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: ADDL R11, CX ADDL $0x04, R11 MOVL CX, 12(SP) // emitRepeat MOVL R11, BP LEAL -4(R11), R11 CMPL BP, $0x08 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B CMPL BP, $0x0c JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: CMPL R11, $0x00000104 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B LEAL -256(R11), R11 MOVW $0x0019, (AX) MOVW R11, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: LEAL -4(R11), R11 MOVW $0x0015, (AX) MOVB R11, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: SHLL $0x02, R11 ORL $0x01, R11 MOVW R11, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B XORQ BP, BP LEAL 1(BP)(R11*4), R11 MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, R11 MOVB R11, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm8B: CMPL CX, 8(SP) JGE emit_remainder_encodeBetterBlockAsm8B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm8B: MOVQ $0x0000cf1bbcdcbf9b, BP MOVQ $0x9e3779b1, DI INCL SI MOVQ (DX)(SI*1), R8 MOVQ R8, R9 MOVQ R8, R10 SHRQ $0x08, R10 LEAL 1(SI), R11 MOVQ -2(DX)(CX*1), R8 SHLQ $0x10, R9 IMULQ BP, R9 SHRQ $0x36, R9 SHLQ $0x20, R10 IMULQ DI, R10 SHRQ $0x38, R10 MOVL SI, 24(SP)(R9*4) MOVL R11, 4120(SP)(R10*4) MOVQ R8, R9 MOVQ R8, R10 SHRQ $0x08, R10 LEAL -2(CX), R8 LEAL -1(CX), SI SHLQ $0x10, R9 IMULQ BP, R9 SHRQ $0x36, R9 SHLQ $0x20, R10 IMULQ DI, R10 SHRQ $0x38, R10 MOVL R8, 24(SP)(R9*4) MOVL SI, 4120(SP)(R10*4) JMP search_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBetterBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBetterBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBetterBlockAsm8B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B two_bytes_emit_remainder_encodeBetterBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B one_byte_emit_remainder_encodeBetterBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBetterBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B memmove_long_emit_remainder_encodeBetterBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBetterBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x06, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm MOVL BP, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 SHLQ $0x10, R10 IMULQ R8, R10 SHRQ $0x32, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x10, R9 IMULQ R8, R9 SHRQ $0x32, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI LEAL -1(DI), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm CMPL BP, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm CMPL BP, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: MOVL BP, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm JMP memmove_long_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm: LEAQ (AX)(DI*1), BP // genMemMoveShort CMPQ DI, $0x03 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4through7 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm memmove_long_repeat_emit_encodeSnappyBlockAsm: LEAQ (AX)(DI*1), BP // genMemMoveLong MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVQ DI, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R8)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R8)(R11*1), X4 MOVOU -16(R8)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_loop_repeat_extend_encodeSnappyBlockAsm: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm matchlen_single_repeat_extend_encodeSnappyBlockAsm: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm repeat_extend_forward_end_encodeSnappyBlockAsm: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI // emitCopy CMPL SI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: CMPL BP, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(BP), BP ADDQ $0x05, AX CMPL BP, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: TESTL BP, BP JZ repeat_end_emit_encodeSnappyBlockAsm MOVB $0x03, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsm two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm no_repeat_found_encodeSnappyBlockAsm: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm candidate3_match_encodeSnappyBlockAsm: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm candidate2_match_encodeSnappyBlockAsm: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm match_extend_back_loop_encodeSnappyBlockAsm: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm JMP match_extend_back_loop_encodeSnappyBlockAsm match_extend_back_end_encodeSnappyBlockAsm: MOVL CX, SI SUBL 12(SP), SI LEAQ 5(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm CMPL DI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsm CMPL DI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm four_bytes_match_emit_encodeSnappyBlockAsm: MOVL DI, R9 SHRL $0x10, R9 MOVB $0xf8, (AX) MOVW DI, 1(AX) MOVB R9, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm three_bytes_match_emit_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm two_bytes_match_emit_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm JMP memmove_long_match_emit_encodeSnappyBlockAsm one_byte_match_emit_encodeSnappyBlockAsm: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm: LEAQ (AX)(R8*1), DI // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm memmove_long_match_emit_encodeSnappyBlockAsm: LEAQ (AX)(R8*1), DI // genMemMoveLong MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm: match_nolit_loop_encodeSnappyBlockAsm: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP // matchLen XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm matchlen_loopback_match_nolit_encodeSnappyBlockAsm: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_loop_match_nolit_encodeSnappyBlockAsm: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm matchlen_single_match_nolit_encodeSnappyBlockAsm: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm match_nolit_end_encodeSnappyBlockAsm: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: CMPL R9, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm MOVB $0xff, (AX) MOVL BP, 1(AX) LEAL -64(R9), R9 ADDQ $0x05, AX CMPL R9, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: TESTL R9, R9 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm MOVB $0x03, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVL BP, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x32, DI SHLQ $0x10, BP IMULQ R8, BP SHRQ $0x32, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm INCL CX JMP search_loop_encodeSnappyBlockAsm emit_remainder_encodeSnappyBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm four_bytes_emit_remainder_encodeSnappyBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm three_bytes_emit_remainder_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm two_bytes_emit_remainder_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm memmove_long_emit_remainder_encodeSnappyBlockAsm: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000080, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm12B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm12B MOVL BP, 20(SP) MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 SHLQ $0x18, R10 IMULQ R8, R10 SHRQ $0x34, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x18, R9 IMULQ R8, R9 SHRQ $0x34, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm12B LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm12B MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI LEAL -1(DI), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm12B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(DI*1), BP // genMemMoveShort CMPQ DI, $0x03 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4through7 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B memmove_long_repeat_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(DI*1), BP // genMemMoveLong MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVQ DI, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R8)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R8)(R11*1), X4 MOVOU -16(R8)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm12B matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B matchlen_single_repeat_extend_encodeSnappyBlockAsm12B: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B repeat_extend_forward_end_encodeSnappyBlockAsm12B: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm12B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm12B: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm12B no_repeat_found_encodeSnappyBlockAsm12B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm12B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm12B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm12B candidate3_match_encodeSnappyBlockAsm12B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm12B candidate2_match_encodeSnappyBlockAsm12B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm12B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm12B match_extend_back_loop_encodeSnappyBlockAsm12B: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm12B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm12B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm12B JMP match_extend_back_loop_encodeSnappyBlockAsm12B match_extend_back_end_encodeSnappyBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI LEAQ 3(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm12B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm12B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm12B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm12B two_bytes_match_emit_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBlockAsm12B one_byte_match_emit_encodeSnappyBlockAsm12B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(R8*1), DI // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B memmove_long_match_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(R8*1), DI // genMemMoveLong MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm12B: match_nolit_loop_encodeSnappyBlockAsm12B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP // matchLen XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm12B matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B matchlen_single_match_nolit_encodeSnappyBlockAsm12B: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm12B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B match_nolit_end_encodeSnappyBlockAsm12B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm12B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: MOVQ $0x000000cf1bbcdcbb, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x18, DI IMULQ R8, DI SHRQ $0x34, DI SHLQ $0x18, BP IMULQ R8, BP SHRQ $0x34, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm12B INCL CX JMP search_loop_encodeSnappyBlockAsm12B emit_remainder_encodeSnappyBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B two_bytes_emit_remainder_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B memmove_long_emit_remainder_encodeSnappyBlockAsm12B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000020, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm10B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x05, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm10B MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x36, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x36, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm10B LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm10B MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI LEAL -1(DI), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm10B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(DI*1), BP // genMemMoveShort CMPQ DI, $0x03 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4through7 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B memmove_long_repeat_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(DI*1), BP // genMemMoveLong MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVQ DI, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R8)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R8)(R11*1), X4 MOVOU -16(R8)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm10B matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B matchlen_single_repeat_extend_encodeSnappyBlockAsm10B: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B repeat_extend_forward_end_encodeSnappyBlockAsm10B: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B CMPL SI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm10B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm10B: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm10B no_repeat_found_encodeSnappyBlockAsm10B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm10B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm10B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm10B candidate3_match_encodeSnappyBlockAsm10B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm10B candidate2_match_encodeSnappyBlockAsm10B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm10B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm10B match_extend_back_loop_encodeSnappyBlockAsm10B: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm10B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm10B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm10B JMP match_extend_back_loop_encodeSnappyBlockAsm10B match_extend_back_end_encodeSnappyBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI LEAQ 3(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm10B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm10B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm10B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm10B two_bytes_match_emit_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBlockAsm10B one_byte_match_emit_encodeSnappyBlockAsm10B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(R8*1), DI // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B memmove_long_match_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(R8*1), DI // genMemMoveLong MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm10B: match_nolit_loop_encodeSnappyBlockAsm10B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP // matchLen XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm10B matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B matchlen_single_match_nolit_encodeSnappyBlockAsm10B: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm10B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B match_nolit_end_encodeSnappyBlockAsm10B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm10B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: MOVQ $0x9e3779b1, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x20, DI IMULQ R8, DI SHRQ $0x36, DI SHLQ $0x20, BP IMULQ R8, BP SHRQ $0x36, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm10B INCL CX JMP search_loop_encodeSnappyBlockAsm10B emit_remainder_encodeSnappyBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B two_bytes_emit_remainder_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B memmove_long_emit_remainder_encodeSnappyBlockAsm10B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000008, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -5(CX), DX LEAQ -8(CX), BP MOVL BP, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm8B: MOVQ (DX)(CX*1), SI MOVL CX, BP SUBL 12(SP), BP SHRL $0x04, BP LEAL 4(CX)(BP*1), BP CMPL BP, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm8B MOVL BP, 20(SP) MOVQ $0x9e3779b1, R8 MOVQ SI, R9 MOVQ SI, R10 SHRQ $0x08, R10 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 SHLQ $0x20, R10 IMULQ R8, R10 SHRQ $0x38, R10 MOVL 24(SP)(R9*4), BP MOVL 24(SP)(R10*4), DI MOVL CX, 24(SP)(R9*4) LEAL 1(CX), R9 MOVL R9, 24(SP)(R10*4) MOVQ SI, R9 SHRQ $0x10, R9 SHLQ $0x20, R9 IMULQ R8, R9 SHRQ $0x38, R9 MOVL CX, R8 SUBL 16(SP), R8 MOVL 1(DX)(R8*1), R10 MOVQ SI, R8 SHRQ $0x08, R8 CMPL R8, R10 JNE no_repeat_found_encodeSnappyBlockAsm8B LEAL 1(CX), SI MOVL 12(SP), BP MOVL SI, DI SUBL 16(SP), DI JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: CMPL SI, BP JLE repeat_extend_back_end_encodeSnappyBlockAsm8B MOVB -1(DX)(DI*1), BL MOVB -1(DX)(SI*1), R8 CMPB BL, R8 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B LEAL -1(SI), SI DECL DI JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: MOVL 12(SP), BP CMPL BP, SI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B MOVL SI, DI MOVL SI, 12(SP) LEAQ (DX)(BP*1), R8 SUBL BP, DI LEAL -1(DI), BP CMPL BP, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm8B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(DI*1), BP // genMemMoveShort CMPQ DI, $0x03 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3 CMPQ DI, $0x08 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4through7 CMPQ DI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 CMPQ DI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2: MOVB (R8), R9 MOVB -1(R8)(DI*1), R8 MOVB R9, (AX) MOVB R8, -1(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3: MOVW (R8), R9 MOVB 2(R8), R8 MOVW R9, (AX) MOVB R8, 2(AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4through7: MOVL (R8), R9 MOVL -4(R8)(DI*1), R8 MOVL R9, (AX) MOVL R8, -4(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: MOVQ (R8), R9 MOVQ -8(R8)(DI*1), R8 MOVQ R9, (AX) MOVQ R8, -8(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (R8), X0 MOVOU -16(R8)(DI*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DI*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: MOVQ BP, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B memmove_long_repeat_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(DI*1), BP // genMemMoveLong MOVOU (R8), X0 MOVOU 16(R8), X1 MOVOU -32(R8)(DI*1), X2 MOVOU -16(R8)(DI*1), X3 MOVQ DI, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R8)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R8)(R11*1), X4 MOVOU -16(R8)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ DI, R11 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DI*1) MOVOU X3, -16(AX)(DI*1) MOVQ BP, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: ADDL $0x05, CX MOVL CX, BP SUBL 16(SP), BP MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(BP*1), BP // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm8B matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: MOVQ (R8)(R10*1), R9 XORQ (BP)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B matchlen_single_repeat_extend_encodeSnappyBlockAsm8B: TESTL DI, DI JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B: MOVB (R8)(R10*1), R9 CMPB (BP)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B repeat_extend_forward_end_encodeSnappyBlockAsm8B: ADDL R10, CX MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, BP MOVB BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm8B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(BP*4), BP MOVB BP, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm8B: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm8B no_repeat_found_encodeSnappyBlockAsm8B: CMPL (DX)(BP*1), SI JEQ candidate_match_encodeSnappyBlockAsm8B SHRQ $0x08, SI MOVL 24(SP)(R9*4), BP LEAL 2(CX), R8 CMPL (DX)(DI*1), SI JEQ candidate2_match_encodeSnappyBlockAsm8B MOVL R8, 24(SP)(R9*4) SHRQ $0x08, SI CMPL (DX)(BP*1), SI JEQ candidate3_match_encodeSnappyBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm8B candidate3_match_encodeSnappyBlockAsm8B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm8B candidate2_match_encodeSnappyBlockAsm8B: MOVL R8, 24(SP)(R9*4) INCL CX MOVL DI, BP candidate_match_encodeSnappyBlockAsm8B: MOVL 12(SP), SI TESTL BP, BP JZ match_extend_back_end_encodeSnappyBlockAsm8B match_extend_back_loop_encodeSnappyBlockAsm8B: CMPL CX, SI JLE match_extend_back_end_encodeSnappyBlockAsm8B MOVB -1(DX)(BP*1), BL MOVB -1(DX)(CX*1), DI CMPB BL, DI JNE match_extend_back_end_encodeSnappyBlockAsm8B LEAL -1(CX), CX DECL BP JZ match_extend_back_end_encodeSnappyBlockAsm8B JMP match_extend_back_loop_encodeSnappyBlockAsm8B match_extend_back_end_encodeSnappyBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI LEAQ 3(AX)(SI*1), SI CMPQ SI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm8B: MOVL CX, SI MOVL 12(SP), DI CMPL DI, SI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B MOVL SI, R8 MOVL SI, 12(SP) LEAQ (DX)(DI*1), SI SUBL DI, R8 LEAL -1(R8), DI CMPL DI, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm8B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm8B MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm8B two_bytes_match_emit_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB DI, 1(AX) ADDQ $0x02, AX CMPL DI, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBlockAsm8B one_byte_match_emit_encodeSnappyBlockAsm8B: SHLB $0x02, DI MOVB DI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(R8*1), DI // genMemMoveShort CMPQ R8, $0x03 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2: MOVB (SI), R9 MOVB -1(SI)(R8*1), SI MOVB R9, (AX) MOVB SI, -1(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3: MOVW (SI), R9 MOVB 2(SI), SI MOVW R9, (AX) MOVB SI, 2(AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4through7: MOVL (SI), R9 MOVL -4(SI)(R8*1), SI MOVL R9, (AX) MOVL SI, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: MOVQ (SI), R9 MOVQ -8(SI)(R8*1), SI MOVQ R9, (AX) MOVQ SI, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (SI), X0 MOVOU -16(SI)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: MOVQ DI, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B memmove_long_match_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(R8*1), DI // genMemMoveLong MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU -32(SI)(R8*1), X2 MOVOU -16(SI)(R8*1), X3 MOVQ R8, R10 SHRQ $0x05, R10 MOVQ AX, R9 ANDL $0x0000001f, R9 MOVQ $0x00000040, R11 SUBQ R9, R11 DECQ R10 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(SI)(R11*1), R9 LEAQ -32(AX)(R11*1), R12 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (R9), X4 MOVOU 16(R9), X5 MOVOA X4, (R12) MOVOA X5, 16(R12) ADDQ $0x20, R12 ADDQ $0x20, R9 ADDQ $0x20, R11 DECQ R10 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(SI)(R11*1), X4 MOVOU -16(SI)(R11*1), X5 MOVOA X4, -32(AX)(R11*1) MOVOA X5, -16(AX)(R11*1) ADDQ $0x20, R11 CMPQ R8, R11 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ DI, AX emit_literal_done_match_emit_encodeSnappyBlockAsm8B: match_nolit_loop_encodeSnappyBlockAsm8B: MOVL CX, SI SUBL BP, SI MOVL SI, 16(SP) ADDL $0x04, CX ADDL $0x04, BP MOVQ src_len+32(FP), SI SUBL CX, SI LEAQ (DX)(CX*1), DI LEAQ (DX)(BP*1), BP // matchLen XORL R9, R9 CMPL SI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm8B matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: MOVQ (DI)(R9*1), R8 XORQ (BP)(R9*1), R8 TESTQ R8, R8 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B BSFQ R8, R8 SARQ $0x03, R8 LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: LEAL -8(SI), SI LEAL 8(R9), R9 CMPL SI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B matchlen_single_match_nolit_encodeSnappyBlockAsm8B: TESTL SI, SI JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B: MOVB (DI)(R9*1), R8 CMPB (BP)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm8B LEAL 1(R9), R9 DECL SI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B match_nolit_end_encodeSnappyBlockAsm8B: ADDL R9, CX MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: CMPL R9, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B MOVB $0xee, (AX) MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) SHRL $0x08, BP SHLL $0x05, BP ORL BP, R9 MOVB R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R9*4), R9 MOVB R9, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm8B MOVQ -2(DX)(CX*1), SI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: MOVQ $0x9e3779b1, R8 MOVQ SI, DI SHRQ $0x10, SI MOVQ SI, BP SHLQ $0x20, DI IMULQ R8, DI SHRQ $0x38, DI SHLQ $0x20, BP IMULQ R8, BP SHRQ $0x38, BP LEAL -2(CX), R8 LEAQ 24(SP)(BP*4), R9 MOVL (R9), BP MOVL R8, 24(SP)(DI*4) MOVL CX, (R9) CMPL (DX)(BP*1), SI JEQ match_nolit_loop_encodeSnappyBlockAsm8B INCL CX JMP search_loop_encodeSnappyBlockAsm8B emit_remainder_encodeSnappyBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B MOVL CX, BP MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, BP LEAL -1(BP), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B two_bytes_emit_remainder_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveShort CMPQ BX, $0x03 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(BX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(BX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(BX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B memmove_long_emit_remainder_encodeSnappyBlockAsm8B: LEAQ (AX)(BP*1), DX MOVL BP, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ BX, DI JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func emitLiteral(dst []byte, lit []byte) int // Requires: SSE2 TEXT ·emitLiteral(SB), NOSPLIT, $0-56 MOVQ lit_len+32(FP), DX MOVQ dst_base+0(FP), AX MOVQ lit_base+24(FP), CX TESTQ DX, DX JZ emit_literal_end_standalone_skip MOVL DX, BX LEAL -1(DX), BP CMPL BP, $0x3c JLT one_byte_standalone CMPL BP, $0x00000100 JLT two_bytes_standalone CMPL BP, $0x00010000 JLT three_bytes_standalone CMPL BP, $0x01000000 JLT four_bytes_standalone MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP memmove_long_standalone four_bytes_standalone: MOVL BP, SI SHRL $0x10, SI MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB SI, 3(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP memmove_long_standalone three_bytes_standalone: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP memmove_long_standalone two_bytes_standalone: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, BX ADDQ $0x02, AX CMPL BP, $0x40 JL memmove_standalone JMP memmove_long_standalone one_byte_standalone: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, BX ADDQ $0x01, AX memmove_standalone: // genMemMoveShort CMPQ DX, $0x03 JB emit_lit_memmove_standalone_memmove_move_1or2 JE emit_lit_memmove_standalone_memmove_move_3 CMPQ DX, $0x08 JB emit_lit_memmove_standalone_memmove_move_4through7 CMPQ DX, $0x10 JBE emit_lit_memmove_standalone_memmove_move_8through16 CMPQ DX, $0x20 JBE emit_lit_memmove_standalone_memmove_move_17through32 JMP emit_lit_memmove_standalone_memmove_move_33through64 emit_lit_memmove_standalone_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(DX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_4through7: MOVL (CX), BP MOVL -4(CX)(DX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_8through16: MOVQ (CX), BP MOVQ -8(CX)(DX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(DX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_standalone JMP emit_literal_end_standalone memmove_long_standalone: // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVQ DX, SI SHRQ $0x05, SI MOVQ AX, BP ANDL $0x0000001f, BP MOVQ $0x00000040, DI SUBQ BP, DI DECQ SI JA emit_lit_memmove_long_standalonelarge_forward_sse_loop_32 LEAQ -32(CX)(DI*1), BP LEAQ -32(AX)(DI*1), R8 emit_lit_memmove_long_standalonelarge_big_loop_back: MOVOU (BP), X4 MOVOU 16(BP), X5 MOVOA X4, (R8) MOVOA X5, 16(R8) ADDQ $0x20, R8 ADDQ $0x20, BP ADDQ $0x20, DI DECQ SI JNA emit_lit_memmove_long_standalonelarge_big_loop_back emit_lit_memmove_long_standalonelarge_forward_sse_loop_32: MOVOU -32(CX)(DI*1), X4 MOVOU -16(CX)(DI*1), X5 MOVOA X4, -32(AX)(DI*1) MOVOA X5, -16(AX)(DI*1) ADDQ $0x20, DI CMPQ DX, DI JAE emit_lit_memmove_long_standalonelarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_standalone JMP emit_literal_end_standalone emit_literal_end_standalone_skip: XORQ BX, BX emit_literal_end_standalone: MOVQ BX, ret+48(FP) RET // func emitRepeat(dst []byte, offset int, length int) int TEXT ·emitRepeat(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX // emitRepeat emit_repeat_again_standalone: MOVL DX, BP LEAL -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone CMPL CX, $0x00000800 JLT repeat_two_offset_standalone cant_repeat_two_offset_standalone: CMPL DX, $0x00000104 JLT repeat_three_standalone CMPL DX, $0x00010100 JLT repeat_four_standalone CMPL DX, $0x0100ffff JLT repeat_five_standalone LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone repeat_five_standalone: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_repeat_end repeat_four_standalone: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_repeat_end repeat_three_standalone: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_repeat_end repeat_two_standalone: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_repeat_end repeat_two_offset_standalone: XORQ BP, BP LEAL 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX gen_emit_repeat_end: MOVQ BX, ret+40(FP) RET // func emitCopy(dst []byte, offset int, length int) int TEXT ·emitCopy(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX // emitCopy CMPL CX, $0x00010000 JL two_byte_offset_standalone four_bytes_loop_back_standalone: CMPL DX, $0x40 JLE four_bytes_remain_standalone MOVB $0xff, (AX) MOVL CX, 1(AX) LEAL -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 JL four_bytes_remain_standalone // emitRepeat emit_repeat_again_standalone_emit_copy: MOVL DX, BP LEAL -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone_emit_copy CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy cant_repeat_two_offset_standalone_emit_copy: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy repeat_five_standalone_emit_copy: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy: XORQ BP, BP LEAL 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end JMP four_bytes_loop_back_standalone four_bytes_remain_standalone: TESTL DX, DX JZ gen_emit_copy_end MOVB $0x03, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVL CX, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end two_byte_offset_standalone: CMPL DX, $0x40 JLE two_byte_offset_short_standalone MOVB $0xee, (AX) MOVW CX, 1(AX) LEAL -60(DX), DX ADDQ $0x03, AX ADDQ $0x03, BX // emitRepeat emit_repeat_again_standalone_emit_copy_short: MOVL DX, BP LEAL -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy_short CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy_short cant_repeat_two_offset_standalone_emit_copy_short: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy_short CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy_short CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy_short LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy_short repeat_five_standalone_emit_copy_short: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy_short: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy_short: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy_short: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy_short: XORQ BP, BP LEAL 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end JMP two_byte_offset_standalone two_byte_offset_short_standalone: CMPL DX, $0x0c JGE emit_copy_three_standalone CMPL CX, $0x00000800 JGE emit_copy_three_standalone MOVB $0x01, BP LEAL -16(BP)(DX*4), DX MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end emit_copy_three_standalone: MOVB $0x02, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX gen_emit_copy_end: MOVQ BX, ret+40(FP) RET // func emitCopyNoRepeat(dst []byte, offset int, length int) int TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX // emitCopy CMPL CX, $0x00010000 JL two_byte_offset_standalone_snappy four_bytes_loop_back_standalone_snappy: CMPL DX, $0x40 JLE four_bytes_remain_standalone_snappy MOVB $0xff, (AX) MOVL CX, 1(AX) LEAL -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 JL four_bytes_remain_standalone_snappy JMP four_bytes_loop_back_standalone_snappy four_bytes_remain_standalone_snappy: TESTL DX, DX JZ gen_emit_copy_end_snappy MOVB $0x03, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVL CX, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end_snappy two_byte_offset_standalone_snappy: CMPL DX, $0x40 JLE two_byte_offset_short_standalone_snappy MOVB $0xee, (AX) MOVW CX, 1(AX) LEAL -60(DX), DX ADDQ $0x03, AX ADDQ $0x03, BX JMP two_byte_offset_standalone_snappy two_byte_offset_short_standalone_snappy: CMPL DX, $0x0c JGE emit_copy_three_standalone_snappy CMPL CX, $0x00000800 JGE emit_copy_three_standalone_snappy MOVB $0x01, BP LEAL -16(BP)(DX*4), DX MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end_snappy emit_copy_three_standalone_snappy: MOVB $0x02, BP LEAL -4(BP)(DX*4), DX MOVB DL, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX gen_emit_copy_end_snappy: MOVQ BX, ret+40(FP) RET // func matchLen(a []byte, b []byte) int TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX MOVQ a_len+8(FP), DX // matchLen XORL BP, BP CMPL DX, $0x08 JL matchlen_single_standalone matchlen_loopback_standalone: MOVQ (AX)(BP*1), BX XORQ (CX)(BP*1), BX TESTQ BX, BX JZ matchlen_loop_standalone BSFQ BX, BX SARQ $0x03, BX LEAL (BP)(BX*1), BP JMP gen_match_len_end matchlen_loop_standalone: LEAL -8(DX), DX LEAL 8(BP), BP CMPL DX, $0x08 JGE matchlen_loopback_standalone matchlen_single_standalone: TESTL DX, DX JZ gen_match_len_end matchlen_single_loopback_standalone: MOVB (AX)(BP*1), BL CMPB (CX)(BP*1), BL JNE gen_match_len_end LEAL 1(BP), BP DECL DX JNZ matchlen_single_loopback_standalone gen_match_len_end: MOVQ BP, ret+48(FP) RET