// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. // +build !appengine // +build !noasm // +build gc #include "textflag.h" // func encodeBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm: MOVL CX, SI SUBL 12(SP), SI SHRL $0x06, SI LEAL 4(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeBlockAsm MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ DI, R10 MOVQ DI, R11 SHRQ $0x08, R11 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 SHLQ $0x10, R11 IMULQ R9, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI MOVL 24(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) LEAL 1(CX), R10 MOVL R10, 24(SP)(R11*4) MOVQ DI, R10 SHRQ $0x10, R10 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 MOVL CX, R9 SUBL 16(SP), R9 MOVL 1(DX)(R9*1), R11 MOVQ DI, R9 SHRQ $0x08, R9 CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm LEAL 1(CX), DI MOVL 12(SP), R8 MOVL DI, SI SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: CMPL DI, R8 JLE repeat_extend_back_end_encodeBlockAsm MOVB -1(DX)(SI*1), BL MOVB -1(DX)(DI*1), R9 CMPB BL, R9 JNE repeat_extend_back_end_encodeBlockAsm LEAL -1(DI), DI DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm CMPL SI, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm CMPL SI, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm CMPL SI, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm MOVB $0xfc, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP memmove_long_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: MOVL SI, R11 SHRL $0x10, R11 MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB R11, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_repeat_emit_encodeBlockAsm JMP memmove_long_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: MOVQ (R10), R11 MOVQ R11, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm: MOVQ SI, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm memmove_long_repeat_emit_encodeBlockAsm: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R12 SHRQ $0x05, R12 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R13 SUBQ R11, R13 DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R10)(R13*1), R11 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R11 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R10)(R13*1), X4 MOVOU -16(R10)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_repeat_emit_encodeBlockAsm: ADDL $0x05, CX MOVL CX, SI SUBL 16(SP), SI MOVQ src_len+32(FP), R9 SUBL CX, R9 LEAQ (DX)(CX*1), R10 LEAQ (DX)(SI*1), SI // matchLen XORL R12, R12 CMPL R9, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm matchlen_loopback_repeat_extend_encodeBlockAsm: MOVQ (R10)(R12*1), R11 XORQ (SI)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_repeat_extend_encodeBlockAsm BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_loop_repeat_extend_encodeBlockAsm: LEAL -8(R9), R9 LEAL 8(R12), R12 CMPL R9, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm matchlen_single_repeat_extend_encodeBlockAsm: TESTL R9, R9 JZ repeat_extend_forward_end_encodeBlockAsm matchlen_single_loopback_repeat_extend_encodeBlockAsm: MOVB (R10)(R12*1), R11 CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm LEAL 1(R12), R12 DECL R9 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm repeat_extend_forward_end_encodeBlockAsm: ADDL R12, CX MOVL CX, SI SUBL DI, SI MOVL 16(SP), DI TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_match_repeat_encodeBlockAsm: MOVL SI, R8 LEAL -4(SI), SI CMPL R8, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm CMPL R8, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm CMPL DI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm cant_repeat_two_offset_match_repeat_encodeBlockAsm: CMPL SI, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm CMPL SI, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm CMPL SI, $0x0100ffff JLT repeat_five_match_repeat_encodeBlockAsm LEAL -16842747(SI), SI MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_repeat_encodeBlockAsm repeat_five_match_repeat_encodeBlockAsm: LEAL -65536(SI), SI MOVL SI, DI MOVW $0x001d, (AX) MOVW SI, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_encodeBlockAsm: LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_encodeBlockAsm: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_encodeBlockAsm: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_encodeBlockAsm: XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: // emitCopy CMPL DI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm four_bytes_loop_back_repeat_as_copy_encodeBlockAsm: CMPL SI, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm MOVB $0xff, (AX) MOVL DI, 1(AX) LEAL -64(SI), SI ADDQ $0x05, AX CMPL SI, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: MOVL SI, R8 LEAL -4(SI), SI CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy CMPL DI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: CMPL SI, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy CMPL SI, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy CMPL SI, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy LEAL -16842747(SI), SI MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -65536(SI), SI MOVL SI, DI MOVW $0x001d, (AX) MOVW SI, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm MOVB $0x03, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: CMPL SI, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(SI), SI ADDQ $0x03, AX // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: MOVL SI, R8 LEAL -4(SI), SI CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL DI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: CMPL SI, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL SI, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL SI, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short LEAL -16842747(SI), SI MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -65536(SI), SI MOVL SI, DI MOVW $0x001d, (AX) MOVW SI, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm JMP two_byte_offset_repeat_as_copy_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: CMPL SI, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm CMPL DI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm MOVB $0x01, BL LEAL -16(BX)(SI*4), SI MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: MOVB $0x02, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBlockAsm SHRQ $0x08, DI MOVL 24(SP)(R10*4), SI LEAL 2(CX), R9 CMPL (DX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm MOVL R9, 24(SP)(R10*4) SHRQ $0x08, DI CMPL (DX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm MOVL 20(SP), CX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: MOVL R9, 24(SP)(R10*4) INCL CX MOVL R8, SI candidate_match_encodeBlockAsm: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: CMPL CX, DI JLE match_extend_back_end_encodeBlockAsm MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeBlockAsm LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: MOVL CX, DI SUBL 12(SP), DI LEAQ 5(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm: MOVL CX, DI MOVL 12(SP), R8 CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(R8*1), DI SUBL R8, R9 LEAL -1(R9), R8 CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsm CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm CMPL R8, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm CMPL R8, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm MOVB $0xfc, (AX) MOVL R8, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: MOVL R8, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW R8, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: MOVB $0xf4, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: MOVB $0xf0, (AX) MOVB R8, 1(AX) ADDQ $0x02, AX CMPL R8, $0x40 JL memmove_match_emit_encodeBlockAsm JMP memmove_long_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: SHLB $0x02, R8 MOVB R8, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm: LEAQ (AX)(R9*1), R8 // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: MOVQ (DI), R10 MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: MOVQ (DI), R10 MOVQ -8(DI)(R9*1), DI MOVQ R10, (AX) MOVQ DI, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm: MOVQ R8, AX JMP emit_literal_done_match_emit_encodeBlockAsm memmove_long_match_emit_encodeBlockAsm: LEAQ (AX)(R9*1), R8 // genMemMoveLong MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVQ R9, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 LEAQ -32(DI)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: MOVOU -32(DI)(R12*1), X4 MOVOU -16(DI)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ R8, AX emit_literal_done_match_emit_encodeBlockAsm: match_nolit_loop_encodeBlockAsm: MOVL CX, DI SUBL SI, DI MOVL DI, 16(SP) ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(SI*1), SI // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm matchlen_loopback_match_nolit_encodeBlockAsm: MOVQ (R8)(R10*1), R9 XORQ (SI)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_match_nolit_encodeBlockAsm BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_loop_match_nolit_encodeBlockAsm: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm matchlen_single_match_nolit_encodeBlockAsm: TESTL DI, DI JZ match_nolit_end_encodeBlockAsm matchlen_single_loopback_match_nolit_encodeBlockAsm: MOVB (R8)(R10*1), R9 CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm match_nolit_end_encodeBlockAsm: ADDL R10, CX MOVL 16(SP), SI ADDL $0x04, R10 MOVL CX, 12(SP) // emitCopy CMPL SI, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm four_bytes_loop_back_match_nolit_encodeBlockAsm: CMPL R10, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(R10), R10 ADDQ $0x05, AX CMPL R10, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: MOVL R10, DI LEAL -4(R10), R10 CMPL DI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: CMPL R10, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy CMPL R10, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy CMPL R10, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy LEAL -16842747(R10), R10 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: LEAL -65536(R10), R10 MOVL R10, SI MOVW $0x001d, (AX) MOVW R10, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: LEAL -256(R10), R10 MOVW $0x0019, (AX) MOVW R10, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: LEAL -4(R10), R10 MOVW $0x0015, (AX) MOVB R10, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: SHLL $0x02, R10 ORL $0x01, R10 MOVW R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: XORQ DI, DI LEAL 1(DI)(R10*4), R10 MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm JMP four_bytes_loop_back_match_nolit_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm MOVB $0x03, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: CMPL R10, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(R10), R10 ADDQ $0x03, AX // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: MOVL R10, DI LEAL -4(R10), R10 CMPL DI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: CMPL R10, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short CMPL R10, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short CMPL R10, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short LEAL -16842747(R10), R10 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -65536(R10), R10 MOVL R10, SI MOVW $0x001d, (AX) MOVW R10, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -256(R10), R10 MOVW $0x0019, (AX) MOVW R10, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: LEAL -4(R10), R10 MOVW $0x0015, (AX) MOVB R10, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: SHLL $0x02, R10 ORL $0x01, R10 MOVW R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: XORQ DI, DI LEAL 1(DI)(R10*4), R10 MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm JMP two_byte_offset_match_nolit_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: CMPL R10, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm MOVB $0x01, BL LEAL -16(BX)(R10*4), R10 MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm MOVQ -2(DX)(CX*1), DI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm: MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ DI, R8 SHRQ $0x10, DI MOVQ DI, SI SHLQ $0x10, R8 IMULQ R9, R8 SHRQ $0x32, R8 SHLQ $0x10, SI IMULQ R9, SI SHRQ $0x32, SI LEAL -2(CX), R9 LEAQ 24(SP)(SI*4), R10 MOVL (R10), SI MOVL R9, 24(SP)(R8*4) MOVL CX, (R10) CMPL (DX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm INCL CX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_long_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm JMP memmove_long_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm memmove_long_emit_remainder_encodeBlockAsm: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm4MB(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm4MB(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm4MB: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm4MB MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm4MB: MOVL CX, SI SUBL 12(SP), SI SHRL $0x06, SI LEAL 4(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeBlockAsm4MB MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ DI, R10 MOVQ DI, R11 SHRQ $0x08, R11 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 SHLQ $0x10, R11 IMULQ R9, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI MOVL 24(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) LEAL 1(CX), R10 MOVL R10, 24(SP)(R11*4) MOVQ DI, R10 SHRQ $0x10, R10 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 MOVL CX, R9 SUBL 16(SP), R9 MOVL 1(DX)(R9*1), R11 MOVQ DI, R9 SHRQ $0x08, R9 CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm4MB LEAL 1(CX), DI MOVL 12(SP), R8 MOVL DI, SI SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm4MB repeat_extend_back_loop_encodeBlockAsm4MB: CMPL DI, R8 JLE repeat_extend_back_end_encodeBlockAsm4MB MOVB -1(DX)(SI*1), BL MOVB -1(DX)(DI*1), R9 CMPB BL, R9 JNE repeat_extend_back_end_encodeBlockAsm4MB LEAL -1(DI), DI DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm4MB repeat_extend_back_end_encodeBlockAsm4MB: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm4MB CMPL SI, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm4MB CMPL SI, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm4MB MOVL SI, R11 SHRL $0x10, R11 MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB R11, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeBlockAsm4MB three_bytes_repeat_emit_encodeBlockAsm4MB: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm4MB two_bytes_repeat_emit_encodeBlockAsm4MB: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_repeat_emit_encodeBlockAsm4MB JMP memmove_long_repeat_emit_encodeBlockAsm4MB one_byte_repeat_emit_encodeBlockAsm4MB: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm4MB: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8: MOVQ (R10), R11 MOVQ R11, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm4MB: MOVQ SI, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB memmove_long_repeat_emit_encodeBlockAsm4MB: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R12 SHRQ $0x05, R12 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R13 SUBQ R11, R13 DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(R10)(R13*1), R11 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R11 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(R10)(R13*1), X4 MOVOU -16(R10)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_repeat_emit_encodeBlockAsm4MB: ADDL $0x05, CX MOVL CX, SI SUBL 16(SP), SI MOVQ src_len+32(FP), R9 SUBL CX, R9 LEAQ (DX)(CX*1), R10 LEAQ (DX)(SI*1), SI // matchLen XORL R12, R12 CMPL R9, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm4MB matchlen_loopback_repeat_extend_encodeBlockAsm4MB: MOVQ (R10)(R12*1), R11 XORQ (SI)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_repeat_extend_encodeBlockAsm4MB BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_loop_repeat_extend_encodeBlockAsm4MB: LEAL -8(R9), R9 LEAL 8(R12), R12 CMPL R9, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm4MB matchlen_single_repeat_extend_encodeBlockAsm4MB: TESTL R9, R9 JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB: MOVB (R10)(R12*1), R11 CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm4MB LEAL 1(R12), R12 DECL R9 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB repeat_extend_forward_end_encodeBlockAsm4MB: ADDL R12, CX MOVL CX, SI SUBL DI, SI MOVL 16(SP), DI TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm4MB // emitRepeat MOVL SI, R8 LEAL -4(SI), SI CMPL R8, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm4MB CMPL R8, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB CMPL DI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm4MB cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: CMPL SI, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm4MB CMPL SI, $0x00010100 JLT repeat_four_match_repeat_encodeBlockAsm4MB LEAL -65536(SI), SI MOVL SI, DI MOVW $0x001d, (AX) MOVW SI, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_match_repeat_encodeBlockAsm4MB: LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_match_repeat_encodeBlockAsm4MB: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_match_repeat_encodeBlockAsm4MB: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_match_repeat_encodeBlockAsm4MB: XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_as_copy_encodeBlockAsm4MB: // emitCopy CMPL DI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm4MB four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB: CMPL SI, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB MOVB $0xff, (AX) MOVL DI, 1(AX) LEAL -64(SI), SI ADDQ $0x05, AX CMPL SI, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB // emitRepeat MOVL SI, R8 LEAL -4(SI), SI CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy CMPL DI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: CMPL SI, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy CMPL SI, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy LEAL -65536(SI), SI MOVL SI, DI MOVW $0x001d, (AX) MOVW SI, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm4MB MOVB $0x03, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: CMPL SI, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(SI), SI ADDQ $0x03, AX // emitRepeat MOVL SI, R8 LEAL -4(SI), SI CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short CMPL DI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: CMPL SI, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short CMPL SI, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short LEAL -65536(SI), SI MOVL SI, DI MOVW $0x001d, (AX) MOVW SI, 2(AX) SARL $0x10, DI MOVB DI, 4(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB JMP two_byte_offset_repeat_as_copy_encodeBlockAsm4MB two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: CMPL SI, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB CMPL DI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB MOVB $0x01, BL LEAL -16(BX)(SI*4), SI MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm4MB emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: MOVB $0x02, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm4MB: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm4MB no_repeat_found_encodeBlockAsm4MB: CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBlockAsm4MB SHRQ $0x08, DI MOVL 24(SP)(R10*4), SI LEAL 2(CX), R9 CMPL (DX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm4MB MOVL R9, 24(SP)(R10*4) SHRQ $0x08, DI CMPL (DX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm4MB MOVL 20(SP), CX JMP search_loop_encodeBlockAsm4MB candidate3_match_encodeBlockAsm4MB: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm4MB candidate2_match_encodeBlockAsm4MB: MOVL R9, 24(SP)(R10*4) INCL CX MOVL R8, SI candidate_match_encodeBlockAsm4MB: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm4MB match_extend_back_loop_encodeBlockAsm4MB: CMPL CX, DI JLE match_extend_back_end_encodeBlockAsm4MB MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeBlockAsm4MB LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeBlockAsm4MB JMP match_extend_back_loop_encodeBlockAsm4MB match_extend_back_end_encodeBlockAsm4MB: MOVL CX, DI SUBL 12(SP), DI LEAQ 4(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm4MB: MOVL CX, DI MOVL 12(SP), R8 CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm4MB MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(R8*1), DI SUBL R8, R9 LEAL -1(R9), R8 CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsm4MB CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm4MB CMPL R8, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm4MB MOVL R8, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW R8, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBlockAsm4MB three_bytes_match_emit_encodeBlockAsm4MB: MOVB $0xf4, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm4MB two_bytes_match_emit_encodeBlockAsm4MB: MOVB $0xf0, (AX) MOVB R8, 1(AX) ADDQ $0x02, AX CMPL R8, $0x40 JL memmove_match_emit_encodeBlockAsm4MB JMP memmove_long_match_emit_encodeBlockAsm4MB one_byte_match_emit_encodeBlockAsm4MB: SHLB $0x02, R8 MOVB R8, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm4MB: LEAQ (AX)(R9*1), R8 // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8: MOVQ (DI), R10 MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: MOVQ (DI), R10 MOVQ -8(DI)(R9*1), DI MOVQ R10, (AX) MOVQ DI, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm4MB: MOVQ R8, AX JMP emit_literal_done_match_emit_encodeBlockAsm4MB memmove_long_match_emit_encodeBlockAsm4MB: LEAQ (AX)(R9*1), R8 // genMemMoveLong MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVQ R9, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(DI)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(DI)(R12*1), X4 MOVOU -16(DI)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ R8, AX emit_literal_done_match_emit_encodeBlockAsm4MB: match_nolit_loop_encodeBlockAsm4MB: MOVL CX, DI SUBL SI, DI MOVL DI, 16(SP) ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(SI*1), SI // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm4MB matchlen_loopback_match_nolit_encodeBlockAsm4MB: MOVQ (R8)(R10*1), R9 XORQ (SI)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_match_nolit_encodeBlockAsm4MB BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_loop_match_nolit_encodeBlockAsm4MB: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm4MB matchlen_single_match_nolit_encodeBlockAsm4MB: TESTL DI, DI JZ match_nolit_end_encodeBlockAsm4MB matchlen_single_loopback_match_nolit_encodeBlockAsm4MB: MOVB (R8)(R10*1), R9 CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm4MB LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm4MB match_nolit_end_encodeBlockAsm4MB: ADDL R10, CX MOVL 16(SP), SI ADDL $0x04, R10 MOVL CX, 12(SP) // emitCopy CMPL SI, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm4MB four_bytes_loop_back_match_nolit_encodeBlockAsm4MB: CMPL R10, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm4MB MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(R10), R10 ADDQ $0x05, AX CMPL R10, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm4MB // emitRepeat MOVL R10, DI LEAL -4(R10), R10 CMPL DI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: CMPL R10, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy CMPL R10, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy LEAL -65536(R10), R10 MOVL R10, SI MOVW $0x001d, (AX) MOVW R10, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: LEAL -256(R10), R10 MOVW $0x0019, (AX) MOVW R10, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: LEAL -4(R10), R10 MOVW $0x0015, (AX) MOVB R10, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: SHLL $0x02, R10 ORL $0x01, R10 MOVW R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: XORQ DI, DI LEAL 1(DI)(R10*4), R10 MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB JMP four_bytes_loop_back_match_nolit_encodeBlockAsm4MB four_bytes_remain_match_nolit_encodeBlockAsm4MB: TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB MOVB $0x03, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_match_nolit_encodeBlockAsm4MB: CMPL R10, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm4MB MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(R10), R10 ADDQ $0x03, AX // emitRepeat MOVL R10, DI LEAL -4(R10), R10 CMPL DI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: CMPL R10, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short CMPL R10, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short LEAL -65536(R10), R10 MOVL R10, SI MOVW $0x001d, (AX) MOVW R10, 2(AX) SARL $0x10, SI MOVB SI, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: LEAL -256(R10), R10 MOVW $0x0019, (AX) MOVW R10, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: LEAL -4(R10), R10 MOVW $0x0015, (AX) MOVB R10, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: SHLL $0x02, R10 ORL $0x01, R10 MOVW R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: XORQ DI, DI LEAL 1(DI)(R10*4), R10 MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB JMP two_byte_offset_match_nolit_encodeBlockAsm4MB two_byte_offset_short_match_nolit_encodeBlockAsm4MB: CMPL R10, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm4MB CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm4MB MOVB $0x01, BL LEAL -16(BX)(R10*4), R10 MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB emit_copy_three_match_nolit_encodeBlockAsm4MB: MOVB $0x02, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm4MB: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm4MB MOVQ -2(DX)(CX*1), DI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm4MB: MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ DI, R8 SHRQ $0x10, DI MOVQ DI, SI SHLQ $0x10, R8 IMULQ R9, R8 SHRQ $0x32, R8 SHLQ $0x10, SI IMULQ R9, SI SHRQ $0x32, SI LEAL -2(CX), R9 LEAQ 24(SP)(SI*4), R10 MOVL (R10), SI MOVL R9, 24(SP)(R8*4) MOVL CX, (R10) CMPL (DX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm4MB INCL CX JMP search_loop_encodeBlockAsm4MB emit_remainder_encodeBlockAsm4MB: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm4MB: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm4MB CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm4MB CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm4MB MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeBlockAsm4MB three_bytes_emit_remainder_encodeBlockAsm4MB: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm4MB two_bytes_emit_remainder_encodeBlockAsm4MB: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm4MB JMP memmove_long_emit_remainder_encodeBlockAsm4MB one_byte_emit_remainder_encodeBlockAsm4MB: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm4MB: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm4MB: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB memmove_long_emit_remainder_encodeBlockAsm4MB: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm4MB: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm12B(SB), $16408-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000080, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x05, SI LEAL 4(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeBlockAsm12B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x000000cf1bbcdcbb, R9 MOVQ DI, R10 MOVQ DI, R11 SHRQ $0x08, R11 SHLQ $0x18, R10 IMULQ R9, R10 SHRQ $0x34, R10 SHLQ $0x18, R11 IMULQ R9, R11 SHRQ $0x34, R11 MOVL 24(SP)(R10*4), SI MOVL 24(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) LEAL 1(CX), R10 MOVL R10, 24(SP)(R11*4) MOVQ DI, R10 SHRQ $0x10, R10 SHLQ $0x18, R10 IMULQ R9, R10 SHRQ $0x34, R10 MOVL CX, R9 SUBL 16(SP), R9 MOVL 1(DX)(R9*1), R11 MOVQ DI, R9 SHRQ $0x08, R9 CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm12B LEAL 1(CX), DI MOVL 12(SP), R8 MOVL DI, SI SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: CMPL DI, R8 JLE repeat_extend_back_end_encodeBlockAsm12B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(DI*1), R9 CMPB BL, R9 JNE repeat_extend_back_end_encodeBlockAsm12B LEAL -1(DI), DI DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm12B CMPL SI, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm12B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_repeat_emit_encodeBlockAsm12B JMP memmove_long_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm12B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: MOVQ (R10), R11 MOVQ R11, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm12B: MOVQ SI, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm12B memmove_long_repeat_emit_encodeBlockAsm12B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R12 SHRQ $0x05, R12 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R13 SUBQ R11, R13 DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R10)(R13*1), R11 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R11 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R10)(R13*1), X4 MOVOU -16(R10)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_repeat_emit_encodeBlockAsm12B: ADDL $0x05, CX MOVL CX, SI SUBL 16(SP), SI MOVQ src_len+32(FP), R9 SUBL CX, R9 LEAQ (DX)(CX*1), R10 LEAQ (DX)(SI*1), SI // matchLen XORL R12, R12 CMPL R9, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm12B matchlen_loopback_repeat_extend_encodeBlockAsm12B: MOVQ (R10)(R12*1), R11 XORQ (SI)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_repeat_extend_encodeBlockAsm12B BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_loop_repeat_extend_encodeBlockAsm12B: LEAL -8(R9), R9 LEAL 8(R12), R12 CMPL R9, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm12B matchlen_single_repeat_extend_encodeBlockAsm12B: TESTL R9, R9 JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_single_loopback_repeat_extend_encodeBlockAsm12B: MOVB (R10)(R12*1), R11 CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm12B LEAL 1(R12), R12 DECL R9 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm12B repeat_extend_forward_end_encodeBlockAsm12B: ADDL R12, CX MOVL CX, SI SUBL DI, SI MOVL 16(SP), DI TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm12B // emitRepeat MOVL SI, R8 LEAL -4(SI), SI CMPL R8, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm12B CMPL R8, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B CMPL DI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: CMPL SI, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm12B LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: // emitCopy two_byte_offset_repeat_as_copy_encodeBlockAsm12B: CMPL SI, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(SI), SI ADDQ $0x03, AX // emitRepeat MOVL SI, R8 LEAL -4(SI), SI CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL DI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: CMPL SI, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: CMPL SI, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B CMPL DI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(SI*4), SI MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm12B: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBlockAsm12B SHRQ $0x08, DI MOVL 24(SP)(R10*4), SI LEAL 2(CX), R9 CMPL (DX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm12B MOVL R9, 24(SP)(R10*4) SHRQ $0x08, DI CMPL (DX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: MOVL R9, 24(SP)(R10*4) INCL CX MOVL R8, SI candidate_match_encodeBlockAsm12B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: CMPL CX, DI JLE match_extend_back_end_encodeBlockAsm12B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeBlockAsm12B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm12B: MOVL CX, DI MOVL 12(SP), R8 CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm12B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(R8*1), DI SUBL R8, R9 LEAL -1(R9), R8 CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsm12B CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm12B MOVB $0xf4, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB R8, 1(AX) ADDQ $0x02, AX CMPL R8, $0x40 JL memmove_match_emit_encodeBlockAsm12B JMP memmove_long_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: SHLB $0x02, R8 MOVB R8, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm12B: LEAQ (AX)(R9*1), R8 // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: MOVQ (DI), R10 MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: MOVQ (DI), R10 MOVQ -8(DI)(R9*1), DI MOVQ R10, (AX) MOVQ DI, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm12B: MOVQ R8, AX JMP emit_literal_done_match_emit_encodeBlockAsm12B memmove_long_match_emit_encodeBlockAsm12B: LEAQ (AX)(R9*1), R8 // genMemMoveLong MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVQ R9, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(DI)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(DI)(R12*1), X4 MOVOU -16(DI)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ R8, AX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: MOVL CX, DI SUBL SI, DI MOVL DI, 16(SP) ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(SI*1), SI // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm12B matchlen_loopback_match_nolit_encodeBlockAsm12B: MOVQ (R8)(R10*1), R9 XORQ (SI)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_match_nolit_encodeBlockAsm12B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_loop_match_nolit_encodeBlockAsm12B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm12B matchlen_single_match_nolit_encodeBlockAsm12B: TESTL DI, DI JZ match_nolit_end_encodeBlockAsm12B matchlen_single_loopback_match_nolit_encodeBlockAsm12B: MOVB (R8)(R10*1), R9 CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm12B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B match_nolit_end_encodeBlockAsm12B: ADDL R10, CX MOVL 16(SP), SI ADDL $0x04, R10 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBlockAsm12B: CMPL R10, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(R10), R10 ADDQ $0x03, AX // emitRepeat MOVL R10, DI LEAL -4(R10), R10 CMPL DI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: CMPL R10, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short LEAL -256(R10), R10 MOVW $0x0019, (AX) MOVW R10, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAL -4(R10), R10 MOVW $0x0015, (AX) MOVB R10, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, R10 ORL $0x01, R10 MOVW R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: XORQ DI, DI LEAL 1(DI)(R10*4), R10 MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B JMP two_byte_offset_match_nolit_encodeBlockAsm12B two_byte_offset_short_match_nolit_encodeBlockAsm12B: CMPL R10, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm12B CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R10*4), R10 MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm12B: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm12B MOVQ -2(DX)(CX*1), DI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: MOVQ $0x000000cf1bbcdcbb, R9 MOVQ DI, R8 SHRQ $0x10, DI MOVQ DI, SI SHLQ $0x18, R8 IMULQ R9, R8 SHRQ $0x34, R8 SHLQ $0x18, SI IMULQ R9, SI SHRQ $0x34, SI LEAL -2(CX), R9 LEAQ 24(SP)(SI*4), R10 MOVL (R10), SI MOVL R9, 24(SP)(R8*4) MOVL CX, (R10) CMPL (DX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm12B INCL CX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm12B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm12B JMP memmove_long_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm12B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm12B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm12B memmove_long_emit_remainder_encodeBlockAsm12B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm10B(SB), $4120-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000020, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x05, SI LEAL 4(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeBlockAsm10B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x9e3779b1, R9 MOVQ DI, R10 MOVQ DI, R11 SHRQ $0x08, R11 SHLQ $0x20, R10 IMULQ R9, R10 SHRQ $0x36, R10 SHLQ $0x20, R11 IMULQ R9, R11 SHRQ $0x36, R11 MOVL 24(SP)(R10*4), SI MOVL 24(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) LEAL 1(CX), R10 MOVL R10, 24(SP)(R11*4) MOVQ DI, R10 SHRQ $0x10, R10 SHLQ $0x20, R10 IMULQ R9, R10 SHRQ $0x36, R10 MOVL CX, R9 SUBL 16(SP), R9 MOVL 1(DX)(R9*1), R11 MOVQ DI, R9 SHRQ $0x08, R9 CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm10B LEAL 1(CX), DI MOVL 12(SP), R8 MOVL DI, SI SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: CMPL DI, R8 JLE repeat_extend_back_end_encodeBlockAsm10B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(DI*1), R9 CMPB BL, R9 JNE repeat_extend_back_end_encodeBlockAsm10B LEAL -1(DI), DI DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm10B CMPL SI, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm10B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_repeat_emit_encodeBlockAsm10B JMP memmove_long_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm10B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8: MOVQ (R10), R11 MOVQ R11, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm10B: MOVQ SI, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm10B memmove_long_repeat_emit_encodeBlockAsm10B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R12 SHRQ $0x05, R12 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R13 SUBQ R11, R13 DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R10)(R13*1), R11 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R11 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R10)(R13*1), X4 MOVOU -16(R10)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_repeat_emit_encodeBlockAsm10B: ADDL $0x05, CX MOVL CX, SI SUBL 16(SP), SI MOVQ src_len+32(FP), R9 SUBL CX, R9 LEAQ (DX)(CX*1), R10 LEAQ (DX)(SI*1), SI // matchLen XORL R12, R12 CMPL R9, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm10B matchlen_loopback_repeat_extend_encodeBlockAsm10B: MOVQ (R10)(R12*1), R11 XORQ (SI)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_repeat_extend_encodeBlockAsm10B BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_loop_repeat_extend_encodeBlockAsm10B: LEAL -8(R9), R9 LEAL 8(R12), R12 CMPL R9, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm10B matchlen_single_repeat_extend_encodeBlockAsm10B: TESTL R9, R9 JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_single_loopback_repeat_extend_encodeBlockAsm10B: MOVB (R10)(R12*1), R11 CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm10B LEAL 1(R12), R12 DECL R9 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm10B repeat_extend_forward_end_encodeBlockAsm10B: ADDL R12, CX MOVL CX, SI SUBL DI, SI MOVL 16(SP), DI TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm10B // emitRepeat MOVL SI, R8 LEAL -4(SI), SI CMPL R8, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm10B CMPL R8, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B CMPL DI, $0x00000800 JLT repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: CMPL SI, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm10B LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: // emitCopy two_byte_offset_repeat_as_copy_encodeBlockAsm10B: CMPL SI, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(SI), SI ADDQ $0x03, AX // emitRepeat MOVL SI, R8 LEAL -4(SI), SI CMPL R8, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short CMPL DI, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: CMPL SI, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: CMPL SI, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B CMPL DI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(SI*4), SI MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm10B: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBlockAsm10B SHRQ $0x08, DI MOVL 24(SP)(R10*4), SI LEAL 2(CX), R9 CMPL (DX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm10B MOVL R9, 24(SP)(R10*4) SHRQ $0x08, DI CMPL (DX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm10B candidate3_match_encodeBlockAsm10B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: MOVL R9, 24(SP)(R10*4) INCL CX MOVL R8, SI candidate_match_encodeBlockAsm10B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: CMPL CX, DI JLE match_extend_back_end_encodeBlockAsm10B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeBlockAsm10B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm10B: MOVL CX, DI MOVL 12(SP), R8 CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm10B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(R8*1), DI SUBL R8, R9 LEAL -1(R9), R8 CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsm10B CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm10B MOVB $0xf4, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB R8, 1(AX) ADDQ $0x02, AX CMPL R8, $0x40 JL memmove_match_emit_encodeBlockAsm10B JMP memmove_long_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: SHLB $0x02, R8 MOVB R8, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm10B: LEAQ (AX)(R9*1), R8 // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8: MOVQ (DI), R10 MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: MOVQ (DI), R10 MOVQ -8(DI)(R9*1), DI MOVQ R10, (AX) MOVQ DI, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm10B: MOVQ R8, AX JMP emit_literal_done_match_emit_encodeBlockAsm10B memmove_long_match_emit_encodeBlockAsm10B: LEAQ (AX)(R9*1), R8 // genMemMoveLong MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVQ R9, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(DI)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(DI)(R12*1), X4 MOVOU -16(DI)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ R8, AX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: MOVL CX, DI SUBL SI, DI MOVL DI, 16(SP) ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(SI*1), SI // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm10B matchlen_loopback_match_nolit_encodeBlockAsm10B: MOVQ (R8)(R10*1), R9 XORQ (SI)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_match_nolit_encodeBlockAsm10B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_loop_match_nolit_encodeBlockAsm10B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm10B matchlen_single_match_nolit_encodeBlockAsm10B: TESTL DI, DI JZ match_nolit_end_encodeBlockAsm10B matchlen_single_loopback_match_nolit_encodeBlockAsm10B: MOVB (R8)(R10*1), R9 CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm10B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm10B match_nolit_end_encodeBlockAsm10B: ADDL R10, CX MOVL 16(SP), SI ADDL $0x04, R10 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBlockAsm10B: CMPL R10, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm10B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(R10), R10 ADDQ $0x03, AX // emitRepeat MOVL R10, DI LEAL -4(R10), R10 CMPL DI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short CMPL SI, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: CMPL R10, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short LEAL -256(R10), R10 MOVW $0x0019, (AX) MOVW R10, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: LEAL -4(R10), R10 MOVW $0x0015, (AX) MOVB R10, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: SHLL $0x02, R10 ORL $0x01, R10 MOVW R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: XORQ DI, DI LEAL 1(DI)(R10*4), R10 MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B JMP two_byte_offset_match_nolit_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: CMPL R10, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm10B CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R10*4), R10 MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm10B: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm10B MOVQ -2(DX)(CX*1), DI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: MOVQ $0x9e3779b1, R9 MOVQ DI, R8 SHRQ $0x10, DI MOVQ DI, SI SHLQ $0x20, R8 IMULQ R9, R8 SHRQ $0x36, R8 SHLQ $0x20, SI IMULQ R9, SI SHRQ $0x36, SI LEAL -2(CX), R9 LEAQ 24(SP)(SI*4), R10 MOVL (R10), SI MOVL R9, 24(SP)(R8*4) MOVL CX, (R10) CMPL (DX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm10B INCL CX JMP search_loop_encodeBlockAsm10B emit_remainder_encodeBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm10B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm10B two_bytes_emit_remainder_encodeBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm10B JMP memmove_long_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm10B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm10B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm10B memmove_long_emit_remainder_encodeBlockAsm10B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm8B(SB), $1048-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000008, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x04, SI LEAL 4(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeBlockAsm8B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x9e3779b1, R9 MOVQ DI, R10 MOVQ DI, R11 SHRQ $0x08, R11 SHLQ $0x20, R10 IMULQ R9, R10 SHRQ $0x38, R10 SHLQ $0x20, R11 IMULQ R9, R11 SHRQ $0x38, R11 MOVL 24(SP)(R10*4), SI MOVL 24(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) LEAL 1(CX), R10 MOVL R10, 24(SP)(R11*4) MOVQ DI, R10 SHRQ $0x10, R10 SHLQ $0x20, R10 IMULQ R9, R10 SHRQ $0x38, R10 MOVL CX, R9 SUBL 16(SP), R9 MOVL 1(DX)(R9*1), R11 MOVQ DI, R9 SHRQ $0x08, R9 CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm8B LEAL 1(CX), DI MOVL 12(SP), R8 MOVL DI, SI SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: CMPL DI, R8 JLE repeat_extend_back_end_encodeBlockAsm8B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(DI*1), R9 CMPB BL, R9 JNE repeat_extend_back_end_encodeBlockAsm8B LEAL -1(DI), DI DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm8B CMPL SI, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm8B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_repeat_emit_encodeBlockAsm8B JMP memmove_long_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeBlockAsm8B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8: MOVQ (R10), R11 MOVQ R11, (AX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm8B: MOVQ SI, AX JMP emit_literal_done_repeat_emit_encodeBlockAsm8B memmove_long_repeat_emit_encodeBlockAsm8B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R12 SHRQ $0x05, R12 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R13 SUBQ R11, R13 DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R10)(R13*1), R11 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R11 ADDQ $0x20, R13 DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R10)(R13*1), X4 MOVOU -16(R10)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_repeat_emit_encodeBlockAsm8B: ADDL $0x05, CX MOVL CX, SI SUBL 16(SP), SI MOVQ src_len+32(FP), R9 SUBL CX, R9 LEAQ (DX)(CX*1), R10 LEAQ (DX)(SI*1), SI // matchLen XORL R12, R12 CMPL R9, $0x08 JL matchlen_single_repeat_extend_encodeBlockAsm8B matchlen_loopback_repeat_extend_encodeBlockAsm8B: MOVQ (R10)(R12*1), R11 XORQ (SI)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_repeat_extend_encodeBlockAsm8B BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_loop_repeat_extend_encodeBlockAsm8B: LEAL -8(R9), R9 LEAL 8(R12), R12 CMPL R9, $0x08 JGE matchlen_loopback_repeat_extend_encodeBlockAsm8B matchlen_single_repeat_extend_encodeBlockAsm8B: TESTL R9, R9 JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_single_loopback_repeat_extend_encodeBlockAsm8B: MOVB (R10)(R12*1), R11 CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm8B LEAL 1(R12), R12 DECL R9 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm8B repeat_extend_forward_end_encodeBlockAsm8B: ADDL R12, CX MOVL CX, SI SUBL DI, SI MOVL 16(SP), DI TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm8B // emitRepeat MOVL SI, DI LEAL -4(SI), SI CMPL DI, $0x08 JLE repeat_two_match_repeat_encodeBlockAsm8B CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: CMPL SI, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm8B LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: // emitCopy two_byte_offset_repeat_as_copy_encodeBlockAsm8B: CMPL SI, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(SI), SI ADDQ $0x03, AX // emitRepeat MOVL SI, DI LEAL -4(SI), SI CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: CMPL SI, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short LEAL -256(SI), SI MOVW $0x0019, (AX) MOVW SI, 2(AX) ADDQ $0x04, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: LEAL -4(SI), SI MOVW $0x0015, (AX) MOVB SI, 2(AX) ADDQ $0x03, AX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: SHLL $0x02, SI ORL $0x01, SI MOVW SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B XORQ R8, R8 LEAL 1(R8)(SI*4), SI MOVB DI, 1(AX) SARL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: CMPL SI, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(SI*4), SI MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeBlockAsm8B: MOVL CX, 12(SP) JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBlockAsm8B SHRQ $0x08, DI MOVL 24(SP)(R10*4), SI LEAL 2(CX), R9 CMPL (DX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm8B MOVL R9, 24(SP)(R10*4) SHRQ $0x08, DI CMPL (DX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeBlockAsm8B candidate3_match_encodeBlockAsm8B: ADDL $0x02, CX JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: MOVL R9, 24(SP)(R10*4) INCL CX MOVL R8, SI candidate_match_encodeBlockAsm8B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: CMPL CX, DI JLE match_extend_back_end_encodeBlockAsm8B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeBlockAsm8B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm8B: MOVL CX, DI MOVL 12(SP), R8 CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm8B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(R8*1), DI SUBL R8, R9 LEAL -1(R9), R8 CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsm8B CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm8B MOVB $0xf4, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB R8, 1(AX) ADDQ $0x02, AX CMPL R8, $0x40 JL memmove_match_emit_encodeBlockAsm8B JMP memmove_long_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: SHLB $0x02, R8 MOVB R8, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBlockAsm8B: LEAQ (AX)(R9*1), R8 // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8: MOVQ (DI), R10 MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: MOVQ (DI), R10 MOVQ -8(DI)(R9*1), DI MOVQ R10, (AX) MOVQ DI, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm8B: MOVQ R8, AX JMP emit_literal_done_match_emit_encodeBlockAsm8B memmove_long_match_emit_encodeBlockAsm8B: LEAQ (AX)(R9*1), R8 // genMemMoveLong MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVQ R9, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(DI)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(DI)(R12*1), X4 MOVOU -16(DI)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ R8, AX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: MOVL CX, DI SUBL SI, DI MOVL DI, 16(SP) ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(SI*1), SI // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm8B matchlen_loopback_match_nolit_encodeBlockAsm8B: MOVQ (R8)(R10*1), R9 XORQ (SI)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_match_nolit_encodeBlockAsm8B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_loop_match_nolit_encodeBlockAsm8B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm8B matchlen_single_match_nolit_encodeBlockAsm8B: TESTL DI, DI JZ match_nolit_end_encodeBlockAsm8B matchlen_single_loopback_match_nolit_encodeBlockAsm8B: MOVB (R8)(R10*1), R9 CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm8B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm8B match_nolit_end_encodeBlockAsm8B: ADDL R10, CX MOVL 16(SP), SI ADDL $0x04, R10 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBlockAsm8B: CMPL R10, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm8B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(R10), R10 ADDQ $0x03, AX // emitRepeat MOVL R10, SI LEAL -4(R10), R10 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: CMPL R10, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short LEAL -256(R10), R10 MOVW $0x0019, (AX) MOVW R10, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: LEAL -4(R10), R10 MOVW $0x0015, (AX) MOVB R10, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: SHLL $0x02, R10 ORL $0x01, R10 MOVW R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B XORQ DI, DI LEAL 1(DI)(R10*4), R10 MOVB SI, 1(AX) SARL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B JMP two_byte_offset_match_nolit_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: CMPL R10, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R10*4), R10 MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeBlockAsm8B: CMPL CX, 8(SP) JGE emit_remainder_encodeBlockAsm8B MOVQ -2(DX)(CX*1), DI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: MOVQ $0x9e3779b1, R9 MOVQ DI, R8 SHRQ $0x10, DI MOVQ DI, SI SHLQ $0x20, R8 IMULQ R9, R8 SHRQ $0x38, R8 SHLQ $0x20, SI IMULQ R9, SI SHRQ $0x38, SI LEAL -2(CX), R9 LEAQ 24(SP)(SI*4), R10 MOVL (R10), SI MOVL R9, 24(SP)(R8*4) MOVL CX, (R10) CMPL (DX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm8B INCL CX JMP search_loop_encodeBlockAsm8B emit_remainder_encodeBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm8B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBlockAsm8B two_bytes_emit_remainder_encodeBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBlockAsm8B JMP memmove_long_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBlockAsm8B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm8B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBlockAsm8B memmove_long_emit_remainder_encodeBlockAsm8B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBetterBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBetterBlockAsm(SB), $327704-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000a00, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBetterBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBetterBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm: MOVL CX, SI SUBL 12(SP), SI SHRL $0x07, SI CMPL SI, $0x63 JLE check_maxskip_ok_encodeBetterBlockAsm LEAL 100(CX), SI JMP check_maxskip_cont_encodeBetterBlockAsm check_maxskip_ok_encodeBetterBlockAsm: LEAL 1(CX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm: CMPL SI, 8(SP) JGE emit_remainder_encodeBetterBlockAsm MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x00cf1bbcdcbfa563, R9 MOVQ $0x9e3779b1, SI MOVQ DI, R10 MOVQ DI, R11 SHLQ $0x08, R10 IMULQ R9, R10 SHRQ $0x30, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI MOVL 262168(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 262168(SP)(R11*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm CMPL (DX)(R8*1), DI JEQ candidateS_match_encodeBetterBlockAsm MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x08, R10 IMULQ R9, R10 SHRQ $0x30, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm DECL CX MOVL R8, SI candidate_match_encodeBetterBlockAsm: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm match_extend_back_loop_encodeBetterBlockAsm: CMPL CX, DI JLE match_extend_back_end_encodeBetterBlockAsm MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeBetterBlockAsm LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeBetterBlockAsm JMP match_extend_back_loop_encodeBetterBlockAsm match_extend_back_end_encodeBetterBlockAsm: MOVL CX, DI SUBL 12(SP), DI LEAQ 5(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm: MOVL CX, DI ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), R10 // matchLen XORL R12, R12 CMPL R8, $0x08 JL matchlen_single_match_nolit_encodeBetterBlockAsm matchlen_loopback_match_nolit_encodeBetterBlockAsm: MOVQ (R9)(R12*1), R11 XORQ (R10)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_loop_match_nolit_encodeBetterBlockAsm: LEAL -8(R8), R8 LEAL 8(R12), R12 CMPL R8, $0x08 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm matchlen_single_match_nolit_encodeBetterBlockAsm: TESTL R8, R8 JZ match_nolit_end_encodeBetterBlockAsm matchlen_single_loopback_match_nolit_encodeBetterBlockAsm: MOVB (R9)(R12*1), R11 CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm LEAL 1(R12), R12 DECL R8 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm match_nolit_end_encodeBetterBlockAsm: MOVL CX, R8 SUBL SI, R8 // Check if repeat CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm CMPL R12, $0x01 JG match_length_ok_encodeBetterBlockAsm CMPL R8, $0x0000ffff JLE match_length_ok_encodeBetterBlockAsm MOVL 20(SP), CX INCL CX JMP search_loop_encodeBetterBlockAsm match_length_ok_encodeBetterBlockAsm: MOVL R8, 16(SP) MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_encodeBetterBlockAsm CMPL SI, $0x00000100 JLT two_bytes_match_emit_encodeBetterBlockAsm CMPL SI, $0x00010000 JLT three_bytes_match_emit_encodeBetterBlockAsm CMPL SI, $0x01000000 JLT four_bytes_match_emit_encodeBetterBlockAsm MOVB $0xfc, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeBetterBlockAsm four_bytes_match_emit_encodeBetterBlockAsm: MOVL SI, R11 SHRL $0x10, R11 MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB R11, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBetterBlockAsm three_bytes_match_emit_encodeBetterBlockAsm: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm two_bytes_match_emit_encodeBetterBlockAsm: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_encodeBetterBlockAsm JMP memmove_long_match_emit_encodeBetterBlockAsm one_byte_match_emit_encodeBetterBlockAsm: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x04 JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4 CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4: MOVL (R10), R11 MOVL R11, (AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: MOVL (R10), R11 MOVL -4(R10)(R9*1), R10 MOVL R11, (AX) MOVL R10, -4(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm: MOVQ SI, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm memmove_long_match_emit_encodeBetterBlockAsm: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_encodeBetterBlockAsm: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitCopy CMPL R8, $0x00010000 JL two_byte_offset_match_nolit_encodeBetterBlockAsm four_bytes_loop_back_match_nolit_encodeBetterBlockAsm: CMPL R12, $0x40 JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm MOVB $0xff, (AX) MOVL R8, 1(AX) LEAL -64(R12), R12 ADDQ $0x05, AX CMPL R12, $0x04 JL four_bytes_remain_match_nolit_encodeBetterBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy CMPL R8, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy CMPL R12, $0x00010100 JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy CMPL R12, $0x0100ffff JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy LEAL -16842747(R12), R12 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: LEAL -65536(R12), R12 MOVL R12, R8 MOVW $0x001d, (AX) MOVW R12, 2(AX) SARL $0x10, R8 MOVB R8, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm four_bytes_remain_match_nolit_encodeBetterBlockAsm: TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm MOVB $0x03, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVL R8, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_match_nolit_encodeBetterBlockAsm: CMPL R12, $0x40 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm MOVB $0xee, (AX) MOVW R8, 1(AX) LEAL -60(R12), R12 ADDQ $0x03, AX // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short CMPL R8, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short CMPL R12, $0x00010100 JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short CMPL R12, $0x0100ffff JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short LEAL -16842747(R12), R12 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: LEAL -65536(R12), R12 MOVL R12, R8 MOVW $0x001d, (AX) MOVW R12, 2(AX) SARL $0x10, R8 MOVB R8, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm JMP two_byte_offset_match_nolit_encodeBetterBlockAsm two_byte_offset_short_match_nolit_encodeBetterBlockAsm: CMPL R12, $0x0c JGE emit_copy_three_match_nolit_encodeBetterBlockAsm CMPL R8, $0x00000800 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm MOVB $0x01, BL LEAL -16(BX)(R12*4), R12 MOVB R8, 1(AX) SHRL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm emit_copy_three_match_nolit_encodeBetterBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm match_is_repeat_encodeBetterBlockAsm: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_repeat_encodeBetterBlockAsm CMPL SI, $0x00000100 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm CMPL SI, $0x00010000 JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm CMPL SI, $0x01000000 JLT four_bytes_match_emit_repeat_encodeBetterBlockAsm MOVB $0xfc, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm four_bytes_match_emit_repeat_encodeBetterBlockAsm: MOVL SI, R11 SHRL $0x10, R11 MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB R11, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm three_bytes_match_emit_repeat_encodeBetterBlockAsm: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm two_bytes_match_emit_repeat_encodeBetterBlockAsm: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_repeat_encodeBetterBlockAsm JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm one_byte_match_emit_repeat_encodeBetterBlockAsm: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x04 JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4 CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4: MOVL (R10), R11 MOVL R11, (AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: MOVL (R10), R11 MOVL -4(R10)(R9*1), R10 MOVL R11, (AX) MOVL R10, -4(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: MOVQ SI, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm memmove_long_match_emit_repeat_encodeBetterBlockAsm: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitRepeat emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm CMPL R8, $0x00000800 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm CMPL R12, $0x00010100 JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm CMPL R12, $0x0100ffff JLT repeat_five_match_nolit_repeat_encodeBetterBlockAsm LEAL -16842747(R12), R12 MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm repeat_five_match_nolit_repeat_encodeBetterBlockAsm: LEAL -65536(R12), R12 MOVL R12, R8 MOVW $0x001d, (AX) MOVW R12, 2(AX) SARL $0x10, R8 MOVB R8, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_repeat_encodeBetterBlockAsm: LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_repeat_encodeBetterBlockAsm: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_repeat_encodeBetterBlockAsm: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm: CMPL CX, 8(SP) JGE emit_remainder_encodeBetterBlockAsm CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 INCL DI MOVQ (DX)(DI*1), R9 MOVQ R9, R10 MOVQ R9, R11 MOVQ R9, R12 SHRQ $0x08, R11 MOVQ R11, R13 SHRQ $0x10, R12 LEAL 1(DI), R14 LEAL 2(DI), R15 MOVQ -2(DX)(CX*1), R9 SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 SHLQ $0x08, R13 IMULQ SI, R13 SHRQ $0x30, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 SHLQ $0x20, R12 IMULQ R8, R12 SHRQ $0x32, R12 MOVL DI, 24(SP)(R10*4) MOVL R14, 24(SP)(R13*4) MOVL R14, 262168(SP)(R11*4) MOVL R15, 262168(SP)(R12*4) MOVQ R9, R10 MOVQ R9, R11 SHRQ $0x08, R11 MOVQ R11, R13 LEAL -2(CX), R9 LEAL -1(CX), DI SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 SHLQ $0x08, R13 IMULQ SI, R13 SHRQ $0x30, R13 MOVL R9, 24(SP)(R10*4) MOVL DI, 262168(SP)(R11*4) MOVL DI, 24(SP)(R13*4) JMP search_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBetterBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBetterBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm four_bytes_emit_remainder_encodeBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm three_bytes_emit_remainder_encodeBetterBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm two_bytes_emit_remainder_encodeBetterBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBetterBlockAsm JMP memmove_long_emit_remainder_encodeBetterBlockAsm one_byte_emit_remainder_encodeBetterBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBetterBlockAsm: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x04 JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4: MOVL (CX), SI MOVL SI, (AX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7: MOVL (CX), SI MOVL -4(CX)(BX*1), CX MOVL SI, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm memmove_long_emit_remainder_encodeBetterBlockAsm: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBetterBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBetterBlockAsm4MB(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000a00, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBetterBlockAsm4MB: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBetterBlockAsm4MB MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm4MB: MOVL CX, SI SUBL 12(SP), SI SHRL $0x07, SI CMPL SI, $0x63 JLE check_maxskip_ok_encodeBetterBlockAsm4MB LEAL 100(CX), SI JMP check_maxskip_cont_encodeBetterBlockAsm4MB check_maxskip_ok_encodeBetterBlockAsm4MB: LEAL 1(CX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm4MB: CMPL SI, 8(SP) JGE emit_remainder_encodeBetterBlockAsm4MB MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x00cf1bbcdcbfa563, R9 MOVQ $0x9e3779b1, SI MOVQ DI, R10 MOVQ DI, R11 SHLQ $0x08, R10 IMULQ R9, R10 SHRQ $0x30, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI MOVL 262168(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 262168(SP)(R11*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm4MB CMPL (DX)(R8*1), DI JEQ candidateS_match_encodeBetterBlockAsm4MB MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x08, R10 IMULQ R9, R10 SHRQ $0x30, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm4MB DECL CX MOVL R8, SI candidate_match_encodeBetterBlockAsm4MB: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm4MB match_extend_back_loop_encodeBetterBlockAsm4MB: CMPL CX, DI JLE match_extend_back_end_encodeBetterBlockAsm4MB MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeBetterBlockAsm4MB LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeBetterBlockAsm4MB JMP match_extend_back_loop_encodeBetterBlockAsm4MB match_extend_back_end_encodeBetterBlockAsm4MB: MOVL CX, DI SUBL 12(SP), DI LEAQ 4(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeBetterBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm4MB: MOVL CX, DI ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), R10 // matchLen XORL R12, R12 CMPL R8, $0x08 JL matchlen_single_match_nolit_encodeBetterBlockAsm4MB matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: MOVQ (R9)(R12*1), R11 XORQ (R10)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_loop_match_nolit_encodeBetterBlockAsm4MB: LEAL -8(R8), R8 LEAL 8(R12), R12 CMPL R8, $0x08 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB matchlen_single_match_nolit_encodeBetterBlockAsm4MB: TESTL R8, R8 JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB: MOVB (R9)(R12*1), R11 CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm4MB LEAL 1(R12), R12 DECL R8 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB match_nolit_end_encodeBetterBlockAsm4MB: MOVL CX, R8 SUBL SI, R8 // Check if repeat CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm4MB CMPL R12, $0x01 JG match_length_ok_encodeBetterBlockAsm4MB CMPL R8, $0x0000ffff JLE match_length_ok_encodeBetterBlockAsm4MB MOVL 20(SP), CX INCL CX JMP search_loop_encodeBetterBlockAsm4MB match_length_ok_encodeBetterBlockAsm4MB: MOVL R8, 16(SP) MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_encodeBetterBlockAsm4MB CMPL SI, $0x00000100 JLT two_bytes_match_emit_encodeBetterBlockAsm4MB CMPL SI, $0x00010000 JLT three_bytes_match_emit_encodeBetterBlockAsm4MB MOVL SI, R11 SHRL $0x10, R11 MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB R11, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB three_bytes_match_emit_encodeBetterBlockAsm4MB: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB two_bytes_match_emit_encodeBetterBlockAsm4MB: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_encodeBetterBlockAsm4MB one_byte_match_emit_encodeBetterBlockAsm4MB: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm4MB: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x04 JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4 CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4: MOVL (R10), R11 MOVL R11, (AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: MOVL (R10), R11 MOVL -4(R10)(R9*1), R10 MOVL R11, (AX) MOVL R10, -4(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: MOVQ SI, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB memmove_long_match_emit_encodeBetterBlockAsm4MB: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_encodeBetterBlockAsm4MB: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitCopy CMPL R8, $0x00010000 JL two_byte_offset_match_nolit_encodeBetterBlockAsm4MB four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB: CMPL R12, $0x40 JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB MOVB $0xff, (AX) MOVL R8, 1(AX) LEAL -64(R12), R12 ADDQ $0x05, AX CMPL R12, $0x04 JL four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB // emitRepeat MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy CMPL R8, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy CMPL R12, $0x00010100 JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy LEAL -65536(R12), R12 MOVL R12, R8 MOVW $0x001d, (AX) MOVW R12, 2(AX) SARL $0x10, R8 MOVB R8, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB MOVB $0x03, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVL R8, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: CMPL R12, $0x40 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB MOVB $0xee, (AX) MOVW R8, 1(AX) LEAL -60(R12), R12 ADDQ $0x03, AX // emitRepeat MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short CMPL R8, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short CMPL R12, $0x00010100 JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short LEAL -65536(R12), R12 MOVL R12, R8 MOVW $0x001d, (AX) MOVW R12, 2(AX) SARL $0x10, R8 MOVB R8, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB JMP two_byte_offset_match_nolit_encodeBetterBlockAsm4MB two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: CMPL R12, $0x0c JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB CMPL R8, $0x00000800 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB MOVB $0x01, BL LEAL -16(BX)(R12*4), R12 MOVB R8, 1(AX) SHRL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: MOVB $0x02, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB match_is_repeat_encodeBetterBlockAsm4MB: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_repeat_encodeBetterBlockAsm4MB CMPL SI, $0x00000100 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB CMPL SI, $0x00010000 JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB MOVL SI, R11 SHRL $0x10, R11 MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB R11, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_repeat_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm4MB: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x04 JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4 CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4: MOVL (R10), R11 MOVL R11, (AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: MOVL (R10), R11 MOVL -4(R10)(R9*1), R10 MOVL R11, (AX) MOVL R10, -4(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: MOVQ SI, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitRepeat MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB CMPL R8, $0x00000800 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB CMPL R12, $0x00010100 JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB LEAL -65536(R12), R12 MOVL R12, R8 MOVW $0x001d, (AX) MOVW R12, 2(AX) SARL $0x10, R8 MOVB R8, 4(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: CMPL CX, 8(SP) JGE emit_remainder_encodeBetterBlockAsm4MB CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBetterBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm4MB: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 INCL DI MOVQ (DX)(DI*1), R9 MOVQ R9, R10 MOVQ R9, R11 MOVQ R9, R12 SHRQ $0x08, R11 MOVQ R11, R13 SHRQ $0x10, R12 LEAL 1(DI), R14 LEAL 2(DI), R15 MOVQ -2(DX)(CX*1), R9 SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 SHLQ $0x08, R13 IMULQ SI, R13 SHRQ $0x30, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 SHLQ $0x20, R12 IMULQ R8, R12 SHRQ $0x32, R12 MOVL DI, 24(SP)(R10*4) MOVL R14, 24(SP)(R13*4) MOVL R14, 262168(SP)(R11*4) MOVL R15, 262168(SP)(R12*4) MOVQ R9, R10 MOVQ R9, R11 SHRQ $0x08, R11 MOVQ R11, R13 LEAL -2(CX), R9 LEAL -1(CX), DI SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 SHLQ $0x08, R13 IMULQ SI, R13 SHRQ $0x30, R13 MOVL R9, 24(SP)(R10*4) MOVL DI, 262168(SP)(R11*4) MOVL DI, 24(SP)(R13*4) JMP search_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 4(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBetterBlockAsm4MB MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBetterBlockAsm4MB: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBetterBlockAsm4MB CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBetterBlockAsm4MB CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBetterBlockAsm4MB MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB three_bytes_emit_remainder_encodeBetterBlockAsm4MB: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB two_bytes_emit_remainder_encodeBetterBlockAsm4MB: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBetterBlockAsm4MB JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB one_byte_emit_remainder_encodeBetterBlockAsm4MB: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBetterBlockAsm4MB: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x04 JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4: MOVL (CX), SI MOVL SI, (AX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7: MOVL (CX), SI MOVL -4(CX)(BX*1), CX MOVL SI, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB memmove_long_emit_remainder_encodeBetterBlockAsm4MB: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBetterBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBetterBlockAsm12B(SB), $81944-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000280, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBetterBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBetterBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x06, SI LEAL 1(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeBetterBlockAsm12B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ $0x9e3779b1, SI MOVQ DI, R10 MOVQ DI, R11 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x34, R11 MOVL 24(SP)(R10*4), SI MOVL 65560(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 65560(SP)(R11*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm12B CMPL (DX)(R8*1), DI JEQ candidateS_match_encodeBetterBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm12B DECL CX MOVL R8, SI candidate_match_encodeBetterBlockAsm12B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm12B match_extend_back_loop_encodeBetterBlockAsm12B: CMPL CX, DI JLE match_extend_back_end_encodeBetterBlockAsm12B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeBetterBlockAsm12B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeBetterBlockAsm12B JMP match_extend_back_loop_encodeBetterBlockAsm12B match_extend_back_end_encodeBetterBlockAsm12B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm12B: MOVL CX, DI ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), R10 // matchLen XORL R12, R12 CMPL R8, $0x08 JL matchlen_single_match_nolit_encodeBetterBlockAsm12B matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: MOVQ (R9)(R12*1), R11 XORQ (R10)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm12B BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_loop_match_nolit_encodeBetterBlockAsm12B: LEAL -8(R8), R8 LEAL 8(R12), R12 CMPL R8, $0x08 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B matchlen_single_match_nolit_encodeBetterBlockAsm12B: TESTL R8, R8 JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_single_loopback_match_nolit_encodeBetterBlockAsm12B: MOVB (R9)(R12*1), R11 CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm12B LEAL 1(R12), R12 DECL R8 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm12B match_nolit_end_encodeBetterBlockAsm12B: MOVL CX, R8 SUBL SI, R8 // Check if repeat CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm12B MOVL R8, 16(SP) MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_encodeBetterBlockAsm12B CMPL SI, $0x00000100 JLT two_bytes_match_emit_encodeBetterBlockAsm12B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm12B two_bytes_match_emit_encodeBetterBlockAsm12B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_encodeBetterBlockAsm12B JMP memmove_long_match_emit_encodeBetterBlockAsm12B one_byte_match_emit_encodeBetterBlockAsm12B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm12B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x04 JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4 CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4: MOVL (R10), R11 MOVL R11, (AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: MOVL (R10), R11 MOVL -4(R10)(R9*1), R10 MOVL R11, (AX) MOVL R10, -4(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm12B: MOVQ SI, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B memmove_long_match_emit_encodeBetterBlockAsm12B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_encodeBetterBlockAsm12B: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBetterBlockAsm12B: CMPL R12, $0x40 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B MOVB $0xee, (AX) MOVW R8, 1(AX) LEAL -60(R12), R12 ADDQ $0x03, AX // emitRepeat MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short CMPL R8, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B JMP two_byte_offset_match_nolit_encodeBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: CMPL R12, $0x0c JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B CMPL R8, $0x00000800 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R12*4), R12 MOVB R8, 1(AX) SHRL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B emit_copy_three_match_nolit_encodeBetterBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B match_is_repeat_encodeBetterBlockAsm12B: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_repeat_encodeBetterBlockAsm12B CMPL SI, $0x00000100 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm12B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_repeat_encodeBetterBlockAsm12B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B one_byte_match_emit_repeat_encodeBetterBlockAsm12B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm12B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x04 JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4 CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4: MOVL (R10), R11 MOVL R11, (AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: MOVL (R10), R11 MOVL -4(R10)(R9*1), R10 MOVL R11, (AX) MOVL R10, -4(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: MOVQ SI, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitRepeat MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B CMPL R8, $0x00000800 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm12B: CMPL CX, 8(SP) JGE emit_remainder_encodeBetterBlockAsm12B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm12B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 INCL DI MOVQ (DX)(DI*1), R9 MOVQ R9, R10 MOVQ R9, R11 MOVQ R9, R12 SHRQ $0x08, R11 MOVQ R11, R13 SHRQ $0x10, R12 LEAL 1(DI), R14 LEAL 2(DI), R15 MOVQ -2(DX)(CX*1), R9 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x32, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x34, R11 SHLQ $0x20, R12 IMULQ R8, R12 SHRQ $0x34, R12 MOVL DI, 24(SP)(R10*4) MOVL R14, 24(SP)(R13*4) MOVL R14, 65560(SP)(R11*4) MOVL R15, 65560(SP)(R12*4) MOVQ R9, R10 MOVQ R9, R11 SHRQ $0x08, R11 MOVQ R11, R13 LEAL -2(CX), R9 LEAL -1(CX), DI SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x34, R11 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x32, R13 MOVL R9, 24(SP)(R10*4) MOVL DI, 65560(SP)(R11*4) MOVL DI, 24(SP)(R13*4) JMP search_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBetterBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBetterBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBetterBlockAsm12B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B two_bytes_emit_remainder_encodeBetterBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B one_byte_emit_remainder_encodeBetterBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBetterBlockAsm12B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x04 JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4: MOVL (CX), SI MOVL SI, (AX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7: MOVL (CX), SI MOVL -4(CX)(BX*1), CX MOVL SI, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B memmove_long_emit_remainder_encodeBetterBlockAsm12B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBetterBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBetterBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBetterBlockAsm10B(SB), $20504-56 MOVQ dst_base+0(FP), AX MOVQ $0x000000a0, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBetterBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBetterBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x05, SI LEAL 1(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeBetterBlockAsm10B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ $0x9e3779b1, SI MOVQ DI, R10 MOVQ DI, R11 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x34, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x36, R11 MOVL 24(SP)(R10*4), SI MOVL 16408(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 16408(SP)(R11*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm10B CMPL (DX)(R8*1), DI JEQ candidateS_match_encodeBetterBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x34, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm10B DECL CX MOVL R8, SI candidate_match_encodeBetterBlockAsm10B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm10B match_extend_back_loop_encodeBetterBlockAsm10B: CMPL CX, DI JLE match_extend_back_end_encodeBetterBlockAsm10B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeBetterBlockAsm10B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeBetterBlockAsm10B JMP match_extend_back_loop_encodeBetterBlockAsm10B match_extend_back_end_encodeBetterBlockAsm10B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm10B: MOVL CX, DI ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), R10 // matchLen XORL R12, R12 CMPL R8, $0x08 JL matchlen_single_match_nolit_encodeBetterBlockAsm10B matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: MOVQ (R9)(R12*1), R11 XORQ (R10)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm10B BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_loop_match_nolit_encodeBetterBlockAsm10B: LEAL -8(R8), R8 LEAL 8(R12), R12 CMPL R8, $0x08 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B matchlen_single_match_nolit_encodeBetterBlockAsm10B: TESTL R8, R8 JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_single_loopback_match_nolit_encodeBetterBlockAsm10B: MOVB (R9)(R12*1), R11 CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm10B LEAL 1(R12), R12 DECL R8 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm10B match_nolit_end_encodeBetterBlockAsm10B: MOVL CX, R8 SUBL SI, R8 // Check if repeat CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm10B MOVL R8, 16(SP) MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_encodeBetterBlockAsm10B CMPL SI, $0x00000100 JLT two_bytes_match_emit_encodeBetterBlockAsm10B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm10B two_bytes_match_emit_encodeBetterBlockAsm10B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_encodeBetterBlockAsm10B JMP memmove_long_match_emit_encodeBetterBlockAsm10B one_byte_match_emit_encodeBetterBlockAsm10B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm10B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x04 JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4 CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4: MOVL (R10), R11 MOVL R11, (AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: MOVL (R10), R11 MOVL -4(R10)(R9*1), R10 MOVL R11, (AX) MOVL R10, -4(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm10B: MOVQ SI, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B memmove_long_match_emit_encodeBetterBlockAsm10B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_encodeBetterBlockAsm10B: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBetterBlockAsm10B: CMPL R12, $0x40 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B MOVB $0xee, (AX) MOVW R8, 1(AX) LEAL -60(R12), R12 ADDQ $0x03, AX // emitRepeat MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short CMPL R8, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B JMP two_byte_offset_match_nolit_encodeBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: CMPL R12, $0x0c JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B CMPL R8, $0x00000800 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R12*4), R12 MOVB R8, 1(AX) SHRL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B emit_copy_three_match_nolit_encodeBetterBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B match_is_repeat_encodeBetterBlockAsm10B: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_repeat_encodeBetterBlockAsm10B CMPL SI, $0x00000100 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm10B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_repeat_encodeBetterBlockAsm10B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B one_byte_match_emit_repeat_encodeBetterBlockAsm10B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm10B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x04 JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4 CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4: MOVL (R10), R11 MOVL R11, (AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: MOVL (R10), R11 MOVL -4(R10)(R9*1), R10 MOVL R11, (AX) MOVL R10, -4(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: MOVQ SI, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitRepeat MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B CMPL R8, $0x00000800 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm10B: CMPL CX, 8(SP) JGE emit_remainder_encodeBetterBlockAsm10B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm10B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 INCL DI MOVQ (DX)(DI*1), R9 MOVQ R9, R10 MOVQ R9, R11 MOVQ R9, R12 SHRQ $0x08, R11 MOVQ R11, R13 SHRQ $0x10, R12 LEAL 1(DI), R14 LEAL 2(DI), R15 MOVQ -2(DX)(CX*1), R9 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x34, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x36, R11 SHLQ $0x20, R12 IMULQ R8, R12 SHRQ $0x36, R12 MOVL DI, 24(SP)(R10*4) MOVL R14, 24(SP)(R13*4) MOVL R14, 16408(SP)(R11*4) MOVL R15, 16408(SP)(R12*4) MOVQ R9, R10 MOVQ R9, R11 SHRQ $0x08, R11 MOVQ R11, R13 LEAL -2(CX), R9 LEAL -1(CX), DI SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x36, R11 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x34, R13 MOVL R9, 24(SP)(R10*4) MOVL DI, 16408(SP)(R11*4) MOVL DI, 24(SP)(R13*4) JMP search_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBetterBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBetterBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBetterBlockAsm10B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B two_bytes_emit_remainder_encodeBetterBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B one_byte_emit_remainder_encodeBetterBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBetterBlockAsm10B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x04 JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4: MOVL (CX), SI MOVL SI, (AX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7: MOVL (CX), SI MOVL -4(CX)(BX*1), CX MOVL SI, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B memmove_long_emit_remainder_encodeBetterBlockAsm10B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBetterBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeBetterBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBetterBlockAsm8B(SB), $5144-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000028, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeBetterBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeBetterBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -6(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeBetterBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x04, SI LEAL 1(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeBetterBlockAsm8B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ $0x9e3779b1, SI MOVQ DI, R10 MOVQ DI, R11 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x36, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x38, R11 MOVL 24(SP)(R10*4), SI MOVL 4120(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 4120(SP)(R11*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm8B CMPL (DX)(R8*1), DI JEQ candidateS_match_encodeBetterBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x36, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm8B DECL CX MOVL R8, SI candidate_match_encodeBetterBlockAsm8B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm8B match_extend_back_loop_encodeBetterBlockAsm8B: CMPL CX, DI JLE match_extend_back_end_encodeBetterBlockAsm8B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeBetterBlockAsm8B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeBetterBlockAsm8B JMP match_extend_back_loop_encodeBetterBlockAsm8B match_extend_back_end_encodeBetterBlockAsm8B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBetterBlockAsm8B: MOVL CX, DI ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), R10 // matchLen XORL R12, R12 CMPL R8, $0x08 JL matchlen_single_match_nolit_encodeBetterBlockAsm8B matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: MOVQ (R9)(R12*1), R11 XORQ (R10)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm8B BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_loop_match_nolit_encodeBetterBlockAsm8B: LEAL -8(R8), R8 LEAL 8(R12), R12 CMPL R8, $0x08 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B matchlen_single_match_nolit_encodeBetterBlockAsm8B: TESTL R8, R8 JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_single_loopback_match_nolit_encodeBetterBlockAsm8B: MOVB (R9)(R12*1), R11 CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm8B LEAL 1(R12), R12 DECL R8 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm8B match_nolit_end_encodeBetterBlockAsm8B: MOVL CX, R8 SUBL SI, R8 // Check if repeat CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm8B MOVL R8, 16(SP) MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_encodeBetterBlockAsm8B CMPL SI, $0x00000100 JLT two_bytes_match_emit_encodeBetterBlockAsm8B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeBetterBlockAsm8B two_bytes_match_emit_encodeBetterBlockAsm8B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_encodeBetterBlockAsm8B JMP memmove_long_match_emit_encodeBetterBlockAsm8B one_byte_match_emit_encodeBetterBlockAsm8B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeBetterBlockAsm8B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x04 JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4 CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4: MOVL (R10), R11 MOVL R11, (AX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: MOVL (R10), R11 MOVL -4(R10)(R9*1), R10 MOVL R11, (AX) MOVL R10, -4(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm8B: MOVQ SI, AX JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B memmove_long_match_emit_encodeBetterBlockAsm8B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_encodeBetterBlockAsm8B: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeBetterBlockAsm8B: CMPL R12, $0x40 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B MOVB $0xee, (AX) MOVW R8, 1(AX) LEAL -60(R12), R12 ADDQ $0x03, AX // emitRepeat MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B JMP two_byte_offset_match_nolit_encodeBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: CMPL R12, $0x0c JGE emit_copy_three_match_nolit_encodeBetterBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R12*4), R12 MOVB R8, 1(AX) SHRL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B emit_copy_three_match_nolit_encodeBetterBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B match_is_repeat_encodeBetterBlockAsm8B: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B MOVL DI, R8 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R9 SUBL SI, R8 LEAL -1(R8), SI CMPL SI, $0x3c JLT one_byte_match_emit_repeat_encodeBetterBlockAsm8B CMPL SI, $0x00000100 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm8B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_repeat_encodeBetterBlockAsm8B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B one_byte_match_emit_repeat_encodeBetterBlockAsm8B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_repeat_encodeBetterBlockAsm8B: LEAQ (AX)(R8*1), SI // genMemMoveShort CMPQ R8, $0x04 JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4 CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4: MOVL (R9), R10 MOVL R10, (AX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: MOVL (R9), R10 MOVL -4(R9)(R8*1), R9 MOVL R10, (AX) MOVL R9, -4(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: MOVQ SI, AX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: LEAQ (AX)(R8*1), SI // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R13 SUBQ R10, R13 DECQ R11 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R9)(R13*1), R10 LEAQ -32(AX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R14) MOVOA X5, 16(R14) ADDQ $0x20, R14 ADDQ $0x20, R10 ADDQ $0x20, R13 DECQ R11 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R9)(R13*1), X4 MOVOU -16(R9)(R13*1), X5 MOVOA X4, -32(AX)(R13*1) MOVOA X5, -16(AX)(R13*1) ADDQ $0x20, R13 CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ SI, AX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitRepeat MOVL R12, SI LEAL -4(R12), R12 CMPL SI, $0x08 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B CMPL SI, $0x0c JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: CMPL R12, $0x00000104 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B LEAL -256(R12), R12 MOVW $0x0019, (AX) MOVW R12, 2(AX) ADDQ $0x04, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: LEAL -4(R12), R12 MOVW $0x0015, (AX) MOVB R12, 2(AX) ADDQ $0x03, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: SHLL $0x02, R12 ORL $0x01, R12 MOVW R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B XORQ SI, SI LEAL 1(SI)(R12*4), R12 MOVB R8, 1(AX) SARL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX match_nolit_emitcopy_end_encodeBetterBlockAsm8B: CMPL CX, 8(SP) JGE emit_remainder_encodeBetterBlockAsm8B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm8B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 INCL DI MOVQ (DX)(DI*1), R9 MOVQ R9, R10 MOVQ R9, R11 MOVQ R9, R12 SHRQ $0x08, R11 MOVQ R11, R13 SHRQ $0x10, R12 LEAL 1(DI), R14 LEAL 2(DI), R15 MOVQ -2(DX)(CX*1), R9 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x36, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x38, R11 SHLQ $0x20, R12 IMULQ R8, R12 SHRQ $0x38, R12 MOVL DI, 24(SP)(R10*4) MOVL R14, 24(SP)(R13*4) MOVL R14, 4120(SP)(R11*4) MOVL R15, 4120(SP)(R12*4) MOVQ R9, R10 MOVQ R9, R11 SHRQ $0x08, R11 MOVQ R11, R13 LEAL -2(CX), R9 LEAL -1(CX), DI SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x38, R11 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x36, R13 MOVL R9, 24(SP)(R10*4) MOVL DI, 4120(SP)(R11*4) MOVL DI, 24(SP)(R13*4) JMP search_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBetterBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBetterBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBetterBlockAsm8B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B two_bytes_emit_remainder_encodeBetterBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B one_byte_emit_remainder_encodeBetterBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeBetterBlockAsm8B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x04 JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4: MOVL (CX), SI MOVL SI, (AX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7: MOVL (CX), SI MOVL -4(CX)(BX*1), CX MOVL SI, (AX) MOVL CX, -4(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B memmove_long_emit_remainder_encodeBetterBlockAsm8B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeBetterBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm: MOVL CX, SI SUBL 12(SP), SI SHRL $0x06, SI LEAL 4(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ DI, R10 MOVQ DI, R11 SHRQ $0x08, R11 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 SHLQ $0x10, R11 IMULQ R9, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI MOVL 24(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) LEAL 1(CX), R10 MOVL R10, 24(SP)(R11*4) MOVQ DI, R10 SHRQ $0x10, R10 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 MOVL CX, R9 SUBL 16(SP), R9 MOVL 1(DX)(R9*1), R11 MOVQ DI, R9 SHRQ $0x08, R9 CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm LEAL 1(CX), DI MOVL 12(SP), SI MOVL DI, R8 SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: CMPL DI, SI JLE repeat_extend_back_end_encodeSnappyBlockAsm MOVB -1(DX)(R8*1), BL MOVB -1(DX)(DI*1), R9 CMPB BL, R9 JNE repeat_extend_back_end_encodeSnappyBlockAsm LEAL -1(DI), DI DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm MOVL DI, R8 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R9 SUBL SI, R8 LEAL -1(R8), SI CMPL SI, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm CMPL SI, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm CMPL SI, $0x00010000 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm CMPL SI, $0x01000000 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: MOVL SI, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm JMP memmove_long_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm: LEAQ (AX)(R8*1), SI // genMemMoveShort CMPQ R8, $0x08 JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: MOVQ SI, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm memmove_long_repeat_emit_encodeSnappyBlockAsm: LEAQ (AX)(R8*1), SI // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ SI, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: ADDL $0x05, CX MOVL CX, SI SUBL 16(SP), SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), SI // matchLen XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: MOVQ (R9)(R11*1), R10 XORQ (SI)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_loop_repeat_extend_encodeSnappyBlockAsm: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm matchlen_single_repeat_extend_encodeSnappyBlockAsm: TESTL R8, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm: MOVB (R9)(R11*1), R10 CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm repeat_extend_forward_end_encodeSnappyBlockAsm: ADDL R11, CX MOVL CX, SI SUBL DI, SI MOVL 16(SP), DI // emitCopy CMPL DI, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: CMPL SI, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm MOVB $0xff, (AX) MOVL DI, 1(AX) LEAL -64(SI), SI ADDQ $0x05, AX CMPL SI, $0x04 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: TESTL SI, SI JZ repeat_end_emit_encodeSnappyBlockAsm MOVB $0x03, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVL DI, 1(AX) ADDQ $0x05, AX JMP repeat_end_emit_encodeSnappyBlockAsm two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: CMPL SI, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(SI), SI ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: CMPL SI, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm CMPL DI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm MOVB $0x01, BL LEAL -16(BX)(SI*4), SI MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: MOVB $0x02, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm no_repeat_found_encodeSnappyBlockAsm: CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm SHRQ $0x08, DI MOVL 24(SP)(R10*4), SI LEAL 2(CX), R9 CMPL (DX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm MOVL R9, 24(SP)(R10*4) SHRQ $0x08, DI CMPL (DX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm candidate3_match_encodeSnappyBlockAsm: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm candidate2_match_encodeSnappyBlockAsm: MOVL R9, 24(SP)(R10*4) INCL CX MOVL R8, SI candidate_match_encodeSnappyBlockAsm: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm match_extend_back_loop_encodeSnappyBlockAsm: CMPL CX, DI JLE match_extend_back_end_encodeSnappyBlockAsm MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeSnappyBlockAsm LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm JMP match_extend_back_loop_encodeSnappyBlockAsm match_extend_back_end_encodeSnappyBlockAsm: MOVL CX, DI SUBL 12(SP), DI LEAQ 5(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm: MOVL CX, DI MOVL 12(SP), R8 CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(R8*1), DI SUBL R8, R9 LEAL -1(R9), R8 CMPL R8, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm CMPL R8, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBlockAsm CMPL R8, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL R8, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm four_bytes_match_emit_encodeSnappyBlockAsm: MOVL R8, R10 SHRL $0x10, R10 MOVB $0xf8, (AX) MOVW R8, 1(AX) MOVB R10, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm three_bytes_match_emit_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm two_bytes_match_emit_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB R8, 1(AX) ADDQ $0x02, AX CMPL R8, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm JMP memmove_long_match_emit_encodeSnappyBlockAsm one_byte_match_emit_encodeSnappyBlockAsm: SHLB $0x02, R8 MOVB R8, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm: LEAQ (AX)(R9*1), R8 // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: MOVQ (DI), R10 MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: MOVQ (DI), R10 MOVQ -8(DI)(R9*1), DI MOVQ R10, (AX) MOVQ DI, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm: MOVQ R8, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm memmove_long_match_emit_encodeSnappyBlockAsm: LEAQ (AX)(R9*1), R8 // genMemMoveLong MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVQ R9, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 LEAQ -32(DI)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: MOVOU -32(DI)(R12*1), X4 MOVOU -16(DI)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ R8, AX emit_literal_done_match_emit_encodeSnappyBlockAsm: match_nolit_loop_encodeSnappyBlockAsm: MOVL CX, DI SUBL SI, DI MOVL DI, 16(SP) ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(SI*1), SI // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm matchlen_loopback_match_nolit_encodeSnappyBlockAsm: MOVQ (R8)(R10*1), R9 XORQ (SI)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_loop_match_nolit_encodeSnappyBlockAsm: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm matchlen_single_match_nolit_encodeSnappyBlockAsm: TESTL DI, DI JZ match_nolit_end_encodeSnappyBlockAsm matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm: MOVB (R8)(R10*1), R9 CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm match_nolit_end_encodeSnappyBlockAsm: ADDL R10, CX MOVL 16(SP), SI ADDL $0x04, R10 MOVL CX, 12(SP) // emitCopy CMPL SI, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: CMPL R10, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm MOVB $0xff, (AX) MOVL SI, 1(AX) LEAL -64(R10), R10 ADDQ $0x05, AX CMPL R10, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm MOVB $0x03, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: CMPL R10, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(R10), R10 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: CMPL R10, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm MOVB $0x01, BL LEAL -16(BX)(R10*4), R10 MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm MOVQ -2(DX)(CX*1), DI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ DI, R8 SHRQ $0x10, DI MOVQ DI, SI SHLQ $0x10, R8 IMULQ R9, R8 SHRQ $0x32, R8 SHLQ $0x10, SI IMULQ R9, SI SHRQ $0x32, SI LEAL -2(CX), R9 LEAQ 24(SP)(SI*4), R10 MOVL (R10), SI MOVL R9, 24(SP)(R8*4) MOVL CX, (R10) CMPL (DX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm INCL CX JMP search_loop_encodeSnappyBlockAsm emit_remainder_encodeSnappyBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm four_bytes_emit_remainder_encodeSnappyBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm three_bytes_emit_remainder_encodeSnappyBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm two_bytes_emit_remainder_encodeSnappyBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm memmove_long_emit_remainder_encodeSnappyBlockAsm: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm64K(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000200, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm64K: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm64K MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm64K: MOVL CX, SI SUBL 12(SP), SI SHRL $0x06, SI LEAL 4(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm64K MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ DI, R10 MOVQ DI, R11 SHRQ $0x08, R11 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 SHLQ $0x10, R11 IMULQ R9, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI MOVL 24(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) LEAL 1(CX), R10 MOVL R10, 24(SP)(R11*4) MOVQ DI, R10 SHRQ $0x10, R10 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 MOVL CX, R9 SUBL 16(SP), R9 MOVL 1(DX)(R9*1), R11 MOVQ DI, R9 SHRQ $0x08, R9 CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm64K LEAL 1(CX), DI MOVL 12(SP), SI MOVL DI, R8 SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm64K repeat_extend_back_loop_encodeSnappyBlockAsm64K: CMPL DI, SI JLE repeat_extend_back_end_encodeSnappyBlockAsm64K MOVB -1(DX)(R8*1), BL MOVB -1(DX)(DI*1), R9 CMPB BL, R9 JNE repeat_extend_back_end_encodeSnappyBlockAsm64K LEAL -1(DI), DI DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K repeat_extend_back_end_encodeSnappyBlockAsm64K: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K MOVL DI, R8 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R9 SUBL SI, R8 LEAL -1(R8), SI CMPL SI, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm64K CMPL SI, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm64K MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K two_bytes_repeat_emit_encodeSnappyBlockAsm64K: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm64K JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K one_byte_repeat_emit_encodeSnappyBlockAsm64K: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm64K: LEAQ (AX)(R8*1), SI // genMemMoveShort CMPQ R8, $0x08 JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K: MOVQ SI, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K memmove_long_repeat_emit_encodeSnappyBlockAsm64K: LEAQ (AX)(R8*1), SI // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ SI, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: ADDL $0x05, CX MOVL CX, SI SUBL 16(SP), SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), SI // matchLen XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm64K matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: MOVQ (R9)(R11*1), R10 XORQ (SI)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K matchlen_single_repeat_extend_encodeSnappyBlockAsm64K: TESTL R8, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm64K: MOVB (R9)(R11*1), R10 CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm64K repeat_extend_forward_end_encodeSnappyBlockAsm64K: ADDL R11, CX MOVL CX, SI SUBL DI, SI MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K: CMPL SI, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(SI), SI ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K: CMPL SI, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K CMPL DI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K MOVB $0x01, BL LEAL -16(BX)(SI*4), SI MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm64K emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K: MOVB $0x02, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm64K: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm64K no_repeat_found_encodeSnappyBlockAsm64K: CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm64K SHRQ $0x08, DI MOVL 24(SP)(R10*4), SI LEAL 2(CX), R9 CMPL (DX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm64K MOVL R9, 24(SP)(R10*4) SHRQ $0x08, DI CMPL (DX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm64K MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm64K candidate3_match_encodeSnappyBlockAsm64K: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm64K candidate2_match_encodeSnappyBlockAsm64K: MOVL R9, 24(SP)(R10*4) INCL CX MOVL R8, SI candidate_match_encodeSnappyBlockAsm64K: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm64K match_extend_back_loop_encodeSnappyBlockAsm64K: CMPL CX, DI JLE match_extend_back_end_encodeSnappyBlockAsm64K MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeSnappyBlockAsm64K LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm64K JMP match_extend_back_loop_encodeSnappyBlockAsm64K match_extend_back_end_encodeSnappyBlockAsm64K: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm64K: MOVL CX, DI MOVL 12(SP), R8 CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(R8*1), DI SUBL R8, R9 LEAL -1(R9), R8 CMPL R8, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm64K CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm64K MOVB $0xf4, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm64K two_bytes_match_emit_encodeSnappyBlockAsm64K: MOVB $0xf0, (AX) MOVB R8, 1(AX) ADDQ $0x02, AX CMPL R8, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBlockAsm64K one_byte_match_emit_encodeSnappyBlockAsm64K: SHLB $0x02, R8 MOVB R8, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm64K: LEAQ (AX)(R9*1), R8 // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8: MOVQ (DI), R10 MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: MOVQ (DI), R10 MOVQ -8(DI)(R9*1), DI MOVQ R10, (AX) MOVQ DI, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm64K: MOVQ R8, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K memmove_long_match_emit_encodeSnappyBlockAsm64K: LEAQ (AX)(R9*1), R8 // genMemMoveLong MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVQ R9, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 LEAQ -32(DI)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: MOVOU -32(DI)(R12*1), X4 MOVOU -16(DI)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ R8, AX emit_literal_done_match_emit_encodeSnappyBlockAsm64K: match_nolit_loop_encodeSnappyBlockAsm64K: MOVL CX, DI SUBL SI, DI MOVL DI, 16(SP) ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(SI*1), SI // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm64K matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: MOVQ (R8)(R10*1), R9 XORQ (SI)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm64K BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_loop_match_nolit_encodeSnappyBlockAsm64K: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K matchlen_single_match_nolit_encodeSnappyBlockAsm64K: TESTL DI, DI JZ match_nolit_end_encodeSnappyBlockAsm64K matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm64K: MOVB (R8)(R10*1), R9 CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm64K LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm64K match_nolit_end_encodeSnappyBlockAsm64K: ADDL R10, CX MOVL 16(SP), SI ADDL $0x04, R10 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm64K: CMPL R10, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(R10), R10 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K: CMPL R10, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K MOVB $0x01, BL LEAL -16(BX)(R10*4), R10 MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBlockAsm64K: MOVB $0x02, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm64K: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm64K MOVQ -2(DX)(CX*1), DI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm64K: MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ DI, R8 SHRQ $0x10, DI MOVQ DI, SI SHLQ $0x10, R8 IMULQ R9, R8 SHRQ $0x32, R8 SHLQ $0x10, SI IMULQ R9, SI SHRQ $0x32, SI LEAL -2(CX), R9 LEAQ 24(SP)(SI*4), R10 MOVL (R10), SI MOVL R9, 24(SP)(R8*4) MOVL CX, (R10) CMPL (DX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm64K INCL CX JMP search_loop_encodeSnappyBlockAsm64K emit_remainder_encodeSnappyBlockAsm64K: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm64K: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm64K CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm64K MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K two_bytes_emit_remainder_encodeSnappyBlockAsm64K: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K one_byte_emit_remainder_encodeSnappyBlockAsm64K: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm64K: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K memmove_long_emit_remainder_encodeSnappyBlockAsm64K: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000080, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x05, SI LEAL 4(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm12B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x000000cf1bbcdcbb, R9 MOVQ DI, R10 MOVQ DI, R11 SHRQ $0x08, R11 SHLQ $0x18, R10 IMULQ R9, R10 SHRQ $0x34, R10 SHLQ $0x18, R11 IMULQ R9, R11 SHRQ $0x34, R11 MOVL 24(SP)(R10*4), SI MOVL 24(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) LEAL 1(CX), R10 MOVL R10, 24(SP)(R11*4) MOVQ DI, R10 SHRQ $0x10, R10 SHLQ $0x18, R10 IMULQ R9, R10 SHRQ $0x34, R10 MOVL CX, R9 SUBL 16(SP), R9 MOVL 1(DX)(R9*1), R11 MOVQ DI, R9 SHRQ $0x08, R9 CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm12B LEAL 1(CX), DI MOVL 12(SP), SI MOVL DI, R8 SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: CMPL DI, SI JLE repeat_extend_back_end_encodeSnappyBlockAsm12B MOVB -1(DX)(R8*1), BL MOVB -1(DX)(DI*1), R9 CMPB BL, R9 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B LEAL -1(DI), DI DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B MOVL DI, R8 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R9 SUBL SI, R8 LEAL -1(R8), SI CMPL SI, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B CMPL SI, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm12B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(R8*1), SI // genMemMoveShort CMPQ R8, $0x08 JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: MOVQ SI, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B memmove_long_repeat_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(R8*1), SI // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ SI, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: ADDL $0x05, CX MOVL CX, SI SUBL 16(SP), SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), SI // matchLen XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm12B matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: MOVQ (R9)(R11*1), R10 XORQ (SI)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B matchlen_single_repeat_extend_encodeSnappyBlockAsm12B: TESTL R8, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B: MOVB (R9)(R11*1), R10 CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B repeat_extend_forward_end_encodeSnappyBlockAsm12B: ADDL R11, CX MOVL CX, SI SUBL DI, SI MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: CMPL SI, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(SI), SI ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: CMPL SI, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B CMPL DI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(SI*4), SI MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm12B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm12B: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm12B no_repeat_found_encodeSnappyBlockAsm12B: CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm12B SHRQ $0x08, DI MOVL 24(SP)(R10*4), SI LEAL 2(CX), R9 CMPL (DX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm12B MOVL R9, 24(SP)(R10*4) SHRQ $0x08, DI CMPL (DX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm12B candidate3_match_encodeSnappyBlockAsm12B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm12B candidate2_match_encodeSnappyBlockAsm12B: MOVL R9, 24(SP)(R10*4) INCL CX MOVL R8, SI candidate_match_encodeSnappyBlockAsm12B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm12B match_extend_back_loop_encodeSnappyBlockAsm12B: CMPL CX, DI JLE match_extend_back_end_encodeSnappyBlockAsm12B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeSnappyBlockAsm12B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm12B JMP match_extend_back_loop_encodeSnappyBlockAsm12B match_extend_back_end_encodeSnappyBlockAsm12B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm12B: MOVL CX, DI MOVL 12(SP), R8 CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(R8*1), DI SUBL R8, R9 LEAL -1(R9), R8 CMPL R8, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm12B CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm12B MOVB $0xf4, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm12B two_bytes_match_emit_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB R8, 1(AX) ADDQ $0x02, AX CMPL R8, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBlockAsm12B one_byte_match_emit_encodeSnappyBlockAsm12B: SHLB $0x02, R8 MOVB R8, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(R9*1), R8 // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: MOVQ (DI), R10 MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: MOVQ (DI), R10 MOVQ -8(DI)(R9*1), DI MOVQ R10, (AX) MOVQ DI, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: MOVQ R8, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B memmove_long_match_emit_encodeSnappyBlockAsm12B: LEAQ (AX)(R9*1), R8 // genMemMoveLong MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVQ R9, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(DI)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(DI)(R12*1), X4 MOVOU -16(DI)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ R8, AX emit_literal_done_match_emit_encodeSnappyBlockAsm12B: match_nolit_loop_encodeSnappyBlockAsm12B: MOVL CX, DI SUBL SI, DI MOVL DI, 16(SP) ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(SI*1), SI // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm12B matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: MOVQ (R8)(R10*1), R9 XORQ (SI)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B matchlen_single_match_nolit_encodeSnappyBlockAsm12B: TESTL DI, DI JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B: MOVB (R8)(R10*1), R9 CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm12B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B match_nolit_end_encodeSnappyBlockAsm12B: ADDL R10, CX MOVL 16(SP), SI ADDL $0x04, R10 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: CMPL R10, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(R10), R10 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: CMPL R10, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R10*4), R10 MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm12B MOVQ -2(DX)(CX*1), DI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: MOVQ $0x000000cf1bbcdcbb, R9 MOVQ DI, R8 SHRQ $0x10, DI MOVQ DI, SI SHLQ $0x18, R8 IMULQ R9, R8 SHRQ $0x34, R8 SHLQ $0x18, SI IMULQ R9, SI SHRQ $0x34, SI LEAL -2(CX), R9 LEAQ 24(SP)(SI*4), R10 MOVL (R10), SI MOVL R9, 24(SP)(R8*4) MOVL CX, (R10) CMPL (DX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm12B INCL CX JMP search_loop_encodeSnappyBlockAsm12B emit_remainder_encodeSnappyBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B two_bytes_emit_remainder_encodeSnappyBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm12B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B memmove_long_emit_remainder_encodeSnappyBlockAsm12B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000020, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x05, SI LEAL 4(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm10B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x9e3779b1, R9 MOVQ DI, R10 MOVQ DI, R11 SHRQ $0x08, R11 SHLQ $0x20, R10 IMULQ R9, R10 SHRQ $0x36, R10 SHLQ $0x20, R11 IMULQ R9, R11 SHRQ $0x36, R11 MOVL 24(SP)(R10*4), SI MOVL 24(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) LEAL 1(CX), R10 MOVL R10, 24(SP)(R11*4) MOVQ DI, R10 SHRQ $0x10, R10 SHLQ $0x20, R10 IMULQ R9, R10 SHRQ $0x36, R10 MOVL CX, R9 SUBL 16(SP), R9 MOVL 1(DX)(R9*1), R11 MOVQ DI, R9 SHRQ $0x08, R9 CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm10B LEAL 1(CX), DI MOVL 12(SP), SI MOVL DI, R8 SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: CMPL DI, SI JLE repeat_extend_back_end_encodeSnappyBlockAsm10B MOVB -1(DX)(R8*1), BL MOVB -1(DX)(DI*1), R9 CMPB BL, R9 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B LEAL -1(DI), DI DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B MOVL DI, R8 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R9 SUBL SI, R8 LEAL -1(R8), SI CMPL SI, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B CMPL SI, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm10B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(R8*1), SI // genMemMoveShort CMPQ R8, $0x08 JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: MOVQ SI, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B memmove_long_repeat_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(R8*1), SI // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ SI, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: ADDL $0x05, CX MOVL CX, SI SUBL 16(SP), SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), SI // matchLen XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm10B matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: MOVQ (R9)(R11*1), R10 XORQ (SI)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B matchlen_single_repeat_extend_encodeSnappyBlockAsm10B: TESTL R8, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B: MOVB (R9)(R11*1), R10 CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B repeat_extend_forward_end_encodeSnappyBlockAsm10B: ADDL R11, CX MOVL CX, SI SUBL DI, SI MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: CMPL SI, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(SI), SI ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: CMPL SI, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B CMPL DI, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(SI*4), SI MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm10B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm10B: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm10B no_repeat_found_encodeSnappyBlockAsm10B: CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm10B SHRQ $0x08, DI MOVL 24(SP)(R10*4), SI LEAL 2(CX), R9 CMPL (DX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm10B MOVL R9, 24(SP)(R10*4) SHRQ $0x08, DI CMPL (DX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm10B candidate3_match_encodeSnappyBlockAsm10B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm10B candidate2_match_encodeSnappyBlockAsm10B: MOVL R9, 24(SP)(R10*4) INCL CX MOVL R8, SI candidate_match_encodeSnappyBlockAsm10B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm10B match_extend_back_loop_encodeSnappyBlockAsm10B: CMPL CX, DI JLE match_extend_back_end_encodeSnappyBlockAsm10B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeSnappyBlockAsm10B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm10B JMP match_extend_back_loop_encodeSnappyBlockAsm10B match_extend_back_end_encodeSnappyBlockAsm10B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm10B: MOVL CX, DI MOVL 12(SP), R8 CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(R8*1), DI SUBL R8, R9 LEAL -1(R9), R8 CMPL R8, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm10B CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm10B MOVB $0xf4, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm10B two_bytes_match_emit_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB R8, 1(AX) ADDQ $0x02, AX CMPL R8, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBlockAsm10B one_byte_match_emit_encodeSnappyBlockAsm10B: SHLB $0x02, R8 MOVB R8, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(R9*1), R8 // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: MOVQ (DI), R10 MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: MOVQ (DI), R10 MOVQ -8(DI)(R9*1), DI MOVQ R10, (AX) MOVQ DI, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: MOVQ R8, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B memmove_long_match_emit_encodeSnappyBlockAsm10B: LEAQ (AX)(R9*1), R8 // genMemMoveLong MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVQ R9, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(DI)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(DI)(R12*1), X4 MOVOU -16(DI)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ R8, AX emit_literal_done_match_emit_encodeSnappyBlockAsm10B: match_nolit_loop_encodeSnappyBlockAsm10B: MOVL CX, DI SUBL SI, DI MOVL DI, 16(SP) ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(SI*1), SI // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm10B matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: MOVQ (R8)(R10*1), R9 XORQ (SI)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B matchlen_single_match_nolit_encodeSnappyBlockAsm10B: TESTL DI, DI JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B: MOVB (R8)(R10*1), R9 CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm10B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B match_nolit_end_encodeSnappyBlockAsm10B: ADDL R10, CX MOVL 16(SP), SI ADDL $0x04, R10 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: CMPL R10, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(R10), R10 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: CMPL R10, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B CMPL SI, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R10*4), R10 MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm10B MOVQ -2(DX)(CX*1), DI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: MOVQ $0x9e3779b1, R9 MOVQ DI, R8 SHRQ $0x10, DI MOVQ DI, SI SHLQ $0x20, R8 IMULQ R9, R8 SHRQ $0x36, R8 SHLQ $0x20, SI IMULQ R9, SI SHRQ $0x36, SI LEAL -2(CX), R9 LEAQ 24(SP)(SI*4), R10 MOVL (R10), SI MOVL R9, 24(SP)(R8*4) MOVL CX, (R10) CMPL (DX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm10B INCL CX JMP search_loop_encodeSnappyBlockAsm10B emit_remainder_encodeSnappyBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B two_bytes_emit_remainder_encodeSnappyBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm10B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B memmove_long_emit_remainder_encodeSnappyBlockAsm10B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000008, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL CX, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x04, SI LEAL 4(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm8B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x9e3779b1, R9 MOVQ DI, R10 MOVQ DI, R11 SHRQ $0x08, R11 SHLQ $0x20, R10 IMULQ R9, R10 SHRQ $0x38, R10 SHLQ $0x20, R11 IMULQ R9, R11 SHRQ $0x38, R11 MOVL 24(SP)(R10*4), SI MOVL 24(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) LEAL 1(CX), R10 MOVL R10, 24(SP)(R11*4) MOVQ DI, R10 SHRQ $0x10, R10 SHLQ $0x20, R10 IMULQ R9, R10 SHRQ $0x38, R10 MOVL CX, R9 SUBL 16(SP), R9 MOVL 1(DX)(R9*1), R11 MOVQ DI, R9 SHRQ $0x08, R9 CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm8B LEAL 1(CX), DI MOVL 12(SP), SI MOVL DI, R8 SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: CMPL DI, SI JLE repeat_extend_back_end_encodeSnappyBlockAsm8B MOVB -1(DX)(R8*1), BL MOVB -1(DX)(DI*1), R9 CMPB BL, R9 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B LEAL -1(DI), DI DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B MOVL DI, R8 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R9 SUBL SI, R8 LEAL -1(R8), SI CMPL SI, $0x3c JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B CMPL SI, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_repeat_emit_encodeSnappyBlockAsm8B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_repeat_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(R8*1), SI // genMemMoveShort CMPQ R8, $0x08 JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8: MOVQ (R9), R10 MOVQ R10, (AX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: MOVQ (R9), R10 MOVQ -8(R9)(R8*1), R9 MOVQ R10, (AX) MOVQ R9, -8(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (R9), X0 MOVOU -16(R9)(R8*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: MOVQ SI, AX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B memmove_long_repeat_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(R8*1), SI // genMemMoveLong MOVOU (R9), X0 MOVOU 16(R9), X1 MOVOU -32(R9)(R8*1), X2 MOVOU -16(R9)(R8*1), X3 MOVQ R8, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R9)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R9)(R12*1), X4 MOVOU -16(R9)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R8*1) MOVOU X3, -16(AX)(R8*1) MOVQ SI, AX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: ADDL $0x05, CX MOVL CX, SI SUBL 16(SP), SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), SI // matchLen XORL R11, R11 CMPL R8, $0x08 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm8B matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: MOVQ (R9)(R11*1), R10 XORQ (SI)(R11*1), R10 TESTQ R10, R10 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B BSFQ R10, R10 SARQ $0x03, R10 LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B: LEAL -8(R8), R8 LEAL 8(R11), R11 CMPL R8, $0x08 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B matchlen_single_repeat_extend_encodeSnappyBlockAsm8B: TESTL R8, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B: MOVB (R9)(R11*1), R10 CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B LEAL 1(R11), R11 DECL R8 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B repeat_extend_forward_end_encodeSnappyBlockAsm8B: ADDL R11, CX MOVL CX, SI SUBL DI, SI MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL SI, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B MOVB $0xee, (AX) MOVW DI, 1(AX) LEAL -60(SI), SI ADDQ $0x03, AX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL SI, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(SI*4), SI MOVB DI, 1(AX) SHRL $0x08, DI SHLL $0x05, DI ORL DI, SI MOVB SI, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeSnappyBlockAsm8B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(SI*4), SI MOVB SI, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX repeat_end_emit_encodeSnappyBlockAsm8B: MOVL CX, 12(SP) JMP search_loop_encodeSnappyBlockAsm8B no_repeat_found_encodeSnappyBlockAsm8B: CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm8B SHRQ $0x08, DI MOVL 24(SP)(R10*4), SI LEAL 2(CX), R9 CMPL (DX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm8B MOVL R9, 24(SP)(R10*4) SHRQ $0x08, DI CMPL (DX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeSnappyBlockAsm8B candidate3_match_encodeSnappyBlockAsm8B: ADDL $0x02, CX JMP candidate_match_encodeSnappyBlockAsm8B candidate2_match_encodeSnappyBlockAsm8B: MOVL R9, 24(SP)(R10*4) INCL CX MOVL R8, SI candidate_match_encodeSnappyBlockAsm8B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm8B match_extend_back_loop_encodeSnappyBlockAsm8B: CMPL CX, DI JLE match_extend_back_end_encodeSnappyBlockAsm8B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeSnappyBlockAsm8B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm8B JMP match_extend_back_loop_encodeSnappyBlockAsm8B match_extend_back_end_encodeSnappyBlockAsm8B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBlockAsm8B: MOVL CX, DI MOVL 12(SP), R8 CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(R8*1), DI SUBL R8, R9 LEAL -1(R9), R8 CMPL R8, $0x3c JLT one_byte_match_emit_encodeSnappyBlockAsm8B CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm8B MOVB $0xf4, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBlockAsm8B two_bytes_match_emit_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB R8, 1(AX) ADDQ $0x02, AX CMPL R8, $0x40 JL memmove_match_emit_encodeSnappyBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBlockAsm8B one_byte_match_emit_encodeSnappyBlockAsm8B: SHLB $0x02, R8 MOVB R8, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(R9*1), R8 // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: MOVQ (DI), R10 MOVQ R10, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: MOVQ (DI), R10 MOVQ -8(DI)(R9*1), DI MOVQ R10, (AX) MOVQ DI, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: MOVQ R8, AX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B memmove_long_match_emit_encodeSnappyBlockAsm8B: LEAQ (AX)(R9*1), R8 // genMemMoveLong MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(R9*1), X2 MOVOU -16(DI)(R9*1), X3 MOVQ R9, R11 SHRQ $0x05, R11 MOVQ AX, R10 ANDL $0x0000001f, R10 MOVQ $0x00000040, R12 SUBQ R10, R12 DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(DI)(R12*1), R10 LEAQ -32(AX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (R10), X4 MOVOU 16(R10), X5 MOVOA X4, (R13) MOVOA X5, 16(R13) ADDQ $0x20, R13 ADDQ $0x20, R10 ADDQ $0x20, R12 DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(DI)(R12*1), X4 MOVOU -16(DI)(R12*1), X5 MOVOA X4, -32(AX)(R12*1) MOVOA X5, -16(AX)(R12*1) ADDQ $0x20, R12 CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ R8, AX emit_literal_done_match_emit_encodeSnappyBlockAsm8B: match_nolit_loop_encodeSnappyBlockAsm8B: MOVL CX, DI SUBL SI, DI MOVL DI, 16(SP) ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), DI SUBL CX, DI LEAQ (DX)(CX*1), R8 LEAQ (DX)(SI*1), SI // matchLen XORL R10, R10 CMPL DI, $0x08 JL matchlen_single_match_nolit_encodeSnappyBlockAsm8B matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: MOVQ (R8)(R10*1), R9 XORQ (SI)(R10*1), R9 TESTQ R9, R9 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B BSFQ R9, R9 SARQ $0x03, R9 LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: LEAL -8(DI), DI LEAL 8(R10), R10 CMPL DI, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B matchlen_single_match_nolit_encodeSnappyBlockAsm8B: TESTL DI, DI JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B: MOVB (R8)(R10*1), R9 CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm8B LEAL 1(R10), R10 DECL DI JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B match_nolit_end_encodeSnappyBlockAsm8B: ADDL R10, CX MOVL 16(SP), SI ADDL $0x04, R10 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: CMPL R10, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B MOVB $0xee, (AX) MOVW SI, 1(AX) LEAL -60(R10), R10 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: CMPL R10, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R10*4), R10 MOVB SI, 1(AX) SHRL $0x08, SI SHLL $0x05, SI ORL SI, R10 MOVB R10, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R10*4), R10 MOVB R10, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBlockAsm8B MOVQ -2(DX)(CX*1), DI CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: MOVQ $0x9e3779b1, R9 MOVQ DI, R8 SHRQ $0x10, DI MOVQ DI, SI SHLQ $0x20, R8 IMULQ R9, R8 SHRQ $0x38, R8 SHLQ $0x20, SI IMULQ R9, SI SHRQ $0x38, SI LEAL -2(CX), R9 LEAQ 24(SP)(SI*4), R10 MOVL (R10), SI MOVL R9, 24(SP)(R8*4) MOVL CX, (R10) CMPL (DX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm8B INCL CX JMP search_loop_encodeSnappyBlockAsm8B emit_remainder_encodeSnappyBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B two_bytes_emit_remainder_encodeSnappyBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBlockAsm8B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B memmove_long_emit_remainder_encodeSnappyBlockAsm8B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBetterBlockAsm(SB), $327704-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000a00, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBetterBlockAsm MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBetterBlockAsm: MOVL CX, SI SUBL 12(SP), SI SHRL $0x07, SI CMPL SI, $0x63 JLE check_maxskip_ok_encodeSnappyBetterBlockAsm LEAL 100(CX), SI JMP check_maxskip_cont_encodeSnappyBetterBlockAsm check_maxskip_ok_encodeSnappyBetterBlockAsm: LEAL 1(CX)(SI*1), SI check_maxskip_cont_encodeSnappyBetterBlockAsm: CMPL SI, 8(SP) JGE emit_remainder_encodeSnappyBetterBlockAsm MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x00cf1bbcdcbfa563, R9 MOVQ $0x9e3779b1, SI MOVQ DI, R10 MOVQ DI, R11 SHLQ $0x08, R10 IMULQ R9, R10 SHRQ $0x30, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI MOVL 262168(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 262168(SP)(R11*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm CMPL (DX)(R8*1), DI JEQ candidateS_match_encodeSnappyBetterBlockAsm MOVL 20(SP), CX JMP search_loop_encodeSnappyBetterBlockAsm candidateS_match_encodeSnappyBetterBlockAsm: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x08, R10 IMULQ R9, R10 SHRQ $0x30, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm DECL CX MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm match_extend_back_loop_encodeSnappyBetterBlockAsm: CMPL CX, DI JLE match_extend_back_end_encodeSnappyBetterBlockAsm MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeSnappyBetterBlockAsm LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm JMP match_extend_back_loop_encodeSnappyBetterBlockAsm match_extend_back_end_encodeSnappyBetterBlockAsm: MOVL CX, DI SUBL 12(SP), DI LEAQ 5(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeSnappyBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm: MOVL CX, DI ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), R10 // matchLen XORL R12, R12 CMPL R8, $0x08 JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm: MOVQ (R9)(R12*1), R11 XORQ (R10)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm: LEAL -8(R8), R8 LEAL 8(R12), R12 CMPL R8, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm matchlen_single_match_nolit_encodeSnappyBetterBlockAsm: TESTL R8, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm: MOVB (R9)(R12*1), R11 CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm LEAL 1(R12), R12 DECL R8 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm match_nolit_end_encodeSnappyBetterBlockAsm: MOVL CX, R8 SUBL SI, R8 // Check if repeat CMPL R12, $0x01 JG match_length_ok_encodeSnappyBetterBlockAsm CMPL R8, $0x0000ffff JLE match_length_ok_encodeSnappyBetterBlockAsm MOVL 20(SP), CX INCL CX JMP search_loop_encodeSnappyBetterBlockAsm match_length_ok_encodeSnappyBetterBlockAsm: MOVL R8, 16(SP) MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_encodeSnappyBetterBlockAsm CMPL SI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm CMPL SI, $0x00010000 JLT three_bytes_match_emit_encodeSnappyBetterBlockAsm CMPL SI, $0x01000000 JLT four_bytes_match_emit_encodeSnappyBetterBlockAsm MOVB $0xfc, (AX) MOVL SI, 1(AX) ADDQ $0x05, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm four_bytes_match_emit_encodeSnappyBetterBlockAsm: MOVL SI, R11 SHRL $0x10, R11 MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB R11, 3(AX) ADDQ $0x04, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm three_bytes_match_emit_encodeSnappyBetterBlockAsm: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm two_bytes_match_emit_encodeSnappyBetterBlockAsm: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_encodeSnappyBetterBlockAsm JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm one_byte_match_emit_encodeSnappyBetterBlockAsm: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBetterBlockAsm: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8: MOVQ (R10), R11 MOVQ R11, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm: MOVQ SI, AX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm memmove_long_match_emit_encodeSnappyBetterBlockAsm: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitCopy CMPL R8, $0x00010000 JL two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm: CMPL R12, $0x40 JLE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm MOVB $0xff, (AX) MOVL R8, 1(AX) LEAL -64(R12), R12 ADDQ $0x05, AX CMPL R12, $0x04 JL four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm: TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm MOVB $0x03, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVL R8, 1(AX) ADDQ $0x05, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm: CMPL R12, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm MOVB $0xee, (AX) MOVW R8, 1(AX) LEAL -60(R12), R12 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm: CMPL R12, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm CMPL R8, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm MOVB $0x01, BL LEAL -16(BX)(R12*4), R12 MOVB R8, 1(AX) SHRL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm: MOVB $0x02, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBetterBlockAsm CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 INCL DI MOVQ (DX)(DI*1), R9 MOVQ R9, R10 MOVQ R9, R11 MOVQ R9, R12 SHRQ $0x08, R11 MOVQ R11, R13 SHRQ $0x10, R12 LEAL 1(DI), R14 LEAL 2(DI), R15 MOVQ -2(DX)(CX*1), R9 SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 SHLQ $0x08, R13 IMULQ SI, R13 SHRQ $0x30, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 SHLQ $0x20, R12 IMULQ R8, R12 SHRQ $0x32, R12 MOVL DI, 24(SP)(R10*4) MOVL R14, 24(SP)(R13*4) MOVL R14, 262168(SP)(R11*4) MOVL R15, 262168(SP)(R12*4) MOVQ R9, R10 MOVQ R9, R11 SHRQ $0x08, R11 MOVQ R11, R13 LEAL -2(CX), R9 LEAL -1(CX), DI SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 SHLQ $0x08, R13 IMULQ SI, R13 SHRQ $0x30, R13 MOVL R9, 24(SP)(R10*4) MOVL DI, 262168(SP)(R11*4) MOVL DI, 24(SP)(R13*4) JMP search_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 5(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBetterBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeSnappyBetterBlockAsm MOVB $0xfc, (AX) MOVL DX, 1(AX) ADDQ $0x05, AX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm four_bytes_emit_remainder_encodeSnappyBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX MOVB $0xf8, (AX) MOVW DX, 1(AX) MOVB BL, 3(AX) ADDQ $0x04, AX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm three_bytes_emit_remainder_encodeSnappyBetterBlockAsm: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm two_bytes_emit_remainder_encodeSnappyBetterBlockAsm: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBetterBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm one_byte_emit_remainder_encodeSnappyBetterBlockAsm: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBetterBlockAsm: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm memmove_long_emit_remainder_encodeSnappyBetterBlockAsm: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000a00, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm64K: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBetterBlockAsm64K MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBetterBlockAsm64K: MOVL CX, SI SUBL 12(SP), SI SHRL $0x07, SI LEAL 1(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeSnappyBetterBlockAsm64K MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x00cf1bbcdcbfa563, R9 MOVQ $0x9e3779b1, SI MOVQ DI, R10 MOVQ DI, R11 SHLQ $0x08, R10 IMULQ R9, R10 SHRQ $0x30, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI MOVL 262168(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 262168(SP)(R11*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K CMPL (DX)(R8*1), DI JEQ candidateS_match_encodeSnappyBetterBlockAsm64K MOVL 20(SP), CX JMP search_loop_encodeSnappyBetterBlockAsm64K candidateS_match_encodeSnappyBetterBlockAsm64K: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x08, R10 IMULQ R9, R10 SHRQ $0x30, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K DECL CX MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm64K: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K match_extend_back_loop_encodeSnappyBetterBlockAsm64K: CMPL CX, DI JLE match_extend_back_end_encodeSnappyBetterBlockAsm64K MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K match_extend_back_end_encodeSnappyBetterBlockAsm64K: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeSnappyBetterBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm64K: MOVL CX, DI ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), R10 // matchLen XORL R12, R12 CMPL R8, $0x08 JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: MOVQ (R9)(R12*1), R11 XORQ (R10)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K: LEAL -8(R8), R8 LEAL 8(R12), R12 CMPL R8, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_single_match_nolit_encodeSnappyBetterBlockAsm64K: TESTL R8, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: MOVB (R9)(R12*1), R11 CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm64K LEAL 1(R12), R12 DECL R8 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm64K match_nolit_end_encodeSnappyBetterBlockAsm64K: MOVL CX, R8 SUBL SI, R8 // Check if repeat MOVL R8, 16(SP) MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_encodeSnappyBetterBlockAsm64K CMPL SI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm64K MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K two_bytes_match_emit_encodeSnappyBetterBlockAsm64K: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_encodeSnappyBetterBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K one_byte_match_emit_encodeSnappyBetterBlockAsm64K: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBetterBlockAsm64K: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8: MOVQ (R10), R11 MOVQ R11, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K: MOVQ SI, AX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K memmove_long_match_emit_encodeSnappyBetterBlockAsm64K: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K: CMPL R12, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K MOVB $0xee, (AX) MOVW R8, 1(AX) LEAL -60(R12), R12 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K: CMPL R12, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K CMPL R8, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K MOVB $0x01, BL LEAL -16(BX)(R12*4), R12 MOVB R8, 1(AX) SHRL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K: MOVB $0x02, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBetterBlockAsm64K CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 INCL DI MOVQ (DX)(DI*1), R9 MOVQ R9, R10 MOVQ R9, R11 MOVQ R9, R12 SHRQ $0x08, R11 MOVQ R11, R13 SHRQ $0x10, R12 LEAL 1(DI), R14 LEAL 2(DI), R15 MOVQ -2(DX)(CX*1), R9 SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 SHLQ $0x08, R13 IMULQ SI, R13 SHRQ $0x30, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 SHLQ $0x20, R12 IMULQ R8, R12 SHRQ $0x32, R12 MOVL DI, 24(SP)(R10*4) MOVL R14, 24(SP)(R13*4) MOVL R14, 262168(SP)(R11*4) MOVL R15, 262168(SP)(R12*4) MOVQ R9, R10 MOVQ R9, R11 SHRQ $0x08, R11 MOVQ R11, R13 LEAL -2(CX), R9 LEAL -1(CX), DI SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 SHLQ $0x08, R13 IMULQ SI, R13 SHRQ $0x30, R13 MOVL R9, 24(SP)(R10*4) MOVL DI, 262168(SP)(R11*4) MOVL DI, 24(SP)(R13*4) JMP search_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBetterBlockAsm64K MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm64K: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBetterBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000280, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm12B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBetterBlockAsm12B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBetterBlockAsm12B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x06, SI LEAL 1(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeSnappyBetterBlockAsm12B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ $0x9e3779b1, SI MOVQ DI, R10 MOVQ DI, R11 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x34, R11 MOVL 24(SP)(R10*4), SI MOVL 65560(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 65560(SP)(R11*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B CMPL (DX)(R8*1), DI JEQ candidateS_match_encodeSnappyBetterBlockAsm12B MOVL 20(SP), CX JMP search_loop_encodeSnappyBetterBlockAsm12B candidateS_match_encodeSnappyBetterBlockAsm12B: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x32, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B DECL CX MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm12B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B match_extend_back_loop_encodeSnappyBetterBlockAsm12B: CMPL CX, DI JLE match_extend_back_end_encodeSnappyBetterBlockAsm12B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B match_extend_back_end_encodeSnappyBetterBlockAsm12B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeSnappyBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm12B: MOVL CX, DI ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), R10 // matchLen XORL R12, R12 CMPL R8, $0x08 JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: MOVQ (R9)(R12*1), R11 XORQ (R10)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B: LEAL -8(R8), R8 LEAL 8(R12), R12 CMPL R8, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_single_match_nolit_encodeSnappyBetterBlockAsm12B: TESTL R8, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: MOVB (R9)(R12*1), R11 CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm12B LEAL 1(R12), R12 DECL R8 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm12B match_nolit_end_encodeSnappyBetterBlockAsm12B: MOVL CX, R8 SUBL SI, R8 // Check if repeat MOVL R8, 16(SP) MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_encodeSnappyBetterBlockAsm12B CMPL SI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm12B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B two_bytes_match_emit_encodeSnappyBetterBlockAsm12B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_encodeSnappyBetterBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B one_byte_match_emit_encodeSnappyBetterBlockAsm12B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBetterBlockAsm12B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8: MOVQ (R10), R11 MOVQ R11, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B: MOVQ SI, AX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B memmove_long_match_emit_encodeSnappyBetterBlockAsm12B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B: CMPL R12, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B MOVB $0xee, (AX) MOVW R8, 1(AX) LEAL -60(R12), R12 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B: CMPL R12, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B CMPL R8, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B MOVB $0x01, BL LEAL -16(BX)(R12*4), R12 MOVB R8, 1(AX) SHRL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B: MOVB $0x02, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBetterBlockAsm12B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 INCL DI MOVQ (DX)(DI*1), R9 MOVQ R9, R10 MOVQ R9, R11 MOVQ R9, R12 SHRQ $0x08, R11 MOVQ R11, R13 SHRQ $0x10, R12 LEAL 1(DI), R14 LEAL 2(DI), R15 MOVQ -2(DX)(CX*1), R9 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x32, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x34, R11 SHLQ $0x20, R12 IMULQ R8, R12 SHRQ $0x34, R12 MOVL DI, 24(SP)(R10*4) MOVL R14, 24(SP)(R13*4) MOVL R14, 65560(SP)(R11*4) MOVL R15, 65560(SP)(R12*4) MOVQ R9, R10 MOVQ R9, R11 SHRQ $0x08, R11 MOVQ R11, R13 LEAL -2(CX), R9 LEAL -1(CX), DI SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x34, R11 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x32, R13 MOVL R9, 24(SP)(R10*4) MOVL DI, 65560(SP)(R11*4) MOVL DI, 24(SP)(R13*4) JMP search_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBetterBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm12B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56 MOVQ dst_base+0(FP), AX MOVQ $0x000000a0, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm10B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBetterBlockAsm10B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBetterBlockAsm10B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x05, SI LEAL 1(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeSnappyBetterBlockAsm10B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ $0x9e3779b1, SI MOVQ DI, R10 MOVQ DI, R11 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x34, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x36, R11 MOVL 24(SP)(R10*4), SI MOVL 16408(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 16408(SP)(R11*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B CMPL (DX)(R8*1), DI JEQ candidateS_match_encodeSnappyBetterBlockAsm10B MOVL 20(SP), CX JMP search_loop_encodeSnappyBetterBlockAsm10B candidateS_match_encodeSnappyBetterBlockAsm10B: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x34, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B DECL CX MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm10B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B match_extend_back_loop_encodeSnappyBetterBlockAsm10B: CMPL CX, DI JLE match_extend_back_end_encodeSnappyBetterBlockAsm10B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B match_extend_back_end_encodeSnappyBetterBlockAsm10B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeSnappyBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm10B: MOVL CX, DI ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), R10 // matchLen XORL R12, R12 CMPL R8, $0x08 JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: MOVQ (R9)(R12*1), R11 XORQ (R10)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B: LEAL -8(R8), R8 LEAL 8(R12), R12 CMPL R8, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_single_match_nolit_encodeSnappyBetterBlockAsm10B: TESTL R8, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: MOVB (R9)(R12*1), R11 CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm10B LEAL 1(R12), R12 DECL R8 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm10B match_nolit_end_encodeSnappyBetterBlockAsm10B: MOVL CX, R8 SUBL SI, R8 // Check if repeat MOVL R8, 16(SP) MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_encodeSnappyBetterBlockAsm10B CMPL SI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm10B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B two_bytes_match_emit_encodeSnappyBetterBlockAsm10B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_encodeSnappyBetterBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B one_byte_match_emit_encodeSnappyBetterBlockAsm10B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBetterBlockAsm10B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8: MOVQ (R10), R11 MOVQ R11, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B: MOVQ SI, AX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B memmove_long_match_emit_encodeSnappyBetterBlockAsm10B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B: CMPL R12, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B MOVB $0xee, (AX) MOVW R8, 1(AX) LEAL -60(R12), R12 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B: CMPL R12, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B CMPL R8, $0x00000800 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B MOVB $0x01, BL LEAL -16(BX)(R12*4), R12 MOVB R8, 1(AX) SHRL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B: MOVB $0x02, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBetterBlockAsm10B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 INCL DI MOVQ (DX)(DI*1), R9 MOVQ R9, R10 MOVQ R9, R11 MOVQ R9, R12 SHRQ $0x08, R11 MOVQ R11, R13 SHRQ $0x10, R12 LEAL 1(DI), R14 LEAL 2(DI), R15 MOVQ -2(DX)(CX*1), R9 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x34, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x36, R11 SHLQ $0x20, R12 IMULQ R8, R12 SHRQ $0x36, R12 MOVL DI, 24(SP)(R10*4) MOVL R14, 24(SP)(R13*4) MOVL R14, 16408(SP)(R11*4) MOVL R15, 16408(SP)(R12*4) MOVQ R9, R10 MOVQ R9, R11 SHRQ $0x08, R11 MOVQ R11, R13 LEAL -2(CX), R9 LEAL -1(CX), DI SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x36, R11 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x34, R13 MOVL R9, 24(SP)(R10*4) MOVL DI, 16408(SP)(R11*4) MOVL DI, 24(SP)(R13*4) JMP search_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBetterBlockAsm10B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm10B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56 MOVQ dst_base+0(FP), AX MOVQ $0x00000028, CX LEAQ 24(SP), DX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm8B: MOVOU X0, (DX) MOVOU X0, 16(DX) MOVOU X0, 32(DX) MOVOU X0, 48(DX) MOVOU X0, 64(DX) MOVOU X0, 80(DX) MOVOU X0, 96(DX) MOVOU X0, 112(DX) ADDQ $0x80, DX DECQ CX JNZ zero_loop_encodeSnappyBetterBlockAsm8B MOVL $0x00000000, 12(SP) MOVQ src_len+32(FP), CX LEAQ -9(CX), DX LEAQ -8(CX), SI MOVL SI, 8(SP) SHRQ $0x05, CX SUBL CX, DX LEAQ (AX)(DX*1), DX MOVQ DX, (SP) MOVL $0x00000001, CX MOVL $0x00000000, 16(SP) MOVQ src_base+24(FP), DX search_loop_encodeSnappyBetterBlockAsm8B: MOVL CX, SI SUBL 12(SP), SI SHRL $0x04, SI LEAL 1(CX)(SI*1), SI CMPL SI, 8(SP) JGE emit_remainder_encodeSnappyBetterBlockAsm8B MOVQ (DX)(CX*1), DI MOVL SI, 20(SP) MOVQ $0x0000cf1bbcdcbf9b, R9 MOVQ $0x9e3779b1, SI MOVQ DI, R10 MOVQ DI, R11 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x36, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x38, R11 MOVL 24(SP)(R10*4), SI MOVL 4120(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 4120(SP)(R11*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B CMPL (DX)(R8*1), DI JEQ candidateS_match_encodeSnappyBetterBlockAsm8B MOVL 20(SP), CX JMP search_loop_encodeSnappyBetterBlockAsm8B candidateS_match_encodeSnappyBetterBlockAsm8B: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x10, R10 IMULQ R9, R10 SHRQ $0x36, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) CMPL (DX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B DECL CX MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm8B: MOVL 12(SP), DI TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B match_extend_back_loop_encodeSnappyBetterBlockAsm8B: CMPL CX, DI JLE match_extend_back_end_encodeSnappyBetterBlockAsm8B MOVB -1(DX)(SI*1), BL MOVB -1(DX)(CX*1), R8 CMPB BL, R8 JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B LEAL -1(CX), CX DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B match_extend_back_end_encodeSnappyBetterBlockAsm8B: MOVL CX, DI SUBL 12(SP), DI LEAQ 3(AX)(DI*1), DI CMPQ DI, (SP) JL match_dst_size_check_encodeSnappyBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm8B: MOVL CX, DI ADDL $0x04, CX ADDL $0x04, SI MOVQ src_len+32(FP), R8 SUBL CX, R8 LEAQ (DX)(CX*1), R9 LEAQ (DX)(SI*1), R10 // matchLen XORL R12, R12 CMPL R8, $0x08 JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: MOVQ (R9)(R12*1), R11 XORQ (R10)(R12*1), R11 TESTQ R11, R11 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B BSFQ R11, R11 SARQ $0x03, R11 LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B: LEAL -8(R8), R8 LEAL 8(R12), R12 CMPL R8, $0x08 JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_single_match_nolit_encodeSnappyBetterBlockAsm8B: TESTL R8, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: MOVB (R9)(R12*1), R11 CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm8B LEAL 1(R12), R12 DECL R8 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm8B match_nolit_end_encodeSnappyBetterBlockAsm8B: MOVL CX, R8 SUBL SI, R8 // Check if repeat MOVL R8, 16(SP) MOVL 12(SP), SI CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B MOVL DI, R9 MOVL DI, 12(SP) LEAQ (DX)(SI*1), R10 SUBL SI, R9 LEAL -1(R9), SI CMPL SI, $0x3c JLT one_byte_match_emit_encodeSnappyBetterBlockAsm8B CMPL SI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm8B MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, AX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B two_bytes_match_emit_encodeSnappyBetterBlockAsm8B: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_match_emit_encodeSnappyBetterBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B one_byte_match_emit_encodeSnappyBetterBlockAsm8B: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, AX memmove_match_emit_encodeSnappyBetterBlockAsm8B: LEAQ (AX)(R9*1), SI // genMemMoveShort CMPQ R9, $0x08 JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8 CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8: MOVQ (R10), R11 MOVQ R11, (AX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: MOVQ (R10), R11 MOVQ -8(R10)(R9*1), R10 MOVQ R11, (AX) MOVQ R10, -8(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: MOVOU (R10), X0 MOVOU -16(R10)(R9*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B: MOVQ SI, AX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B memmove_long_match_emit_encodeSnappyBetterBlockAsm8B: LEAQ (AX)(R9*1), SI // genMemMoveLong MOVOU (R10), X0 MOVOU 16(R10), X1 MOVOU -32(R10)(R9*1), X2 MOVOU -16(R10)(R9*1), X3 MOVQ R9, R13 SHRQ $0x05, R13 MOVQ AX, R11 ANDL $0x0000001f, R11 MOVQ $0x00000040, R14 SUBQ R11, R14 DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(R10)(R14*1), R11 LEAQ -32(AX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: MOVOU (R11), X4 MOVOU 16(R11), X5 MOVOA X4, (R15) MOVOA X5, 16(R15) ADDQ $0x20, R15 ADDQ $0x20, R11 ADDQ $0x20, R14 DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(R10)(R14*1), X4 MOVOU -16(R10)(R14*1), X5 MOVOA X4, -32(AX)(R14*1) MOVOA X5, -16(AX)(R14*1) ADDQ $0x20, R14 CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(R9*1) MOVOU X3, -16(AX)(R9*1) MOVQ SI, AX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B: ADDL R12, CX ADDL $0x04, R12 MOVL CX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B: CMPL R12, $0x40 JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B MOVB $0xee, (AX) MOVW R8, 1(AX) LEAL -60(R12), R12 ADDQ $0x03, AX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B: CMPL R12, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R12*4), R12 MOVB R8, 1(AX) SHRL $0x08, R8 SHLL $0x05, R8 ORL R8, R12 MOVB R12, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B: MOVB $0x02, BL LEAL -4(BX)(R12*4), R12 MOVB R12, (AX) MOVW R8, 1(AX) ADDQ $0x03, AX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: CMPL CX, 8(SP) JGE emit_remainder_encodeSnappyBetterBlockAsm8B CMPQ AX, (SP) JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 INCL DI MOVQ (DX)(DI*1), R9 MOVQ R9, R10 MOVQ R9, R11 MOVQ R9, R12 SHRQ $0x08, R11 MOVQ R11, R13 SHRQ $0x10, R12 LEAL 1(DI), R14 LEAL 2(DI), R15 MOVQ -2(DX)(CX*1), R9 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x36, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x38, R11 SHLQ $0x20, R12 IMULQ R8, R12 SHRQ $0x38, R12 MOVL DI, 24(SP)(R10*4) MOVL R14, 24(SP)(R13*4) MOVL R14, 4120(SP)(R11*4) MOVL R15, 4120(SP)(R12*4) MOVQ R9, R10 MOVQ R9, R11 SHRQ $0x08, R11 MOVQ R11, R13 LEAL -2(CX), R9 LEAL -1(CX), DI SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x38, R11 SHLQ $0x10, R13 IMULQ SI, R13 SHRQ $0x36, R13 MOVL R9, 24(SP)(R10*4) MOVL DI, 4120(SP)(R11*4) MOVL DI, 24(SP)(R13*4) JMP search_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: MOVQ src_len+32(FP), CX SUBL 12(SP), CX LEAQ 3(AX)(CX*1), CX CMPQ CX, (SP) JL emit_remainder_ok_encodeSnappyBetterBlockAsm8B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm8B: MOVQ src_len+32(FP), CX MOVL 12(SP), BX CMPL BX, CX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B MOVL CX, SI MOVL CX, 12(SP) LEAQ (DX)(BX*1), CX SUBL BX, SI LEAL -1(SI), DX CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: MOVB $0xf0, (AX) MOVB DL, 1(AX) ADDQ $0x02, AX CMPL DX, $0x40 JL memmove_emit_remainder_encodeSnappyBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B: SHLB $0x02, DL MOVB DL, (AX) ADDQ $0x01, AX memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveShort CMPQ BX, $0x08 JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8: MOVQ (CX), SI MOVQ SI, (AX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(BX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(BX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B: MOVQ DX, AX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B: LEAQ (AX)(SI*1), DX MOVL SI, BX // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(BX*1), X2 MOVOU -16(CX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(BX*1) MOVOU X3, -16(AX)(BX*1) MOVQ DX, AX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B: MOVQ dst_base+0(FP), CX SUBQ CX, AX MOVQ AX, ret+48(FP) RET // func emitLiteral(dst []byte, lit []byte) int // Requires: SSE2 TEXT ·emitLiteral(SB), NOSPLIT, $0-56 MOVQ lit_len+32(FP), DX MOVQ dst_base+0(FP), AX MOVQ lit_base+24(FP), CX TESTQ DX, DX JZ emit_literal_end_standalone_skip MOVL DX, BX LEAL -1(DX), SI CMPL SI, $0x3c JLT one_byte_standalone CMPL SI, $0x00000100 JLT two_bytes_standalone CMPL SI, $0x00010000 JLT three_bytes_standalone CMPL SI, $0x01000000 JLT four_bytes_standalone MOVB $0xfc, (AX) MOVL SI, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP memmove_long_standalone four_bytes_standalone: MOVL SI, DI SHRL $0x10, DI MOVB $0xf8, (AX) MOVW SI, 1(AX) MOVB DI, 3(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP memmove_long_standalone three_bytes_standalone: MOVB $0xf4, (AX) MOVW SI, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP memmove_long_standalone two_bytes_standalone: MOVB $0xf0, (AX) MOVB SI, 1(AX) ADDQ $0x02, BX ADDQ $0x02, AX CMPL SI, $0x40 JL memmove_standalone JMP memmove_long_standalone one_byte_standalone: SHLB $0x02, SI MOVB SI, (AX) ADDQ $0x01, BX ADDQ $0x01, AX memmove_standalone: // genMemMoveShort CMPQ DX, $0x03 JB emit_lit_memmove_standalone_memmove_move_1or2 JE emit_lit_memmove_standalone_memmove_move_3 CMPQ DX, $0x08 JB emit_lit_memmove_standalone_memmove_move_4through7 CMPQ DX, $0x10 JBE emit_lit_memmove_standalone_memmove_move_8through16 CMPQ DX, $0x20 JBE emit_lit_memmove_standalone_memmove_move_17through32 JMP emit_lit_memmove_standalone_memmove_move_33through64 emit_lit_memmove_standalone_memmove_move_1or2: MOVB (CX), SI MOVB -1(CX)(DX*1), CL MOVB SI, (AX) MOVB CL, -1(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_3: MOVW (CX), SI MOVB 2(CX), CL MOVW SI, (AX) MOVB CL, 2(AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_4through7: MOVL (CX), SI MOVL -4(CX)(DX*1), CX MOVL SI, (AX) MOVL CX, -4(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_8through16: MOVQ (CX), SI MOVQ -8(CX)(DX*1), CX MOVQ SI, (AX) MOVQ CX, -8(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(DX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_standalone JMP emit_literal_end_standalone memmove_long_standalone: // genMemMoveLong MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVQ DX, DI SHRQ $0x05, DI MOVQ AX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_standalonelarge_forward_sse_loop_32 LEAQ -32(CX)(R8*1), SI LEAQ -32(AX)(R8*1), R9 emit_lit_memmove_long_standalonelarge_big_loop_back: MOVOU (SI), X4 MOVOU 16(SI), X5 MOVOA X4, (R9) MOVOA X5, 16(R9) ADDQ $0x20, R9 ADDQ $0x20, SI ADDQ $0x20, R8 DECQ DI JNA emit_lit_memmove_long_standalonelarge_big_loop_back emit_lit_memmove_long_standalonelarge_forward_sse_loop_32: MOVOU -32(CX)(R8*1), X4 MOVOU -16(CX)(R8*1), X5 MOVOA X4, -32(AX)(R8*1) MOVOA X5, -16(AX)(R8*1) ADDQ $0x20, R8 CMPQ DX, R8 JAE emit_lit_memmove_long_standalonelarge_forward_sse_loop_32 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_standalone JMP emit_literal_end_standalone emit_literal_end_standalone_skip: XORQ BX, BX emit_literal_end_standalone: MOVQ BX, ret+48(FP) RET // func emitRepeat(dst []byte, offset int, length int) int TEXT ·emitRepeat(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX // emitRepeat emit_repeat_again_standalone: MOVL DX, SI LEAL -4(DX), DX CMPL SI, $0x08 JLE repeat_two_standalone CMPL SI, $0x0c JGE cant_repeat_two_offset_standalone CMPL CX, $0x00000800 JLT repeat_two_offset_standalone cant_repeat_two_offset_standalone: CMPL DX, $0x00000104 JLT repeat_three_standalone CMPL DX, $0x00010100 JLT repeat_four_standalone CMPL DX, $0x0100ffff JLT repeat_five_standalone LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone repeat_five_standalone: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_repeat_end repeat_four_standalone: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_repeat_end repeat_three_standalone: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_repeat_end repeat_two_standalone: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_repeat_end repeat_two_offset_standalone: XORQ SI, SI LEAL 1(SI)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX gen_emit_repeat_end: MOVQ BX, ret+40(FP) RET // func emitCopy(dst []byte, offset int, length int) int TEXT ·emitCopy(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX // emitCopy CMPL CX, $0x00010000 JL two_byte_offset_standalone four_bytes_loop_back_standalone: CMPL DX, $0x40 JLE four_bytes_remain_standalone MOVB $0xff, (AX) MOVL CX, 1(AX) LEAL -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 JL four_bytes_remain_standalone // emitRepeat emit_repeat_again_standalone_emit_copy: MOVL DX, SI LEAL -4(DX), DX CMPL SI, $0x08 JLE repeat_two_standalone_emit_copy CMPL SI, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy cant_repeat_two_offset_standalone_emit_copy: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy repeat_five_standalone_emit_copy: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy: XORQ SI, SI LEAL 1(SI)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end JMP four_bytes_loop_back_standalone four_bytes_remain_standalone: TESTL DX, DX JZ gen_emit_copy_end MOVB $0x03, SI LEAL -4(SI)(DX*4), DX MOVB DL, (AX) MOVL CX, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end two_byte_offset_standalone: CMPL DX, $0x40 JLE two_byte_offset_short_standalone MOVB $0xee, (AX) MOVW CX, 1(AX) LEAL -60(DX), DX ADDQ $0x03, AX ADDQ $0x03, BX // emitRepeat emit_repeat_again_standalone_emit_copy_short: MOVL DX, SI LEAL -4(DX), DX CMPL SI, $0x08 JLE repeat_two_standalone_emit_copy_short CMPL SI, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy_short CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy_short cant_repeat_two_offset_standalone_emit_copy_short: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy_short CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy_short CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy_short LEAL -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy_short repeat_five_standalone_emit_copy_short: LEAL -65536(DX), DX MOVL DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARL $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy_short: LEAL -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy_short: LEAL -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy_short: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy_short: XORQ SI, SI LEAL 1(SI)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end JMP two_byte_offset_standalone two_byte_offset_short_standalone: CMPL DX, $0x0c JGE emit_copy_three_standalone CMPL CX, $0x00000800 JGE emit_copy_three_standalone MOVB $0x01, SI LEAL -16(SI)(DX*4), DX MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end emit_copy_three_standalone: MOVB $0x02, SI LEAL -4(SI)(DX*4), DX MOVB DL, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX gen_emit_copy_end: MOVQ BX, ret+40(FP) RET // func emitCopyNoRepeat(dst []byte, offset int, length int) int TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX // emitCopy CMPL CX, $0x00010000 JL two_byte_offset_standalone_snappy four_bytes_loop_back_standalone_snappy: CMPL DX, $0x40 JLE four_bytes_remain_standalone_snappy MOVB $0xff, (AX) MOVL CX, 1(AX) LEAL -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 JL four_bytes_remain_standalone_snappy JMP four_bytes_loop_back_standalone_snappy four_bytes_remain_standalone_snappy: TESTL DX, DX JZ gen_emit_copy_end_snappy MOVB $0x03, SI LEAL -4(SI)(DX*4), DX MOVB DL, (AX) MOVL CX, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end_snappy two_byte_offset_standalone_snappy: CMPL DX, $0x40 JLE two_byte_offset_short_standalone_snappy MOVB $0xee, (AX) MOVW CX, 1(AX) LEAL -60(DX), DX ADDQ $0x03, AX ADDQ $0x03, BX JMP two_byte_offset_standalone_snappy two_byte_offset_short_standalone_snappy: CMPL DX, $0x0c JGE emit_copy_three_standalone_snappy CMPL CX, $0x00000800 JGE emit_copy_three_standalone_snappy MOVB $0x01, SI LEAL -16(SI)(DX*4), DX MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end_snappy emit_copy_three_standalone_snappy: MOVB $0x02, SI LEAL -4(SI)(DX*4), DX MOVB DL, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX gen_emit_copy_end_snappy: MOVQ BX, ret+40(FP) RET // func matchLen(a []byte, b []byte) int TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX MOVQ a_len+8(FP), DX // matchLen XORL SI, SI CMPL DX, $0x08 JL matchlen_single_standalone matchlen_loopback_standalone: MOVQ (AX)(SI*1), BX XORQ (CX)(SI*1), BX TESTQ BX, BX JZ matchlen_loop_standalone BSFQ BX, BX SARQ $0x03, BX LEAL (SI)(BX*1), SI JMP gen_match_len_end matchlen_loop_standalone: LEAL -8(DX), DX LEAL 8(SI), SI CMPL DX, $0x08 JGE matchlen_loopback_standalone matchlen_single_standalone: TESTL DX, DX JZ gen_match_len_end matchlen_single_loopback_standalone: MOVB (AX)(SI*1), BL CMPB (CX)(SI*1), BL JNE gen_match_len_end LEAL 1(SI), SI DECL DX JNZ matchlen_single_loopback_standalone gen_match_len_end: MOVQ SI, ret+48(FP) RET