; CheckPitchAsm ¼öÁ¤ - 2011.02.14 ; ÇÇÄ¡ Á¤º¸¸¦ pitch16 ±â¹ÝÀ¸·Î ¼öÁ¤ ; CheckPitchPixelAsm Ãß°¡ - 2011.02.14 ; Ãß°¡Áß Áß´Ü.. ¼Óµµ°¡ Áö±ØÈ÷ ÇÊ¿äÇÏ¸é ¿Ï¼º ÇÏ´øÁö. ; CheckPitchAsmVert Ãß°¡ - 2011.02.16 .data .code ImgAddr equ r14 ByteAddr equ rcx ShortAddr equ rdx nByte equ r8 iByte equ r9 Byte_To_Short PROC pxor xmm0, xmm0 cmp nByte, 10h jl START_EXTRA mov iByte, 10h LOOP_SIMD: movdqu xmm1, [ByteAddr] movdqa xmm2, xmm1 punpcklbw xmm1, xmm0 punpckhbw xmm2, xmm0 movntdq [ShortAddr], xmm1 movntdq [ShortAddr+10h], xmm2 add ByteAddr, 10h add ShortAddr, 20h add iByte, 10h cmp iByte, nByte jle LOOP_SIMD sub iByte, 10h START_EXTRA: xor rax, rax cmp iByte, nByte jge LOOP_END LOOP_EXTRA: mov al, [ByteAddr] mov [ShortAddr], ax inc ByteAddr add ShortAddr, 2 inc iByte cmp iByte, nByte jl LOOP_EXTRA LOOP_END: RET Byte_To_Short ENDP ;========================================================== iPitch16 equ rcx rrImgAddr equ r8 rrBW equ r9 rrPitch equ r10 rrPitch1 equ r11 rrPitch2 equ r12 rrPitch3 equ r13 rrInspBot equ r15 ix equ rsi iy equ rdi ConvShift equ 1 SprShift equ 4 ConvNSprShift equ 5 InspLeft equ qword ptr[rbp+ 30h] InspTop equ qword ptr[rbp+ 38h] InspWidth equ qword ptr[rbp+ 40h] InspHeight equ qword ptr[rbp+ 48h] DPC_SUPP EQU QWORD PTR[RBP+ 50H] xxSupp2 equ xmm3 xxSrc equ xmm4 xxSrc2 equ xmm5 xxCmp equ xmm6 xxCmp2 equ xmm7 xxSpr equ xmm8 xxOne equ xmm9 xxfSpr equ xmm10 xxsSpr equ xmm11 xxMinus equ xmm12 xxMinus2 equ xmm13 xxRslt equ xmm14 CheckPitchAsm PROC local ImgAddrLine :qword local ImgAddrLineEnd :qword local InspBottom :qword push rbp mov rbp, rsp push r10 push r11 push r12 push r13 push r14 push r15 push rdi push rsi push rbx ADD rbp, 020h mov rrPitch, rcx sar rrPitch, SprShift mov rax, 2 ; Suppress mul rdx sal rax, 4 movd xxSupp2, rax movdqa xmm0, xxSupp2 punpcklwd xmm0, xxSupp2 pshufd xxSupp2, xmm0, 0 mov rax, 10h ; spr movd xxSpr, rax movdqa xmm0, xxSpr punpcklwd xmm0, xxSpr pshufd xxSpr, xmm0, 0 mov rax, rcx ; sSpr and rax, 0fh movd xxsSpr, rax movdqa xmm0, xxsSpr punpcklwd xmm0, xxsSpr pshufd xxsSpr, xmm0, 0 movdqa xxfSpr, xxSpr ; fSpr psubw xxfSpr, xxsSpr mov rax, InspTop add rax, InspHeight mov InspBottom, rax mov rrPitch2, rrPitch add rrPitch2, 1h mov rrPitch3, rrPitch add rrPitch3, 2h mov rax, 1 movd xxOne, rax ; 1·Î ä¿î´Ù.(word) movdqa xmm0, xxOne punpcklwd xmm0, xxOne pshufd xxOne, xmm0, 0 ; À̹ÌÁö ÁÖ¼Ò ¸¸µé±â mov rax, rrBW ; ImgAddr= ImgAddr+ BuffWidth*InspTop + InspLeft; mul InspTop add rax, InspLeft add rrImgAddr, rax mov ImgAddrLine, rrImgAddr mov rax, ImgAddrLine add rax, InspWidth mov ImgAddrLineEnd, rax mov iy, InspTop pxor xxRslt, xxRslt pxor xmm0, xmm0 Cmp_16: ; 1. Src Buffer movdqu xxSrc, [rrImgAddr] ; Load movdqa xxSrc2, xxSrc punpcklbw xxSrc, xmm0 ; Unpack punpckhbw xxSrc2, xmm0 movdqu xxMinus, [rrImgAddr+ 1] ; Load movdqa xxMinus2, xxMinus punpcklbw xxMinus, xmm0 ; Unpack punpckhbw xxMinus2, xmm0 paddw xxSrc, xxMinus paddw xxSrc2, xxMinus2 pmullw xxSrc, xxSpr ; SPR pmullw xxSrc2, xxSpr ; 2. Compare Buffer movdqu xxCmp, [rrImgAddr+ rrPitch] ; Load movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 ; Unpack punpckhbw xxCmp2, xmm0 pmullw xxCmp, xxfSpr ; fSpr pmullw xxCmp2, xxfSpr psubw xxSrc, xxCmp ; SPR - fSpr psubw xxSrc2, xxCmp2 movdqu xxCmp, [rrImgAddr+ rrPitch2] movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 punpckhbw xxCmp2, xmm0 pmullw xxCmp, xxSpr pmullw xxCmp2, xxSpr psubw xxSrc, xxCmp psubw xxSrc2, xxCmp2 movdqu xxCmp, [rrImgAddr+ rrPitch3] ; Load movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 ; Unpack punpckhbw xxCmp2, xmm0 pmullw xxCmp, xxsSpr ; sSpr pmullw xxCmp2, xxsSpr psubw xxSrc, xxCmp ; (SPR- fSpr) - sSpr psubw xxSrc2, xxCmp2 pabsw xxSrc, xxSrc ; Àý´ë°ª psubusw xxSrc, xxSupp2 pmaddwd xxSrc, xxOne ; multiply and add ( 16 -> 32) pabsw xxSrc2, xxSrc2 ; Àý´ë°ª psubusw xxSrc2, xxSupp2 pmaddwd xxSrc2, xxOne ; multiply and add ( 16 -> 32) movdqa xxCmp, xxSrc punpckldq xxCmp, xmm0 ; unpack 32 -> 64 punpckhdq xxSrc, xmm0 paddq xxRslt, xxSrc ; add to Result(64) paddq xxRslt, xxCmp ; add to Result(64) movdqa xxCmp, xxSrc2 punpckldq xxCmp, xmm0 ; unpack 32 -> 64 punpckhdq xxSrc2, xmm0 paddq xxRslt, xxSrc2 ; add to Result(64) paddq xxRslt, xxCmp ; add to Result(64) add rrImgAddr, 10h cmp rrImgAddr, ImgAddrLineEnd jl Cmp_16 inc iy cmp iy, InspBottom jge End_Buff mov rax, ImgAddrLine add rax, rrBW mov ImgAddrLine, rax mov rrImgAddr, rax add rax, InspWidth mov ImgAddrLineEnd, rax jmp Cmp_16 End_Buff: pextrq rax, xxRslt, 0 pextrq rcx, xxRslt, 1 add rax, rcx pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop r11 pop r10 pop rbp RET CheckPitchAsm ENDP CheckPitchPixelAsm PROC local ImgAddrLine :qword local ImgAddrLineEnd :qword local InspBottom :qword push rbp mov rbp, rsp push r10 push r11 push r12 push r13 push r14 push r15 push rdi push rsi push rbx ADD rbp, 020h mov rax, InspTop add rax, InspHeight mov InspBottom, rax mov rax, rrBW mul InspTop add rax, InspLeft add rrImgAddr, rax mov ImgAddrLine, rrImgAddr add rax, rrImgAddr mov ImgAddrLineEnd, rax mov iy, InspTop pxor xxRslt, xxRslt pxor xmm0, xmm0 Cmp_16: movdqu xxSrc, [rrImgAddr] movdqa xxSrc2, xxSrc punpcklbw xxSrc, xmm0 punpckhbw xxSrc2, xmm0 movdqu xxCmp, [rrImgAddr+ rrPitch] movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 punpckhbw xxCmp2, xmm0 psubw xxSrc, xxCmp pabsw xxSrc, xxSrc psubusw xxSrc, xxSupp2 pmaddwd xxSrc, xxOne ; multiply and add ( 16bit -> 32bit) movdqa xxCmp, xxSrc punpckldq xxSrc, xmm0 ; unpack (32bit -> 64bit) punpckhdq xxCmp, xmm0 paddq xxRslt, xxSrc paddq xxRslt, xxCmp psubw xxSrc2, xxCmp2 pabsw xxSrc2, xxSrc2 psubusw xxSrc2, xxSupp2 pmaddwd xxSrc2, xxOne movdqa xxCmp, xxSrc2 punpckldq xxSrc2, xmm0 punpckhdq xxCmp, xmm0 paddq xxRslt, xxSrc2 paddq xxRslt, xxCmp add rrImgAddr, 10h cmp rrImgAddr, ImgAddrLineEnd jl Cmp_16 inc iy cmp iy, InspBottom jge End_Buff mov rax, ImgAddrLine add rax, rrBW mov ImgAddrLine, rax mov rrImgAddr, rax add rax, InspWidth mov ImgAddrLineEnd, rax jmp Cmp_16 End_Buff: pextrq rax, xxRslt, 0 pextrq rcx, xxRslt, 1 add rax, rcx pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop r11 pop r10 pop rbp ret CheckPitchPixelAsm ENDP CheckPitchAsmVert PROC local ImgAddrLine :qword local InspBottom :qword local InspRight :qword push rbp mov rbp, rsp push r10 push r11 push r12 push r13 push r14 push r15 push rdi push rsi push rbx ADD rbp, 020h mov rrPitch, rcx sar rrPitch, SprShift sal rdx, ConvNSprShift ; Suppress movd xxSupp2, rdx movdqa xmm0, xxSupp2 punpcklwd xmm0, xxSupp2 pshufd xxSupp2, xmm0, 0 mov rax, 10h ; spr movd xxSpr, rax movdqa xmm0, xxSpr punpcklwd xmm0, xxSpr pshufd xxSpr, xmm0, 0 mov rax, rcx ; sSpr and rax, 0fh movd xxsSpr, rax movdqa xmm0, xxsSpr punpcklwd xmm0, xxsSpr pshufd xxsSpr, xmm0, 0 movdqa xxfSpr, xxSpr ; fSpr psubw xxfSpr, xxsSpr mov rax, InspTop add rax, InspHeight mov InspBottom, rax mov rax, InspWidth sar rax, SprShift sal rax, SprShift add rax, InspLeft mov InspRight, rax mov rax, 1 movd xxOne, rax ; 1·Î ä¿î´Ù.(word) movdqa xmm0, xxOne punpcklwd xmm0, xxOne pshufd xxOne, xmm0, 0 mov rax, rrBW mul rrPitch mov rrPitch1, rax add rax, rrBW mov rrPitch2, rax add rax, rrBW mov rrPitch3, rax mov rax, rrBW ; ImgAddr= ImgAddr+ BuffWidth*InspTop + InspLeft; mul InspTop add rax, InspLeft add rrImgAddr, rax mov ImgAddrLine, rrImgAddr pxor xxRslt, xxRslt pxor xmm0, xmm0 START: mov ix, InspLeft mov iy, InspTop jmp LOOP_X_CHECK LOOP_X_TAIL: add ix, 10h mov rrImgAddr, ImgAddrLine add rrImgAddr, 10h mov ImgAddrLine, rrImgAddr mov rax, rrImgAddr mov iy, InspTop LOOP_X_CHECK: cmp ix, InspRight jge End_Buff jmp LOOP_X_BODY LOOP_X_BODY: jmp LOOP_Y_CHECK LOOP_Y_TAIL: inc iy add rrImgAddr, rrBW LOOP_Y_CHECK: cmp iy, InspBottom jge LOOP_X_TAIL LOOP_Y_BODY: movdqu xxSrc, [rrImgAddr] movdqa xxSrc2, xxSrc punpcklbw xxSrc, xmm0 punpckhbw xxSrc2, xmm0 movdqu xxCmp, [rrImgAddr+ rrBW] movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 punpckhbw xxCmp2, xmm0 paddw xxSrc, xxCmp paddw xxSrc2, xxCmp2 psllw xxSrc, SprShift ; spr Shift psllw xxSrc2, SprShift movdqu xxCmp, [rrImgAddr+ rrPitch1] movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 punpckhbw xxCmp2, xmm0 pmullw xxCmp, xxfSpr ; fSpr pmullw xxCmp2, xxfSpr psubw xxSrc, xxCmp ; SPR - fSpr psubw xxSrc2, xxCmp2 movdqu xxCmp, [rrImgAddr+ rrPitch2] movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 punpckhbw xxCmp2, xmm0 psllw xxCmp, SprShift psllw xxCmp2, SprShift psubw xxSrc, xxCmp psubw xxSrc2, xxCmp2 movdqu xxCmp, [rrImgAddr+ rrPitch3] ; Load movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 ; Unpack punpckhbw xxCmp2, xmm0 pmullw xxCmp, xxsSpr ; sSpr pmullw xxCmp2, xxsSpr psubw xxSrc, xxCmp ; (SPR- fSpr) - sSpr psubw xxSrc2, xxCmp2 pabsw xxSrc, xxSrc ; Àý´ë°ª pabsw xxSrc2, xxSrc2 ; Àý´ë°ª psubusw xxSrc, xxSupp2 psubusw xxSrc2, xxSupp2 pmaddwd xxSrc, xxOne ; multiply and add ( 16 -> 32) pmaddwd xxSrc2, xxOne ; multiply and add ( 16 -> 32) movdqa xxCmp, xxSrc punpckldq xxCmp, xmm0 ; unpack 32 -> 64 punpckhdq xxSrc, xmm0 paddq xxRslt, xxSrc ; add to Result(64) paddq xxRslt, xxCmp ; add to Result(64) movdqa xxCmp, xxSrc2 punpckldq xxCmp, xmm0 ; unpack 32 -> 64 punpckhdq xxSrc2, xmm0 paddq xxRslt, xxSrc2 ; add to Result(64) paddq xxRslt, xxCmp ; add to Result(64) jmp LOOP_Y_TAIL End_Buff: pextrq rax, xxRslt, 0 pextrq rcx, xxRslt, 1 add rax, rcx pop rbx pop rsi pop rdi pop r15 pop r14 pop r13 pop r12 pop r11 pop r10 pop rbp ret CheckPitchAsmVert ENDP end