; Copyright (c) Microsoft Corporation. All rights reserved. ; Custom Build Step, including a listing file placed in intermediate directory ; but without Source Browser information ; debug: ; ml -c -Zi "-Fl$(IntDir)\$(InputName).lst" "-Fo$(IntDir)\$(InputName).obj" "$(InputPath)" ; release: ; ml -c "-Fl$(IntDir)\$(InputName).lst" "-Fo$(IntDir)\$(InputName).obj" "$(InputPath)" ; outputs: ; $(IntDir)\$(InputName).obj ; Custom Build Step, including a listing file placed in intermediate directory ; and Source Browser information also placed in intermediate directory ; debug: ; ml -c -Zi "-Fl$(IntDir)\$(InputName).lst" "-FR$(IntDir)\$(InputName).sbr" "-Fo$(IntDir)\$(InputName).obj" "$(InputPath)" ; release: ; ml -c "-Fl$(IntDir)\$(InputName).lst" "-FR$(IntDir)\$(InputName).sbr" "-Fo$(IntDir)\$(InputName).obj" "$(InputPath)" ; outputs: ; $(IntDir)\$(InputName).obj ; $(IntDir)\$(InputName).sbr ;.386 ;.MODEL FLAT, C PBYTE TYPEDEF PTR BYTE PWORD TYPEDEF PTR WORD PDWORD TYPEDEF PTR DWORD .data .code ; Euclid's algorithm finds the greatest common divisor by repeatedly ; subtracting the smaller number from the larger number until zero ; is reached. The number remaining is the greatest common divisor. ImgAddr equ r14 ByteAddr equ ecx ShortAddr equ edx nByte equ r8 iByte equ rsi Byte_To_Short PROC mov iByte, 16 pxor xmm0, xmm0 cmp nByte, 16 jl START_EXTRA LOOP_SIMD: movdqu xmm1, [ByteAddr] movdqa xmm2, xmm1 punpcklbw xmm1, xmm0 punpckhbw xmm2, xmm0 movntdq [ShortAddr], xmm1 movntdq [ShortAddr+16], xmm2 add ByteAddr, 16 add ShortAddr, 32 add iByte, 16 cmp iByte, nByte jle LOOP_SIMD sub iByte, 16 START_EXTRA: xor rax, rax LOOP_EXTRA: mov al, [ByteAddr] mov [ShortAddr], ax inc ByteAddr add ShortAddr, 2 inc iByte cmp iByte, nByte jl LOOP_EXTRA RET Byte_To_Short ENDP ;========================================================== iPitch equ rcx SPR equ rdx fSPR equ r8 sSPR equ r9 InspLeft equ qword ptr[rbp+ 30h] InspRight equ qword ptr[rbp+ 38h] InspTop equ qword ptr[rbp+ 40h] InspBottom equ qword ptr[rbp+ 48h] pData equ qword ptr[rbp+ 50h] BuffWidth equ qword ptr[rbp+ 58h] ix equ rsi iy equ rdi xxSrc equ xmm4 xxSrc2 equ xmm5 xxCmp equ xmm6 xxCmp2 equ xmm7 xxSpr equ xmm8 xxOne equ xmm9 xxfSpr equ xmm10 xxsSpr equ xmm11 xxMinus equ xmm12 xxMinus2 equ xmm13 xxRslt equ xmm14 rrImgAddr equ r15 rrPitch equ r10 rrPitch2 equ r11 CheckPitchAsm PROC local ImgAddrLine : qword local ImgLineEnd : qword local ImgLineLength : qword local Result64[2] : qword mov rrPitch, iPitch mov rrPitch2, rrPitch add rrPitch2, 1h mov rax, 1 movd xxOne, rax ; 1·Î ä¿î´Ù.(word) movdqa xmm0, xxOne punpcklwd xmm0, xxOne pshufd xxOne, xmm0, 0 mov rax, SPR movd xxSpr, rax movdqa xmm0, xxSpr punpcklwd xmm0, xxSpr pshufd xxSpr, xmm0, 0 mov rax, fSpr movd xxfSpr, rax movdqa xmm0, xxfSpr punpcklwd xmm0, xxfSpr pshufd xxfSpr, xmm0, 0 movdqa xxsSpr, xxSpr psubw xxsSpr, xxfSpr ; À̹ÌÁö ÁÖ¼Ò ¸¸µé±â mov rrImgAddr, pData mov rax, BuffWidth ; ImgAddr= ImgAddr+ BuffWidth*InspTop + InspLeft; mul InspTop add rax, InspLeft add rrImgAddr, rax mov ImgAddrLine, rrImgAddr mov rax, InspRight sub rax, InspLeft mov ImgLineLength, rax add rax, rrImgAddr mov ImgLineEnd, rax mov iy, InspTop pxor xxRslt, xxRslt pxor xmm0, xmm0 Cmp_16: ; 1. Src Buffer movdqu xxSrc, [rrImgAddr] ; Load movdqa xxSrc2, xxSrc punpcklbw xxSrc, xmm0 ; Unpack punpckhbw xxSrc2, xmm0 movdqu xxMinus, [rrImgAddr+ 1] ; Load movdqa xxMinus2, xxMinus punpcklbw xxMinus, xmm0 ; Unpack punpckhbw xxMinus2, xmm0 paddw xxSrc, xxMinus paddw xxSrc2, xxMinus2 pmullw xxSrc, xxSpr ; SPR pmullw xxSrc2, xxSpr ; 2. Compare Buffer movdqu xxCmp, [rrImgAddr+ rrPitch] ; Load movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 ; Unpack punpckhbw xxCmp2, xmm0 pmullw xxCmp, xxfSpr ; fSpr pmullw xxCmp2, xxfSpr psubw xxSrc, xxCmp ; SPR - fSpr psubw xxSrc2, xxCmp2 movdqu xxCmp, [rrImgAddr+ rrPitch2] movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 punpckhbw xxCmp2, xmm0 pmullw xxCmp, xxSpr pmullw xxCmp2, xxSpr psubw xxSrc, xxCmp psubw xxSrc2, xxCmp2 movdqu xxCmp, [rrImgAddr+ rrPitch2+ 1] ; Load movdqa xxCmp2, xxCmp punpcklbw xxCmp, xmm0 ; Unpack punpckhbw xxCmp2, xmm0 pmullw xxCmp, xxsSpr ; sSpr pmullw xxCmp2, xxsSpr psubw xxSrc, xxCmp ; (SPR- fSpr) - sSpr psubw xxSrc2, xxCmp2 pabsw xxSrc, xxSrc ; Àý´ë°ª psubusw xxSrc, xxOne pmaddwd xxSrc, xxOne ; multiply and add ( 16 -> 32) pabsw xxSrc2, xxSrc2 ; Àý´ë°ª psubusw xxSrc2, xxOne pmaddwd xxSrc2, xxOne ; multiply and add ( 16 -> 32) movdqa xxCmp, xxSrc punpckldq xxCmp, xmm0 ; unpack 32 -> 64 punpckhdq xxSrc, xmm0 paddq xxRslt, xxSrc ; add to Result(64) paddq xxRslt, xxCmp ; add to Result(64) movdqa xxCmp, xxSrc2 punpckldq xxCmp, xmm0 ; unpack 32 -> 64 punpckhdq xxSrc2, xmm0 paddq xxRslt, xxSrc2 ; add to Result(64) paddq xxRslt, xxCmp ; add to Result(64) add rrImgAddr, 10h cmp rrImgAddr, ImgLineEnd jl Cmp_16 inc iy cmp iy, InspBottom jge End_Buff mov rax, ImgAddrLine add rax, BuffWidth mov ImgAddrLine, rax mov rrImgAddr, rax add rax, ImgLineLength mov ImgLineEnd, rax jmp Cmp_16 End_Buff: lea esi, Result64 movdqu [esi], xxRslt mov rax, [esi] mov rcx, [esi+8h] add rax, rcx RET CheckPitchAsm ENDP end