// ConvInt1.cpp: implementation of the CConvInt1 class. // ////////////////////////////////////////////////////////////////////// #include "stdafx.h" #include "ConvInt1.h" #ifdef _DEBUG #undef THIS_FILE static char THIS_FILE[]=__FILE__; #define new DEBUG_NEW #endif int CConvInt1::m_nDefectNum = 0; int CConvInt1::m_nDefectPointX[MAX_DEFECTPIX_NUM] = {0, }; int CConvInt1::m_nDefectPointY[MAX_DEFECTPIX_NUM] = {0, }; short CConvInt1::m_sDefectType[MAX_DEFECTPIX_NUM] = {0, }; int CConvInt1::m_nDefectValue[MAX_DEFECTPIX_NUM] = {0, }; ////////////////////////////////////////////////////////////////////// // Construction/Destruction ////////////////////////////////////////////////////////////////////// CConvInt1::CConvInt1() { m_nLastDefectNum =0; } CConvInt1::~CConvInt1() { } void CConvInt1::ReadyToConvolution(ConvParam* pParam) { m_pParam = pParam; m_nDefectNum = 0; //ÇÁ·¹ÀÓ¸¶´ÙÀÇ °áÇÔPOINT¼ö m_nDefectPairedNum = 0; // Pairing ÇÑ °áÇÔ ÀúÀåÇÒ ±¸Á¶Ã¼ ÃʱâÈ­. for (int i = 0; i < MAX_DEFECTPIX_NUM; i++) { m_nDefectPointX[i] = 0; m_nDefectPointY[i] = 0; m_sDefectType[i] = 0; m_nDefectValue[i] = 0; m_DefectPaired[i].Reset(); } } ////////////////////////////////////////////////////////////////////////// DIT_RESULT CConvInt1::PairingPlus(double dPitchsize) { PairingPlus_1(dPitchsize); return DIT_CONV_SUCCESS; ////////////////////////////////////////////////////////////////////////// //MMX¿¡¼­ thresholding±îÁöÇÏ°í ³ª¿À´Â °áÇÔÀÇ pointÁ¤º¸´Â pair·Î ³ª¿À¹Ç·Î //½ÇÁ¦ ÇѰ³ÀÇ pointÁ¤º¸¸¦ °Å¸£´Â °úÁ¤ÀÌ ÇÊ¿äÇÏ´Ù. int i, j; BOOL ignore = FALSE; for(i = 0; i < m_nDefectNum; i++) { m_nDefectPointX[i]++; m_nDefectPointY[i]++; } for(i = 0; i < m_nDefectNum; i++) { if (m_nDefectPairedNum >= m_pParam->s_nFrameDefPixLimit) break; if(m_sDefectType[i] == DEFTYPE_DELETE) continue; if (m_sDefectType[i] == DEFTYPE_NODEFECT) continue; // ¿ÞÂÊ °Ë»ç ¿µ¿ª ¿¹¿Üó¸®. ignore = FALSE; if(m_nDefectPointX[i] < dPitchsize * 2) { // °æ°è¿¡¼­ÀÇ Paring °Ë»ç. for(j = i + 1; j < m_nDefectNum; j++) { if (m_sDefectType[i] == m_sDefectType[j]) continue; // ÇÑ ÇÇÄ¡ ¿À¸¥ÂÊ¿¡ °áÇÔÀÌ ÀÖ´Â °æ¿ì DEFPOS_LEFT_PAIR·Î ÆÇÁ¤ if (abs(m_nDefectPointX[j] - (m_nDefectPointX[i] + dPitchsize)) < 2 && m_nDefectPointY[j] == m_nDefectPointY[i]) { m_DefectPaired[m_nDefectPairedNum].s_DefectPos = DEFPOS_LEFT; m_DefectPaired[m_nDefectPairedNum].s_DefectPair = DEFPAIR_PPAIR; m_DefectPaired[m_nDefectPairedNum].s_nDefectX = m_nDefectPointX[i] + m_pParam->s_RectConv.left - 1; // Add Margin m_DefectPaired[m_nDefectPairedNum].s_nDefectY = m_nDefectPointY[i] + m_pParam->s_RectConv.top; m_DefectPaired[m_nDefectPairedNum].s_DefectType = static_cast(m_sDefectType[i]); //½ÇÁ¦¹é°áÇÔÀº1,Èæ°áÇÔÀº0 m_DefectPaired[m_nDefectPairedNum].SetPeak(m_nDefectValue[i], m_pParam->s_nThreshold, m_pParam->s_nConvWidth * m_pParam->s_nConvHeight); m_sDefectType[j] = DEFTYPE_DELETE; // ÇÇÄ¡ ¿À¸¥ÂÊ °áÇÔÀº ´õÀÌ»ó »ý°¢ÇÒ Çʿ䵵 ¾ø´Ù. m_nDefectPairedNum++; ignore = TRUE; break; } if (m_nDefectPointY[j] > m_nDefectPointY[i]) // ´õÀÌ»ó ºñ±³ÇÒ ÇÊ¿ä ¾ø´Ù. break; } if(ignore == FALSE) // Pair °¡ ¾Æ´Ñ °æ¿ì DEFPOS_LEFT_UNPAIR·Î ÆÇÁ¤. ÀÌÈÄ Classify¿¡¼­ ÀçÁ¶Á¤. { m_DefectPaired[m_nDefectPairedNum].s_DefectPos = DEFPOS_LEFT; m_DefectPaired[m_nDefectPairedNum].s_DefectPair = DEFPAIR_UNPAIR; m_DefectPaired[m_nDefectPairedNum].s_nDefectX = m_nDefectPointX[i] + m_pParam->s_RectConv.left - 1; // Add Margin m_DefectPaired[m_nDefectPairedNum].s_nDefectY = m_nDefectPointY[i] + m_pParam->s_RectConv.top; m_DefectPaired[m_nDefectPairedNum].s_DefectType = static_cast(m_sDefectType[i]); //½ÇÁ¦¹é°áÇÔÀº1,Èæ°áÇÔÀº0 m_DefectPaired[m_nDefectPairedNum].SetPeak(m_nDefectValue[i], m_pParam->s_nThreshold, m_pParam->s_nConvWidth * m_pParam->s_nConvHeight); m_nDefectPairedNum++; } } // °¡¿îµ¥ °Ë»ç¿µ¿ªÀÏ ¶§ ó¸®. else if((m_nDefectPointX[i] < m_pParam->s_RectConv.right-m_pParam->s_RectConv.left-dPitchsize) && (m_nDefectPointX[i] >= dPitchsize * 2)) { for(j = i + 1; j < m_nDefectNum; j++) { if (m_sDefectType[i] == m_sDefectType[j]) continue; // ÇÑ ÇÇÄ¡ ÀÌÈÄ¿¡ µðÆåÀÌ ÀÖÀ¸¸é Pairing. if (abs(m_nDefectPointX[j] - (m_nDefectPointX[i] + dPitchsize)) < 2 && m_nDefectPointY[j] == m_nDefectPointY[i]) { m_DefectPaired[m_nDefectPairedNum].s_DefectPos = DEFPOS_CENTER; m_DefectPaired[m_nDefectPairedNum].s_DefectPair = DEFPAIR_PPAIR; m_DefectPaired[m_nDefectPairedNum].s_nDefectX = m_nDefectPointX[i] + m_pParam->s_RectConv.left - 1; // Add Margin m_DefectPaired[m_nDefectPairedNum].s_nDefectY = m_nDefectPointY[i] + m_pParam->s_RectConv.top; m_DefectPaired[m_nDefectPairedNum].s_DefectType = static_cast(m_sDefectType[i]); //½ÇÁ¦¹é°áÇÔÀº1,Èæ°áÇÔÀº0 m_DefectPaired[m_nDefectPairedNum].SetPeak(m_nDefectValue[i], m_pParam->s_nThreshold, m_pParam->s_nConvWidth * m_pParam->s_nConvHeight); m_sDefectType[j] = DEFTYPE_DELETE; // ÇÇÄ¡ ¿À¸¥ÂÊ °áÇÔÀº ´õÀÌ»ó »ý°¢ÇÒ Çʿ䵵 ¾ø´Ù. m_nDefectPairedNum++; ignore = TRUE; break; } if (m_nDefectPointY[j] > m_nDefectPointY[i]) // ´õÀÌ»ó ºñ±³ÇÒ ÇÊ¿ä ¾ø´Ù. break; } if(ignore == FALSE) // Pair °¡ ¾Æ´Ñ °æ¿ì. { m_DefectPaired[m_nDefectPairedNum].s_DefectPos = DEFPOS_CENTER; m_DefectPaired[m_nDefectPairedNum].s_DefectPair = DEFPAIR_UNPAIR; m_DefectPaired[m_nDefectPairedNum].s_nDefectX = m_nDefectPointX[i] + m_pParam->s_RectConv.left - 1; // Add Margin m_DefectPaired[m_nDefectPairedNum].s_nDefectY = m_nDefectPointY[i] + m_pParam->s_RectConv.top; m_DefectPaired[m_nDefectPairedNum].s_DefectType = (DefectType)m_sDefectType[i]; //½ÇÁ¦¹é°áÇÔÀº1,Èæ°áÇÔÀº0 m_DefectPaired[m_nDefectPairedNum].SetPeak(m_nDefectValue[i], m_pParam->s_nThreshold, m_pParam->s_nConvWidth * m_pParam->s_nConvHeight); m_nDefectPairedNum++; } } // ¿À¸¥ÂÊ ¿µ¿ª¿¡¼­ ¿ÞÂÊ¿¡ DefectÀÌ ÀÖÀ¸¸é PairingÀÌ µÈ °áÇÔÀÌ´Ù. else if(m_nDefectPointX[i] >= m_pParam->s_RectConv.right-m_pParam->s_RectConv.left-dPitchsize) { m_DefectPaired[m_nDefectPairedNum].s_DefectPos = DEFPOS_RIGHT; m_DefectPaired[m_nDefectPairedNum].s_DefectPair = DEFPAIR_UNPAIR; m_DefectPaired[m_nDefectPairedNum].s_nDefectX = m_nDefectPointX[i] + m_pParam->s_RectConv.left - 1; // Add Margin m_DefectPaired[m_nDefectPairedNum].s_nDefectY = m_nDefectPointY[i] + m_pParam->s_RectConv.top; m_DefectPaired[m_nDefectPairedNum].s_DefectType = static_cast(m_sDefectType[i]); //½ÇÁ¦¹é°áÇÔÀº1,Èæ°áÇÔÀº0 m_DefectPaired[m_nDefectPairedNum].SetPeak(m_nDefectValue[i], m_pParam->s_nThreshold, m_pParam->s_nConvWidth * m_pParam->s_nConvHeight); m_nDefectPairedNum++; } } return DIT_CONV_SUCCESS; } DIT_RESULT CConvInt1::PairingMinus(double dPitchsize) { // -2Pitch Pairing // °áÇÔÀÌ Pair·Î ÀÖÀ» °æ¿ì¸¦ ó¸®. 1Pitchº¸´Ù Å©°í 2Pitchº¸´Ù ÀÛÀº °áÇÔ °ËÃâ. int i, j, nLenX, nLenY; for(i = 0; i < m_nDefectPairedNum; i++) { ////////////////////////////////////////////////////////////////////////// // CENTER if (m_DefectPaired[i].s_DefectPos != DEFPOS_CENTER && m_DefectPaired[i].s_DefectPos != DEFPOS_RIGHT) continue; if (m_DefectPaired[i].s_DefectPair != DEFPAIR_UNPAIR) continue; // +Pitch µ¿ÀÏÇÑ °áÇÔÀÌ ÀÖ´Â °æ¿ì, °áÇÔÀÌ ÇÇÄ¡º¸´Ù Ä¿¼­ -2Pitch¿¡ UNPAIR°¡ ÀÖ´Â °æ¿ì, ¿ø·¡ UNPAIR·Î °ËÃâµÈ °æ¿ì. // -nPitch ºÎÅÍ °è¼Ó °áÇÔÀÎ °æ¿ì. // -2Pitch ¶û ºñ±³. for(j = i - 1; j >= 0; j--) { nLenX = m_DefectPaired[j].s_nDefectX - (m_DefectPaired[i].s_nDefectX - static_cast(dPitchsize * 2)); nLenY = m_DefectPaired[j].s_nDefectY - m_DefectPaired[i].s_nDefectY; if (nLenY < 0) break; if (abs(nLenX) > 2 || nLenY > 0) continue; // -2Pitch°¡ Origin PairÀÎ °æ¿ì´Â -1Pitch¿Í -2Pitch °¡ °áÇÔ TypeÀÌ ´Ù¸£´Ù. if (m_DefectPaired[j].s_DefectPair == DEFPAIR_PPAIR) { if (m_DefectPaired[i].s_DefectType == m_DefectPaired[j].s_DefectType) { m_DefectPaired[i].s_DefectType = static_cast((m_DefectPaired[j].s_DefectType + 1) % 2); m_DefectPaired[i].s_nDefectX -= static_cast(dPitchsize); m_DefectPaired[i].s_DefectPair = DEFPAIR_MPAIR; } } else if (m_DefectPaired[j].s_DefectPair == DEFPAIR_MPAIR) { if (m_DefectPaired[i].s_DefectType != m_DefectPaired[j].s_DefectType) { m_DefectPaired[i].s_DefectType = m_DefectPaired[j].s_DefectType; m_DefectPaired[i].s_nDefectX -= static_cast(dPitchsize); m_DefectPaired[i].s_DefectPair = DEFPAIR_MPAIR; } } } } return DIT_CONV_SUCCESS; } int CConvInt1::HConvolution_SSE3() { //VConvolution_SEE3_6(); m_nLastDefectNum = HConvolution_SSE3_6(); //m_nLastDefectNum = HVConvolution_SEE3_6(); return m_nLastDefectNum; #define RPA_SIZE 128.0 #define SHIFT_COUNT 6 char* img = (char*)(m_pParam->s_lpBuffer + m_pParam->s_RectConv.top * m_pParam->s_nFrameWidth + m_pParam->s_RectConv.left); int Honrizontal_Len =2; int Vertical_Len = 2; int MAX_WORD_PROCESS = 16; int SEARCH_WORD_PROCESS = 8 ; if(m_pParam->s_RectConv.Width() <=0) return; if(Honrizontal_Len > SEARCH_WORD_PROCESS) return; //lks short mulFactorOrg[8] = {8,8,8,8,8,8,8,8}; // integer ¿¬»êÀ¸·Î floating¿¬»êÇÏ´Â È¿°ú¸¦ º¸±âÀ§ÇÏ¿© int Cmp_Pitch = (int)(m_pParam->s_dPitchConvReal * m_pParam->s_nPitchCycleConv); // ½Ç¼öpitch¿¡¼­ Á¤¼ö¼ººÐ double rPitch = (m_pParam->s_dPitchConvReal * m_pParam->s_nPitchCycleConv) - Cmp_Pitch; // ½Ç¼öpitch¿¡¼­ Á¤¼ö¼ººÐÀ» »« ³ª¸ÓÁö (0ºÎÅÍ 1»çÀÌÀÇ ¼ö) int rpRatio = (int)(rPitch*8.0 + 0.5); // ³ª¸ÓÁö ¼ººÐÀÇ 8ºÐÀ§ µî±Þ int rpRatioC = 8 - rpRatio; // 8ºÐÀ§µî±ÞÀÇ 8's complement short mulFactorR[8], mulFactorC[8]; // int i; int vertical_count=Vertical_Len; int horiz_count = m_pParam->s_nFrameWidth; int limit_vert_count = m_pParam->s_RectConv.Height()-Vertical_Len ; int limit_horiz_count = (m_pParam->s_RectConv.Width()-1)/MAX_WORD_PROCESS; int backup_limit_horiz_count=limit_horiz_count; int Vwidth =m_pParam->s_nFrameWidth * vertical_count; int Hwidth =Cmp_Pitch; int Short_Cmp_Pitch =Cmp_Pitch *2 ; int Num_Defect_Search =6; //2*2 int Backup_Num_Defect_Search=Num_Defect_Search; int Jmp_Char_Count = 6; // 6 pixel processing int Jmp_Short_Count =Jmp_Char_Count*2; int short_limit_horiz_count = (m_pParam->s_RectConv.Width()-Cmp_Pitch-MAX_WORD_PROCESS)/Jmp_Char_Count ; int backup_short_limit_horiz_count = short_limit_horiz_count; UCHAR *src_add = (UCHAR*)img ; UCHAR *cmp_add = src_add +Cmp_Pitch; short *Temp_Result_Add; short *Temp_Dynamic_thres_Result_Add; Temp_Result_Add = new short[m_pParam->s_RectConv.Width()]; memset(Temp_Result_Add,0,sizeof(short) * m_pParam->s_RectConv.Width()); short Dynamic_thres[8]; UCHAR thres_suppress[16]; short check_tmp1[8]; short check_tmp2[8]; short check_tmp3[8]; short check_tmp4[8]; short check_tmp5[8]; short *check_tmp; check_tmp=check_tmp1; for(int suppress_i=0;suppress_i<16;suppress_i++) thres_suppress[suppress_i] = m_pParam->s_nThresholdSupress; int Start_X = 0; int Start_Y = 0; int start_x = Start_X+Cmp_Pitch; int start_x1=Start_X+Cmp_Pitch; for(int thre_i=0;thre_i<8;thre_i++) { Dynamic_thres[thre_i] = m_pParam->s_nThreshold; mulFactorR[thre_i] = rpRatio; mulFactorC[thre_i] = rpRatioC; } Dynamic_thres[6]=1000; //2 word pass next ins Dynamic_thres[7]=1000; int nMaxPixelNum = m_pParam->s_nFrameDefPixLimit; __asm { pushad mov esi, src_add movdqu xmm6, Dynamic_thres UNPACK_LOOP: mov edi, Temp_Result_Add mov ebx, limit_horiz_count pxor xmm0, xmm0 mov ecx , esi HORIZONTAL_IMG_SUM : mov eax , ecx pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm7, xmm7 movdqu xmm1, [eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 movdqu xmm1, xmm2 punpcklbw xmm1, xmm0 // unpack lower-order bytes to words punpckhbw xmm2, xmm0 // unpack higher-order bytes to words paddw xmm3, xmm1 paddw xmm4, xmm2 add eax , horiz_count movdqu xmm1, [eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 movdqu xmm1, xmm2 punpcklbw xmm1, xmm0 // unpack lower-order bytes to words punpckhbw xmm2, xmm0 // unpack higher-order bytes to words paddw xmm3, xmm1 paddw xmm4, xmm2 movdqu [edi],xmm3 movdqu [edi+16],xmm4 add ecx ,16 add edi ,32 dec ebx jnz HORIZONTAL_IMG_SUM INIT_BLANK_SPACE: pxor xmm0, xmm0 mov ebx, start_x mov start_x1,ebx mov ebx,backup_short_limit_horiz_count mov short_limit_horiz_count,ebx mov edx,backup_limit_horiz_count mov limit_horiz_count,edx mov ecx, esi mov edi, Temp_Result_Add mov edx,backup_limit_horiz_count mov limit_horiz_count,edx HORIZONTAL_INS : pxor xmm0, xmm0 mov eax , Short_Cmp_Pitch pxor xmm3,xmm3 Pxor xmm4,xmm4 movdqu xmm1, [edi] paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 // movdqu check_tmp2,xmm3 movdqu xmm5, [edi+eax] movdqu xmm4, xmm5 movdqu xmm0, mulFactorC pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, mulFactorR pmullw xmm4, xmm0 paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 // movdqu check_tmp1,xmm5 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 // movdqu check_tmp3,xmm0 pcmpgtw xmm0, xmm6 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH1 jmp THRES_PASS1 THRES_PASS1 : mov eax ,Jmp_Short_Count add edi, eax // add ecx, eax mov eax ,Jmp_Char_Count mov edx, start_x1 add edx, eax mov start_x1 ,edx dec short_limit_horiz_count jnz HORIZONTAL_INS add esi,horiz_count inc Start_Y dec limit_vert_count jnz UNPACK_LOOP jmp PROCESSING_END DEFECT_SEARCH1 : // 1¹øÂ° ¿öµå mov eax, start_x1 pextrw edx, xmm0, 0x00 cmp edx, 0 je NODEFECT1 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x00 pextrw edx, xmm1, 0x00 cmp ebx, edx je POSITIVE_POINT mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 1 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT1 POSITIVE_POINT : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 0 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT1: // 2¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x01 cmp edx, 0 je NODEFECT2 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x01 pextrw edx, xmm1, 0x01 cmp ebx, edx je POSITIVE_POINT1 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 1 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT2 POSITIVE_POINT1 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 0 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT2: // 3¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x02 cmp edx, 0 je NODEFECT3 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x02 pextrw edx, xmm1, 0x02 cmp ebx, edx je POSITIVE_POINT2 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 1 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT3 POSITIVE_POINT2 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 0 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT3: // 4¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x03 cmp edx, 0 je NODEFECT4 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x03 pextrw edx, xmm1, 0x03 cmp ebx, edx je POSITIVE_POINT3 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 1 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT4 POSITIVE_POINT3 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 0 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT4: // 5¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x04 cmp edx, 0 je NODEFECT5 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x04 pextrw edx, xmm1, 0x04 cmp ebx, edx je POSITIVE_POINT4 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 1 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT5 POSITIVE_POINT4 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 0 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT5: // 6¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x05 cmp edx, 0 je THRES_PASS1 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x05 pextrw edx, xmm1, 0x05 cmp ebx, edx je POSITIVE_POINT5 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 1 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp THRES_PASS1 POSITIVE_POINT5 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 0 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum jmp THRES_PASS1 PROCESSING_END : popad emms } delete[] Temp_Result_Add; } void CConvInt1::HConvolution_SSE3_1() { if(m_pParam->s_RectConv.Width() <=(int)(m_pParam->s_dPitchConvReal)*3) return; #define PROCESSING_COUNT 6 char* img = (char*)(m_pParam->s_lpBuffer + m_pParam->s_RectConv.top * m_pParam->s_nFrameWidth + m_pParam->s_RectConv.left); int Honrizontal_Len =2; int Vertical_Len = 2; int MAX_WORD_PROCESS = 16; int SEARCH_WORD_PROCESS = 8 ; if(m_pParam->s_RectConv.Width() <=0) return; if(Honrizontal_Len > SEARCH_WORD_PROCESS) return; //lks short mulFactorOrg[8] = {8,8,8,8,8,8,8,8}; // integer ¿¬»êÀ¸·Î floating¿¬»êÇÏ´Â È¿°ú¸¦ º¸±âÀ§ÇÏ¿© int Cmp_Pitch = (int)(m_pParam->s_dPitchConvReal); // ½Ç¼öpitch¿¡¼­ Á¤¼ö¼ººÐ double rPitch = (m_pParam->s_dPitchConvReal) - Cmp_Pitch; // ½Ç¼öpitch¿¡¼­ Á¤¼ö¼ººÐÀ» »« ³ª¸ÓÁö (0ºÎÅÍ 1»çÀÌÀÇ ¼ö) int rpRatio = (int)(rPitch*8.0 + 0.5); // ³ª¸ÓÁö ¼ººÐÀÇ 8ºÐÀ§ µî±Þ int rpRatioC = 8 - rpRatio; // 8ºÐÀ§µî±ÞÀÇ 8's complement short mulFactorR[8], mulFactorC[8]; // 2PITCH int Cmp_Pitch2 = (int)(m_pParam->s_dPitchConvReal * 2); // ½Ç¼öpitch¿¡¼­ Á¤¼ö¼ººÐ double rPitch2 = (m_pParam->s_dPitchConvReal * 2) - Cmp_Pitch2; // ½Ç¼öpitch¿¡¼­ Á¤¼ö¼ººÐÀ» »« ³ª¸ÓÁö (0ºÎÅÍ 1»çÀÌÀÇ ¼ö) int rpRatio2 = (int)(rPitch2*8.0 + 0.5); // ³ª¸ÓÁö ¼ººÐÀÇ 8ºÐÀ§ µî±Þ int rpRatioC2 = 8 - rpRatio2; // 8ºÐÀ§µî±ÞÀÇ 8's complement short mulFactorR2[8], mulFactorC2[8]; int vertical_count=Vertical_Len; int horiz_count = m_pParam->s_nFrameWidth; int limit_vert_count = m_pParam->s_RectConv.Height()-1 ; int limit_horiz_count = m_pParam->s_RectConv.Width()/MAX_WORD_PROCESS+1; int backup_limit_horiz_count=limit_horiz_count; int Vwidth =m_pParam->s_nFrameWidth * vertical_count; int Hwidth =Cmp_Pitch; int Short_Cmp_Pitch =Cmp_Pitch *2 ; int Short_Cmp_Pitch2 =Cmp_Pitch2 *2 ; int Jmp_Char_Count = PROCESSING_COUNT; // 6 pixel processing int Jmp_Short_Count =Jmp_Char_Count*2; int Jmp_Char_Count2 = Jmp_Char_Count*2; // 6 pixel processing int Jmp_Short_Count2 =Jmp_Short_Count*2; int short_limit_horiz_count = (m_pParam->s_RectConv.Width()-Cmp_Pitch2-Jmp_Char_Count)/Jmp_Char_Count ; int temp=0; int short_limit_horiz_right_count=0; if(m_pParam->s_RectConv.Width() <=(int)(m_pParam->s_dPitchConvReal)*3) { temp=Jmp_Char_Count-Cmp_Pitch%Jmp_Char_Count; short_limit_horiz_right_count =(temp+(Cmp_Pitch))/Jmp_Char_Count; } else { temp=Jmp_Char_Count-Cmp_Pitch2%Jmp_Char_Count; short_limit_horiz_right_count =(temp+(Cmp_Pitch2))/Jmp_Char_Count; } int backup_short_limit_horiz_count = short_limit_horiz_count; int backup_short_limit_horiz_right_count = short_limit_horiz_right_count; UCHAR *src_add = (UCHAR*)img ; short *Temp_Result_Add; Temp_Result_Add = new short[m_pParam->s_RectConv.Width()+Short_Cmp_Pitch]; memset(Temp_Result_Add,0,sizeof(short) * (m_pParam->s_RectConv.Width()+Short_Cmp_Pitch)); short Dynamic_thres[8]; UCHAR thres_suppress[16]; int Start_X = 0; int Start_Y = 0; int start_x = Start_X; int start_x1=Start_X; int p=m_pParam->s_RectConv.Width(); int start_right_x = m_pParam->s_RectConv.Width()-short_limit_horiz_right_count*Jmp_Char_Count; int start_right_x1=Start_X; int is_right_ins_mode =0; int nMaxPixelNum = m_pParam->s_nFrameDefPixLimit; short check_tmp1[8]; short check_tmp2[8]; short check_tmp3[8]; short check_tmp4[8]; short check_tmp5[8]; // yong debug for(int suppress_i=0;suppress_i<16;suppress_i++) thres_suppress[suppress_i] = m_pParam->s_nThresholdSupress; for(int thre_i=0;thre_i<8;thre_i++) { Dynamic_thres[thre_i] = m_pParam->s_nThreshold; mulFactorR[thre_i] = rpRatio; mulFactorC[thre_i] = rpRatioC; mulFactorR2[thre_i] = rpRatio2; mulFactorC2[thre_i] = rpRatioC2; } Dynamic_thres[6]=1000; //2 word pass next ins max :: 255*4(2cheng2) Dynamic_thres[7]=1000; __asm { pushad mov esi, src_add movdqu xmm6, Dynamic_thres UNPACK_LOOP: mov edi, Temp_Result_Add mov ebx, limit_horiz_count pxor xmm0, xmm0 mov ecx , esi HORIZONTAL_IMG_SUM : mov eax , ecx pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm7, xmm7 movdqu xmm1, [eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 movdqu xmm1, xmm2 punpcklbw xmm1, xmm0 // unpack lower-order bytes to words punpckhbw xmm2, xmm0 // unpack higher-order bytes to words paddw xmm3, xmm1 paddw xmm4, xmm2 add eax , horiz_count movdqu xmm1, [eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 movdqu xmm1, xmm2 punpcklbw xmm1, xmm0 // unpack lower-order bytes to words punpckhbw xmm2, xmm0 // unpack higher-order bytes to words paddw xmm3, xmm1 paddw xmm4, xmm2 movdqu [edi],xmm3 movdqu [edi+16],xmm4 add ecx ,16 add edi ,32 dec ebx jnz HORIZONTAL_IMG_SUM INIT_BLANK_SPACE: pxor xmm0, xmm0 mov ebx, start_x mov start_x1,ebx mov ebx,backup_short_limit_horiz_count mov short_limit_horiz_count,ebx mov edx,backup_limit_horiz_count mov limit_horiz_count,edx mov ecx, esi mov edi, Temp_Result_Add mov edx,backup_limit_horiz_count mov limit_horiz_count,edx mov is_right_ins_mode,0 HORIZONTAL_INS_LEFT : pxor xmm0, xmm0 mov eax , Short_Cmp_Pitch pxor xmm3,xmm3 Pxor xmm4,xmm4 movdqu xmm1, [edi] paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 movdqu xmm5, [edi+eax] movdqu xmm4, xmm5 movdqu xmm0, mulFactorC pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, mulFactorR pmullw xmm4, xmm0 paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 // movdqu check_tmp1, xmm3 // movdqu check_tmp2, xmm5 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 movdqu xmm7, xmm0 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH1 jmp THRES_PASS1 DEFECT_SEARCH1 : mov eax , Short_Cmp_Pitch2 movdqu xmm5, [edi+eax] movdqu xmm4, xmm5 movdqu xmm0, mulFactorC2 pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, mulFactorR2 pmullw xmm4, xmm0 paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 // movdqu check_tmp1, xmm3 // movdqu check_tmp2, xmm5 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 // movdqu check_tmp3, xmm0 // movdqu check_tmp4, xmm7 pand xmm0, xmm7 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH2 jmp THRES_PASS1 THRES_PASS1 : mov eax ,Jmp_Short_Count add edi, eax mov eax ,Jmp_Char_Count mov edx, start_x1 add edx, eax mov start_x1 ,edx dec short_limit_horiz_count jnz HORIZONTAL_INS_LEFT jmp HORIZONTAL_INS_RIGHT_START THRES_PASS2 : mov eax ,Jmp_Short_Count2 add edi, eax mov eax ,Jmp_Char_Count2 mov edx, start_x1 add edx, eax mov start_x1 ,edx dec short_limit_horiz_count jz HORIZONTAL_INS_RIGHT_START dec short_limit_horiz_count jnz HORIZONTAL_INS_LEFT HORIZONTAL_INS_RIGHT_START : mov edi, Temp_Result_Add mov ebx, start_right_x mov start_x1,ebx mov eax ,start_x1 add edi , eax add edi, eax mov eax,backup_short_limit_horiz_right_count mov short_limit_horiz_right_count,eax mov is_right_ins_mode,1 //Right HORIZONTAL_INS_RIGHT : pxor xmm0, xmm0 mov eax , Short_Cmp_Pitch pxor xmm3,xmm3 Pxor xmm4,xmm4 sub edi, eax movdqu xmm1, [edi] //[edi -eax]bu neng directly sub paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 add edi, eax movdqu xmm5, [edi] movdqu xmm4, xmm5 movdqu xmm0, mulFactorC pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, mulFactorR pmullw xmm4, xmm0 paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 movdqu xmm7, xmm0 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH1_RIGHT jmp THRES_PASS1_RIGHT DEFECT_SEARCH1_RIGHT : pxor xmm0, xmm0 mov eax , Short_Cmp_Pitch2 pxor xmm3,xmm3 Pxor xmm4,xmm4 sub edi, eax movdqu xmm1, [edi] paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 add edi, eax movdqu xmm5, [edi] movdqu xmm4, xmm5 movdqu xmm0, mulFactorC2 pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, mulFactorR2 pmullw xmm4, xmm0 paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 movdqu xmm1, xmm5 psubw xmm1, xmm3 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 pand xmm0, xmm7 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH2 THRES_PASS1_RIGHT : mov eax ,Jmp_Short_Count add edi, eax mov eax ,Jmp_Char_Count mov edx, start_x1 add edx, eax mov start_x1 ,edx dec short_limit_horiz_right_count jnz HORIZONTAL_INS_RIGHT NEXT_VERTICAL : add esi,horiz_count inc Start_Y dec limit_vert_count jnz UNPACK_LOOP jmp PROCESSING_END DEFECT_SEARCH2 : // 1¹øÂ° ¿öµå mov eax, start_x1 pextrw edx, xmm0, 0x00 cmp edx, 0 je NODEFECT1 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x00 pextrw edx, xmm1, 0x00 cmp ebx, edx je POSITIVE_POINT mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT1 POSITIVE_POINT : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT1: // 2¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x01 cmp edx, 0 je NODEFECT2 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x01 pextrw edx, xmm1, 0x01 cmp ebx, edx je POSITIVE_POINT1 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT2 POSITIVE_POINT1 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT2: // 3¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x02 cmp edx, 0 je NODEFECT3 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x02 pextrw edx, xmm1, 0x02 cmp ebx, edx je POSITIVE_POINT2 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT3 POSITIVE_POINT2 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT3: // 4¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x03 cmp edx, 0 je NODEFECT4 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x03 pextrw edx, xmm1, 0x03 cmp ebx, edx je POSITIVE_POINT3 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT4 POSITIVE_POINT3 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT4: // 5¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x04 cmp edx, 0 je NODEFECT5 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x04 pextrw edx, xmm1, 0x04 cmp ebx, edx je POSITIVE_POINT4 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT5 POSITIVE_POINT4 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT5: // 6¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x05 cmp edx, 0 je THRES_PASS mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x05 pextrw edx, xmm1, 0x05 cmp ebx, edx je POSITIVE_POINT5 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum THRES_PASS : mov edx,is_right_ins_mode cmp edx,1 je THRES_PASS1_RIGHT jmp THRES_PASS1 POSITIVE_POINT5 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum mov edx,is_right_ins_mode cmp edx,1 je THRES_PASS1_RIGHT jmp THRES_PASS1 PROCESSING_END : popad emms } delete[] Temp_Result_Add; } void CConvInt1::PairingPlus_1(double dPitchsize) { ////////////////////////////////////////////////////////////////////////// //MMX¿¡¼­ thresholding±îÁöÇÏ°í ³ª¿À´Â °áÇÔÀÇ pointÁ¤º¸´Â pair·Î ³ª¿À¹Ç·Î //½ÇÁ¦ ÇѰ³ÀÇ pointÁ¤º¸¸¦ °Å¸£´Â °úÁ¤ÀÌ ÇÊ¿äÇÏ´Ù. int i, j; BOOL ignore = FALSE; for(i = 0; i < m_nDefectNum; i++) { if (m_nDefectPairedNum >= m_pParam->s_nFrameDefPixLimit) break; if(m_sDefectType[i] == DEFTYPE_DELETE) continue; if (m_sDefectType[i] == DEFTYPE_NODEFECT) continue; m_DefectPaired[m_nDefectPairedNum].s_DefectPos = DEFPOS_CENTER; m_DefectPaired[m_nDefectPairedNum].s_DefectPair = DEFPAIR_PPAIR; m_DefectPaired[m_nDefectPairedNum].s_nDefectX = m_nDefectPointX[i] + m_pParam->s_RectConv.left - 1; // Add Margin m_DefectPaired[m_nDefectPairedNum].s_nDefectY = m_nDefectPointY[i] + m_pParam->s_RectConv.top; m_DefectPaired[m_nDefectPairedNum].s_DefectType = static_cast(m_sDefectType[i]); //½ÇÁ¦¹é°áÇÔÀº1,Èæ°áÇÔÀº0 m_DefectPaired[m_nDefectPairedNum].SetPeak(m_nDefectValue[i], m_pParam->s_nThreshold, m_pParam->s_nConvWidth * m_pParam->s_nConvHeight); m_nDefectPairedNum++; } // return DIT_CONV_SUCCESS; } void CConvInt1::HConvolution_SSE3_PITCH3() { #define PROCESSING_COUNT 6 char* img = (char*)(m_pParam->s_lpBuffer + m_pParam->s_RectConv.top * m_pParam->s_nFrameWidth + m_pParam->s_RectConv.left); int Honrizontal_Len =2; int Vertical_Len = 2; int MAX_WORD_PROCESS = 16; int SEARCH_WORD_PROCESS = 8 ; if(m_pParam->s_RectConv.Width() <=0) return; if(Honrizontal_Len > SEARCH_WORD_PROCESS) return; //lks short mulFactorOrg[8] = {8,8,8,8,8,8,8,8}; // integer ¿¬»êÀ¸·Î floating¿¬»êÇÏ´Â È¿°ú¸¦ º¸±âÀ§ÇÏ¿© int Cmp_Pitch = (int)(m_pParam->s_dPitchConvReal); // ½Ç¼öpitch¿¡¼­ Á¤¼ö¼ººÐ double rPitch = (m_pParam->s_dPitchConvReal) - Cmp_Pitch; // ½Ç¼öpitch¿¡¼­ Á¤¼ö¼ººÐÀ» »« ³ª¸ÓÁö (0ºÎÅÍ 1»çÀÌÀÇ ¼ö) int rpRatio = (int)(rPitch*8.0 + 0.5); // ³ª¸ÓÁö ¼ººÐÀÇ 8ºÐÀ§ µî±Þ int rpRatioC = 8 - rpRatio; // 8ºÐÀ§µî±ÞÀÇ 8's complement short mulFactorR[8], mulFactorC[8]; // 2PITCH int Cmp_Pitch2 = (int)(m_pParam->s_dPitchConvReal * 2); // ½Ç¼öpitch¿¡¼­ Á¤¼ö¼ººÐ double rPitch2 = (m_pParam->s_dPitchConvReal * 2) - Cmp_Pitch2; // ½Ç¼öpitch¿¡¼­ Á¤¼ö¼ººÐÀ» »« ³ª¸ÓÁö (0ºÎÅÍ 1»çÀÌÀÇ ¼ö) int rpRatio2 = (int)(rPitch2*8.0 + 0.5); // ³ª¸ÓÁö ¼ººÐÀÇ 8ºÐÀ§ µî±Þ int rpRatioC2 = 8 - rpRatio2; // 8ºÐÀ§µî±ÞÀÇ 8's complement short mulFactorR2[8], mulFactorC2[8]; int vertical_count=Vertical_Len; int horiz_count = m_pParam->s_nFrameWidth; int limit_vert_count = m_pParam->s_RectConv.Height()-1 ; int limit_horiz_count = m_pParam->s_RectConv.Width()/MAX_WORD_PROCESS+1; int backup_limit_horiz_count=limit_horiz_count; int Vwidth =m_pParam->s_nFrameWidth * vertical_count; int Hwidth =Cmp_Pitch; int Short_Cmp_Pitch =Cmp_Pitch *2 ; int Short_Cmp_Pitch2 =Cmp_Pitch2 *2 ; int Jmp_Char_Count = PROCESSING_COUNT; // 6 pixel processing int Jmp_Short_Count =Jmp_Char_Count*2; int Jmp_Char_Count2 = Jmp_Char_Count*2; // 6 pixel processing int Jmp_Short_Count2 =Jmp_Short_Count*2; int short_limit_horiz_count = (m_pParam->s_RectConv.Width()-Cmp_Pitch2-Jmp_Char_Count)/Jmp_Char_Count ; int temp=Jmp_Char_Count-Cmp_Pitch2%Jmp_Char_Count; int short_limit_horiz_right_count =(temp+(Cmp_Pitch*2))/Jmp_Char_Count; int backup_short_limit_horiz_count = short_limit_horiz_count; int backup_short_limit_horiz_right_count = short_limit_horiz_right_count; UCHAR *src_add = (UCHAR*)img ; short *Temp_Result_Add; Temp_Result_Add = new short[m_pParam->s_RectConv.Width()+Short_Cmp_Pitch]; memset(Temp_Result_Add,0,sizeof(short) * (m_pParam->s_RectConv.Width()+Short_Cmp_Pitch)); short Dynamic_thres[8]; UCHAR thres_suppress[16]; int Start_X = 0; int Start_Y = 0; int start_x = Start_X; int start_x1=Start_X; int start_right_x = m_pParam->s_RectConv.Width()-short_limit_horiz_right_count*Jmp_Char_Count; int start_right_x1=Start_X; int is_right_ins_mode =0; Dynamic_thres[6]=1000; //2 word pass next ins max :: 255*4(2cheng2) Dynamic_thres[7]=1000; int nMaxPixelNum = m_pParam->s_nFrameDefPixLimit; /* short check_tmp1[8]; short check_tmp2[8]; short check_tmp3[8]; short check_tmp4[8]; short check_tmp5[8]; */ // yong debug for(int suppress_i=0;suppress_i<16;suppress_i++) thres_suppress[suppress_i] = m_pParam->s_nThresholdSupress; for(int thre_i=0;thre_i<8;thre_i++) { Dynamic_thres[thre_i] = m_pParam->s_nThreshold; mulFactorR[thre_i] = rpRatio; mulFactorC[thre_i] = rpRatioC; mulFactorR2[thre_i] = rpRatio2; mulFactorC2[thre_i] = rpRatioC2; } __asm { pushad mov esi, src_add movdqu xmm6, Dynamic_thres UNPACK_LOOP: mov edi, Temp_Result_Add mov ebx, limit_horiz_count pxor xmm0, xmm0 mov ecx , esi HORIZONTAL_IMG_SUM : mov eax , ecx pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm7, xmm7 movdqu xmm1, [eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 movdqu xmm1, xmm2 punpcklbw xmm1, xmm0 // unpack lower-order bytes to words punpckhbw xmm2, xmm0 // unpack higher-order bytes to words paddw xmm3, xmm1 paddw xmm4, xmm2 add eax , horiz_count movdqu xmm1, [eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 movdqu xmm1, xmm2 punpcklbw xmm1, xmm0 // unpack lower-order bytes to words punpckhbw xmm2, xmm0 // unpack higher-order bytes to words paddw xmm3, xmm1 paddw xmm4, xmm2 movdqu [edi],xmm3 movdqu [edi+16],xmm4 add ecx ,16 add edi ,32 dec ebx jnz HORIZONTAL_IMG_SUM INIT_BLANK_SPACE: pxor xmm0, xmm0 mov ebx, start_x mov start_x1,ebx mov ebx,backup_short_limit_horiz_count mov short_limit_horiz_count,ebx mov edx,backup_limit_horiz_count mov limit_horiz_count,edx mov ecx, esi mov edi, Temp_Result_Add mov edx,backup_limit_horiz_count mov limit_horiz_count,edx mov is_right_ins_mode,0 HORIZONTAL_INS_LEFT : pxor xmm0, xmm0 mov eax , Short_Cmp_Pitch pxor xmm3,xmm3 Pxor xmm4,xmm4 movdqu xmm1, [edi] paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 movdqu xmm5, [edi+eax] movdqu xmm4, xmm5 movdqu xmm0, mulFactorC pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, mulFactorR pmullw xmm4, xmm0 paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 movdqu xmm7, xmm0 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH1 jmp THRES_PASS1 DEFECT_SEARCH1 : mov eax , Short_Cmp_Pitch2 movdqu xmm5, [edi+eax] movdqu xmm4, xmm5 movdqu xmm0, mulFactorC2 pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, mulFactorR2 pmullw xmm4, xmm0 paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 pand xmm0, xmm7 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH2 jmp THRES_PASS1 THRES_PASS1 : mov eax ,Jmp_Short_Count add edi, eax mov eax ,Jmp_Char_Count mov edx, start_x1 add edx, eax mov start_x1 ,edx dec short_limit_horiz_count jnz HORIZONTAL_INS_LEFT jmp HORIZONTAL_INS_RIGHT_START THRES_PASS2 : mov eax ,Jmp_Short_Count2 add edi, eax mov eax ,Jmp_Char_Count2 mov edx, start_x1 add edx, eax mov start_x1 ,edx dec short_limit_horiz_count jz HORIZONTAL_INS_RIGHT_START dec short_limit_horiz_count jnz HORIZONTAL_INS_LEFT HORIZONTAL_INS_RIGHT_START : mov edi, Temp_Result_Add mov ebx, start_right_x mov start_x1,ebx mov eax ,start_x1 add edi , eax add edi, eax mov eax,backup_short_limit_horiz_right_count mov short_limit_horiz_right_count,eax mov is_right_ins_mode,1 //Right HORIZONTAL_INS_RIGHT : pxor xmm0, xmm0 mov eax , Short_Cmp_Pitch pxor xmm3,xmm3 Pxor xmm4,xmm4 sub edi, eax movdqu xmm1, [edi] //[edi -eax]bu neng directly sub paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 add edi, eax movdqu xmm5, [edi] movdqu xmm4, xmm5 movdqu xmm0, mulFactorC pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, mulFactorR pmullw xmm4, xmm0 paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 movdqu xmm7, xmm0 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH1_RIGHT jmp THRES_PASS1_RIGHT DEFECT_SEARCH1_RIGHT : pxor xmm0, xmm0 mov eax , Short_Cmp_Pitch2 pxor xmm3,xmm3 Pxor xmm4,xmm4 sub edi, eax movdqu xmm1, [edi] paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 add edi, eax movdqu xmm5, [edi] movdqu xmm4, xmm5 movdqu xmm0, mulFactorC2 pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, mulFactorR2 pmullw xmm4, xmm0 paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 movdqu xmm1, xmm5 psubw xmm1, xmm3 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 pand xmm0, xmm7 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH2 THRES_PASS1_RIGHT : mov eax ,Jmp_Short_Count add edi, eax mov eax ,Jmp_Char_Count mov edx, start_x1 add edx, eax mov start_x1 ,edx dec short_limit_horiz_right_count jnz HORIZONTAL_INS_RIGHT NEXT_VERTICAL : add esi,horiz_count inc Start_Y dec limit_vert_count jnz UNPACK_LOOP jmp PROCESSING_END DEFECT_SEARCH2 : // 1¹øÂ° ¿öµå mov eax, start_x1 pextrw edx, xmm0, 0x00 cmp edx, 0 je NODEFECT1 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x00 pextrw edx, xmm1, 0x00 cmp ebx, edx je POSITIVE_POINT mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT1 POSITIVE_POINT : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT1: // 2¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x01 cmp edx, 0 je NODEFECT2 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x01 pextrw edx, xmm1, 0x01 cmp ebx, edx je POSITIVE_POINT1 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT2 POSITIVE_POINT1 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT2: // 3¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x02 cmp edx, 0 je NODEFECT3 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x02 pextrw edx, xmm1, 0x02 cmp ebx, edx je POSITIVE_POINT2 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT3 POSITIVE_POINT2 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT3: // 4¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x03 cmp edx, 0 je NODEFECT4 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x03 pextrw edx, xmm1, 0x03 cmp ebx, edx je POSITIVE_POINT3 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT4 POSITIVE_POINT3 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT4: // 5¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x04 cmp edx, 0 je NODEFECT5 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x04 pextrw edx, xmm1, 0x04 cmp ebx, edx je POSITIVE_POINT4 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT5 POSITIVE_POINT4 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT5: // 6¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x05 cmp edx, 0 je THRES_PASS mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x05 pextrw edx, xmm1, 0x05 cmp ebx, edx je POSITIVE_POINT5 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum THRES_PASS : mov edx,is_right_ins_mode cmp edx,1 je THRES_PASS1_RIGHT jmp THRES_PASS1 POSITIVE_POINT5 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum mov edx,is_right_ins_mode cmp edx,1 je THRES_PASS1_RIGHT jmp THRES_PASS1 PROCESSING_END : popad emms } delete[] Temp_Result_Add; } void CConvInt1::VConvolution_SEE3() { /* reverse de hua huan add jizhu. */ if(m_pParam->s_RectConv.Height() <=(int)(m_pParam->s_dPitchConvReal)*4) return; char* img = (char*)(m_pParam->s_lpBuffer + m_pParam->s_RectConv.top * m_pParam->s_nFrameWidth + m_pParam->s_RectConv.left); if(m_pParam->s_RectConv.Width() <=0) return; //lks #define PROCESSING_COUNT 7 #define MAX_THRESHOLD 1000 short mulFactorOrg[8] = {8,8,8,8,8,8,8,8}; int Cmp_Pitch = (int)(m_pParam->s_dPitchConvReal); double rPitch = (m_pParam->s_dPitchConvReal) - Cmp_Pitch; int rpRatio = (int)(rPitch*8.0 + 0.5); int rpRatioC = 8 - rpRatio; short mulFactorR[8], mulFactorC[8]; // 2PITCH int Cmp_Pitch2 = (int)(m_pParam->s_dPitchConvReal * 2); double rPitch2 = (m_pParam->s_dPitchConvReal * 2) - Cmp_Pitch2; int rpRatio2 = (int)(rPitch2*8.0 + 0.5); int rpRatioC2 = 8 - rpRatio2; short mulFactorR2[8], mulFactorC2[8]; int horiz_count = m_pParam->s_nFrameWidth; int limit_vert_count = m_pParam->s_RectConv.Height()-1 ; int limit_vert_count_forward =limit_vert_count-Cmp_Pitch2 ; int limit_vert_count_backward = Cmp_Pitch2 ; //2*2 end int Jmp_Char_Count = PROCESSING_COUNT; // 7 pixel processing int char_limit_horiz_count = m_pParam->s_RectConv.Width()/Jmp_Char_Count ; int Remainder_count = m_pParam->s_RectConv.Width()%Jmp_Char_Count ; int temp=0; int Inspection_End=1; int backup_char_limit_horiz_count = char_limit_horiz_count; int backup_Remainder_count= Remainder_count; UCHAR *src_add = (UCHAR*)img ; UCHAR *src_add_backward= (UCHAR*)img +limit_vert_count_forward*horiz_count- Cmp_Pitch*horiz_count; UCHAR *dest_add =(UCHAR*)img + Cmp_Pitch*horiz_count; UCHAR *dest_add2 =(UCHAR*)img + Cmp_Pitch2*horiz_count; UCHAR *dest_add_minus =(UCHAR*)src_add_backward + Cmp_Pitch*horiz_count; //yuanlai src UCHAR *dest_add2_minus =(UCHAR*)dest_add_minus - Cmp_Pitch2*horiz_count; UCHAR *dest_add_backup =dest_add; UCHAR *dest_add2_backup =dest_add2; UCHAR *src_add_backup=src_add; short Dynamic_thres[8]; short Remainder_Dynamic_thres[8]; UCHAR thres_suppress[16]; int Start_X = 0; int Start_Y = 0; int start_x = Start_X; int start_x1=Start_X; int nMaxPixelNum = m_pParam->s_nFrameDefPixLimit; int is_reverse=0; UCHAR check_tmp11[16]; short check_tmp1[8]; short check_tmp2[8]; short check_tmp3[8]; short check_tmp4[8]; short check_tmp5[8]; // yong debug for(int suppress_i=0;suppress_i<16;suppress_i++) thres_suppress[suppress_i] = m_pParam->s_nThresholdSupress; for(int thre_i=0;thre_i<8;thre_i++) { Dynamic_thres[thre_i] = m_pParam->s_nThreshold; Remainder_Dynamic_thres[thre_i] = m_pParam->s_nThreshold; mulFactorR[thre_i] = rpRatio; mulFactorC[thre_i] = rpRatioC; mulFactorR2[thre_i] = rpRatio2; mulFactorC2[thre_i] = rpRatioC2; } Dynamic_thres[7]=MAX_THRESHOLD; //1 word pass next ins max :: 255*4(2cheng2) for(thre_i=0;thre_i<8-Remainder_count;thre_i++) { Remainder_Dynamic_thres[PROCESSING_COUNT-thre_i]=MAX_THRESHOLD; } __asm { pushad UNPACK_LOOP: mov ebx, start_x mov start_x1,ebx mov ebx,backup_char_limit_horiz_count mov char_limit_horiz_count,ebx movdqu xmm6,Dynamic_thres mov edx, backup_Remainder_count mov Remainder_count,edx HORIZONTAL_INS_LEFT : mov edi, src_add pxor xmm0, xmm0 pxor xmm3, xmm3 Pxor xmm4, xmm4 movdqu xmm1, [edi] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words paddw xmm3, xmm2 mov eax , horiz_count add edi , eax movdqu xmm1, [edi] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words paddw xmm3, xmm2 movdqu xmm4, xmm3 psrldq xmm4, 2 paddw xmm3, xmm4 mov edi , dest_add pxor xmm4, xmm4 movdqu xmm1, [edi] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm0, mulFactorC pmullw xmm2, xmm0 paddw xmm4, xmm2 mov eax , horiz_count add edi , eax movdqu xmm1, [edi] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm7, xmm2 movdqu xmm0, mulFactorR pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm0, mulFactorC pmullw xmm7, xmm0 paddw xmm4, xmm7 add edi, eax movdqu xmm1, [edi] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm7, xmm2 movdqu xmm0, mulFactorR pmullw xmm2, xmm0 paddw xmm4, xmm2 psraw xmm4, 3 movdqu xmm5, xmm4 psrldq xmm4, 2 //1 num rpa paddw xmm5, xmm4 // movdqu check_tmp2,xmm5 // movdqu check_tmp1,xmm3 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 movdqu xmm7, xmm0 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH1 jmp THRES_PASS1 THRES_PASS1 : mov eax ,Jmp_Char_Count mov edx, start_x1 add edx, eax mov start_x1 ,edx mov edi, src_add add edi, eax mov src_add,edi mov edi, dest_add add edi, eax mov dest_add,edi mov edi, dest_add2 add edi, eax mov dest_add2,edi dec char_limit_horiz_count jnz HORIZONTAL_INS_LEFT mov eax, Remainder_count cmp eax, 0 je NEXT_VERTICAL mov Remainder_count,0 mov char_limit_horiz_count,1 movdqu xmm6,Remainder_Dynamic_thres jmp HORIZONTAL_INS_LEFT NEXT_VERTICAL : mov eax,horiz_count add src_add_backup,eax mov edi,src_add_backup mov src_add,edi add dest_add_backup,eax mov edi,dest_add_backup mov dest_add,edi add dest_add2_backup,eax mov edi,dest_add2_backup mov dest_add2,edi inc Start_Y dec limit_vert_count_forward jnz UNPACK_LOOP cmp Inspection_End,0 je PROCESSING_END mov eax,limit_vert_count_backward mov limit_vert_count_forward,eax mov edi,src_add_backward mov src_add,edi mov src_add_backup,edi mov edi,dest_add_minus mov dest_add,edi mov dest_add_backup,edi mov edi,dest_add2_minus mov dest_add2,edi mov dest_add2_backup,edi mov Inspection_End,0 mov is_reverse,1 jmp UNPACK_LOOP DEFECT_SEARCH1 : mov edx, is_reverse cmp edx, 1 je DEFECT_SEARCH1_REVERSE mov edi , dest_add2 pxor xmm4, xmm4 movdqu xmm1, [edi] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm0, mulFactorC2 pmullw xmm2, xmm0 paddw xmm4, xmm2 mov eax , horiz_count add edi , eax movdqu xmm1, [edi] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm1, xmm2 movdqu xmm0, mulFactorR2 pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm0, mulFactorC2 pmullw xmm1, xmm0 paddw xmm4, xmm1 add edi, eax movdqu xmm1, [edi] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm0, mulFactorR2 pmullw xmm2, xmm0 paddw xmm4, xmm2 psraw xmm4, 3 movdqu xmm5,xmm4 psrldq xmm4, 2 //1 num rpa paddw xmm5, xmm4 // movdqu check_tmp3,xmm5 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 pand xmm0, xmm7 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH2 jmp THRES_PASS1 DEFECT_SEARCH1_REVERSE : pxor xmm0, xmm0 pxor xmm3, xmm3 Pxor xmm4, xmm4 mov edi, dest_add2 movdqu xmm1, [edi] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words paddw xmm3, xmm2 mov eax , horiz_count add edi , eax movdqu xmm1, [edi] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words paddw xmm3, xmm2 movdqu xmm4, xmm3 psrldq xmm4, 2 paddw xmm3, xmm4 mov edi , dest_add //reverse ~ dest_add -> src pxor xmm4, xmm4 movdqu xmm1, [edi] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm0, mulFactorC2 pmullw xmm2, xmm0 paddw xmm4, xmm2 mov eax , horiz_count add edi , eax movdqu xmm1, [edi] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm1, xmm2 movdqu xmm0, mulFactorR2 pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm0, mulFactorC2 pmullw xmm1, xmm0 paddw xmm4, xmm1 add edi, eax movdqu xmm1, [edi] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 // movdqu xmm1, xmm2 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm0, mulFactorR2 pmullw xmm2, xmm0 paddw xmm4, xmm2 psraw xmm4, 3 movdqu xmm5, xmm4 psrldq xmm4, 2 //1 num rpa paddw xmm5, xmm4 movdqu xmm2, xmm5 movdqu xmm5, xmm3 movdqu xmm3, xmm2 movdqu xmm1, xmm3 // movdqu check_tmp2,xmm5 // movdqu check_tmp1,xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 pand xmm0, xmm7 pmovmskb edx, xmm0 cmp edx, 0 jne DEFECT_SEARCH2 jmp THRES_PASS1 DEFECT_SEARCH2 : // 1¹øÂ° ¿öµå mov eax, start_x1 pextrw edx, xmm0, 0x00 cmp edx, 0 je NODEFECT1 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x00 pextrw edx, xmm1, 0x00 cmp ebx, edx je POSITIVE_POINT mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT1 POSITIVE_POINT : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT1: // 2¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x01 cmp edx, 0 je NODEFECT2 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x01 pextrw edx, xmm1, 0x01 cmp ebx, edx je POSITIVE_POINT1 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT2 POSITIVE_POINT1 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT2: // 3¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x02 cmp edx, 0 je NODEFECT3 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x02 pextrw edx, xmm1, 0x02 cmp ebx, edx je POSITIVE_POINT2 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT3 POSITIVE_POINT2 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT3: // 4¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x03 cmp edx, 0 je NODEFECT4 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x03 pextrw edx, xmm1, 0x03 cmp ebx, edx je POSITIVE_POINT3 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT4 POSITIVE_POINT3 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT4: // 5¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x04 cmp edx, 0 je NODEFECT5 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x04 pextrw edx, xmm1, 0x04 cmp ebx, edx je POSITIVE_POINT4 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT5 POSITIVE_POINT4 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT5: // 6¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x05 cmp edx, 0 je NODEFECT6 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x05 pextrw edx, xmm1, 0x05 cmp ebx, edx je POSITIVE_POINT5 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT6 POSITIVE_POINT5 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT6: // 7¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x06 cmp edx, 0 je THRES_PASS1 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x06 pextrw edx, xmm1, 0x06 cmp ebx, edx je POSITIVE_POINT6 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp THRES_PASS1 POSITIVE_POINT6 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum jmp THRES_PASS1 PROCESSING_END : popad emms } } int CConvInt1::HConvolution_SSE3_6() { #define PROCESSING_COUNT 6 if(m_pParam->s_RectConv.Width() <=(int)(m_pParam->s_dPitchConvReal)*3) return; char* img = (char*)(m_pParam->s_lpBuffer + m_pParam->s_RectConv.top * m_pParam->s_nFrameWidth + m_pParam->s_RectConv.left); int Honrizontal_Len =2; int Vertical_Len = 2; int MAX_WORD_PROCESS = 16; int SEARCH_WORD_PROCESS = 8 ; if(m_pParam->s_RectConv.Width() <=0) return; if(Honrizontal_Len > SEARCH_WORD_PROCESS) return; //lks short Real_Cmp_Pitch[24], Real_Cmp_Pitch2[24]; short mulFactorR[24], mulFactorC[24]; short mulFactorR2[24], mulFactorC2[24]; short Result_bit[8]; double rPitch=0,rPitch2=0; int rpRatio=0,rpRatioC=0; int Cmp_Pitch=(int)(m_pParam->s_dPitchConvReal); int Cmp_Pitch2=Cmp_Pitch*2; int rpRatio2=0,rpRatioC2=0; int thre_i=0,i=0; int vertical_count=Vertical_Len; int horiz_count = m_pParam->s_nFrameWidth; int limit_vert_count = m_pParam->s_RectConv.Height()-1 ; int limit_horiz_count = m_pParam->s_RectConv.Width()/MAX_WORD_PROCESS+1; int backup_limit_horiz_count=limit_horiz_count; int Vwidth =m_pParam->s_nFrameWidth * vertical_count; int Short_Cmp_Pitch =Cmp_Pitch *2 ; int Short_Cmp_Pitch2 =Cmp_Pitch2 *2 ; int Jmp_Char_Count = PROCESSING_COUNT; // 6 pixel processing int Jmp_Short_Count =Jmp_Char_Count*2; int Jmp_Char_Count2 = Jmp_Char_Count*2; // 6 pixel processing int Jmp_Short_Count2 =Jmp_Short_Count*2; int short_limit_horiz_count = (m_pParam->s_RectConv.Width()-Cmp_Pitch2-Jmp_Char_Count)/Jmp_Char_Count ; int temp=0; int short_limit_horiz_right_count=0; if(m_pParam->s_RectConv.Width() <=(int)(m_pParam->s_dPitchConvReal)*4) { temp=Jmp_Char_Count-Cmp_Pitch%Jmp_Char_Count; short_limit_horiz_right_count =(temp+(Cmp_Pitch))/Jmp_Char_Count; } else { temp=Jmp_Char_Count-Cmp_Pitch2%Jmp_Char_Count; short_limit_horiz_right_count =(temp+(Cmp_Pitch2))/Jmp_Char_Count; } int backup_short_limit_horiz_count = short_limit_horiz_count; int backup_short_limit_horiz_right_count = short_limit_horiz_right_count; UCHAR *src_add = (UCHAR*)img ; short *Temp_Result_Add; Temp_Result_Add = new short[m_pParam->s_RectConv.Width()+Short_Cmp_Pitch2]; memset(Temp_Result_Add,0,sizeof(short) * (m_pParam->s_RectConv.Width()+Short_Cmp_Pitch2)); short Dynamic_thres[8]; UCHAR thres_suppress[16]; int Start_X = 0; int Start_Y = 0; int start_x = Start_X; int start_x1=Start_X; int p=m_pParam->s_RectConv.Width(); int start_right_x = m_pParam->s_RectConv.Width()-short_limit_horiz_right_count*Jmp_Char_Count; int start_right_x1=Start_X; int is_right_ins_mode =0; int nMaxPixelNum = m_pParam->s_nFrameDefPixLimit; short check_tmp1[8]; short check_tmp2[8]; short check_tmp3[8]; short check_tmp4[8]; short check_tmp5[8]; // yong debug for(int suppress_i=0;suppress_i<16;suppress_i++) thres_suppress[suppress_i] = m_pParam->s_nThresholdSupress; for( thre_i=0;thre_i<8;thre_i++) { Dynamic_thres[thre_i] = m_pParam->s_nThreshold; Result_bit[thre_i]=-1; } for(i=0;i<3;i++) { Cmp_Pitch = (int)(m_pParam->s_dPitchConvReal+(i*0.2-0.2)); rPitch = (m_pParam->s_dPitchConvReal+(i*0.2-0.2)) - Cmp_Pitch; rpRatio = (int)(rPitch*8.0 + 0.5); rpRatioC = 8 - rpRatio; Cmp_Pitch2 = (int)(m_pParam->s_dPitchConvReal*2+(i*0.2-0.2)); rPitch2 = (m_pParam->s_dPitchConvReal*2+(i*0.2-0.2)) - Cmp_Pitch2; rpRatio2 = (int)(rPitch2*8.0 + 0.5); rpRatioC2 = 8 - rpRatio2; for(thre_i=0;thre_i<8;thre_i++) { mulFactorR[thre_i+i*8] = rpRatio; mulFactorC[thre_i+i*8] = rpRatioC; mulFactorR2[thre_i+i*8] = rpRatio2; mulFactorC2[thre_i+i*8] = rpRatioC2; Real_Cmp_Pitch[thre_i+i*8]=Cmp_Pitch*2; Real_Cmp_Pitch2[thre_i+i*8]=Cmp_Pitch2*2; } } Dynamic_thres[6]=1000; //2 word pass next ins max :: 255*4(2cheng2) Dynamic_thres[7]=1000; __asm { pushad mov esi, src_add movdqu xmm6, Dynamic_thres UNPACK_LOOP: mov edi, Temp_Result_Add mov ebx, limit_horiz_count pxor xmm0, xmm0 mov ecx , esi HORIZONTAL_IMG_SUM : mov eax , ecx pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm7, xmm7 movdqu xmm1, [eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 movdqu xmm1, xmm2 punpcklbw xmm1, xmm0 // unpack lower-order bytes to words punpckhbw xmm2, xmm0 // unpack higher-order bytes to words paddw xmm3, xmm1 paddw xmm4, xmm2 add eax , horiz_count movdqu xmm1, [eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 movdqu xmm1, xmm2 punpcklbw xmm1, xmm0 // unpack lower-order bytes to words punpckhbw xmm2, xmm0 // unpack higher-order bytes to words paddw xmm3, xmm1 paddw xmm4, xmm2 movdqu [edi],xmm3 movdqu [edi+16],xmm4 add ecx ,16 add edi ,32 dec ebx jnz HORIZONTAL_IMG_SUM INIT_BLANK_SPACE: pxor xmm0, xmm0 mov ebx, start_x mov start_x1,ebx mov ebx,backup_short_limit_horiz_count mov short_limit_horiz_count,ebx mov edx,backup_limit_horiz_count mov limit_horiz_count,edx mov ecx, esi mov edi, Temp_Result_Add mov edx,backup_limit_horiz_count mov limit_horiz_count,edx mov is_right_ins_mode,0 HORIZONTAL_INS_LEFT : pxor xmm0, xmm0 pxor xmm3,xmm3 Pxor xmm4,xmm4 movdqu xmm1, [edi] paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 movdqu xmm7, Result_bit mov ebx,0 LEFT_RIGHT_SHAKE : cmp ebx,48 je DEFECT_SEARCH1 movdqu xmm0, [Real_Cmp_Pitch+ebx] pextrw eax, xmm0, 0x00 movdqu xmm5, [edi+eax] movdqu xmm4, xmm5 movdqu xmm0, [mulFactorC +ebx] pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, [mulFactorR +ebx] pmullw xmm4, xmm0 paddw xmm5,xmm4 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 // movdqu check_tmp1, xmm0 // pmullw xmm0, xmm0 // movdqu check_tmp2, xmm0 pand xmm0, xmm7 // movdqu check_tmp3, xmm0 movdqu xmm7, xmm0 pmovmskb edx, xmm0 add ebx, 16 cmp edx, 0 jne LEFT_RIGHT_SHAKE jmp THRES_PASS1 DEFECT_SEARCH1 : // jmp DEFECT_SEARCH2 // test mov ebx,0 LEFT_RIGHT_SHAKE2 : cmp ebx,48 je DEFECT_SEARCH2 movdqu xmm0, [Real_Cmp_Pitch2+ebx] pextrw eax, xmm0, 0x00 movdqu xmm5, [edi+eax] movdqu xmm4, xmm5 movdqu xmm0, [mulFactorC2+ebx] pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, [mulFactorR2+ebx] pmullw xmm4, xmm0 paddw xmm5,xmm4 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 psraw xmm5, 3 // movdqu check_tmp1, xmm3 // movdqu check_tmp2, xmm5 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 pand xmm0, xmm7 movdqu xmm7, xmm0 pmovmskb edx, xmm0 add ebx, 16 cmp edx, 0 jne LEFT_RIGHT_SHAKE2 jmp THRES_PASS1 THRES_PASS1 : mov eax ,Jmp_Short_Count add edi, eax mov eax ,Jmp_Char_Count mov edx, start_x1 add edx, eax mov start_x1 ,edx dec short_limit_horiz_count jnz HORIZONTAL_INS_LEFT jmp HORIZONTAL_INS_RIGHT_START HORIZONTAL_INS_RIGHT_START : mov edi, Temp_Result_Add mov ebx, start_right_x mov start_x1,ebx mov eax ,start_x1 add edi , eax add edi, eax mov eax,backup_short_limit_horiz_right_count mov short_limit_horiz_right_count,eax mov is_right_ins_mode,1 //Right HORIZONTAL_INS_RIGHT : // jmp THRES_PASS1_RIGHT // test pxor xmm0, xmm0 mov eax , Short_Cmp_Pitch pxor xmm3,xmm3 Pxor xmm4,xmm4 sub edi, eax movdqu xmm1, [edi] //[edi -eax]bu neng directly sub paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 mov ebx,0 movdqu xmm7, Result_bit RIGHT_INS_LEFT_RIGHT_SHAKE : cmp ebx,48 je DEFECT_SEARCH1_RIGHT movdqu xmm0, [Real_Cmp_Pitch+ebx] pextrw eax, xmm0, 0x00 movdqu xmm5, [edi+eax] movdqu xmm4, xmm5 movdqu xmm0, [mulFactorC+ebx] pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, [mulFactorR+ebx] pmullw xmm4, xmm0 paddw xmm5,xmm4 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 pand xmm0, xmm7 movdqu xmm7, xmm0 pmovmskb edx, xmm0 add ebx, 16 cmp edx, 0 jne RIGHT_INS_LEFT_RIGHT_SHAKE mov eax , Short_Cmp_Pitch sub edi, eax jmp THRES_PASS1_RIGHT DEFECT_SEARCH1_RIGHT : pxor xmm0, xmm0 mov eax , Short_Cmp_Pitch sub edi, eax pxor xmm3,xmm3 Pxor xmm4,xmm4 movdqu xmm1, [edi] //[edi -eax]bu neng directly sub paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 mov ebx,0 RIGHT_INS_LEFT_RIGHT_SHAKE2 : cmp ebx,48 je DEFECT_SEARCH2 movdqu xmm0, [Real_Cmp_Pitch2+ebx] pextrw eax, xmm0, 0x00 movdqu xmm5, [edi+eax] movdqu xmm4, xmm5 movdqu xmm0, [mulFactorC2+ebx] pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, [mulFactorR2+ebx] pmullw xmm4, xmm0 paddw xmm5,xmm4 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm1, xmm5 psubw xmm1, xmm3 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 ; pmullw xmm0, xmm0 pand xmm0, xmm7 movdqu xmm7, xmm0 pmovmskb edx, xmm0 add ebx, 16 cmp edx, 0 jne RIGHT_INS_LEFT_RIGHT_SHAKE2 THRES_PASS1_RIGHT : mov eax , Short_Cmp_Pitch2 add edi, eax mov eax ,Jmp_Short_Count add edi, eax mov eax ,Jmp_Char_Count mov edx, start_x1 add edx, eax mov start_x1 ,edx dec short_limit_horiz_right_count jnz HORIZONTAL_INS_RIGHT NEXT_VERTICAL : add esi,horiz_count inc Start_Y dec limit_vert_count jnz UNPACK_LOOP jmp PROCESSING_END DEFECT_SEARCH2 : // 1¹øÂ° ¿öµå mov eax, start_x1 pextrw edx, xmm0, 0x00 cmp edx, 0 je NODEFECT1 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x00 pextrw edx, xmm1, 0x00 cmp ebx, edx je POSITIVE_POINT mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT1 POSITIVE_POINT : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT1: // 2¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x01 cmp edx, 0 je NODEFECT2 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x01 pextrw edx, xmm1, 0x01 cmp ebx, edx je POSITIVE_POINT1 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT2 POSITIVE_POINT1 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT2: // 3¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x02 cmp edx, 0 je NODEFECT3 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x02 pextrw edx, xmm1, 0x02 cmp ebx, edx je POSITIVE_POINT2 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT3 POSITIVE_POINT2 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT3: // 4¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x03 cmp edx, 0 je NODEFECT4 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x03 pextrw edx, xmm1, 0x03 cmp ebx, edx je POSITIVE_POINT3 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT4 POSITIVE_POINT3 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT4: // 5¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x04 cmp edx, 0 je NODEFECT5 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x04 pextrw edx, xmm1, 0x04 cmp ebx, edx je POSITIVE_POINT4 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT5 POSITIVE_POINT4 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT5: // 6¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x05 cmp edx, 0 je THRES_PASS mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x05 pextrw edx, xmm1, 0x05 cmp ebx, edx je POSITIVE_POINT5 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum THRES_PASS : mov edx,is_right_ins_mode cmp edx,1 je THRES_PASS1_RIGHT jmp THRES_PASS1 POSITIVE_POINT5 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum mov edx,is_right_ins_mode cmp edx,1 je THRES_PASS1_RIGHT jmp THRES_PASS1 PROCESSING_END : popad emms } delete[] Temp_Result_Add; return m_nDefectNum; } void CConvInt1::VConvolution_SEE3_6() { /* reverse de hua huan add jizhu. */ if(m_pParam->s_RectConv.Height() <=(int)(m_pParam->s_dPitchConvReal)*4) return; char* img = (char*)(m_pParam->s_lpBuffer + m_pParam->s_RectConv.top * m_pParam->s_nFrameWidth + m_pParam->s_RectConv.left); if(m_pParam->s_RectConv.Width() <=0) return; //lks #define PROCESSING_COUNT 7 #define MAX_THRESHOLD 1000 short Real_Cmp_Pitch[24], Real_Cmp_Pitch2[24]; short mulFactorR[24], mulFactorC[24]; short mulFactorR2[24], mulFactorC2[24]; double rPitch=0,rPitch2=0; int rpRatio=0,rpRatioC=0; int Cmp_Pitch=(int)(m_pParam->s_dPitchConvReal-0.2); int Cmp_Pitch2=(int)(m_pParam->s_dPitchConvReal*2-0.2); int rpRatio2=0,rpRatioC2=0; int thre_i=0,i=0; int horiz_count = m_pParam->s_nFrameWidth; int limit_vert_count = m_pParam->s_RectConv.Height()-1 ; int limit_vert_count_forward =limit_vert_count-Cmp_Pitch2 ; int limit_vert_count_backward = Cmp_Pitch2 ; //2*2 end int Jmp_Char_Count = PROCESSING_COUNT; // 7 pixel processing int char_limit_horiz_count = m_pParam->s_RectConv.Width()/Jmp_Char_Count ; int Remainder_count = m_pParam->s_RectConv.Width()%Jmp_Char_Count ; int temp=0; int Inspection_End=1; int backup_char_limit_horiz_count = char_limit_horiz_count; int backup_Remainder_count= Remainder_count; UCHAR *src_add = (UCHAR*)img ; UCHAR *dest_add =(UCHAR*)img + Cmp_Pitch*horiz_count; UCHAR *dest_add2 =(UCHAR*)img + Cmp_Pitch2*horiz_count; UCHAR *src_add_backward= (UCHAR*)img +limit_vert_count_forward*horiz_count- Cmp_Pitch*horiz_count; UCHAR *dest_add_minus =(UCHAR*)src_add_backward + Cmp_Pitch*horiz_count; //yuanlai src UCHAR *dest_add2_minus =(UCHAR*)dest_add_minus - Cmp_Pitch2*horiz_count; UCHAR *dest_add_backup =dest_add; UCHAR *dest_add2_backup =dest_add2; UCHAR *src_add_backup=src_add; short Dynamic_thres[8]; short Remainder_Dynamic_thres[8]; short Result_bit[8],backup_Result_bit[8]; UCHAR thres_suppress[16]; int Start_X = 0; int Start_Y = 0; int start_x = Start_X; int start_x1=Start_X; int nMaxPixelNum = m_pParam->s_nFrameDefPixLimit; int is_reverse=0; int temp_pitch=0,temp_pitch2=0; UCHAR check_tmp11[16]; short check_tmp1[8]; short check_tmp2[8]; short check_tmp3[8]; short check_tmp4[8]; short check_tmp5[8]; // yong debug for(int suppress_i=0;suppress_i<16;suppress_i++) thres_suppress[suppress_i] = m_pParam->s_nThresholdSupress; for(i=0;i<3;i++) { temp_pitch = (int)(m_pParam->s_dPitchConvReal+(i*0.2-0.2)); rPitch = (m_pParam->s_dPitchConvReal+(i*0.2-0.2)) - temp_pitch; rpRatio = (int)(rPitch*8.0 + 0.5); rpRatioC = 8 - rpRatio; temp_pitch2 = (int)(m_pParam->s_dPitchConvReal*2+(i*0.2-0.2)); rPitch2 = (m_pParam->s_dPitchConvReal*2+(i*0.2-0.2)) - temp_pitch2; rpRatio2 = (int)(rPitch2*8.0 + 0.5); rpRatioC2 = 8 - rpRatio2; for(thre_i=0;thre_i<8;thre_i++) { mulFactorR[thre_i+i*8] = rpRatio; mulFactorC[thre_i+i*8] = rpRatioC; mulFactorR2[thre_i+i*8] = rpRatio2; mulFactorC2[thre_i+i*8] = rpRatioC2; Real_Cmp_Pitch[thre_i+i*8]=(temp_pitch-Cmp_Pitch); Real_Cmp_Pitch2[thre_i+i*8]=(temp_pitch2-Cmp_Pitch2); } } for(thre_i=0;thre_i<8;thre_i++) { Dynamic_thres[thre_i] = m_pParam->s_nThreshold; Remainder_Dynamic_thres[thre_i] = m_pParam->s_nThreshold; Result_bit[thre_i]=-1; backup_Result_bit[thre_i]=-1; } Dynamic_thres[7]=MAX_THRESHOLD; //1 word pass next ins max :: 255*4(2cheng2) for(thre_i=0;thre_i<8-Remainder_count;thre_i++) { Remainder_Dynamic_thres[PROCESSING_COUNT-thre_i]=MAX_THRESHOLD; } __asm { pushad UNPACK_LOOP: mov ebx, start_x mov start_x1,ebx mov ebx,backup_char_limit_horiz_count mov char_limit_horiz_count,ebx movdqu xmm6,Dynamic_thres mov edx, backup_Remainder_count mov Remainder_count,edx HORIZONTAL_INS_LEFT : mov edi, src_add pxor xmm0, xmm0 pxor xmm3, xmm3 Pxor xmm4, xmm4 movdqu xmm1, [edi] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words paddw xmm3, xmm2 mov eax , horiz_count // add edi , eax movdqu xmm1, [edi+eax] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words paddw xmm3, xmm2 movdqu xmm4, xmm3 psrldq xmm4, 2 paddw xmm3, xmm4 mov ebx,0 mov edi , dest_add LEFT_RIGHT_SHAKE : cmp ebx,48 je DEFECT_SEARCH1 movdqu xmm0, [Real_Cmp_Pitch+ebx] pextrw eax, xmm0, 0x00 pxor xmm4, xmm4 imul eax, horiz_count movdqu xmm1, [edi+eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words // movdqu check_tmp1,xmm2 movdqu xmm0, [mulFactorC+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 mov edx , horiz_count add eax , edx movdqu xmm1, [edi+eax] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm7, xmm2 // movdqu check_tmp1,xmm2 movdqu xmm0, [mulFactorR+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm0, [mulFactorC +ebx] pmullw xmm7, xmm0 paddw xmm4, xmm7 add eax, edx movdqu xmm1, [edi+eax] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words // movdqu check_tmp1,xmm2 movdqu xmm7, xmm2 movdqu xmm0, [mulFactorR+ebx] // movdqu check_tmp3,xmm0 pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm5, xmm4 psrldq xmm4, 2 //1 num rpa paddw xmm5, xmm4 psraw xmm5, 3 // movdqu check_tmp3,xmm5 // movdqu check_tmp4,xmm3 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 movdqu xmm7, Result_bit // movdqu check_tmp1,xmm0 pand xmm0, xmm7 movdqu Result_bit,xmm0 pmovmskb edx, xmm0 add ebx, 16 cmp edx, 0 jne LEFT_RIGHT_SHAKE jmp THRES_PASS1 THRES_PASS1 : mov eax ,Jmp_Char_Count mov edx, start_x1 add edx, eax mov start_x1 ,edx movdqu xmm0, backup_Result_bit movdqu Result_bit,xmm0 mov edi, src_add add edi, eax mov src_add,edi mov edi, dest_add add edi, eax mov dest_add,edi mov edi, dest_add2 add edi, eax mov dest_add2,edi dec char_limit_horiz_count jnz HORIZONTAL_INS_LEFT mov eax, Remainder_count cmp eax, 0 je NEXT_VERTICAL mov Remainder_count,0 mov char_limit_horiz_count,1 movdqu xmm6,Remainder_Dynamic_thres jmp HORIZONTAL_INS_LEFT NEXT_VERTICAL : movdqu xmm0, backup_Result_bit movdqu Result_bit,xmm0 mov eax,horiz_count add src_add_backup,eax mov edi,src_add_backup mov src_add,edi add dest_add_backup,eax mov edi,dest_add_backup mov dest_add,edi add dest_add2_backup,eax mov edi,dest_add2_backup mov dest_add2,edi inc Start_Y dec limit_vert_count_forward jnz UNPACK_LOOP cmp Inspection_End,0 je PROCESSING_END mov eax,limit_vert_count_backward mov limit_vert_count_forward,eax mov edi,src_add_backward mov src_add,edi mov src_add_backup,edi mov edi,dest_add_minus mov dest_add,edi mov dest_add_backup,edi mov edi,dest_add2_minus mov dest_add2,edi mov dest_add2_backup,edi mov Inspection_End,0 mov is_reverse,1 jmp UNPACK_LOOP DEFECT_SEARCH1 : // jmp DEFECT_SEARCH2 //test mov edx, is_reverse cmp edx, 1 je DEFECT_SEARCH1_REVERSE mov ebx,0 LEFT_RIGHT_SHAKE2 : cmp ebx,48 je DEFECT_SEARCH2 movdqu xmm0, [Real_Cmp_Pitch2+ebx] pextrw eax, xmm0, 0x00 mov edi , dest_add2 pxor xmm4, xmm4 imul eax , horiz_count movdqu xmm1, [edi+eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm0, [mulFactorC2+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 mov edx , horiz_count add eax , edx movdqu xmm1, [edi+eax] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm1, xmm2 movdqu xmm0, [mulFactorR2+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm0, [mulFactorC2+ebx] pmullw xmm1, xmm0 paddw xmm4, xmm1 add eax, edx movdqu xmm1, [edi+eax] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm0, [mulFactorR2+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm5,xmm4 psrldq xmm4, 2 //1 num rpa paddw xmm5, xmm4 psraw xmm5, 3 // movdqu check_tmp5,xmm5 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 movdqu xmm7, Result_bit pand xmm0, xmm7 movdqu Result_bit,xmm0 pmovmskb edx, xmm0 add ebx, 16 cmp edx, 0 jne LEFT_RIGHT_SHAKE2 jmp THRES_PASS1 DEFECT_SEARCH1_REVERSE : pxor xmm0, xmm0 pxor xmm3, xmm3 Pxor xmm4, xmm4 mov ebx,0 LEFT_RIGHT_SHAKE_REVERSE : cmp ebx,48 je DEFECT_SEARCH2 mov edi, dest_add2 pxor xmm3, xmm3 movdqu xmm1, [edi] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words // movdqu check_tmp1,xmm2 paddw xmm3, xmm2 mov eax , horiz_count movdqu xmm1, [edi+eax] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words // movdqu check_tmp1,xmm2 paddw xmm3, xmm2 movdqu xmm4, xmm3 psrldq xmm4, 2 paddw xmm3, xmm4 // movdqu check_tmp3,xmm3 movdqu xmm0, [Real_Cmp_Pitch2+ebx] pextrw eax, xmm0, 0x00 mov edi , dest_add //reverse ~ dest_add -> src pxor xmm4, xmm4 movdqu xmm1, [edi+eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words // movdqu check_tmp1,xmm2 movdqu xmm0, [mulFactorC2+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 mov edx , horiz_count add eax , edx movdqu xmm1, [edi+eax] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words // movdqu check_tmp1,xmm2 movdqu xmm1, xmm2 movdqu xmm0, [mulFactorR2+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm0, [mulFactorC2+ebx] pmullw xmm1, xmm0 paddw xmm4, xmm1 add eax, edx movdqu xmm1, [edi+eax] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words // movdqu check_tmp1,xmm2 movdqu xmm0, [mulFactorR2+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm5, xmm4 psrldq xmm4, 2 //1 num rpa paddw xmm5, xmm4 psraw xmm5, 3 // movdqu check_tmp2, xmm5 movdqu xmm2, xmm5 movdqu xmm5, xmm3 movdqu xmm3, xmm2 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 movdqu xmm7, Result_bit pand xmm0, xmm7 movdqu Result_bit,xmm0 pmovmskb edx, xmm0 add ebx, 16 cmp edx, 0 jne LEFT_RIGHT_SHAKE_REVERSE jmp THRES_PASS1 DEFECT_SEARCH2 : // 1¹øÂ° ¿öµå mov eax, start_x1 pextrw edx, xmm0, 0x00 cmp edx, 0 je NODEFECT1 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x00 pextrw edx, xmm1, 0x00 cmp ebx, edx je POSITIVE_POINT mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT1 POSITIVE_POINT : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT1: // 2¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x01 cmp edx, 0 je NODEFECT2 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x01 pextrw edx, xmm1, 0x01 cmp ebx, edx je POSITIVE_POINT1 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT2 POSITIVE_POINT1 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT2: // 3¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x02 cmp edx, 0 je NODEFECT3 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x02 pextrw edx, xmm1, 0x02 cmp ebx, edx je POSITIVE_POINT2 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT3 POSITIVE_POINT2 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT3: // 4¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x03 cmp edx, 0 je NODEFECT4 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x03 pextrw edx, xmm1, 0x03 cmp ebx, edx je POSITIVE_POINT3 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT4 POSITIVE_POINT3 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT4: // 5¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x04 cmp edx, 0 je NODEFECT5 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x04 pextrw edx, xmm1, 0x04 cmp ebx, edx je POSITIVE_POINT4 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT5 POSITIVE_POINT4 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT5: // 6¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x05 cmp edx, 0 je NODEFECT6 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x05 pextrw edx, xmm1, 0x05 cmp ebx, edx je POSITIVE_POINT5 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT6 POSITIVE_POINT5 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT6: // 7¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x06 cmp edx, 0 je THRES_PASS1 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x06 pextrw edx, xmm1, 0x06 cmp ebx, edx je POSITIVE_POINT6 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp THRES_PASS1 POSITIVE_POINT6 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum jmp THRES_PASS1 PROCESSING_END : popad emms } } int CConvInt1::HVConvolution_SEE3_6() { #define PROCESSING_COUNT 6 int MAX_WORD_PROCESS = 16; if(m_pParam->s_RectConv.Width() <=(int)(m_pParam->s_dPitchConvReal)+MAX_WORD_PROCESS) return; if(m_pParam->s_RectConv.Height() <=(int)(m_pParam->s_dPitchScanReal)+1) return; char* img = (char*)(m_pParam->s_lpBuffer + m_pParam->s_RectConv.top * m_pParam->s_nFrameWidth + m_pParam->s_RectConv.left); int Honrizontal_Len =2; int Vertical_Len = 2; int SEARCH_WORD_PROCESS = 8 ; //lks short Real_Cmp_Pitch[24], VReal_Cmp_Pitch[24]; short mulFactorR[24], mulFactorC[24]; short VmulFactorR[24], VmulFactorC[24]; short Result_bit[8],Backup_Result_bit[8]; double rPitch=0,VrPitch=0; int rpRatio=0,rpRatioC=0; int VrpRatio=0,VrpRatioC=0; int Cmp_Pitch=(int)(m_pParam->s_dPitchConvReal); int V_Cmp_Pitch=(int)(m_pParam->s_dPitchScanReal); int thre_i=0,i=0; int vertical_count=Vertical_Len; int horiz_count = m_pParam->s_nFrameWidth; int limit_vert_count = m_pParam->s_RectConv.Height()-V_Cmp_Pitch ; int limit_horiz_count = m_pParam->s_RectConv.Width()/MAX_WORD_PROCESS+1; int backup_limit_horiz_count=limit_horiz_count; int Vwidth =m_pParam->s_nFrameWidth * vertical_count; int Short_Cmp_Pitch =Cmp_Pitch *2 ; int Jmp_Char_Count = PROCESSING_COUNT; // 6 pixel processing int Jmp_Short_Count =Jmp_Char_Count*2; int short_limit_horiz_count = (m_pParam->s_RectConv.Width()-Cmp_Pitch-Jmp_Char_Count)/Jmp_Char_Count ; int limit_bottom_vertical_count =V_Cmp_Pitch; if(m_pParam->s_RectConv.Height() <=(int)(m_pParam->s_dPitchScanReal)*2+1) limit_bottom_vertical_count=0; int temp=0; int short_limit_horiz_right_count=0; int Is_Vertical_Backward=0; int Is_Horizontal_Backward=0; temp=Jmp_Char_Count-Cmp_Pitch%Jmp_Char_Count; if(m_pParam->s_RectConv.Width() <=(int)(m_pParam->s_dPitchConvReal)*2+MAX_WORD_PROCESS) short_limit_horiz_right_count=0; else short_limit_horiz_right_count =(temp+(Cmp_Pitch))/Jmp_Char_Count; int backup_short_limit_horiz_count = short_limit_horiz_count; int backup_short_limit_horiz_right_count = short_limit_horiz_right_count; UCHAR *src_add = (UCHAR*)img ; short *Temp_Result_Add; Temp_Result_Add = new short[m_pParam->s_RectConv.Width()+Short_Cmp_Pitch]; memset(Temp_Result_Add,0,sizeof(short) * (m_pParam->s_RectConv.Width()+Short_Cmp_Pitch)); short Dynamic_thres[8]; UCHAR thres_suppress[16]; int Start_X = 0; int Start_Y = 0; int start_x = Start_X; int start_x1=Start_X; int p=m_pParam->s_RectConv.Width(); int nMaxPixelNum = m_pParam->s_nFrameDefPixLimit; /* short check_tmp1[8]; short check_tmp2[8]; short check_tmp3[8]; short check_tmp4[8]; short check_tmp5[8]; */ // yong debug for(int suppress_i=0;suppress_i<16;suppress_i++) thres_suppress[suppress_i] = m_pParam->s_nThresholdSupress; for( thre_i=0;thre_i<8;thre_i++) { Dynamic_thres[thre_i] = m_pParam->s_nThreshold; Result_bit[thre_i]=-1; Backup_Result_bit[thre_i]=-1; } for(i=0;i<3;i++) { Cmp_Pitch = (int)(m_pParam->s_dPitchConvReal+(i*0.2-0.2)); rPitch = (m_pParam->s_dPitchConvReal+(i*0.2-0.2)) - Cmp_Pitch; rpRatio = (int)(rPitch*8.0 + 0.5); rpRatioC = 8 - rpRatio; V_Cmp_Pitch = (int)(m_pParam->s_dPitchScanReal+(i*0.2-0.2)); VrPitch = (m_pParam->s_dPitchScanReal+(i*0.2-0.2)) - V_Cmp_Pitch; VrpRatio = (int)(VrPitch*8.0 + 0.5); VrpRatioC = 8 - VrpRatio; for(thre_i=0;thre_i<8;thre_i++) { mulFactorR[thre_i+i*8] = rpRatio; mulFactorC[thre_i+i*8] = rpRatioC; VmulFactorR[thre_i+i*8] = VrpRatio; VmulFactorC[thre_i+i*8] = VrpRatioC; Real_Cmp_Pitch[thre_i+i*8]=Cmp_Pitch*2; //short type VReal_Cmp_Pitch[thre_i+i*8]=V_Cmp_Pitch; } } Dynamic_thres[6]=1000; //2 word pass next ins max :: 255*4(2cheng2) Dynamic_thres[7]=1000; __asm { pushad mov esi, src_add movdqu xmm6, Dynamic_thres UNPACK_LOOP: mov edi, Temp_Result_Add mov ebx, limit_horiz_count pxor xmm0, xmm0 mov ecx , esi HORIZONTAL_IMG_SUM : mov eax , ecx pxor xmm3, xmm3 pxor xmm4, xmm4 pxor xmm7, xmm7 movdqu xmm1, [eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 movdqu xmm1, xmm2 punpcklbw xmm1, xmm0 // unpack lower-order bytes to words punpckhbw xmm2, xmm0 // unpack higher-order bytes to words paddw xmm3, xmm1 paddw xmm4, xmm2 add eax , horiz_count movdqu xmm1, [eax] movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 movdqu xmm1, xmm2 punpcklbw xmm1, xmm0 // unpack lower-order bytes to words punpckhbw xmm2, xmm0 // unpack higher-order bytes to words paddw xmm3, xmm1 paddw xmm4, xmm2 movdqu [edi],xmm3 movdqu [edi+16],xmm4 add ecx ,16 add edi ,32 dec ebx jnz HORIZONTAL_IMG_SUM INIT_BLANK_SPACE: pxor xmm0, xmm0 mov ebx, start_x mov start_x1,ebx mov ebx,backup_short_limit_horiz_count mov short_limit_horiz_count,ebx mov edx,backup_limit_horiz_count mov limit_horiz_count,edx mov ecx, esi mov edi, Temp_Result_Add mov edx,backup_limit_horiz_count mov limit_horiz_count,edx HORIZONTAL_INS_LEFT : pxor xmm0, xmm0 pxor xmm3,xmm3 Pxor xmm4,xmm4 movdqu xmm1, [edi] paddw xmm3, xmm1 psrldq xmm1, 2 paddw xmm3, xmm1 movdqu xmm7, Result_bit mov ebx,0 LEFT_RIGHT_SHAKE : cmp ebx,48 je DEFECT_SEARCH1 movdqu xmm0, [Real_Cmp_Pitch+ebx] pextrw eax, xmm0, 0x00 cmp Is_Horizontal_Backward,1 jne HORIZONTAL_FORWARD HORIZONTAL_BACKWARD : sub edi,eax movdqu xmm5,[edi] add edi,eax jmp HORIZONTAL_CALC HORIZONTAL_FORWARD : movdqu xmm5, [edi+eax] HORIZONTAL_CALC : movdqu xmm4, xmm5 movdqu xmm0, [mulFactorC +ebx] pmullw xmm5, xmm0 psrldq xmm4, 2 //1 num rpa movdqu xmm0, [mulFactorR +ebx] pmullw xmm4, xmm0 paddw xmm5,xmm4 movdqu xmm4,xmm5 psrldq xmm4, 2 //1 num rpa paddw xmm5,xmm4 psraw xmm5, 3 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 pand xmm0, xmm7 movdqu xmm7, xmm0 pmovmskb edx, xmm0 add ebx, 16 cmp edx, 0 jne LEFT_RIGHT_SHAKE jmp THRES_PASS1 DEFECT_SEARCH1 : movdqu Result_bit,xmm7 mov ebx,0 LEFT_RIGHT_SHAKE2 : cmp ebx,48 je DEFECT_SEARCH2 mov ecx , esi add ecx , start_x1 movdqu xmm0, [VReal_Cmp_Pitch+ebx] pextrw eax, xmm0, 0x00 pxor xmm4, xmm4 cmp Is_Vertical_Backward,1 jne VERTICAL_FORWARD VERTICAL_BACKWARD : imul eax,horiz_count sub ecx,eax movdqu xmm1,[ecx] jmp VERTICAL_CALC VERTICAL_FORWARD : imul eax ,horiz_count add ecx ,eax movdqu xmm1, [ecx] VERTICAL_CALC : movdqu xmm5, [thres_suppress] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm0, [VmulFactorC+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 add ecx , horiz_count movdqu xmm1, [ecx] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm7, xmm2 movdqu xmm0, [VmulFactorR+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm0, [VmulFactorC +ebx] pmullw xmm7, xmm0 paddw xmm4, xmm7 add ecx, horiz_count movdqu xmm1, [ecx] movdqu xmm2, xmm1 psubusb xmm1, xmm5 psubusb xmm2, xmm1 pxor xmm0, xmm0 punpcklbw xmm2, xmm0 // unpack lower-order bytes to words movdqu xmm7, xmm2 movdqu xmm0, [VmulFactorR+ebx] pmullw xmm2, xmm0 paddw xmm4, xmm2 movdqu xmm5, xmm4 psrldq xmm4, 2 //1 num rpa paddw xmm5, xmm4 psraw xmm5, 3 movdqu xmm1, xmm3 psubw xmm1, xmm5 pxor xmm2, xmm2 psubw xmm2, xmm1 pmaxsw xmm2, xmm1 movdqu xmm0, xmm2 pcmpgtw xmm0, xmm6 movdqu xmm7, Result_bit pand xmm0, xmm7 movdqu xmm7, xmm0 movdqu Result_bit,xmm7 pmovmskb edx, xmm0 add ebx, 16 cmp edx, 0 jne LEFT_RIGHT_SHAKE2 jmp THRES_PASS1 THRES_PASS1 : movdqu xmm0,Backup_Result_bit movdqu Result_bit,xmm0 mov eax ,Jmp_Short_Count add edi, eax mov eax ,Jmp_Char_Count mov edx, start_x1 add edx, eax mov start_x1 ,edx dec short_limit_horiz_count jnz HORIZONTAL_INS_LEFT cmp short_limit_horiz_right_count,0 je NEXT_VERTICAL cmp Is_Horizontal_Backward,1 je NEXT_VERTICAL mov eax,short_limit_horiz_right_count mov short_limit_horiz_count,eax mov Is_Horizontal_Backward,1 jmp HORIZONTAL_INS_LEFT NEXT_VERTICAL : mov Is_Horizontal_Backward,0 add esi,horiz_count inc Start_Y dec limit_vert_count jnz UNPACK_LOOP cmp limit_bottom_vertical_count,0 je PROCESSING_END cmp Is_Vertical_Backward,1 je PROCESSING_END mov eax,limit_bottom_vertical_count mov limit_vert_count,eax mov Is_Vertical_Backward,1 jmp UNPACK_LOOP DEFECT_SEARCH2 : // 1¹øÂ° ¿öµå mov eax, start_x1 pextrw edx, xmm0, 0x00 cmp edx, 0 je NODEFECT1 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x00 pextrw edx, xmm1, 0x00 cmp ebx, edx je POSITIVE_POINT mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT1 POSITIVE_POINT : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT1: // 2¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x01 cmp edx, 0 je NODEFECT2 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x01 pextrw edx, xmm1, 0x01 cmp ebx, edx je POSITIVE_POINT1 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT2 POSITIVE_POINT1 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT2: // 3¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x02 cmp edx, 0 je NODEFECT3 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x02 pextrw edx, xmm1, 0x02 cmp ebx, edx je POSITIVE_POINT2 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT3 POSITIVE_POINT2 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT3: // 4¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x03 cmp edx, 0 je NODEFECT4 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x03 pextrw edx, xmm1, 0x03 cmp ebx, edx je POSITIVE_POINT3 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT4 POSITIVE_POINT3 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT4: // 5¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x04 cmp edx, 0 je NODEFECT5 mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x04 pextrw edx, xmm1, 0x04 cmp ebx, edx je POSITIVE_POINT4 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum jmp NODEFECT5 POSITIVE_POINT4 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum NODEFECT5: // 6¹øÂ° ¿öµå inc eax pextrw edx, xmm0, 0x05 cmp edx, 0 je THRES_PASS mov edx, nMaxPixelNum cmp m_nDefectNum, edx je PROCESSING_END mov edx, m_nDefectNum mov ebx,Start_Y mov m_nDefectPointX[4*edx], eax mov m_nDefectPointY[4*edx], ebx pextrw ebx, xmm2, 0x05 pextrw edx, xmm1, 0x05 cmp ebx, edx je POSITIVE_POINT5 mov ebx, m_nDefectNum mov m_sDefectType[2*ebx], 0 mov m_nDefectValue[4*ebx], edx inc m_nDefectNum THRES_PASS : jmp THRES_PASS1 POSITIVE_POINT5 : mov edx, m_nDefectNum mov m_sDefectType[2*edx], 1 mov m_nDefectValue[4*edx], ebx inc m_nDefectNum jmp THRES_PASS1 PROCESSING_END : popad emms } delete[] Temp_Result_Add; return m_nDefectNum; }