1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
|
| .data
|
|
| .code
|
| _return equ jmp
|
|
| xxZero equ xmm7
| xxSrc equ xmm0
| xxTemp equ xmm1
| xxSrc2 equ xmm2
| xxTemp2 equ xmm3
| xxRslt equ xmm5
|
|
|
| ; ÆÄ¶ó¹ÌÅÍ
| rrSrc equ rsi
| rrSrc2 equ rdi
| rrLen equ r8
| rrFlag equ r9
|
|
|
| ; ·ÎÄà º¯¼ö
|
|
| _asm_GetCC_8u PROC
| local Rslt[2] : qword
|
| ; push rbp
| ; mov rbp, rsp
| ; push r10
| ; push r11
| ; push r12
| ; push r13
| ; push r14
| ; push r15
| push rdi
| push rsi
| push rbx
| ; ADD rbp, 018h
|
|
| pxor xxZero, xxZero
| pxor xxRslt, xxRslt
|
| mov rrSrc, rcx
| mov rrSrc2, rdx
| add rrLen, rrSrc
| sub rrLen, 10h
|
| cmp rrSrc2, 0
| je ONE_BUFF
|
| cmp rrFlag, 1
| je LOOP_AU2
| jg LOOP_UU2
| jmp LOOP_AA2
|
| ONE_BUFF:
| cmp rrFlag, 1
| jg LOOP_UU
| jmp LOOP_AA
|
|
| LOOP_AA:
| cmp rrSrc, rrLen
| jg SUM_RSLT
| movdqa xxSrc, xmmword ptr[rrSrc]
| movdqa xxTemp, xxSrc
|
| punpckhbw xxTemp, xxZero
| pmaddwd xxTemp, xxTemp
| punpcklbw xxSrc, xxZero
| pmaddwd xxSrc, xxSrc
| paddd xxTemp, xxSrc
| paddd xxRslt, xxTemp
| add rrSrc, 10h
| jmp LOOP_AA
|
| LOOP_AA2:
| cmp rrSrc, rrLen
| jg SUM_RSLT
| movdqa xxSrc, xmmword ptr[rrSrc]
| movdqa xxSrc2, xmmword ptr[rrSrc2]
| movdqa xxTemp2, xxSrc2
| punpckhbw xxTemp2, xxZero
| movdqa xxTemp, xxSrc
| punpckhbw xxTemp, xxZero
| pmaddwd xxTemp, xxTemp2
|
| paddd xxRslt, xxTemp
|
| punpcklbw xxSrc, xxZero
| punpcklbw xxSrc2, xxZero
| pmaddwd xxSrc, xxSrc2
| paddd xxRslt, xxSrc
| add rrSrc, 10h
| add rrSrc2, 10h
| jmp LOOP_AA2
|
| LOOP_AU2:
| cmp rrSrc, rrLen
| jg SUM_RSLT
| movdqu xxSrc2, xmmword ptr[rrSrc2]
| movdqa xxSrc, xmmword ptr[rrSrc]
| movdqa xxTemp, xxSrc
| movdqa xxTemp2, xxSrc2
| punpckhbw xxTemp, xxZero
| punpckhbw xxTemp2, xxZero
| pmaddwd xxTemp, xxTemp2
| paddd xxRslt, xxTemp
| punpcklbw xxSrc, xxZero
| punpcklbw xxSrc2, xxZero
| pmaddwd xxSrc, xxSrc2
| paddd xxRslt, xxSrc
| add rrSrc, 10h
| add rrSrc2, 10h
| jmp LOOP_AU2
|
| LOOP_UU2:
| cmp rrSrc, rrLen
| jg SUM_RSLT
| movdqu xxSrc, xmmword ptr[rrSrc]
| movdqu xxSrc2, xmmword ptr[rrSrc2]
|
| movdqa xxTemp2, xxSrc2
| movdqa xxTemp, xxSrc
| punpckhbw xxTemp, xxZero
| punpckhbw xxTemp2, xxZero
| pmaddwd xxTemp, xxTemp2
| paddd xxRslt, xxTemp
|
| punpcklbw xxSrc, xxZero
| punpcklbw xxSrc2, xxZero
| pmaddwd xxSrc, xxSrc2
| paddd xxRslt, xxSrc
| add rrSrc, 10h
| add rrSrc2, 10h
| jmp LOOP_UU2
|
| LOOP_UU:
| cmp rrSrc, rrLen
| jg SUM_RSLT
| movdqu xxSrc, xmmword ptr[rrSrc]
| movdqa xxTemp, xxSrc
|
| punpckhbw xxTemp, xxZero
| pmaddwd xxTemp, xxTemp
| paddd xxRslt, xxTemp
| punpcklbw xxSrc, xxZero
| pmaddwd xxSrc, xxSrc
| paddd xxRslt, xxSrc
| add rrSrc, 10h
| jmp LOOP_UU
|
| SUM_RSLT:
| xor rax, rax
| lea rcx, Rslt
| movdqu xmmword ptr[rcx], xxRslt
| mov eax, [rcx]
| add eax, [rcx+ 4]
| add eax, [rcx+ 8]
| add eax, [rcx+ 12]
| add rrLen, 10h
|
| cmp rrSrc2, 0
| je LOOP_CC
|
| LOOP_CC2:
| cmp rrSrc, rrLen
| jge END_FUNC
| movzx ebx, byte ptr[rrSrc]
| movzx ecx, byte ptr[rrSrc2]
| imul ebx, ecx
| add rax, rcx
| inc rrSrc
| inc rrSrc2
| jmp LOOP_CC2
|
|
| LOOP_CC:
| cmp rrSrc, rrLen
| jge END_FUNC
| movzx ecx, byte ptr[rrSrc]
| imul ecx, ecx
| add rax, rcx
| inc rrSrc
| jmp LOOP_CC
|
|
|
|
| END_FUNC:
|
|
| pop rbx
| pop rsi
| pop rdi
| ; pop r15
| ; pop r14
| ; pop r13
| ; pop r12
| ; pop r11
| ; pop r10
| ; pop rbp
|
| RET
| _asm_GetCC_8u ENDP
|
| end
|
|