Commit | Line | Data |
---|---|---|
80a47a2c TK |
1 | /** |
2 | * \file bn_mul.h | |
3 | * | |
4 | * Based on XySSL: Copyright (C) 2006-2008 Christophe Devine | |
5 | * | |
6 | * Copyright (C) 2009 Paul Bakker <polarssl_maintainer at polarssl dot org> | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2 of the License, or | |
11 | * (at your option) any later version. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License along | |
19 | * with this program; if not, write to the Free Software Foundation, Inc., | |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
21 | */ | |
22 | /* | |
23 | * Multiply source vector [s] with b, add result | |
24 | * to destination vector [d] and set carry c. | |
25 | * | |
26 | * Currently supports: | |
27 | * | |
28 | * . IA-32 (386+) . AMD64 / EM64T | |
29 | * . IA-32 (SSE2) . Motorola 68000 | |
30 | * . PowerPC, 32-bit . MicroBlaze | |
31 | * . PowerPC, 64-bit . TriCore | |
32 | * . SPARC v8 . ARM v3+ | |
33 | * . Alpha . MIPS32 | |
34 | * . C, longlong . C, generic | |
35 | */ | |
36 | ||
37 | /* $Cambridge: exim/src/src/pdkim/bn_mul.h,v 1.2 2009/06/10 07:34:05 tom Exp $ */ | |
38 | ||
39 | #ifndef POLARSSL_BN_MUL_H | |
40 | #define POLARSSL_BN_MUL_H | |
41 | ||
42 | #if defined(POLARSSL_HAVE_ASM) | |
43 | ||
44 | #if defined(__GNUC__) | |
45 | #if defined(__i386__) | |
46 | ||
47 | #define MULADDC_INIT \ | |
48 | asm( "movl %%ebx, %0 " : "=m" (t)); \ | |
49 | asm( "movl %0, %%esi " :: "m" (s)); \ | |
50 | asm( "movl %0, %%edi " :: "m" (d)); \ | |
51 | asm( "movl %0, %%ecx " :: "m" (c)); \ | |
52 | asm( "movl %0, %%ebx " :: "m" (b)); | |
53 | ||
54 | #define MULADDC_CORE \ | |
55 | asm( "lodsl " ); \ | |
56 | asm( "mull %ebx " ); \ | |
57 | asm( "addl %ecx, %eax " ); \ | |
58 | asm( "adcl $0, %edx " ); \ | |
59 | asm( "addl (%edi), %eax " ); \ | |
60 | asm( "adcl $0, %edx " ); \ | |
61 | asm( "movl %edx, %ecx " ); \ | |
62 | asm( "stosl " ); | |
63 | ||
64 | #if defined(POLARSSL_HAVE_SSE2) | |
65 | ||
66 | #define MULADDC_HUIT \ | |
67 | asm( "movd %ecx, %mm1 " ); \ | |
68 | asm( "movd %ebx, %mm0 " ); \ | |
69 | asm( "movd (%edi), %mm3 " ); \ | |
70 | asm( "paddq %mm3, %mm1 " ); \ | |
71 | asm( "movd (%esi), %mm2 " ); \ | |
72 | asm( "pmuludq %mm0, %mm2 " ); \ | |
73 | asm( "movd 4(%esi), %mm4 " ); \ | |
74 | asm( "pmuludq %mm0, %mm4 " ); \ | |
75 | asm( "movd 8(%esi), %mm6 " ); \ | |
76 | asm( "pmuludq %mm0, %mm6 " ); \ | |
77 | asm( "movd 12(%esi), %mm7 " ); \ | |
78 | asm( "pmuludq %mm0, %mm7 " ); \ | |
79 | asm( "paddq %mm2, %mm1 " ); \ | |
80 | asm( "movd 4(%edi), %mm3 " ); \ | |
81 | asm( "paddq %mm4, %mm3 " ); \ | |
82 | asm( "movd 8(%edi), %mm5 " ); \ | |
83 | asm( "paddq %mm6, %mm5 " ); \ | |
84 | asm( "movd 12(%edi), %mm4 " ); \ | |
85 | asm( "paddq %mm4, %mm7 " ); \ | |
86 | asm( "movd %mm1, (%edi) " ); \ | |
87 | asm( "movd 16(%esi), %mm2 " ); \ | |
88 | asm( "pmuludq %mm0, %mm2 " ); \ | |
89 | asm( "psrlq $32, %mm1 " ); \ | |
90 | asm( "movd 20(%esi), %mm4 " ); \ | |
91 | asm( "pmuludq %mm0, %mm4 " ); \ | |
92 | asm( "paddq %mm3, %mm1 " ); \ | |
93 | asm( "movd 24(%esi), %mm6 " ); \ | |
94 | asm( "pmuludq %mm0, %mm6 " ); \ | |
95 | asm( "movd %mm1, 4(%edi) " ); \ | |
96 | asm( "psrlq $32, %mm1 " ); \ | |
97 | asm( "movd 28(%esi), %mm3 " ); \ | |
98 | asm( "pmuludq %mm0, %mm3 " ); \ | |
99 | asm( "paddq %mm5, %mm1 " ); \ | |
100 | asm( "movd 16(%edi), %mm5 " ); \ | |
101 | asm( "paddq %mm5, %mm2 " ); \ | |
102 | asm( "movd %mm1, 8(%edi) " ); \ | |
103 | asm( "psrlq $32, %mm1 " ); \ | |
104 | asm( "paddq %mm7, %mm1 " ); \ | |
105 | asm( "movd 20(%edi), %mm5 " ); \ | |
106 | asm( "paddq %mm5, %mm4 " ); \ | |
107 | asm( "movd %mm1, 12(%edi) " ); \ | |
108 | asm( "psrlq $32, %mm1 " ); \ | |
109 | asm( "paddq %mm2, %mm1 " ); \ | |
110 | asm( "movd 24(%edi), %mm5 " ); \ | |
111 | asm( "paddq %mm5, %mm6 " ); \ | |
112 | asm( "movd %mm1, 16(%edi) " ); \ | |
113 | asm( "psrlq $32, %mm1 " ); \ | |
114 | asm( "paddq %mm4, %mm1 " ); \ | |
115 | asm( "movd 28(%edi), %mm5 " ); \ | |
116 | asm( "paddq %mm5, %mm3 " ); \ | |
117 | asm( "movd %mm1, 20(%edi) " ); \ | |
118 | asm( "psrlq $32, %mm1 " ); \ | |
119 | asm( "paddq %mm6, %mm1 " ); \ | |
120 | asm( "movd %mm1, 24(%edi) " ); \ | |
121 | asm( "psrlq $32, %mm1 " ); \ | |
122 | asm( "paddq %mm3, %mm1 " ); \ | |
123 | asm( "movd %mm1, 28(%edi) " ); \ | |
124 | asm( "addl $32, %edi " ); \ | |
125 | asm( "addl $32, %esi " ); \ | |
126 | asm( "psrlq $32, %mm1 " ); \ | |
127 | asm( "movd %mm1, %ecx " ); | |
128 | ||
129 | #define MULADDC_STOP \ | |
130 | asm( "emms " ); \ | |
131 | asm( "movl %0, %%ebx " :: "m" (t)); \ | |
132 | asm( "movl %%ecx, %0 " : "=m" (c)); \ | |
133 | asm( "movl %%edi, %0 " : "=m" (d)); \ | |
134 | asm( "movl %%esi, %0 " : "=m" (s) :: \ | |
135 | "eax", "ecx", "edx", "esi", "edi" ); | |
136 | ||
137 | #else | |
138 | ||
139 | #define MULADDC_STOP \ | |
140 | asm( "movl %0, %%ebx " :: "m" (t)); \ | |
141 | asm( "movl %%ecx, %0 " : "=m" (c)); \ | |
142 | asm( "movl %%edi, %0 " : "=m" (d)); \ | |
143 | asm( "movl %%esi, %0 " : "=m" (s) :: \ | |
144 | "eax", "ecx", "edx", "esi", "edi" ); | |
145 | ||
146 | #endif /* SSE2 */ | |
147 | #endif /* i386 */ | |
148 | ||
149 | #if defined(__amd64__) || defined (__x86_64__) | |
150 | ||
151 | #define MULADDC_INIT \ | |
152 | asm( "movq %0, %%rsi " :: "m" (s)); \ | |
153 | asm( "movq %0, %%rdi " :: "m" (d)); \ | |
154 | asm( "movq %0, %%rcx " :: "m" (c)); \ | |
155 | asm( "movq %0, %%rbx " :: "m" (b)); \ | |
156 | asm( "xorq %r8, %r8 " ); | |
157 | ||
158 | #define MULADDC_CORE \ | |
159 | asm( "movq (%rsi),%rax " ); \ | |
160 | asm( "mulq %rbx " ); \ | |
161 | asm( "addq $8, %rsi " ); \ | |
162 | asm( "addq %rcx, %rax " ); \ | |
163 | asm( "movq %r8, %rcx " ); \ | |
164 | asm( "adcq $0, %rdx " ); \ | |
165 | asm( "nop " ); \ | |
166 | asm( "addq %rax, (%rdi) " ); \ | |
167 | asm( "adcq %rdx, %rcx " ); \ | |
168 | asm( "addq $8, %rdi " ); | |
169 | ||
170 | #define MULADDC_STOP \ | |
171 | asm( "movq %%rcx, %0 " : "=m" (c)); \ | |
172 | asm( "movq %%rdi, %0 " : "=m" (d)); \ | |
173 | asm( "movq %%rsi, %0 " : "=m" (s) :: \ | |
174 | "rax", "rcx", "rdx", "rbx", "rsi", "rdi", "r8" ); | |
175 | ||
176 | #endif /* AMD64 */ | |
177 | ||
178 | #if defined(__mc68020__) || defined(__mcpu32__) | |
179 | ||
180 | #define MULADDC_INIT \ | |
181 | asm( "movl %0, %%a2 " :: "m" (s)); \ | |
182 | asm( "movl %0, %%a3 " :: "m" (d)); \ | |
183 | asm( "movl %0, %%d3 " :: "m" (c)); \ | |
184 | asm( "movl %0, %%d2 " :: "m" (b)); \ | |
185 | asm( "moveq #0, %d0 " ); | |
186 | ||
187 | #define MULADDC_CORE \ | |
188 | asm( "movel %a2@+, %d1 " ); \ | |
189 | asm( "mulul %d2, %d4:%d1 " ); \ | |
190 | asm( "addl %d3, %d1 " ); \ | |
191 | asm( "addxl %d0, %d4 " ); \ | |
192 | asm( "moveq #0, %d3 " ); \ | |
193 | asm( "addl %d1, %a3@+ " ); \ | |
194 | asm( "addxl %d4, %d3 " ); | |
195 | ||
196 | #define MULADDC_STOP \ | |
197 | asm( "movl %%d3, %0 " : "=m" (c)); \ | |
198 | asm( "movl %%a3, %0 " : "=m" (d)); \ | |
199 | asm( "movl %%a2, %0 " : "=m" (s) :: \ | |
200 | "d0", "d1", "d2", "d3", "d4", "a2", "a3" ); | |
201 | ||
202 | #define MULADDC_HUIT \ | |
203 | asm( "movel %a2@+, %d1 " ); \ | |
204 | asm( "mulul %d2, %d4:%d1 " ); \ | |
205 | asm( "addxl %d3, %d1 " ); \ | |
206 | asm( "addxl %d0, %d4 " ); \ | |
207 | asm( "addl %d1, %a3@+ " ); \ | |
208 | asm( "movel %a2@+, %d1 " ); \ | |
209 | asm( "mulul %d2, %d3:%d1 " ); \ | |
210 | asm( "addxl %d4, %d1 " ); \ | |
211 | asm( "addxl %d0, %d3 " ); \ | |
212 | asm( "addl %d1, %a3@+ " ); \ | |
213 | asm( "movel %a2@+, %d1 " ); \ | |
214 | asm( "mulul %d2, %d4:%d1 " ); \ | |
215 | asm( "addxl %d3, %d1 " ); \ | |
216 | asm( "addxl %d0, %d4 " ); \ | |
217 | asm( "addl %d1, %a3@+ " ); \ | |
218 | asm( "movel %a2@+, %d1 " ); \ | |
219 | asm( "mulul %d2, %d3:%d1 " ); \ | |
220 | asm( "addxl %d4, %d1 " ); \ | |
221 | asm( "addxl %d0, %d3 " ); \ | |
222 | asm( "addl %d1, %a3@+ " ); \ | |
223 | asm( "movel %a2@+, %d1 " ); \ | |
224 | asm( "mulul %d2, %d4:%d1 " ); \ | |
225 | asm( "addxl %d3, %d1 " ); \ | |
226 | asm( "addxl %d0, %d4 " ); \ | |
227 | asm( "addl %d1, %a3@+ " ); \ | |
228 | asm( "movel %a2@+, %d1 " ); \ | |
229 | asm( "mulul %d2, %d3:%d1 " ); \ | |
230 | asm( "addxl %d4, %d1 " ); \ | |
231 | asm( "addxl %d0, %d3 " ); \ | |
232 | asm( "addl %d1, %a3@+ " ); \ | |
233 | asm( "movel %a2@+, %d1 " ); \ | |
234 | asm( "mulul %d2, %d4:%d1 " ); \ | |
235 | asm( "addxl %d3, %d1 " ); \ | |
236 | asm( "addxl %d0, %d4 " ); \ | |
237 | asm( "addl %d1, %a3@+ " ); \ | |
238 | asm( "movel %a2@+, %d1 " ); \ | |
239 | asm( "mulul %d2, %d3:%d1 " ); \ | |
240 | asm( "addxl %d4, %d1 " ); \ | |
241 | asm( "addxl %d0, %d3 " ); \ | |
242 | asm( "addl %d1, %a3@+ " ); \ | |
243 | asm( "addxl %d0, %d3 " ); | |
244 | ||
245 | #endif /* MC68000 */ | |
246 | ||
247 | #if defined(__powerpc__) || defined(__ppc__) | |
248 | #if defined(__powerpc64__) || defined(__ppc64__) | |
249 | ||
250 | #if defined(__MACH__) && defined(__APPLE__) | |
251 | ||
252 | #define MULADDC_INIT \ | |
253 | asm( "ld r3, %0 " :: "m" (s)); \ | |
254 | asm( "ld r4, %0 " :: "m" (d)); \ | |
255 | asm( "ld r5, %0 " :: "m" (c)); \ | |
256 | asm( "ld r6, %0 " :: "m" (b)); \ | |
257 | asm( "addi r3, r3, -8 " ); \ | |
258 | asm( "addi r4, r4, -8 " ); \ | |
259 | asm( "addic r5, r5, 0 " ); | |
260 | ||
261 | #define MULADDC_CORE \ | |
262 | asm( "ldu r7, 8(r3) " ); \ | |
263 | asm( "mulld r8, r7, r6 " ); \ | |
264 | asm( "mulhdu r9, r7, r6 " ); \ | |
265 | asm( "adde r8, r8, r5 " ); \ | |
266 | asm( "ld r7, 8(r4) " ); \ | |
267 | asm( "addze r5, r9 " ); \ | |
268 | asm( "addc r8, r8, r7 " ); \ | |
269 | asm( "stdu r8, 8(r4) " ); | |
270 | ||
271 | #define MULADDC_STOP \ | |
272 | asm( "addze r5, r5 " ); \ | |
273 | asm( "addi r4, r4, 8 " ); \ | |
274 | asm( "addi r3, r3, 8 " ); \ | |
275 | asm( "std r5, %0 " : "=m" (c)); \ | |
276 | asm( "std r4, %0 " : "=m" (d)); \ | |
277 | asm( "std r3, %0 " : "=m" (s) :: \ | |
278 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
279 | ||
280 | #else | |
281 | ||
282 | #define MULADDC_INIT \ | |
283 | asm( "ld %%r3, %0 " :: "m" (s)); \ | |
284 | asm( "ld %%r4, %0 " :: "m" (d)); \ | |
285 | asm( "ld %%r5, %0 " :: "m" (c)); \ | |
286 | asm( "ld %%r6, %0 " :: "m" (b)); \ | |
287 | asm( "addi %r3, %r3, -8 " ); \ | |
288 | asm( "addi %r4, %r4, -8 " ); \ | |
289 | asm( "addic %r5, %r5, 0 " ); | |
290 | ||
291 | #define MULADDC_CORE \ | |
292 | asm( "ldu %r7, 8(%r3) " ); \ | |
293 | asm( "mulld %r8, %r7, %r6 " ); \ | |
294 | asm( "mulhdu %r9, %r7, %r6 " ); \ | |
295 | asm( "adde %r8, %r8, %r5 " ); \ | |
296 | asm( "ld %r7, 8(%r4) " ); \ | |
297 | asm( "addze %r5, %r9 " ); \ | |
298 | asm( "addc %r8, %r8, %r7 " ); \ | |
299 | asm( "stdu %r8, 8(%r4) " ); | |
300 | ||
301 | #define MULADDC_STOP \ | |
302 | asm( "addze %r5, %r5 " ); \ | |
303 | asm( "addi %r4, %r4, 8 " ); \ | |
304 | asm( "addi %r3, %r3, 8 " ); \ | |
305 | asm( "std %%r5, %0 " : "=m" (c)); \ | |
306 | asm( "std %%r4, %0 " : "=m" (d)); \ | |
307 | asm( "std %%r3, %0 " : "=m" (s) :: \ | |
308 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
309 | ||
310 | #endif | |
311 | ||
312 | #else /* PPC32 */ | |
313 | ||
314 | #if defined(__MACH__) && defined(__APPLE__) | |
315 | ||
316 | #define MULADDC_INIT \ | |
317 | asm( "lwz r3, %0 " :: "m" (s)); \ | |
318 | asm( "lwz r4, %0 " :: "m" (d)); \ | |
319 | asm( "lwz r5, %0 " :: "m" (c)); \ | |
320 | asm( "lwz r6, %0 " :: "m" (b)); \ | |
321 | asm( "addi r3, r3, -4 " ); \ | |
322 | asm( "addi r4, r4, -4 " ); \ | |
323 | asm( "addic r5, r5, 0 " ); | |
324 | ||
325 | #define MULADDC_CORE \ | |
326 | asm( "lwzu r7, 4(r3) " ); \ | |
327 | asm( "mullw r8, r7, r6 " ); \ | |
328 | asm( "mulhwu r9, r7, r6 " ); \ | |
329 | asm( "adde r8, r8, r5 " ); \ | |
330 | asm( "lwz r7, 4(r4) " ); \ | |
331 | asm( "addze r5, r9 " ); \ | |
332 | asm( "addc r8, r8, r7 " ); \ | |
333 | asm( "stwu r8, 4(r4) " ); | |
334 | ||
335 | #define MULADDC_STOP \ | |
336 | asm( "addze r5, r5 " ); \ | |
337 | asm( "addi r4, r4, 4 " ); \ | |
338 | asm( "addi r3, r3, 4 " ); \ | |
339 | asm( "stw r5, %0 " : "=m" (c)); \ | |
340 | asm( "stw r4, %0 " : "=m" (d)); \ | |
341 | asm( "stw r3, %0 " : "=m" (s) :: \ | |
342 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
343 | ||
344 | #else | |
345 | ||
346 | #define MULADDC_INIT \ | |
347 | asm( "lwz %%r3, %0 " :: "m" (s)); \ | |
348 | asm( "lwz %%r4, %0 " :: "m" (d)); \ | |
349 | asm( "lwz %%r5, %0 " :: "m" (c)); \ | |
350 | asm( "lwz %%r6, %0 " :: "m" (b)); \ | |
351 | asm( "addi %r3, %r3, -4 " ); \ | |
352 | asm( "addi %r4, %r4, -4 " ); \ | |
353 | asm( "addic %r5, %r5, 0 " ); | |
354 | ||
355 | #define MULADDC_CORE \ | |
356 | asm( "lwzu %r7, 4(%r3) " ); \ | |
357 | asm( "mullw %r8, %r7, %r6 " ); \ | |
358 | asm( "mulhwu %r9, %r7, %r6 " ); \ | |
359 | asm( "adde %r8, %r8, %r5 " ); \ | |
360 | asm( "lwz %r7, 4(%r4) " ); \ | |
361 | asm( "addze %r5, %r9 " ); \ | |
362 | asm( "addc %r8, %r8, %r7 " ); \ | |
363 | asm( "stwu %r8, 4(%r4) " ); | |
364 | ||
365 | #define MULADDC_STOP \ | |
366 | asm( "addze %r5, %r5 " ); \ | |
367 | asm( "addi %r4, %r4, 4 " ); \ | |
368 | asm( "addi %r3, %r3, 4 " ); \ | |
369 | asm( "stw %%r5, %0 " : "=m" (c)); \ | |
370 | asm( "stw %%r4, %0 " : "=m" (d)); \ | |
371 | asm( "stw %%r3, %0 " : "=m" (s) :: \ | |
372 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
373 | ||
374 | #endif | |
375 | ||
376 | #endif /* PPC32 */ | |
377 | #endif /* PPC64 */ | |
378 | ||
379 | #if defined(__sparc__) | |
380 | ||
381 | #define MULADDC_INIT \ | |
382 | asm( "ld %0, %%o0 " :: "m" (s)); \ | |
383 | asm( "ld %0, %%o1 " :: "m" (d)); \ | |
384 | asm( "ld %0, %%o2 " :: "m" (c)); \ | |
385 | asm( "ld %0, %%o3 " :: "m" (b)); | |
386 | ||
387 | #define MULADDC_CORE \ | |
388 | asm( "ld [%o0], %o4 " ); \ | |
389 | asm( "inc 4, %o0 " ); \ | |
390 | asm( "ld [%o1], %o5 " ); \ | |
391 | asm( "umul %o3, %o4, %o4 " ); \ | |
392 | asm( "addcc %o4, %o2, %o4 " ); \ | |
393 | asm( "rd %y, %g1 " ); \ | |
394 | asm( "addx %g1, 0, %g1 " ); \ | |
395 | asm( "addcc %o4, %o5, %o4 " ); \ | |
396 | asm( "st %o4, [%o1] " ); \ | |
397 | asm( "addx %g1, 0, %o2 " ); \ | |
398 | asm( "inc 4, %o1 " ); | |
399 | ||
400 | #define MULADDC_STOP \ | |
401 | asm( "st %%o2, %0 " : "=m" (c)); \ | |
402 | asm( "st %%o1, %0 " : "=m" (d)); \ | |
403 | asm( "st %%o0, %0 " : "=m" (s) :: \ | |
404 | "g1", "o0", "o1", "o2", "o3", "o4", "o5" ); | |
405 | ||
406 | #endif /* SPARCv8 */ | |
407 | ||
408 | #if defined(__microblaze__) || defined(microblaze) | |
409 | ||
410 | #define MULADDC_INIT \ | |
411 | asm( "lwi r3, %0 " :: "m" (s)); \ | |
412 | asm( "lwi r4, %0 " :: "m" (d)); \ | |
413 | asm( "lwi r5, %0 " :: "m" (c)); \ | |
414 | asm( "lwi r6, %0 " :: "m" (b)); \ | |
415 | asm( "andi r7, r6, 0xffff" ); \ | |
416 | asm( "bsrli r6, r6, 16 " ); | |
417 | ||
418 | #define MULADDC_CORE \ | |
419 | asm( "lhui r8, r3, 0 " ); \ | |
420 | asm( "addi r3, r3, 2 " ); \ | |
421 | asm( "lhui r9, r3, 0 " ); \ | |
422 | asm( "addi r3, r3, 2 " ); \ | |
423 | asm( "mul r10, r9, r6 " ); \ | |
424 | asm( "mul r11, r8, r7 " ); \ | |
425 | asm( "mul r12, r9, r7 " ); \ | |
426 | asm( "mul r13, r8, r6 " ); \ | |
427 | asm( "bsrli r8, r10, 16 " ); \ | |
428 | asm( "bsrli r9, r11, 16 " ); \ | |
429 | asm( "add r13, r13, r8 " ); \ | |
430 | asm( "add r13, r13, r9 " ); \ | |
431 | asm( "bslli r10, r10, 16 " ); \ | |
432 | asm( "bslli r11, r11, 16 " ); \ | |
433 | asm( "add r12, r12, r10 " ); \ | |
434 | asm( "addc r13, r13, r0 " ); \ | |
435 | asm( "add r12, r12, r11 " ); \ | |
436 | asm( "addc r13, r13, r0 " ); \ | |
437 | asm( "lwi r10, r4, 0 " ); \ | |
438 | asm( "add r12, r12, r10 " ); \ | |
439 | asm( "addc r13, r13, r0 " ); \ | |
440 | asm( "add r12, r12, r5 " ); \ | |
441 | asm( "addc r5, r13, r0 " ); \ | |
442 | asm( "swi r12, r4, 0 " ); \ | |
443 | asm( "addi r4, r4, 4 " ); | |
444 | ||
445 | #define MULADDC_STOP \ | |
446 | asm( "swi r5, %0 " : "=m" (c)); \ | |
447 | asm( "swi r4, %0 " : "=m" (d)); \ | |
448 | asm( "swi r3, %0 " : "=m" (s) :: \ | |
449 | "r3", "r4" , "r5" , "r6" , "r7" , "r8" , \ | |
450 | "r9", "r10", "r11", "r12", "r13" ); | |
451 | ||
452 | #endif /* MicroBlaze */ | |
453 | ||
454 | #if defined(__tricore__) | |
455 | ||
456 | #define MULADDC_INIT \ | |
457 | asm( "ld.a %%a2, %0 " :: "m" (s)); \ | |
458 | asm( "ld.a %%a3, %0 " :: "m" (d)); \ | |
459 | asm( "ld.w %%d4, %0 " :: "m" (c)); \ | |
460 | asm( "ld.w %%d1, %0 " :: "m" (b)); \ | |
461 | asm( "xor %d5, %d5 " ); | |
462 | ||
463 | #define MULADDC_CORE \ | |
464 | asm( "ld.w %d0, [%a2+] " ); \ | |
465 | asm( "madd.u %e2, %e4, %d0, %d1 " ); \ | |
466 | asm( "ld.w %d0, [%a3] " ); \ | |
467 | asm( "addx %d2, %d2, %d0 " ); \ | |
468 | asm( "addc %d3, %d3, 0 " ); \ | |
469 | asm( "mov %d4, %d3 " ); \ | |
470 | asm( "st.w [%a3+], %d2 " ); | |
471 | ||
472 | #define MULADDC_STOP \ | |
473 | asm( "st.w %0, %%d4 " : "=m" (c)); \ | |
474 | asm( "st.a %0, %%a3 " : "=m" (d)); \ | |
475 | asm( "st.a %0, %%a2 " : "=m" (s) :: \ | |
476 | "d0", "d1", "e2", "d4", "a2", "a3" ); | |
477 | ||
478 | #endif /* TriCore */ | |
479 | ||
480 | #if defined(__arm__) | |
481 | ||
482 | #define MULADDC_INIT \ | |
483 | asm( "ldr r0, %0 " :: "m" (s)); \ | |
484 | asm( "ldr r1, %0 " :: "m" (d)); \ | |
485 | asm( "ldr r2, %0 " :: "m" (c)); \ | |
486 | asm( "ldr r3, %0 " :: "m" (b)); | |
487 | ||
488 | #define MULADDC_CORE \ | |
489 | asm( "ldr r4, [r0], #4 " ); \ | |
490 | asm( "mov r5, #0 " ); \ | |
491 | asm( "ldr r6, [r1] " ); \ | |
492 | asm( "umlal r2, r5, r3, r4 " ); \ | |
493 | asm( "adds r7, r6, r2 " ); \ | |
494 | asm( "adc r2, r5, #0 " ); \ | |
495 | asm( "str r7, [r1], #4 " ); | |
496 | ||
497 | #define MULADDC_STOP \ | |
498 | asm( "str r2, %0 " : "=m" (c)); \ | |
499 | asm( "str r1, %0 " : "=m" (d)); \ | |
500 | asm( "str r0, %0 " : "=m" (s) :: \ | |
501 | "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7" ); | |
502 | ||
503 | #endif /* ARMv3 */ | |
504 | ||
505 | #if defined(__alpha__) | |
506 | ||
507 | #define MULADDC_INIT \ | |
508 | asm( "ldq $1, %0 " :: "m" (s)); \ | |
509 | asm( "ldq $2, %0 " :: "m" (d)); \ | |
510 | asm( "ldq $3, %0 " :: "m" (c)); \ | |
511 | asm( "ldq $4, %0 " :: "m" (b)); | |
512 | ||
513 | #define MULADDC_CORE \ | |
514 | asm( "ldq $6, 0($1) " ); \ | |
515 | asm( "addq $1, 8, $1 " ); \ | |
516 | asm( "mulq $6, $4, $7 " ); \ | |
517 | asm( "umulh $6, $4, $6 " ); \ | |
518 | asm( "addq $7, $3, $7 " ); \ | |
519 | asm( "cmpult $7, $3, $3 " ); \ | |
520 | asm( "ldq $5, 0($2) " ); \ | |
521 | asm( "addq $7, $5, $7 " ); \ | |
522 | asm( "cmpult $7, $5, $5 " ); \ | |
523 | asm( "stq $7, 0($2) " ); \ | |
524 | asm( "addq $2, 8, $2 " ); \ | |
525 | asm( "addq $6, $3, $3 " ); \ | |
526 | asm( "addq $5, $3, $3 " ); | |
527 | ||
528 | #define MULADDC_STOP \ | |
529 | asm( "stq $3, %0 " : "=m" (c)); \ | |
530 | asm( "stq $2, %0 " : "=m" (d)); \ | |
531 | asm( "stq $1, %0 " : "=m" (s) :: \ | |
532 | "$1", "$2", "$3", "$4", "$5", "$6", "$7" ); | |
533 | ||
534 | #endif /* Alpha */ | |
535 | ||
536 | #if defined(__mips__) | |
537 | ||
538 | #define MULADDC_INIT \ | |
539 | asm( "lw $10, %0 " :: "m" (s)); \ | |
540 | asm( "lw $11, %0 " :: "m" (d)); \ | |
541 | asm( "lw $12, %0 " :: "m" (c)); \ | |
542 | asm( "lw $13, %0 " :: "m" (b)); | |
543 | ||
544 | #define MULADDC_CORE \ | |
545 | asm( "lw $14, 0($10) " ); \ | |
546 | asm( "multu $13, $14 " ); \ | |
547 | asm( "addi $10, $10, 4 " ); \ | |
548 | asm( "mflo $14 " ); \ | |
549 | asm( "mfhi $9 " ); \ | |
550 | asm( "addu $14, $12, $14 " ); \ | |
551 | asm( "lw $15, 0($11) " ); \ | |
552 | asm( "sltu $12, $14, $12 " ); \ | |
553 | asm( "addu $15, $14, $15 " ); \ | |
554 | asm( "sltu $14, $15, $14 " ); \ | |
555 | asm( "addu $12, $12, $9 " ); \ | |
556 | asm( "sw $15, 0($11) " ); \ | |
557 | asm( "addu $12, $12, $14 " ); \ | |
558 | asm( "addi $11, $11, 4 " ); | |
559 | ||
560 | #define MULADDC_STOP \ | |
561 | asm( "sw $12, %0 " : "=m" (c)); \ | |
562 | asm( "sw $11, %0 " : "=m" (d)); \ | |
563 | asm( "sw $10, %0 " : "=m" (s) :: \ | |
564 | "$9", "$10", "$11", "$12", "$13", "$14", "$15" ); | |
565 | ||
566 | #endif /* MIPS */ | |
567 | #endif /* GNUC */ | |
568 | ||
569 | #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) | |
570 | ||
571 | #define MULADDC_INIT \ | |
572 | __asm mov esi, s \ | |
573 | __asm mov edi, d \ | |
574 | __asm mov ecx, c \ | |
575 | __asm mov ebx, b | |
576 | ||
577 | #define MULADDC_CORE \ | |
578 | __asm lodsd \ | |
579 | __asm mul ebx \ | |
580 | __asm add eax, ecx \ | |
581 | __asm adc edx, 0 \ | |
582 | __asm add eax, [edi] \ | |
583 | __asm adc edx, 0 \ | |
584 | __asm mov ecx, edx \ | |
585 | __asm stosd | |
586 | ||
587 | #if defined(POLARSSL_HAVE_SSE2) | |
588 | ||
589 | #define EMIT __asm _emit | |
590 | ||
591 | #define MULADDC_HUIT \ | |
592 | EMIT 0x0F EMIT 0x6E EMIT 0xC9 \ | |
593 | EMIT 0x0F EMIT 0x6E EMIT 0xC3 \ | |
594 | EMIT 0x0F EMIT 0x6E EMIT 0x1F \ | |
595 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
596 | EMIT 0x0F EMIT 0x6E EMIT 0x16 \ | |
597 | EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ | |
598 | EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \ | |
599 | EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ | |
600 | EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \ | |
601 | EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ | |
602 | EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \ | |
603 | EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \ | |
604 | EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ | |
605 | EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \ | |
606 | EMIT 0x0F EMIT 0xD4 EMIT 0xDC \ | |
607 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \ | |
608 | EMIT 0x0F EMIT 0xD4 EMIT 0xEE \ | |
609 | EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \ | |
610 | EMIT 0x0F EMIT 0xD4 EMIT 0xFC \ | |
611 | EMIT 0x0F EMIT 0x7E EMIT 0x0F \ | |
612 | EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \ | |
613 | EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ | |
614 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
615 | EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \ | |
616 | EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ | |
617 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
618 | EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \ | |
619 | EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ | |
620 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \ | |
621 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
622 | EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \ | |
623 | EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \ | |
624 | EMIT 0x0F EMIT 0xD4 EMIT 0xCD \ | |
625 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \ | |
626 | EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \ | |
627 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \ | |
628 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
629 | EMIT 0x0F EMIT 0xD4 EMIT 0xCF \ | |
630 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \ | |
631 | EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \ | |
632 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \ | |
633 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
634 | EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ | |
635 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \ | |
636 | EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \ | |
637 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \ | |
638 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
639 | EMIT 0x0F EMIT 0xD4 EMIT 0xCC \ | |
640 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \ | |
641 | EMIT 0x0F EMIT 0xD4 EMIT 0xDD \ | |
642 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \ | |
643 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
644 | EMIT 0x0F EMIT 0xD4 EMIT 0xCE \ | |
645 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \ | |
646 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
647 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
648 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \ | |
649 | EMIT 0x83 EMIT 0xC7 EMIT 0x20 \ | |
650 | EMIT 0x83 EMIT 0xC6 EMIT 0x20 \ | |
651 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
652 | EMIT 0x0F EMIT 0x7E EMIT 0xC9 | |
653 | ||
654 | #define MULADDC_STOP \ | |
655 | EMIT 0x0F EMIT 0x77 \ | |
656 | __asm mov c, ecx \ | |
657 | __asm mov d, edi \ | |
658 | __asm mov s, esi \ | |
659 | ||
660 | #else | |
661 | ||
662 | #define MULADDC_STOP \ | |
663 | __asm mov c, ecx \ | |
664 | __asm mov d, edi \ | |
665 | __asm mov s, esi \ | |
666 | ||
667 | #endif /* SSE2 */ | |
668 | #endif /* MSVC */ | |
669 | ||
670 | #endif /* POLARSSL_HAVE_ASM */ | |
671 | ||
672 | #if !defined(MULADDC_CORE) | |
673 | #if defined(POLARSSL_HAVE_LONGLONG) | |
674 | ||
675 | #define MULADDC_INIT \ | |
676 | { \ | |
677 | t_dbl r; \ | |
678 | t_int r0, r1; | |
679 | ||
680 | #define MULADDC_CORE \ | |
681 | r = *(s++) * (t_dbl) b; \ | |
682 | r0 = r; \ | |
683 | r1 = r >> biL; \ | |
684 | r0 += c; r1 += (r0 < c); \ | |
685 | r0 += *d; r1 += (r0 < *d); \ | |
686 | c = r1; *(d++) = r0; | |
687 | ||
688 | #define MULADDC_STOP \ | |
689 | } | |
690 | ||
691 | #else | |
692 | #define MULADDC_INIT \ | |
693 | { \ | |
694 | t_int s0, s1, b0, b1; \ | |
695 | t_int r0, r1, rx, ry; \ | |
696 | b0 = ( b << biH ) >> biH; \ | |
697 | b1 = ( b >> biH ); | |
698 | ||
699 | #define MULADDC_CORE \ | |
700 | s0 = ( *s << biH ) >> biH; \ | |
701 | s1 = ( *s >> biH ); s++; \ | |
702 | rx = s0 * b1; r0 = s0 * b0; \ | |
703 | ry = s1 * b0; r1 = s1 * b1; \ | |
704 | r1 += ( rx >> biH ); \ | |
705 | r1 += ( ry >> biH ); \ | |
706 | rx <<= biH; ry <<= biH; \ | |
707 | r0 += rx; r1 += (r0 < rx); \ | |
708 | r0 += ry; r1 += (r0 < ry); \ | |
709 | r0 += c; r1 += (r0 < c); \ | |
710 | r0 += *d; r1 += (r0 < *d); \ | |
711 | c = r1; *(d++) = r0; | |
712 | ||
713 | #define MULADDC_STOP \ | |
714 | } | |
715 | ||
716 | #endif /* C (generic) */ | |
717 | #endif /* C (longlong) */ | |
718 | ||
719 | #endif /* bn_mul.h */ |