Commit | Line | Data |
---|---|---|
80a47a2c TK |
1 | /** |
2 | * \file bn_mul.h | |
3 | * | |
62d3e98d TK |
4 | * Copyright (C) 2006-2010, Brainspark B.V. |
5 | * | |
6 | * This file is part of PolarSSL (http://www.polarssl.org) | |
7 | * Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org> | |
80a47a2c | 8 | * |
62d3e98d | 9 | * All rights reserved. |
80a47a2c TK |
10 | * |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License along | |
22 | * with this program; if not, write to the Free Software Foundation, Inc., | |
23 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
24 | */ | |
25 | /* | |
26 | * Multiply source vector [s] with b, add result | |
27 | * to destination vector [d] and set carry c. | |
28 | * | |
29 | * Currently supports: | |
30 | * | |
31 | * . IA-32 (386+) . AMD64 / EM64T | |
32 | * . IA-32 (SSE2) . Motorola 68000 | |
33 | * . PowerPC, 32-bit . MicroBlaze | |
34 | * . PowerPC, 64-bit . TriCore | |
35 | * . SPARC v8 . ARM v3+ | |
36 | * . Alpha . MIPS32 | |
37 | * . C, longlong . C, generic | |
38 | */ | |
39 | ||
67932e54 | 40 | /* $Cambridge: exim/src/src/pdkim/bn_mul.h,v 1.3 2009/12/07 13:05:07 tom Exp $ */ |
80a47a2c TK |
41 | |
42 | #ifndef POLARSSL_BN_MUL_H | |
43 | #define POLARSSL_BN_MUL_H | |
44 | ||
45 | #if defined(POLARSSL_HAVE_ASM) | |
46 | ||
47 | #if defined(__GNUC__) | |
48 | #if defined(__i386__) | |
49 | ||
67932e54 TK |
50 | #define MULADDC_INIT \ |
51 | asm( " \ | |
52 | movl %%ebx, %0; \ | |
53 | movl %5, %%esi; \ | |
54 | movl %6, %%edi; \ | |
55 | movl %7, %%ecx; \ | |
56 | movl %8, %%ebx; \ | |
57 | " | |
80a47a2c | 58 | |
67932e54 TK |
59 | #define MULADDC_CORE \ |
60 | " \ | |
61 | lodsl; \ | |
62 | mull %%ebx; \ | |
63 | addl %%ecx, %%eax; \ | |
64 | adcl $0, %%edx; \ | |
65 | addl (%%edi), %%eax; \ | |
66 | adcl $0, %%edx; \ | |
67 | movl %%edx, %%ecx; \ | |
68 | stosl; \ | |
69 | " | |
80a47a2c TK |
70 | |
71 | #if defined(POLARSSL_HAVE_SSE2) | |
72 | ||
67932e54 TK |
73 | #define MULADDC_HUIT \ |
74 | " \ | |
75 | movd %%ecx, %%mm1; \ | |
76 | movd %%ebx, %%mm0; \ | |
77 | movd (%%edi), %%mm3; \ | |
78 | paddq %%mm3, %%mm1; \ | |
79 | movd (%%esi), %%mm2; \ | |
80 | pmuludq %%mm0, %%mm2; \ | |
81 | movd 4(%%esi), %%mm4; \ | |
82 | pmuludq %%mm0, %%mm4; \ | |
83 | movd 8(%%esi), %%mm6; \ | |
84 | pmuludq %%mm0, %%mm6; \ | |
85 | movd 12(%%esi), %%mm7; \ | |
86 | pmuludq %%mm0, %%mm7; \ | |
87 | paddq %%mm2, %%mm1; \ | |
88 | movd 4(%%edi), %%mm3; \ | |
89 | paddq %%mm4, %%mm3; \ | |
90 | movd 8(%%edi), %%mm5; \ | |
91 | paddq %%mm6, %%mm5; \ | |
92 | movd 12(%%edi), %%mm4; \ | |
93 | paddq %%mm4, %%mm7; \ | |
94 | movd %%mm1, (%%edi); \ | |
95 | movd 16(%%esi), %%mm2; \ | |
96 | pmuludq %%mm0, %%mm2; \ | |
97 | psrlq $32, %%mm1; \ | |
98 | movd 20(%%esi), %%mm4; \ | |
99 | pmuludq %%mm0, %%mm4; \ | |
100 | paddq %%mm3, %%mm1; \ | |
101 | movd 24(%%esi), %%mm6; \ | |
102 | pmuludq %%mm0, %%mm6; \ | |
103 | movd %%mm1, 4(%%edi); \ | |
104 | psrlq $32, %%mm1; \ | |
105 | movd 28(%%esi), %%mm3; \ | |
106 | pmuludq %%mm0, %%mm3; \ | |
107 | paddq %%mm5, %%mm1; \ | |
108 | movd 16(%%edi), %%mm5; \ | |
109 | paddq %%mm5, %%mm2; \ | |
110 | movd %%mm1, 8(%%edi); \ | |
111 | psrlq $32, %%mm1; \ | |
112 | paddq %%mm7, %%mm1; \ | |
113 | movd 20(%%edi), %%mm5; \ | |
114 | paddq %%mm5, %%mm4; \ | |
115 | movd %%mm1, 12(%%edi); \ | |
116 | psrlq $32, %%mm1; \ | |
117 | paddq %%mm2, %%mm1; \ | |
118 | movd 24(%%edi), %%mm5; \ | |
119 | paddq %%mm5, %%mm6; \ | |
120 | movd %%mm1, 16(%%edi); \ | |
121 | psrlq $32, %%mm1; \ | |
122 | paddq %%mm4, %%mm1; \ | |
123 | movd 28(%%edi), %%mm5; \ | |
124 | paddq %%mm5, %%mm3; \ | |
125 | movd %%mm1, 20(%%edi); \ | |
126 | psrlq $32, %%mm1; \ | |
127 | paddq %%mm6, %%mm1; \ | |
128 | movd %%mm1, 24(%%edi); \ | |
129 | psrlq $32, %%mm1; \ | |
130 | paddq %%mm3, %%mm1; \ | |
131 | movd %%mm1, 28(%%edi); \ | |
132 | addl $32, %%edi; \ | |
133 | addl $32, %%esi; \ | |
134 | psrlq $32, %%mm1; \ | |
135 | movd %%mm1, %%ecx; \ | |
136 | " | |
80a47a2c | 137 | |
67932e54 TK |
138 | #define MULADDC_STOP \ |
139 | " \ | |
140 | emms; \ | |
141 | movl %4, %%ebx; \ | |
142 | movl %%ecx, %1; \ | |
143 | movl %%edi, %2; \ | |
144 | movl %%esi, %3; \ | |
145 | " \ | |
146 | : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ | |
147 | : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ | |
148 | : "eax", "ecx", "edx", "esi", "edi" \ | |
149 | ); | |
80a47a2c TK |
150 | |
151 | #else | |
152 | ||
67932e54 TK |
153 | #define MULADDC_STOP \ |
154 | " \ | |
155 | movl %4, %%ebx; \ | |
156 | movl %%ecx, %1; \ | |
157 | movl %%edi, %2; \ | |
158 | movl %%esi, %3; \ | |
159 | " \ | |
160 | : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ | |
161 | : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ | |
162 | : "eax", "ecx", "edx", "esi", "edi" \ | |
163 | ); | |
80a47a2c TK |
164 | #endif /* SSE2 */ |
165 | #endif /* i386 */ | |
166 | ||
167 | #if defined(__amd64__) || defined (__x86_64__) | |
168 | ||
169 | #define MULADDC_INIT \ | |
170 | asm( "movq %0, %%rsi " :: "m" (s)); \ | |
171 | asm( "movq %0, %%rdi " :: "m" (d)); \ | |
172 | asm( "movq %0, %%rcx " :: "m" (c)); \ | |
173 | asm( "movq %0, %%rbx " :: "m" (b)); \ | |
174 | asm( "xorq %r8, %r8 " ); | |
175 | ||
176 | #define MULADDC_CORE \ | |
177 | asm( "movq (%rsi),%rax " ); \ | |
178 | asm( "mulq %rbx " ); \ | |
179 | asm( "addq $8, %rsi " ); \ | |
180 | asm( "addq %rcx, %rax " ); \ | |
181 | asm( "movq %r8, %rcx " ); \ | |
182 | asm( "adcq $0, %rdx " ); \ | |
183 | asm( "nop " ); \ | |
184 | asm( "addq %rax, (%rdi) " ); \ | |
185 | asm( "adcq %rdx, %rcx " ); \ | |
186 | asm( "addq $8, %rdi " ); | |
187 | ||
188 | #define MULADDC_STOP \ | |
189 | asm( "movq %%rcx, %0 " : "=m" (c)); \ | |
190 | asm( "movq %%rdi, %0 " : "=m" (d)); \ | |
191 | asm( "movq %%rsi, %0 " : "=m" (s) :: \ | |
192 | "rax", "rcx", "rdx", "rbx", "rsi", "rdi", "r8" ); | |
193 | ||
194 | #endif /* AMD64 */ | |
195 | ||
196 | #if defined(__mc68020__) || defined(__mcpu32__) | |
197 | ||
198 | #define MULADDC_INIT \ | |
199 | asm( "movl %0, %%a2 " :: "m" (s)); \ | |
200 | asm( "movl %0, %%a3 " :: "m" (d)); \ | |
201 | asm( "movl %0, %%d3 " :: "m" (c)); \ | |
202 | asm( "movl %0, %%d2 " :: "m" (b)); \ | |
203 | asm( "moveq #0, %d0 " ); | |
204 | ||
205 | #define MULADDC_CORE \ | |
206 | asm( "movel %a2@+, %d1 " ); \ | |
207 | asm( "mulul %d2, %d4:%d1 " ); \ | |
208 | asm( "addl %d3, %d1 " ); \ | |
209 | asm( "addxl %d0, %d4 " ); \ | |
210 | asm( "moveq #0, %d3 " ); \ | |
211 | asm( "addl %d1, %a3@+ " ); \ | |
212 | asm( "addxl %d4, %d3 " ); | |
213 | ||
214 | #define MULADDC_STOP \ | |
215 | asm( "movl %%d3, %0 " : "=m" (c)); \ | |
216 | asm( "movl %%a3, %0 " : "=m" (d)); \ | |
217 | asm( "movl %%a2, %0 " : "=m" (s) :: \ | |
218 | "d0", "d1", "d2", "d3", "d4", "a2", "a3" ); | |
219 | ||
220 | #define MULADDC_HUIT \ | |
221 | asm( "movel %a2@+, %d1 " ); \ | |
222 | asm( "mulul %d2, %d4:%d1 " ); \ | |
223 | asm( "addxl %d3, %d1 " ); \ | |
224 | asm( "addxl %d0, %d4 " ); \ | |
225 | asm( "addl %d1, %a3@+ " ); \ | |
226 | asm( "movel %a2@+, %d1 " ); \ | |
227 | asm( "mulul %d2, %d3:%d1 " ); \ | |
228 | asm( "addxl %d4, %d1 " ); \ | |
229 | asm( "addxl %d0, %d3 " ); \ | |
230 | asm( "addl %d1, %a3@+ " ); \ | |
231 | asm( "movel %a2@+, %d1 " ); \ | |
232 | asm( "mulul %d2, %d4:%d1 " ); \ | |
233 | asm( "addxl %d3, %d1 " ); \ | |
234 | asm( "addxl %d0, %d4 " ); \ | |
235 | asm( "addl %d1, %a3@+ " ); \ | |
236 | asm( "movel %a2@+, %d1 " ); \ | |
237 | asm( "mulul %d2, %d3:%d1 " ); \ | |
238 | asm( "addxl %d4, %d1 " ); \ | |
239 | asm( "addxl %d0, %d3 " ); \ | |
240 | asm( "addl %d1, %a3@+ " ); \ | |
241 | asm( "movel %a2@+, %d1 " ); \ | |
242 | asm( "mulul %d2, %d4:%d1 " ); \ | |
243 | asm( "addxl %d3, %d1 " ); \ | |
244 | asm( "addxl %d0, %d4 " ); \ | |
245 | asm( "addl %d1, %a3@+ " ); \ | |
246 | asm( "movel %a2@+, %d1 " ); \ | |
247 | asm( "mulul %d2, %d3:%d1 " ); \ | |
248 | asm( "addxl %d4, %d1 " ); \ | |
249 | asm( "addxl %d0, %d3 " ); \ | |
250 | asm( "addl %d1, %a3@+ " ); \ | |
251 | asm( "movel %a2@+, %d1 " ); \ | |
252 | asm( "mulul %d2, %d4:%d1 " ); \ | |
253 | asm( "addxl %d3, %d1 " ); \ | |
254 | asm( "addxl %d0, %d4 " ); \ | |
255 | asm( "addl %d1, %a3@+ " ); \ | |
256 | asm( "movel %a2@+, %d1 " ); \ | |
257 | asm( "mulul %d2, %d3:%d1 " ); \ | |
258 | asm( "addxl %d4, %d1 " ); \ | |
259 | asm( "addxl %d0, %d3 " ); \ | |
260 | asm( "addl %d1, %a3@+ " ); \ | |
261 | asm( "addxl %d0, %d3 " ); | |
262 | ||
263 | #endif /* MC68000 */ | |
264 | ||
265 | #if defined(__powerpc__) || defined(__ppc__) | |
266 | #if defined(__powerpc64__) || defined(__ppc64__) | |
267 | ||
268 | #if defined(__MACH__) && defined(__APPLE__) | |
269 | ||
270 | #define MULADDC_INIT \ | |
271 | asm( "ld r3, %0 " :: "m" (s)); \ | |
272 | asm( "ld r4, %0 " :: "m" (d)); \ | |
273 | asm( "ld r5, %0 " :: "m" (c)); \ | |
274 | asm( "ld r6, %0 " :: "m" (b)); \ | |
275 | asm( "addi r3, r3, -8 " ); \ | |
276 | asm( "addi r4, r4, -8 " ); \ | |
277 | asm( "addic r5, r5, 0 " ); | |
278 | ||
279 | #define MULADDC_CORE \ | |
280 | asm( "ldu r7, 8(r3) " ); \ | |
281 | asm( "mulld r8, r7, r6 " ); \ | |
282 | asm( "mulhdu r9, r7, r6 " ); \ | |
283 | asm( "adde r8, r8, r5 " ); \ | |
284 | asm( "ld r7, 8(r4) " ); \ | |
285 | asm( "addze r5, r9 " ); \ | |
286 | asm( "addc r8, r8, r7 " ); \ | |
287 | asm( "stdu r8, 8(r4) " ); | |
288 | ||
289 | #define MULADDC_STOP \ | |
290 | asm( "addze r5, r5 " ); \ | |
291 | asm( "addi r4, r4, 8 " ); \ | |
292 | asm( "addi r3, r3, 8 " ); \ | |
293 | asm( "std r5, %0 " : "=m" (c)); \ | |
294 | asm( "std r4, %0 " : "=m" (d)); \ | |
295 | asm( "std r3, %0 " : "=m" (s) :: \ | |
296 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
297 | ||
298 | #else | |
299 | ||
300 | #define MULADDC_INIT \ | |
301 | asm( "ld %%r3, %0 " :: "m" (s)); \ | |
302 | asm( "ld %%r4, %0 " :: "m" (d)); \ | |
303 | asm( "ld %%r5, %0 " :: "m" (c)); \ | |
304 | asm( "ld %%r6, %0 " :: "m" (b)); \ | |
305 | asm( "addi %r3, %r3, -8 " ); \ | |
306 | asm( "addi %r4, %r4, -8 " ); \ | |
307 | asm( "addic %r5, %r5, 0 " ); | |
308 | ||
309 | #define MULADDC_CORE \ | |
310 | asm( "ldu %r7, 8(%r3) " ); \ | |
311 | asm( "mulld %r8, %r7, %r6 " ); \ | |
312 | asm( "mulhdu %r9, %r7, %r6 " ); \ | |
313 | asm( "adde %r8, %r8, %r5 " ); \ | |
314 | asm( "ld %r7, 8(%r4) " ); \ | |
315 | asm( "addze %r5, %r9 " ); \ | |
316 | asm( "addc %r8, %r8, %r7 " ); \ | |
317 | asm( "stdu %r8, 8(%r4) " ); | |
318 | ||
319 | #define MULADDC_STOP \ | |
320 | asm( "addze %r5, %r5 " ); \ | |
321 | asm( "addi %r4, %r4, 8 " ); \ | |
322 | asm( "addi %r3, %r3, 8 " ); \ | |
323 | asm( "std %%r5, %0 " : "=m" (c)); \ | |
324 | asm( "std %%r4, %0 " : "=m" (d)); \ | |
325 | asm( "std %%r3, %0 " : "=m" (s) :: \ | |
326 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
327 | ||
328 | #endif | |
329 | ||
330 | #else /* PPC32 */ | |
331 | ||
332 | #if defined(__MACH__) && defined(__APPLE__) | |
333 | ||
334 | #define MULADDC_INIT \ | |
335 | asm( "lwz r3, %0 " :: "m" (s)); \ | |
336 | asm( "lwz r4, %0 " :: "m" (d)); \ | |
337 | asm( "lwz r5, %0 " :: "m" (c)); \ | |
338 | asm( "lwz r6, %0 " :: "m" (b)); \ | |
339 | asm( "addi r3, r3, -4 " ); \ | |
340 | asm( "addi r4, r4, -4 " ); \ | |
341 | asm( "addic r5, r5, 0 " ); | |
342 | ||
343 | #define MULADDC_CORE \ | |
344 | asm( "lwzu r7, 4(r3) " ); \ | |
345 | asm( "mullw r8, r7, r6 " ); \ | |
346 | asm( "mulhwu r9, r7, r6 " ); \ | |
347 | asm( "adde r8, r8, r5 " ); \ | |
348 | asm( "lwz r7, 4(r4) " ); \ | |
349 | asm( "addze r5, r9 " ); \ | |
350 | asm( "addc r8, r8, r7 " ); \ | |
351 | asm( "stwu r8, 4(r4) " ); | |
352 | ||
353 | #define MULADDC_STOP \ | |
354 | asm( "addze r5, r5 " ); \ | |
355 | asm( "addi r4, r4, 4 " ); \ | |
356 | asm( "addi r3, r3, 4 " ); \ | |
357 | asm( "stw r5, %0 " : "=m" (c)); \ | |
358 | asm( "stw r4, %0 " : "=m" (d)); \ | |
359 | asm( "stw r3, %0 " : "=m" (s) :: \ | |
360 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
361 | ||
362 | #else | |
363 | ||
364 | #define MULADDC_INIT \ | |
365 | asm( "lwz %%r3, %0 " :: "m" (s)); \ | |
366 | asm( "lwz %%r4, %0 " :: "m" (d)); \ | |
367 | asm( "lwz %%r5, %0 " :: "m" (c)); \ | |
368 | asm( "lwz %%r6, %0 " :: "m" (b)); \ | |
369 | asm( "addi %r3, %r3, -4 " ); \ | |
370 | asm( "addi %r4, %r4, -4 " ); \ | |
371 | asm( "addic %r5, %r5, 0 " ); | |
372 | ||
373 | #define MULADDC_CORE \ | |
374 | asm( "lwzu %r7, 4(%r3) " ); \ | |
375 | asm( "mullw %r8, %r7, %r6 " ); \ | |
376 | asm( "mulhwu %r9, %r7, %r6 " ); \ | |
377 | asm( "adde %r8, %r8, %r5 " ); \ | |
378 | asm( "lwz %r7, 4(%r4) " ); \ | |
379 | asm( "addze %r5, %r9 " ); \ | |
380 | asm( "addc %r8, %r8, %r7 " ); \ | |
381 | asm( "stwu %r8, 4(%r4) " ); | |
382 | ||
383 | #define MULADDC_STOP \ | |
384 | asm( "addze %r5, %r5 " ); \ | |
385 | asm( "addi %r4, %r4, 4 " ); \ | |
386 | asm( "addi %r3, %r3, 4 " ); \ | |
387 | asm( "stw %%r5, %0 " : "=m" (c)); \ | |
388 | asm( "stw %%r4, %0 " : "=m" (d)); \ | |
389 | asm( "stw %%r3, %0 " : "=m" (s) :: \ | |
390 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
391 | ||
392 | #endif | |
393 | ||
394 | #endif /* PPC32 */ | |
395 | #endif /* PPC64 */ | |
396 | ||
397 | #if defined(__sparc__) | |
398 | ||
399 | #define MULADDC_INIT \ | |
400 | asm( "ld %0, %%o0 " :: "m" (s)); \ | |
401 | asm( "ld %0, %%o1 " :: "m" (d)); \ | |
402 | asm( "ld %0, %%o2 " :: "m" (c)); \ | |
403 | asm( "ld %0, %%o3 " :: "m" (b)); | |
404 | ||
405 | #define MULADDC_CORE \ | |
406 | asm( "ld [%o0], %o4 " ); \ | |
407 | asm( "inc 4, %o0 " ); \ | |
408 | asm( "ld [%o1], %o5 " ); \ | |
409 | asm( "umul %o3, %o4, %o4 " ); \ | |
410 | asm( "addcc %o4, %o2, %o4 " ); \ | |
411 | asm( "rd %y, %g1 " ); \ | |
412 | asm( "addx %g1, 0, %g1 " ); \ | |
413 | asm( "addcc %o4, %o5, %o4 " ); \ | |
414 | asm( "st %o4, [%o1] " ); \ | |
415 | asm( "addx %g1, 0, %o2 " ); \ | |
416 | asm( "inc 4, %o1 " ); | |
417 | ||
418 | #define MULADDC_STOP \ | |
419 | asm( "st %%o2, %0 " : "=m" (c)); \ | |
420 | asm( "st %%o1, %0 " : "=m" (d)); \ | |
421 | asm( "st %%o0, %0 " : "=m" (s) :: \ | |
422 | "g1", "o0", "o1", "o2", "o3", "o4", "o5" ); | |
423 | ||
424 | #endif /* SPARCv8 */ | |
425 | ||
426 | #if defined(__microblaze__) || defined(microblaze) | |
427 | ||
428 | #define MULADDC_INIT \ | |
429 | asm( "lwi r3, %0 " :: "m" (s)); \ | |
430 | asm( "lwi r4, %0 " :: "m" (d)); \ | |
431 | asm( "lwi r5, %0 " :: "m" (c)); \ | |
432 | asm( "lwi r6, %0 " :: "m" (b)); \ | |
433 | asm( "andi r7, r6, 0xffff" ); \ | |
434 | asm( "bsrli r6, r6, 16 " ); | |
435 | ||
436 | #define MULADDC_CORE \ | |
437 | asm( "lhui r8, r3, 0 " ); \ | |
438 | asm( "addi r3, r3, 2 " ); \ | |
439 | asm( "lhui r9, r3, 0 " ); \ | |
440 | asm( "addi r3, r3, 2 " ); \ | |
441 | asm( "mul r10, r9, r6 " ); \ | |
442 | asm( "mul r11, r8, r7 " ); \ | |
443 | asm( "mul r12, r9, r7 " ); \ | |
444 | asm( "mul r13, r8, r6 " ); \ | |
445 | asm( "bsrli r8, r10, 16 " ); \ | |
446 | asm( "bsrli r9, r11, 16 " ); \ | |
447 | asm( "add r13, r13, r8 " ); \ | |
448 | asm( "add r13, r13, r9 " ); \ | |
449 | asm( "bslli r10, r10, 16 " ); \ | |
450 | asm( "bslli r11, r11, 16 " ); \ | |
451 | asm( "add r12, r12, r10 " ); \ | |
452 | asm( "addc r13, r13, r0 " ); \ | |
453 | asm( "add r12, r12, r11 " ); \ | |
454 | asm( "addc r13, r13, r0 " ); \ | |
455 | asm( "lwi r10, r4, 0 " ); \ | |
456 | asm( "add r12, r12, r10 " ); \ | |
457 | asm( "addc r13, r13, r0 " ); \ | |
458 | asm( "add r12, r12, r5 " ); \ | |
459 | asm( "addc r5, r13, r0 " ); \ | |
460 | asm( "swi r12, r4, 0 " ); \ | |
461 | asm( "addi r4, r4, 4 " ); | |
462 | ||
463 | #define MULADDC_STOP \ | |
464 | asm( "swi r5, %0 " : "=m" (c)); \ | |
465 | asm( "swi r4, %0 " : "=m" (d)); \ | |
466 | asm( "swi r3, %0 " : "=m" (s) :: \ | |
467 | "r3", "r4" , "r5" , "r6" , "r7" , "r8" , \ | |
468 | "r9", "r10", "r11", "r12", "r13" ); | |
469 | ||
470 | #endif /* MicroBlaze */ | |
471 | ||
472 | #if defined(__tricore__) | |
473 | ||
474 | #define MULADDC_INIT \ | |
475 | asm( "ld.a %%a2, %0 " :: "m" (s)); \ | |
476 | asm( "ld.a %%a3, %0 " :: "m" (d)); \ | |
477 | asm( "ld.w %%d4, %0 " :: "m" (c)); \ | |
478 | asm( "ld.w %%d1, %0 " :: "m" (b)); \ | |
479 | asm( "xor %d5, %d5 " ); | |
480 | ||
481 | #define MULADDC_CORE \ | |
482 | asm( "ld.w %d0, [%a2+] " ); \ | |
483 | asm( "madd.u %e2, %e4, %d0, %d1 " ); \ | |
484 | asm( "ld.w %d0, [%a3] " ); \ | |
485 | asm( "addx %d2, %d2, %d0 " ); \ | |
486 | asm( "addc %d3, %d3, 0 " ); \ | |
487 | asm( "mov %d4, %d3 " ); \ | |
488 | asm( "st.w [%a3+], %d2 " ); | |
489 | ||
490 | #define MULADDC_STOP \ | |
491 | asm( "st.w %0, %%d4 " : "=m" (c)); \ | |
492 | asm( "st.a %0, %%a3 " : "=m" (d)); \ | |
493 | asm( "st.a %0, %%a2 " : "=m" (s) :: \ | |
494 | "d0", "d1", "e2", "d4", "a2", "a3" ); | |
495 | ||
496 | #endif /* TriCore */ | |
497 | ||
498 | #if defined(__arm__) | |
499 | ||
500 | #define MULADDC_INIT \ | |
501 | asm( "ldr r0, %0 " :: "m" (s)); \ | |
502 | asm( "ldr r1, %0 " :: "m" (d)); \ | |
503 | asm( "ldr r2, %0 " :: "m" (c)); \ | |
504 | asm( "ldr r3, %0 " :: "m" (b)); | |
505 | ||
506 | #define MULADDC_CORE \ | |
507 | asm( "ldr r4, [r0], #4 " ); \ | |
508 | asm( "mov r5, #0 " ); \ | |
509 | asm( "ldr r6, [r1] " ); \ | |
510 | asm( "umlal r2, r5, r3, r4 " ); \ | |
511 | asm( "adds r7, r6, r2 " ); \ | |
512 | asm( "adc r2, r5, #0 " ); \ | |
513 | asm( "str r7, [r1], #4 " ); | |
514 | ||
515 | #define MULADDC_STOP \ | |
516 | asm( "str r2, %0 " : "=m" (c)); \ | |
517 | asm( "str r1, %0 " : "=m" (d)); \ | |
518 | asm( "str r0, %0 " : "=m" (s) :: \ | |
519 | "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7" ); | |
520 | ||
521 | #endif /* ARMv3 */ | |
522 | ||
523 | #if defined(__alpha__) | |
524 | ||
525 | #define MULADDC_INIT \ | |
526 | asm( "ldq $1, %0 " :: "m" (s)); \ | |
527 | asm( "ldq $2, %0 " :: "m" (d)); \ | |
528 | asm( "ldq $3, %0 " :: "m" (c)); \ | |
529 | asm( "ldq $4, %0 " :: "m" (b)); | |
530 | ||
531 | #define MULADDC_CORE \ | |
532 | asm( "ldq $6, 0($1) " ); \ | |
533 | asm( "addq $1, 8, $1 " ); \ | |
534 | asm( "mulq $6, $4, $7 " ); \ | |
535 | asm( "umulh $6, $4, $6 " ); \ | |
536 | asm( "addq $7, $3, $7 " ); \ | |
537 | asm( "cmpult $7, $3, $3 " ); \ | |
538 | asm( "ldq $5, 0($2) " ); \ | |
539 | asm( "addq $7, $5, $7 " ); \ | |
540 | asm( "cmpult $7, $5, $5 " ); \ | |
541 | asm( "stq $7, 0($2) " ); \ | |
542 | asm( "addq $2, 8, $2 " ); \ | |
543 | asm( "addq $6, $3, $3 " ); \ | |
544 | asm( "addq $5, $3, $3 " ); | |
545 | ||
546 | #define MULADDC_STOP \ | |
547 | asm( "stq $3, %0 " : "=m" (c)); \ | |
548 | asm( "stq $2, %0 " : "=m" (d)); \ | |
549 | asm( "stq $1, %0 " : "=m" (s) :: \ | |
550 | "$1", "$2", "$3", "$4", "$5", "$6", "$7" ); | |
551 | ||
552 | #endif /* Alpha */ | |
553 | ||
554 | #if defined(__mips__) | |
555 | ||
556 | #define MULADDC_INIT \ | |
557 | asm( "lw $10, %0 " :: "m" (s)); \ | |
558 | asm( "lw $11, %0 " :: "m" (d)); \ | |
559 | asm( "lw $12, %0 " :: "m" (c)); \ | |
560 | asm( "lw $13, %0 " :: "m" (b)); | |
561 | ||
562 | #define MULADDC_CORE \ | |
563 | asm( "lw $14, 0($10) " ); \ | |
564 | asm( "multu $13, $14 " ); \ | |
565 | asm( "addi $10, $10, 4 " ); \ | |
566 | asm( "mflo $14 " ); \ | |
567 | asm( "mfhi $9 " ); \ | |
568 | asm( "addu $14, $12, $14 " ); \ | |
569 | asm( "lw $15, 0($11) " ); \ | |
570 | asm( "sltu $12, $14, $12 " ); \ | |
571 | asm( "addu $15, $14, $15 " ); \ | |
572 | asm( "sltu $14, $15, $14 " ); \ | |
573 | asm( "addu $12, $12, $9 " ); \ | |
574 | asm( "sw $15, 0($11) " ); \ | |
575 | asm( "addu $12, $12, $14 " ); \ | |
576 | asm( "addi $11, $11, 4 " ); | |
577 | ||
578 | #define MULADDC_STOP \ | |
579 | asm( "sw $12, %0 " : "=m" (c)); \ | |
580 | asm( "sw $11, %0 " : "=m" (d)); \ | |
581 | asm( "sw $10, %0 " : "=m" (s) :: \ | |
582 | "$9", "$10", "$11", "$12", "$13", "$14", "$15" ); | |
583 | ||
584 | #endif /* MIPS */ | |
585 | #endif /* GNUC */ | |
586 | ||
587 | #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) | |
588 | ||
589 | #define MULADDC_INIT \ | |
590 | __asm mov esi, s \ | |
591 | __asm mov edi, d \ | |
592 | __asm mov ecx, c \ | |
593 | __asm mov ebx, b | |
594 | ||
595 | #define MULADDC_CORE \ | |
596 | __asm lodsd \ | |
597 | __asm mul ebx \ | |
598 | __asm add eax, ecx \ | |
599 | __asm adc edx, 0 \ | |
600 | __asm add eax, [edi] \ | |
601 | __asm adc edx, 0 \ | |
602 | __asm mov ecx, edx \ | |
603 | __asm stosd | |
604 | ||
605 | #if defined(POLARSSL_HAVE_SSE2) | |
606 | ||
607 | #define EMIT __asm _emit | |
608 | ||
609 | #define MULADDC_HUIT \ | |
610 | EMIT 0x0F EMIT 0x6E EMIT 0xC9 \ | |
611 | EMIT 0x0F EMIT 0x6E EMIT 0xC3 \ | |
612 | EMIT 0x0F EMIT 0x6E EMIT 0x1F \ | |
613 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
614 | EMIT 0x0F EMIT 0x6E EMIT 0x16 \ | |
615 | EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ | |
616 | EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \ | |
617 | EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ | |
618 | EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \ | |
619 | EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ | |
620 | EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \ | |
621 | EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \ | |
622 | EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ | |
623 | EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \ | |
624 | EMIT 0x0F EMIT 0xD4 EMIT 0xDC \ | |
625 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \ | |
626 | EMIT 0x0F EMIT 0xD4 EMIT 0xEE \ | |
627 | EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \ | |
628 | EMIT 0x0F EMIT 0xD4 EMIT 0xFC \ | |
629 | EMIT 0x0F EMIT 0x7E EMIT 0x0F \ | |
630 | EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \ | |
631 | EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ | |
632 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
633 | EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \ | |
634 | EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ | |
635 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
636 | EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \ | |
637 | EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ | |
638 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \ | |
639 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
640 | EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \ | |
641 | EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \ | |
642 | EMIT 0x0F EMIT 0xD4 EMIT 0xCD \ | |
643 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \ | |
644 | EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \ | |
645 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \ | |
646 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
647 | EMIT 0x0F EMIT 0xD4 EMIT 0xCF \ | |
648 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \ | |
649 | EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \ | |
650 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \ | |
651 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
652 | EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ | |
653 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \ | |
654 | EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \ | |
655 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \ | |
656 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
657 | EMIT 0x0F EMIT 0xD4 EMIT 0xCC \ | |
658 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \ | |
659 | EMIT 0x0F EMIT 0xD4 EMIT 0xDD \ | |
660 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \ | |
661 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
662 | EMIT 0x0F EMIT 0xD4 EMIT 0xCE \ | |
663 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \ | |
664 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
665 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
666 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \ | |
667 | EMIT 0x83 EMIT 0xC7 EMIT 0x20 \ | |
668 | EMIT 0x83 EMIT 0xC6 EMIT 0x20 \ | |
669 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
670 | EMIT 0x0F EMIT 0x7E EMIT 0xC9 | |
671 | ||
672 | #define MULADDC_STOP \ | |
673 | EMIT 0x0F EMIT 0x77 \ | |
674 | __asm mov c, ecx \ | |
675 | __asm mov d, edi \ | |
676 | __asm mov s, esi \ | |
677 | ||
678 | #else | |
679 | ||
680 | #define MULADDC_STOP \ | |
681 | __asm mov c, ecx \ | |
682 | __asm mov d, edi \ | |
683 | __asm mov s, esi \ | |
684 | ||
685 | #endif /* SSE2 */ | |
686 | #endif /* MSVC */ | |
687 | ||
688 | #endif /* POLARSSL_HAVE_ASM */ | |
689 | ||
690 | #if !defined(MULADDC_CORE) | |
691 | #if defined(POLARSSL_HAVE_LONGLONG) | |
692 | ||
693 | #define MULADDC_INIT \ | |
694 | { \ | |
695 | t_dbl r; \ | |
696 | t_int r0, r1; | |
697 | ||
698 | #define MULADDC_CORE \ | |
699 | r = *(s++) * (t_dbl) b; \ | |
700 | r0 = r; \ | |
701 | r1 = r >> biL; \ | |
702 | r0 += c; r1 += (r0 < c); \ | |
703 | r0 += *d; r1 += (r0 < *d); \ | |
704 | c = r1; *(d++) = r0; | |
705 | ||
706 | #define MULADDC_STOP \ | |
707 | } | |
708 | ||
709 | #else | |
710 | #define MULADDC_INIT \ | |
711 | { \ | |
712 | t_int s0, s1, b0, b1; \ | |
713 | t_int r0, r1, rx, ry; \ | |
714 | b0 = ( b << biH ) >> biH; \ | |
715 | b1 = ( b >> biH ); | |
716 | ||
717 | #define MULADDC_CORE \ | |
718 | s0 = ( *s << biH ) >> biH; \ | |
719 | s1 = ( *s >> biH ); s++; \ | |
720 | rx = s0 * b1; r0 = s0 * b0; \ | |
721 | ry = s1 * b0; r1 = s1 * b1; \ | |
722 | r1 += ( rx >> biH ); \ | |
723 | r1 += ( ry >> biH ); \ | |
724 | rx <<= biH; ry <<= biH; \ | |
725 | r0 += rx; r1 += (r0 < rx); \ | |
726 | r0 += ry; r1 += (r0 < ry); \ | |
727 | r0 += c; r1 += (r0 < c); \ | |
728 | r0 += *d; r1 += (r0 < *d); \ | |
729 | c = r1; *(d++) = r0; | |
730 | ||
731 | #define MULADDC_STOP \ | |
732 | } | |
733 | ||
734 | #endif /* C (generic) */ | |
735 | #endif /* C (longlong) */ | |
736 | ||
737 | #endif /* bn_mul.h */ |