Commit | Line | Data |
---|---|---|
80a47a2c TK |
1 | /** |
2 | * \file bn_mul.h | |
3 | * | |
62d3e98d TK |
4 | * Copyright (C) 2006-2010, Brainspark B.V. |
5 | * | |
6 | * This file is part of PolarSSL (http://www.polarssl.org) | |
7 | * Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org> | |
80a47a2c | 8 | * |
62d3e98d | 9 | * All rights reserved. |
80a47a2c TK |
10 | * |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License along | |
22 | * with this program; if not, write to the Free Software Foundation, Inc., | |
23 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
24 | */ | |
25 | /* | |
26 | * Multiply source vector [s] with b, add result | |
27 | * to destination vector [d] and set carry c. | |
28 | * | |
29 | * Currently supports: | |
30 | * | |
31 | * . IA-32 (386+) . AMD64 / EM64T | |
32 | * . IA-32 (SSE2) . Motorola 68000 | |
33 | * . PowerPC, 32-bit . MicroBlaze | |
34 | * . PowerPC, 64-bit . TriCore | |
35 | * . SPARC v8 . ARM v3+ | |
36 | * . Alpha . MIPS32 | |
37 | * . C, longlong . C, generic | |
38 | */ | |
39 | ||
80a47a2c TK |
40 | #ifndef POLARSSL_BN_MUL_H |
41 | #define POLARSSL_BN_MUL_H | |
42 | ||
43 | #if defined(POLARSSL_HAVE_ASM) | |
44 | ||
45 | #if defined(__GNUC__) | |
46 | #if defined(__i386__) | |
47 | ||
67932e54 TK |
48 | #define MULADDC_INIT \ |
49 | asm( " \ | |
50 | movl %%ebx, %0; \ | |
51 | movl %5, %%esi; \ | |
52 | movl %6, %%edi; \ | |
53 | movl %7, %%ecx; \ | |
54 | movl %8, %%ebx; \ | |
55 | " | |
80a47a2c | 56 | |
67932e54 TK |
57 | #define MULADDC_CORE \ |
58 | " \ | |
59 | lodsl; \ | |
60 | mull %%ebx; \ | |
61 | addl %%ecx, %%eax; \ | |
62 | adcl $0, %%edx; \ | |
63 | addl (%%edi), %%eax; \ | |
64 | adcl $0, %%edx; \ | |
65 | movl %%edx, %%ecx; \ | |
66 | stosl; \ | |
67 | " | |
80a47a2c TK |
68 | |
69 | #if defined(POLARSSL_HAVE_SSE2) | |
70 | ||
67932e54 TK |
71 | #define MULADDC_HUIT \ |
72 | " \ | |
73 | movd %%ecx, %%mm1; \ | |
74 | movd %%ebx, %%mm0; \ | |
75 | movd (%%edi), %%mm3; \ | |
76 | paddq %%mm3, %%mm1; \ | |
77 | movd (%%esi), %%mm2; \ | |
78 | pmuludq %%mm0, %%mm2; \ | |
79 | movd 4(%%esi), %%mm4; \ | |
80 | pmuludq %%mm0, %%mm4; \ | |
81 | movd 8(%%esi), %%mm6; \ | |
82 | pmuludq %%mm0, %%mm6; \ | |
83 | movd 12(%%esi), %%mm7; \ | |
84 | pmuludq %%mm0, %%mm7; \ | |
85 | paddq %%mm2, %%mm1; \ | |
86 | movd 4(%%edi), %%mm3; \ | |
87 | paddq %%mm4, %%mm3; \ | |
88 | movd 8(%%edi), %%mm5; \ | |
89 | paddq %%mm6, %%mm5; \ | |
90 | movd 12(%%edi), %%mm4; \ | |
91 | paddq %%mm4, %%mm7; \ | |
92 | movd %%mm1, (%%edi); \ | |
93 | movd 16(%%esi), %%mm2; \ | |
94 | pmuludq %%mm0, %%mm2; \ | |
95 | psrlq $32, %%mm1; \ | |
96 | movd 20(%%esi), %%mm4; \ | |
97 | pmuludq %%mm0, %%mm4; \ | |
98 | paddq %%mm3, %%mm1; \ | |
99 | movd 24(%%esi), %%mm6; \ | |
100 | pmuludq %%mm0, %%mm6; \ | |
101 | movd %%mm1, 4(%%edi); \ | |
102 | psrlq $32, %%mm1; \ | |
103 | movd 28(%%esi), %%mm3; \ | |
104 | pmuludq %%mm0, %%mm3; \ | |
105 | paddq %%mm5, %%mm1; \ | |
106 | movd 16(%%edi), %%mm5; \ | |
107 | paddq %%mm5, %%mm2; \ | |
108 | movd %%mm1, 8(%%edi); \ | |
109 | psrlq $32, %%mm1; \ | |
110 | paddq %%mm7, %%mm1; \ | |
111 | movd 20(%%edi), %%mm5; \ | |
112 | paddq %%mm5, %%mm4; \ | |
113 | movd %%mm1, 12(%%edi); \ | |
114 | psrlq $32, %%mm1; \ | |
115 | paddq %%mm2, %%mm1; \ | |
116 | movd 24(%%edi), %%mm5; \ | |
117 | paddq %%mm5, %%mm6; \ | |
118 | movd %%mm1, 16(%%edi); \ | |
119 | psrlq $32, %%mm1; \ | |
120 | paddq %%mm4, %%mm1; \ | |
121 | movd 28(%%edi), %%mm5; \ | |
122 | paddq %%mm5, %%mm3; \ | |
123 | movd %%mm1, 20(%%edi); \ | |
124 | psrlq $32, %%mm1; \ | |
125 | paddq %%mm6, %%mm1; \ | |
126 | movd %%mm1, 24(%%edi); \ | |
127 | psrlq $32, %%mm1; \ | |
128 | paddq %%mm3, %%mm1; \ | |
129 | movd %%mm1, 28(%%edi); \ | |
130 | addl $32, %%edi; \ | |
131 | addl $32, %%esi; \ | |
132 | psrlq $32, %%mm1; \ | |
133 | movd %%mm1, %%ecx; \ | |
134 | " | |
80a47a2c | 135 | |
67932e54 TK |
136 | #define MULADDC_STOP \ |
137 | " \ | |
138 | emms; \ | |
139 | movl %4, %%ebx; \ | |
140 | movl %%ecx, %1; \ | |
141 | movl %%edi, %2; \ | |
142 | movl %%esi, %3; \ | |
143 | " \ | |
144 | : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ | |
145 | : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ | |
146 | : "eax", "ecx", "edx", "esi", "edi" \ | |
147 | ); | |
80a47a2c TK |
148 | |
149 | #else | |
150 | ||
67932e54 TK |
151 | #define MULADDC_STOP \ |
152 | " \ | |
153 | movl %4, %%ebx; \ | |
154 | movl %%ecx, %1; \ | |
155 | movl %%edi, %2; \ | |
156 | movl %%esi, %3; \ | |
157 | " \ | |
158 | : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ | |
159 | : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ | |
160 | : "eax", "ecx", "edx", "esi", "edi" \ | |
161 | ); | |
80a47a2c TK |
162 | #endif /* SSE2 */ |
163 | #endif /* i386 */ | |
164 | ||
165 | #if defined(__amd64__) || defined (__x86_64__) | |
166 | ||
167 | #define MULADDC_INIT \ | |
168 | asm( "movq %0, %%rsi " :: "m" (s)); \ | |
169 | asm( "movq %0, %%rdi " :: "m" (d)); \ | |
170 | asm( "movq %0, %%rcx " :: "m" (c)); \ | |
171 | asm( "movq %0, %%rbx " :: "m" (b)); \ | |
172 | asm( "xorq %r8, %r8 " ); | |
173 | ||
174 | #define MULADDC_CORE \ | |
175 | asm( "movq (%rsi),%rax " ); \ | |
176 | asm( "mulq %rbx " ); \ | |
177 | asm( "addq $8, %rsi " ); \ | |
178 | asm( "addq %rcx, %rax " ); \ | |
179 | asm( "movq %r8, %rcx " ); \ | |
180 | asm( "adcq $0, %rdx " ); \ | |
181 | asm( "nop " ); \ | |
182 | asm( "addq %rax, (%rdi) " ); \ | |
183 | asm( "adcq %rdx, %rcx " ); \ | |
184 | asm( "addq $8, %rdi " ); | |
185 | ||
186 | #define MULADDC_STOP \ | |
187 | asm( "movq %%rcx, %0 " : "=m" (c)); \ | |
188 | asm( "movq %%rdi, %0 " : "=m" (d)); \ | |
189 | asm( "movq %%rsi, %0 " : "=m" (s) :: \ | |
190 | "rax", "rcx", "rdx", "rbx", "rsi", "rdi", "r8" ); | |
191 | ||
192 | #endif /* AMD64 */ | |
193 | ||
194 | #if defined(__mc68020__) || defined(__mcpu32__) | |
195 | ||
196 | #define MULADDC_INIT \ | |
197 | asm( "movl %0, %%a2 " :: "m" (s)); \ | |
198 | asm( "movl %0, %%a3 " :: "m" (d)); \ | |
199 | asm( "movl %0, %%d3 " :: "m" (c)); \ | |
200 | asm( "movl %0, %%d2 " :: "m" (b)); \ | |
201 | asm( "moveq #0, %d0 " ); | |
202 | ||
203 | #define MULADDC_CORE \ | |
204 | asm( "movel %a2@+, %d1 " ); \ | |
205 | asm( "mulul %d2, %d4:%d1 " ); \ | |
206 | asm( "addl %d3, %d1 " ); \ | |
207 | asm( "addxl %d0, %d4 " ); \ | |
208 | asm( "moveq #0, %d3 " ); \ | |
209 | asm( "addl %d1, %a3@+ " ); \ | |
210 | asm( "addxl %d4, %d3 " ); | |
211 | ||
212 | #define MULADDC_STOP \ | |
213 | asm( "movl %%d3, %0 " : "=m" (c)); \ | |
214 | asm( "movl %%a3, %0 " : "=m" (d)); \ | |
215 | asm( "movl %%a2, %0 " : "=m" (s) :: \ | |
216 | "d0", "d1", "d2", "d3", "d4", "a2", "a3" ); | |
217 | ||
218 | #define MULADDC_HUIT \ | |
219 | asm( "movel %a2@+, %d1 " ); \ | |
220 | asm( "mulul %d2, %d4:%d1 " ); \ | |
221 | asm( "addxl %d3, %d1 " ); \ | |
222 | asm( "addxl %d0, %d4 " ); \ | |
223 | asm( "addl %d1, %a3@+ " ); \ | |
224 | asm( "movel %a2@+, %d1 " ); \ | |
225 | asm( "mulul %d2, %d3:%d1 " ); \ | |
226 | asm( "addxl %d4, %d1 " ); \ | |
227 | asm( "addxl %d0, %d3 " ); \ | |
228 | asm( "addl %d1, %a3@+ " ); \ | |
229 | asm( "movel %a2@+, %d1 " ); \ | |
230 | asm( "mulul %d2, %d4:%d1 " ); \ | |
231 | asm( "addxl %d3, %d1 " ); \ | |
232 | asm( "addxl %d0, %d4 " ); \ | |
233 | asm( "addl %d1, %a3@+ " ); \ | |
234 | asm( "movel %a2@+, %d1 " ); \ | |
235 | asm( "mulul %d2, %d3:%d1 " ); \ | |
236 | asm( "addxl %d4, %d1 " ); \ | |
237 | asm( "addxl %d0, %d3 " ); \ | |
238 | asm( "addl %d1, %a3@+ " ); \ | |
239 | asm( "movel %a2@+, %d1 " ); \ | |
240 | asm( "mulul %d2, %d4:%d1 " ); \ | |
241 | asm( "addxl %d3, %d1 " ); \ | |
242 | asm( "addxl %d0, %d4 " ); \ | |
243 | asm( "addl %d1, %a3@+ " ); \ | |
244 | asm( "movel %a2@+, %d1 " ); \ | |
245 | asm( "mulul %d2, %d3:%d1 " ); \ | |
246 | asm( "addxl %d4, %d1 " ); \ | |
247 | asm( "addxl %d0, %d3 " ); \ | |
248 | asm( "addl %d1, %a3@+ " ); \ | |
249 | asm( "movel %a2@+, %d1 " ); \ | |
250 | asm( "mulul %d2, %d4:%d1 " ); \ | |
251 | asm( "addxl %d3, %d1 " ); \ | |
252 | asm( "addxl %d0, %d4 " ); \ | |
253 | asm( "addl %d1, %a3@+ " ); \ | |
254 | asm( "movel %a2@+, %d1 " ); \ | |
255 | asm( "mulul %d2, %d3:%d1 " ); \ | |
256 | asm( "addxl %d4, %d1 " ); \ | |
257 | asm( "addxl %d0, %d3 " ); \ | |
258 | asm( "addl %d1, %a3@+ " ); \ | |
259 | asm( "addxl %d0, %d3 " ); | |
260 | ||
261 | #endif /* MC68000 */ | |
262 | ||
263 | #if defined(__powerpc__) || defined(__ppc__) | |
264 | #if defined(__powerpc64__) || defined(__ppc64__) | |
265 | ||
266 | #if defined(__MACH__) && defined(__APPLE__) | |
267 | ||
268 | #define MULADDC_INIT \ | |
269 | asm( "ld r3, %0 " :: "m" (s)); \ | |
270 | asm( "ld r4, %0 " :: "m" (d)); \ | |
271 | asm( "ld r5, %0 " :: "m" (c)); \ | |
272 | asm( "ld r6, %0 " :: "m" (b)); \ | |
273 | asm( "addi r3, r3, -8 " ); \ | |
274 | asm( "addi r4, r4, -8 " ); \ | |
275 | asm( "addic r5, r5, 0 " ); | |
276 | ||
277 | #define MULADDC_CORE \ | |
278 | asm( "ldu r7, 8(r3) " ); \ | |
279 | asm( "mulld r8, r7, r6 " ); \ | |
280 | asm( "mulhdu r9, r7, r6 " ); \ | |
281 | asm( "adde r8, r8, r5 " ); \ | |
282 | asm( "ld r7, 8(r4) " ); \ | |
283 | asm( "addze r5, r9 " ); \ | |
284 | asm( "addc r8, r8, r7 " ); \ | |
285 | asm( "stdu r8, 8(r4) " ); | |
286 | ||
287 | #define MULADDC_STOP \ | |
288 | asm( "addze r5, r5 " ); \ | |
289 | asm( "addi r4, r4, 8 " ); \ | |
290 | asm( "addi r3, r3, 8 " ); \ | |
291 | asm( "std r5, %0 " : "=m" (c)); \ | |
292 | asm( "std r4, %0 " : "=m" (d)); \ | |
293 | asm( "std r3, %0 " : "=m" (s) :: \ | |
294 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
295 | ||
296 | #else | |
297 | ||
298 | #define MULADDC_INIT \ | |
299 | asm( "ld %%r3, %0 " :: "m" (s)); \ | |
300 | asm( "ld %%r4, %0 " :: "m" (d)); \ | |
301 | asm( "ld %%r5, %0 " :: "m" (c)); \ | |
302 | asm( "ld %%r6, %0 " :: "m" (b)); \ | |
303 | asm( "addi %r3, %r3, -8 " ); \ | |
304 | asm( "addi %r4, %r4, -8 " ); \ | |
305 | asm( "addic %r5, %r5, 0 " ); | |
306 | ||
307 | #define MULADDC_CORE \ | |
308 | asm( "ldu %r7, 8(%r3) " ); \ | |
309 | asm( "mulld %r8, %r7, %r6 " ); \ | |
310 | asm( "mulhdu %r9, %r7, %r6 " ); \ | |
311 | asm( "adde %r8, %r8, %r5 " ); \ | |
312 | asm( "ld %r7, 8(%r4) " ); \ | |
313 | asm( "addze %r5, %r9 " ); \ | |
314 | asm( "addc %r8, %r8, %r7 " ); \ | |
315 | asm( "stdu %r8, 8(%r4) " ); | |
316 | ||
317 | #define MULADDC_STOP \ | |
318 | asm( "addze %r5, %r5 " ); \ | |
319 | asm( "addi %r4, %r4, 8 " ); \ | |
320 | asm( "addi %r3, %r3, 8 " ); \ | |
321 | asm( "std %%r5, %0 " : "=m" (c)); \ | |
322 | asm( "std %%r4, %0 " : "=m" (d)); \ | |
323 | asm( "std %%r3, %0 " : "=m" (s) :: \ | |
324 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
325 | ||
326 | #endif | |
327 | ||
328 | #else /* PPC32 */ | |
329 | ||
330 | #if defined(__MACH__) && defined(__APPLE__) | |
331 | ||
332 | #define MULADDC_INIT \ | |
333 | asm( "lwz r3, %0 " :: "m" (s)); \ | |
334 | asm( "lwz r4, %0 " :: "m" (d)); \ | |
335 | asm( "lwz r5, %0 " :: "m" (c)); \ | |
336 | asm( "lwz r6, %0 " :: "m" (b)); \ | |
337 | asm( "addi r3, r3, -4 " ); \ | |
338 | asm( "addi r4, r4, -4 " ); \ | |
339 | asm( "addic r5, r5, 0 " ); | |
340 | ||
341 | #define MULADDC_CORE \ | |
342 | asm( "lwzu r7, 4(r3) " ); \ | |
343 | asm( "mullw r8, r7, r6 " ); \ | |
344 | asm( "mulhwu r9, r7, r6 " ); \ | |
345 | asm( "adde r8, r8, r5 " ); \ | |
346 | asm( "lwz r7, 4(r4) " ); \ | |
347 | asm( "addze r5, r9 " ); \ | |
348 | asm( "addc r8, r8, r7 " ); \ | |
349 | asm( "stwu r8, 4(r4) " ); | |
350 | ||
351 | #define MULADDC_STOP \ | |
352 | asm( "addze r5, r5 " ); \ | |
353 | asm( "addi r4, r4, 4 " ); \ | |
354 | asm( "addi r3, r3, 4 " ); \ | |
355 | asm( "stw r5, %0 " : "=m" (c)); \ | |
356 | asm( "stw r4, %0 " : "=m" (d)); \ | |
357 | asm( "stw r3, %0 " : "=m" (s) :: \ | |
358 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
359 | ||
360 | #else | |
361 | ||
362 | #define MULADDC_INIT \ | |
363 | asm( "lwz %%r3, %0 " :: "m" (s)); \ | |
364 | asm( "lwz %%r4, %0 " :: "m" (d)); \ | |
365 | asm( "lwz %%r5, %0 " :: "m" (c)); \ | |
366 | asm( "lwz %%r6, %0 " :: "m" (b)); \ | |
367 | asm( "addi %r3, %r3, -4 " ); \ | |
368 | asm( "addi %r4, %r4, -4 " ); \ | |
369 | asm( "addic %r5, %r5, 0 " ); | |
370 | ||
371 | #define MULADDC_CORE \ | |
372 | asm( "lwzu %r7, 4(%r3) " ); \ | |
373 | asm( "mullw %r8, %r7, %r6 " ); \ | |
374 | asm( "mulhwu %r9, %r7, %r6 " ); \ | |
375 | asm( "adde %r8, %r8, %r5 " ); \ | |
376 | asm( "lwz %r7, 4(%r4) " ); \ | |
377 | asm( "addze %r5, %r9 " ); \ | |
378 | asm( "addc %r8, %r8, %r7 " ); \ | |
379 | asm( "stwu %r8, 4(%r4) " ); | |
380 | ||
381 | #define MULADDC_STOP \ | |
382 | asm( "addze %r5, %r5 " ); \ | |
383 | asm( "addi %r4, %r4, 4 " ); \ | |
384 | asm( "addi %r3, %r3, 4 " ); \ | |
385 | asm( "stw %%r5, %0 " : "=m" (c)); \ | |
386 | asm( "stw %%r4, %0 " : "=m" (d)); \ | |
387 | asm( "stw %%r3, %0 " : "=m" (s) :: \ | |
388 | "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); | |
389 | ||
390 | #endif | |
391 | ||
392 | #endif /* PPC32 */ | |
393 | #endif /* PPC64 */ | |
394 | ||
395 | #if defined(__sparc__) | |
396 | ||
397 | #define MULADDC_INIT \ | |
398 | asm( "ld %0, %%o0 " :: "m" (s)); \ | |
399 | asm( "ld %0, %%o1 " :: "m" (d)); \ | |
400 | asm( "ld %0, %%o2 " :: "m" (c)); \ | |
401 | asm( "ld %0, %%o3 " :: "m" (b)); | |
402 | ||
403 | #define MULADDC_CORE \ | |
404 | asm( "ld [%o0], %o4 " ); \ | |
405 | asm( "inc 4, %o0 " ); \ | |
406 | asm( "ld [%o1], %o5 " ); \ | |
407 | asm( "umul %o3, %o4, %o4 " ); \ | |
408 | asm( "addcc %o4, %o2, %o4 " ); \ | |
409 | asm( "rd %y, %g1 " ); \ | |
410 | asm( "addx %g1, 0, %g1 " ); \ | |
411 | asm( "addcc %o4, %o5, %o4 " ); \ | |
412 | asm( "st %o4, [%o1] " ); \ | |
413 | asm( "addx %g1, 0, %o2 " ); \ | |
414 | asm( "inc 4, %o1 " ); | |
415 | ||
416 | #define MULADDC_STOP \ | |
417 | asm( "st %%o2, %0 " : "=m" (c)); \ | |
418 | asm( "st %%o1, %0 " : "=m" (d)); \ | |
419 | asm( "st %%o0, %0 " : "=m" (s) :: \ | |
420 | "g1", "o0", "o1", "o2", "o3", "o4", "o5" ); | |
421 | ||
422 | #endif /* SPARCv8 */ | |
423 | ||
424 | #if defined(__microblaze__) || defined(microblaze) | |
425 | ||
426 | #define MULADDC_INIT \ | |
427 | asm( "lwi r3, %0 " :: "m" (s)); \ | |
428 | asm( "lwi r4, %0 " :: "m" (d)); \ | |
429 | asm( "lwi r5, %0 " :: "m" (c)); \ | |
430 | asm( "lwi r6, %0 " :: "m" (b)); \ | |
431 | asm( "andi r7, r6, 0xffff" ); \ | |
432 | asm( "bsrli r6, r6, 16 " ); | |
433 | ||
434 | #define MULADDC_CORE \ | |
435 | asm( "lhui r8, r3, 0 " ); \ | |
436 | asm( "addi r3, r3, 2 " ); \ | |
437 | asm( "lhui r9, r3, 0 " ); \ | |
438 | asm( "addi r3, r3, 2 " ); \ | |
439 | asm( "mul r10, r9, r6 " ); \ | |
440 | asm( "mul r11, r8, r7 " ); \ | |
441 | asm( "mul r12, r9, r7 " ); \ | |
442 | asm( "mul r13, r8, r6 " ); \ | |
443 | asm( "bsrli r8, r10, 16 " ); \ | |
444 | asm( "bsrli r9, r11, 16 " ); \ | |
445 | asm( "add r13, r13, r8 " ); \ | |
446 | asm( "add r13, r13, r9 " ); \ | |
447 | asm( "bslli r10, r10, 16 " ); \ | |
448 | asm( "bslli r11, r11, 16 " ); \ | |
449 | asm( "add r12, r12, r10 " ); \ | |
450 | asm( "addc r13, r13, r0 " ); \ | |
451 | asm( "add r12, r12, r11 " ); \ | |
452 | asm( "addc r13, r13, r0 " ); \ | |
453 | asm( "lwi r10, r4, 0 " ); \ | |
454 | asm( "add r12, r12, r10 " ); \ | |
455 | asm( "addc r13, r13, r0 " ); \ | |
456 | asm( "add r12, r12, r5 " ); \ | |
457 | asm( "addc r5, r13, r0 " ); \ | |
458 | asm( "swi r12, r4, 0 " ); \ | |
459 | asm( "addi r4, r4, 4 " ); | |
460 | ||
461 | #define MULADDC_STOP \ | |
462 | asm( "swi r5, %0 " : "=m" (c)); \ | |
463 | asm( "swi r4, %0 " : "=m" (d)); \ | |
464 | asm( "swi r3, %0 " : "=m" (s) :: \ | |
465 | "r3", "r4" , "r5" , "r6" , "r7" , "r8" , \ | |
466 | "r9", "r10", "r11", "r12", "r13" ); | |
467 | ||
468 | #endif /* MicroBlaze */ | |
469 | ||
470 | #if defined(__tricore__) | |
471 | ||
472 | #define MULADDC_INIT \ | |
473 | asm( "ld.a %%a2, %0 " :: "m" (s)); \ | |
474 | asm( "ld.a %%a3, %0 " :: "m" (d)); \ | |
475 | asm( "ld.w %%d4, %0 " :: "m" (c)); \ | |
476 | asm( "ld.w %%d1, %0 " :: "m" (b)); \ | |
477 | asm( "xor %d5, %d5 " ); | |
478 | ||
479 | #define MULADDC_CORE \ | |
480 | asm( "ld.w %d0, [%a2+] " ); \ | |
481 | asm( "madd.u %e2, %e4, %d0, %d1 " ); \ | |
482 | asm( "ld.w %d0, [%a3] " ); \ | |
483 | asm( "addx %d2, %d2, %d0 " ); \ | |
484 | asm( "addc %d3, %d3, 0 " ); \ | |
485 | asm( "mov %d4, %d3 " ); \ | |
486 | asm( "st.w [%a3+], %d2 " ); | |
487 | ||
488 | #define MULADDC_STOP \ | |
489 | asm( "st.w %0, %%d4 " : "=m" (c)); \ | |
490 | asm( "st.a %0, %%a3 " : "=m" (d)); \ | |
491 | asm( "st.a %0, %%a2 " : "=m" (s) :: \ | |
492 | "d0", "d1", "e2", "d4", "a2", "a3" ); | |
493 | ||
494 | #endif /* TriCore */ | |
495 | ||
496 | #if defined(__arm__) | |
497 | ||
498 | #define MULADDC_INIT \ | |
499 | asm( "ldr r0, %0 " :: "m" (s)); \ | |
500 | asm( "ldr r1, %0 " :: "m" (d)); \ | |
501 | asm( "ldr r2, %0 " :: "m" (c)); \ | |
502 | asm( "ldr r3, %0 " :: "m" (b)); | |
503 | ||
504 | #define MULADDC_CORE \ | |
505 | asm( "ldr r4, [r0], #4 " ); \ | |
506 | asm( "mov r5, #0 " ); \ | |
507 | asm( "ldr r6, [r1] " ); \ | |
508 | asm( "umlal r2, r5, r3, r4 " ); \ | |
509 | asm( "adds r7, r6, r2 " ); \ | |
510 | asm( "adc r2, r5, #0 " ); \ | |
511 | asm( "str r7, [r1], #4 " ); | |
512 | ||
513 | #define MULADDC_STOP \ | |
514 | asm( "str r2, %0 " : "=m" (c)); \ | |
515 | asm( "str r1, %0 " : "=m" (d)); \ | |
516 | asm( "str r0, %0 " : "=m" (s) :: \ | |
517 | "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7" ); | |
518 | ||
519 | #endif /* ARMv3 */ | |
520 | ||
521 | #if defined(__alpha__) | |
522 | ||
523 | #define MULADDC_INIT \ | |
524 | asm( "ldq $1, %0 " :: "m" (s)); \ | |
525 | asm( "ldq $2, %0 " :: "m" (d)); \ | |
526 | asm( "ldq $3, %0 " :: "m" (c)); \ | |
527 | asm( "ldq $4, %0 " :: "m" (b)); | |
528 | ||
529 | #define MULADDC_CORE \ | |
530 | asm( "ldq $6, 0($1) " ); \ | |
531 | asm( "addq $1, 8, $1 " ); \ | |
532 | asm( "mulq $6, $4, $7 " ); \ | |
533 | asm( "umulh $6, $4, $6 " ); \ | |
534 | asm( "addq $7, $3, $7 " ); \ | |
535 | asm( "cmpult $7, $3, $3 " ); \ | |
536 | asm( "ldq $5, 0($2) " ); \ | |
537 | asm( "addq $7, $5, $7 " ); \ | |
538 | asm( "cmpult $7, $5, $5 " ); \ | |
539 | asm( "stq $7, 0($2) " ); \ | |
540 | asm( "addq $2, 8, $2 " ); \ | |
541 | asm( "addq $6, $3, $3 " ); \ | |
542 | asm( "addq $5, $3, $3 " ); | |
543 | ||
544 | #define MULADDC_STOP \ | |
545 | asm( "stq $3, %0 " : "=m" (c)); \ | |
546 | asm( "stq $2, %0 " : "=m" (d)); \ | |
547 | asm( "stq $1, %0 " : "=m" (s) :: \ | |
548 | "$1", "$2", "$3", "$4", "$5", "$6", "$7" ); | |
549 | ||
550 | #endif /* Alpha */ | |
551 | ||
552 | #if defined(__mips__) | |
553 | ||
554 | #define MULADDC_INIT \ | |
555 | asm( "lw $10, %0 " :: "m" (s)); \ | |
556 | asm( "lw $11, %0 " :: "m" (d)); \ | |
557 | asm( "lw $12, %0 " :: "m" (c)); \ | |
558 | asm( "lw $13, %0 " :: "m" (b)); | |
559 | ||
560 | #define MULADDC_CORE \ | |
561 | asm( "lw $14, 0($10) " ); \ | |
562 | asm( "multu $13, $14 " ); \ | |
563 | asm( "addi $10, $10, 4 " ); \ | |
564 | asm( "mflo $14 " ); \ | |
565 | asm( "mfhi $9 " ); \ | |
566 | asm( "addu $14, $12, $14 " ); \ | |
567 | asm( "lw $15, 0($11) " ); \ | |
568 | asm( "sltu $12, $14, $12 " ); \ | |
569 | asm( "addu $15, $14, $15 " ); \ | |
570 | asm( "sltu $14, $15, $14 " ); \ | |
571 | asm( "addu $12, $12, $9 " ); \ | |
572 | asm( "sw $15, 0($11) " ); \ | |
573 | asm( "addu $12, $12, $14 " ); \ | |
574 | asm( "addi $11, $11, 4 " ); | |
575 | ||
576 | #define MULADDC_STOP \ | |
577 | asm( "sw $12, %0 " : "=m" (c)); \ | |
578 | asm( "sw $11, %0 " : "=m" (d)); \ | |
579 | asm( "sw $10, %0 " : "=m" (s) :: \ | |
580 | "$9", "$10", "$11", "$12", "$13", "$14", "$15" ); | |
581 | ||
582 | #endif /* MIPS */ | |
583 | #endif /* GNUC */ | |
584 | ||
585 | #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) | |
586 | ||
587 | #define MULADDC_INIT \ | |
588 | __asm mov esi, s \ | |
589 | __asm mov edi, d \ | |
590 | __asm mov ecx, c \ | |
591 | __asm mov ebx, b | |
592 | ||
593 | #define MULADDC_CORE \ | |
594 | __asm lodsd \ | |
595 | __asm mul ebx \ | |
596 | __asm add eax, ecx \ | |
597 | __asm adc edx, 0 \ | |
598 | __asm add eax, [edi] \ | |
599 | __asm adc edx, 0 \ | |
600 | __asm mov ecx, edx \ | |
601 | __asm stosd | |
602 | ||
603 | #if defined(POLARSSL_HAVE_SSE2) | |
604 | ||
605 | #define EMIT __asm _emit | |
606 | ||
607 | #define MULADDC_HUIT \ | |
608 | EMIT 0x0F EMIT 0x6E EMIT 0xC9 \ | |
609 | EMIT 0x0F EMIT 0x6E EMIT 0xC3 \ | |
610 | EMIT 0x0F EMIT 0x6E EMIT 0x1F \ | |
611 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
612 | EMIT 0x0F EMIT 0x6E EMIT 0x16 \ | |
613 | EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ | |
614 | EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \ | |
615 | EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ | |
616 | EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \ | |
617 | EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ | |
618 | EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \ | |
619 | EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \ | |
620 | EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ | |
621 | EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \ | |
622 | EMIT 0x0F EMIT 0xD4 EMIT 0xDC \ | |
623 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \ | |
624 | EMIT 0x0F EMIT 0xD4 EMIT 0xEE \ | |
625 | EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \ | |
626 | EMIT 0x0F EMIT 0xD4 EMIT 0xFC \ | |
627 | EMIT 0x0F EMIT 0x7E EMIT 0x0F \ | |
628 | EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \ | |
629 | EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ | |
630 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
631 | EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \ | |
632 | EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ | |
633 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
634 | EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \ | |
635 | EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ | |
636 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \ | |
637 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
638 | EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \ | |
639 | EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \ | |
640 | EMIT 0x0F EMIT 0xD4 EMIT 0xCD \ | |
641 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \ | |
642 | EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \ | |
643 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \ | |
644 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
645 | EMIT 0x0F EMIT 0xD4 EMIT 0xCF \ | |
646 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \ | |
647 | EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \ | |
648 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \ | |
649 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
650 | EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ | |
651 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \ | |
652 | EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \ | |
653 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \ | |
654 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
655 | EMIT 0x0F EMIT 0xD4 EMIT 0xCC \ | |
656 | EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \ | |
657 | EMIT 0x0F EMIT 0xD4 EMIT 0xDD \ | |
658 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \ | |
659 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
660 | EMIT 0x0F EMIT 0xD4 EMIT 0xCE \ | |
661 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \ | |
662 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
663 | EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ | |
664 | EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \ | |
665 | EMIT 0x83 EMIT 0xC7 EMIT 0x20 \ | |
666 | EMIT 0x83 EMIT 0xC6 EMIT 0x20 \ | |
667 | EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ | |
668 | EMIT 0x0F EMIT 0x7E EMIT 0xC9 | |
669 | ||
670 | #define MULADDC_STOP \ | |
671 | EMIT 0x0F EMIT 0x77 \ | |
672 | __asm mov c, ecx \ | |
673 | __asm mov d, edi \ | |
674 | __asm mov s, esi \ | |
675 | ||
676 | #else | |
677 | ||
678 | #define MULADDC_STOP \ | |
679 | __asm mov c, ecx \ | |
680 | __asm mov d, edi \ | |
681 | __asm mov s, esi \ | |
682 | ||
683 | #endif /* SSE2 */ | |
684 | #endif /* MSVC */ | |
685 | ||
686 | #endif /* POLARSSL_HAVE_ASM */ | |
687 | ||
688 | #if !defined(MULADDC_CORE) | |
689 | #if defined(POLARSSL_HAVE_LONGLONG) | |
690 | ||
691 | #define MULADDC_INIT \ | |
692 | { \ | |
693 | t_dbl r; \ | |
694 | t_int r0, r1; | |
695 | ||
696 | #define MULADDC_CORE \ | |
697 | r = *(s++) * (t_dbl) b; \ | |
698 | r0 = r; \ | |
699 | r1 = r >> biL; \ | |
700 | r0 += c; r1 += (r0 < c); \ | |
701 | r0 += *d; r1 += (r0 < *d); \ | |
702 | c = r1; *(d++) = r0; | |
703 | ||
704 | #define MULADDC_STOP \ | |
705 | } | |
706 | ||
707 | #else | |
708 | #define MULADDC_INIT \ | |
709 | { \ | |
710 | t_int s0, s1, b0, b1; \ | |
711 | t_int r0, r1, rx, ry; \ | |
712 | b0 = ( b << biH ) >> biH; \ | |
713 | b1 = ( b >> biH ); | |
714 | ||
715 | #define MULADDC_CORE \ | |
716 | s0 = ( *s << biH ) >> biH; \ | |
717 | s1 = ( *s >> biH ); s++; \ | |
718 | rx = s0 * b1; r0 = s0 * b0; \ | |
719 | ry = s1 * b0; r1 = s1 * b1; \ | |
720 | r1 += ( rx >> biH ); \ | |
721 | r1 += ( ry >> biH ); \ | |
722 | rx <<= biH; ry <<= biH; \ | |
723 | r0 += rx; r1 += (r0 < rx); \ | |
724 | r0 += ry; r1 += (r0 < ry); \ | |
725 | r0 += c; r1 += (r0 < c); \ | |
726 | r0 += *d; r1 += (r0 < *d); \ | |
727 | c = r1; *(d++) = r0; | |
728 | ||
729 | #define MULADDC_STOP \ | |
730 | } | |
731 | ||
732 | #endif /* C (generic) */ | |
733 | #endif /* C (longlong) */ | |
734 | ||
735 | #endif /* bn_mul.h */ |