Your message dated Fri, 24 Dec 2021 18:05:24 +0100
with message-id <[email protected]>
and subject line close: upstream bugs or too old
has caused the Debian Bug report #703790,
regarding segfaults when building curve25519-donna-c64.c on i386
to be marked as done.

This means that you claim that the problem has been dealt with.
If this is not the case it is now your responsibility to reopen the
Bug report if necessary, and/or fix the problem forthwith.

(NB: If you are a system administrator and have no idea what this
message is talking about, this may indicate a serious mail system
misconfiguration somewhere. Please contact [email protected]
immediately.)


-- 
703790: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=703790
Debian Bug Tracking System
Contact [email protected] with problems
--- Begin Message ---
Package: clang
Version: 1:3.0-6.1
Severity: normal

Trying to build curve25519-donna-c64.c[0] on i386 makes clang segfault.
It builds fine on amd64.

} weasel@dixie:~/tmp$ schroot -r -c $chroot -- clang -v -c 
curve25519-donna-c64.c
} Debian clang version 3.0-6.1 (tags/RELEASE_30/final) (based on LLVM 3.0)
} Target: i386-pc-linux-gnu
} Thread model: posix
}  "/usr/bin/clang" -cc1 -triple i386-pc-linux-gnu -emit-obj -mrelax-all 
-disable-free -disable-llvm-verifier -main-file-name curve25519-donna-c64.c 
-mrelocation-model static -mdisable-fp-elim -masm-verbose -mconstructor-aliases 
-target-cpu pentium4 -target-linker-version 2.22 -momit-leaf-frame-pointer -v 
-coverage-file curve25519-donna-c64.o -resource-dir /usr/bin/../lib/clang/3.0 
-fmodule-cache-path /var/tmp/clang-module-cache -internal-isystem 
/usr/local/include -internal-isystem /usr/bin/../lib/clang/3.0/include 
-internal-externc-isystem /usr/include/i486-linux-gnu -internal-externc-isystem 
/usr/include -ferror-limit 19 -fmessage-length 211 -fgnu-runtime 
-fobjc-runtime-has-arc -fobjc-runtime-has-weak -fobjc-fragile-abi 
-fdiagnostics-show-option -fcolor-diagnostics -o curve25519-donna-c64.o -x c 
curve25519-donna-c64.c
} clang -cc1 version 3.0 based upon llvm 3.0 hosted on i386-pc-linux-gnu
} ignoring nonexistent directory "/usr/bin/../lib/clang/3.0/include"
} ignoring nonexistent directory "/usr/include/i486-linux-gnu"
} ignoring nonexistent directory "/usr/include/i486-linux-gnu/"
} ignoring nonexistent directory "/usr/bin/../lib/clang/3.0/include"
} ignoring nonexistent directory "/usr/include/i486-linux-gnu"
} ignoring duplicate directory "/usr/local/include"
} ignoring duplicate directory "/usr/include"
} #include "..." search starts here:
} #include <...> search starts here:
}  /usr/local/include
}  /usr/include
}  /usr/include/i386-linux-gnu/
}  /usr/include/clang/3.0/include/
}  /usr/lib/gcc/i486-linux-gnu/4.6/include/
}  /usr/lib/gcc/i486-linux-gnu/4.6/include-fixed/
} End of search list.
} 0  libLLVM-3.0.so.1 0xf7078fc8
} 1  libLLVM-3.0.so.1 0xf70794e4
} 2                   0xf771e400 __kernel_sigreturn + 0
} 3  libLLVM-3.0.so.1 0xf6fa9451 llvm::SelectionDAG::getNode(unsigned int, 
llvm::DebugLoc, llvm::EVT, llvm::SDValue) + 81
} 4  libLLVM-3.0.so.1 0xf6fbd25d
} 5  libLLVM-3.0.so.1 0xf6fbc5ca
} 6  libLLVM-3.0.so.1 0xf6fc473a 
llvm::TargetLowering::LowerCallTo(llvm::SDValue, llvm::Type*, bool, bool, bool, 
bool, unsigned int, llvm::CallingConv::ID, bool, bool, llvm::SDValue, 
std::vector<llvm::TargetLowering::ArgListEntry, 
std::allocator<llvm::TargetLowering::ArgListEntry> >&, llvm::SelectionDAG&, 
llvm::DebugLoc) const + 4986
} 7  libLLVM-3.0.so.1 0xf6f559dc
} 8  libLLVM-3.0.so.1 0xf6f41f1d
} 9  libLLVM-3.0.so.1 0xf6f52092
} 10 libLLVM-3.0.so.1 0xf6f5a3b1
} 11 libLLVM-3.0.so.1 0xf6f5b51a llvm::SelectionDAG::LegalizeTypes() + 490
} 12 libLLVM-3.0.so.1 0xf6ffb783 llvm::SelectionDAGISel::CodeGenAndEmitDAG() + 
211
} 13 libLLVM-3.0.so.1 0xf6ffcca8 
llvm::SelectionDAGISel::SelectBasicBlock(llvm::ilist_iterator<llvm::Instruction 
const>, llvm::ilist_iterator<llvm::Instruction const>, bool&) + 168
} 14 libLLVM-3.0.so.1 0xf6ffd3f6 
llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) + 1846
} 15 libLLVM-3.0.so.1 0xf6ffeda9 
llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) + 409
} 16 libLLVM-3.0.so.1 0xf69d277e 
llvm::MachineFunctionPass::runOnFunction(llvm::Function&) + 126
} 17 libLLVM-3.0.so.1 0xf6b72dcc 
llvm::FPPassManager::runOnFunction(llvm::Function&) + 652
} 18 libLLVM-3.0.so.1 0xf6b72e2c 
llvm::FPPassManager::runOnModule(llvm::Module&) + 76
} 19 libLLVM-3.0.so.1 0xf6b729f4 
llvm::MPPassManager::runOnModule(llvm::Module&) + 500
} 20 libLLVM-3.0.so.1 0xf6b72ae0 llvm::PassManagerImpl::run(llvm::Module&) + 128
} 21 libLLVM-3.0.so.1 0xf6b72b36 llvm::PassManager::run(llvm::Module&) + 38
} 22 clang            0x0831e725 
clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::CodeGenOptions 
const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::Module*, 
clang::BackendAction, llvm::raw_ostream*) + 2133
} 23 clang            0x0831c8ab 
clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) + 267
} 24 clang            0x08443141 clang::ParseAST(clang::Sema&, bool) + 465
} 25 clang            0x08221c17 clang::ASTFrontendAction::ExecuteAction() + 103
} 26 clang            0x0831b772 clang::CodeGenAction::ExecuteAction() + 66
} 27 clang            0x08222200 clang::FrontendAction::Execute() + 240
} 28 clang            0x082094f7 
clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) + 327
} 29 clang            0x081f32b0 
clang::ExecuteCompilerInvocation(clang::CompilerInstance*) + 1072
} 30 clang            0x081ec5da cc1_main(char const**, char const**, char 
const*, void*) + 874
} 31 clang            0x081eb4d0 main + 6880
} 32 libc.so.6        0xf6079e16 __libc_start_main + 230
} 33 clang            0x081ec0c9
} Stack dump:
} 0.      Program arguments: /usr/bin/clang -cc1 -triple i386-pc-linux-gnu 
-emit-obj -mrelax-all -disable-free -disable-llvm-verifier -main-file-name 
curve25519-donna-c64.c -mrelocation-model static -mdisable-fp-elim 
-masm-verbose -mconstructor-aliases -target-cpu pentium4 -target-linker-version 
2.22 -momit-leaf-frame-pointer -v -coverage-file curve25519-donna-c64.o 
-resource-dir /usr/bin/../lib/clang/3.0 -fmodule-cache-path 
/var/tmp/clang-module-cache -internal-isystem /usr/local/include 
-internal-isystem /usr/bin/../lib/clang/3.0/include -internal-externc-isystem 
/usr/include/i486-linux-gnu -internal-externc-isystem /usr/include 
-ferror-limit 19 -fmessage-length 211 -fgnu-runtime -fobjc-runtime-has-arc 
-fobjc-runtime-has-weak -fobjc-fragile-abi -fdiagnostics-show-option 
-fcolor-diagnostics -o curve25519-donna-c64.o -x c curve25519-donna-c64.c 
} 1.      <eof> parser at end of file
} 2.      Code generation
} 3.      Running pass 'Function Pass Manager' on module 
'curve25519-donna-c64.c'.
} 4.      Running pass 'X86 DAG->DAG Instruction Selection' on function 
'@curve25519_donna'
} clang: error: unable to execute command: Segmentation fault
} clang: error: clang frontend command failed due to signal 2 (use -v to see 
invocation)
} clang: note: diagnostic msg: Please submit a bug report to 
http://llvm.org/bugs/ and include command line arguments and all diagnostic 
information.
} clang: note: diagnostic msg: Preprocessed source(s) are located at:
} clang: note: diagnostic msg: /tmp/curve25519-donna-c64-svRExf.i

It probably shouldn't segfault.

0. 
https://raw.github.com/agl/curve25519-donna/d05f13ac8758ce17fdf20fe9f3d842c91992152e/curve25519-donna-c64.c
   
https://github.com/agl/curve25519-donna/blob/d05f13ac8758ce17fdf20fe9f3d842c91992152e/curve25519-donna-c64.c
/* Copyright 2008, Google Inc.
 * All rights reserved.
 *
 * Code released into the public domain.
 *
 * curve25519-donna: Curve25519 elliptic curve, public key function
 *
 * http://code.google.com/p/curve25519-donna/
 *
 * Adam Langley <[email protected]>
 *
 * Derived from public domain C code by Daniel J. Bernstein <[email protected]>
 *
 * More information about curve25519 can be found here
 *   http://cr.yp.to/ecdh.html
 *
 * djb's sample implementation of curve25519 is written in a special assembly
 * language called qhasm and uses the floating point registers.
 *
 * This is, almost, a clean room reimplementation from the curve25519 paper. It
 * uses many of the tricks described therein. Only the crecip function is taken
 * from the sample implementation.
 */

#include <string.h>
#include <stdint.h>

typedef uint8_t u8;
typedef uint64_t limb;
typedef limb felem[5];
// This is a special gcc mode for 128-bit integers. It's implemented on 64-bit
// platforms only as far as I know.
typedef unsigned uint128_t __attribute__((mode(TI)));

#undef force_inline
#define force_inline __attribute__((always_inline))

/* Sum two numbers: output += in */
static inline void force_inline
fsum(limb *output, const limb *in) {
  output[0] += in[0];
  output[1] += in[1];
  output[2] += in[2];
  output[3] += in[3];
  output[4] += in[4];
}

/* Find the difference of two numbers: output = in - output
 * (note the order of the arguments!)
 *
 * Assumes that out[i] < 2**52
 * On return, out[i] < 2**55
 */
static inline void force_inline
fdifference_backwards(felem out, const felem in) {
  /* 152 is 19 << 3 */
  static const limb two54m152 = (((limb)1) << 54) - 152;
  static const limb two54m8 = (((limb)1) << 54) - 8;

  out[0] = in[0] + two54m152 - out[0];
  out[1] = in[1] + two54m8 - out[1];
  out[2] = in[2] + two54m8 - out[2];
  out[3] = in[3] + two54m8 - out[3];
  out[4] = in[4] + two54m8 - out[4];
}

/* Multiply a number by a scalar: output = in * scalar */
static inline void force_inline
fscalar_product(felem output, const felem in, const limb scalar) {
  uint128_t a;

  a = ((uint128_t) in[0]) * scalar;
  output[0] = ((limb)a) & 0x7ffffffffffff;

  a = ((uint128_t) in[1]) * scalar + ((limb) (a >> 51));
  output[1] = ((limb)a) & 0x7ffffffffffff;

  a = ((uint128_t) in[2]) * scalar + ((limb) (a >> 51));
  output[2] = ((limb)a) & 0x7ffffffffffff;

  a = ((uint128_t) in[3]) * scalar + ((limb) (a >> 51));
  output[3] = ((limb)a) & 0x7ffffffffffff;

  a = ((uint128_t) in[4]) * scalar + ((limb) (a >> 51));
  output[4] = ((limb)a) & 0x7ffffffffffff;

  output[0] += (a >> 51) * 19;
}

/* Multiply two numbers: output = in2 * in
 *
 * output must be distinct to both inputs. The inputs are reduced coefficient
 * form, the output is not.
 *
 * Assumes that in[i] < 2**55 and likewise for in2.
 * On return, output[i] < 2**52
 */
static inline void force_inline
fmul(felem output, const felem in2, const felem in) {
  uint128_t t[5];
  limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;

  r0 = in[0];
  r1 = in[1];
  r2 = in[2];
  r3 = in[3];
  r4 = in[4];

  s0 = in2[0];
  s1 = in2[1];
  s2 = in2[2];
  s3 = in2[3];
  s4 = in2[4];

  t[0]  =  ((uint128_t) r0) * s0;
  t[1]  =  ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0;
  t[2]  =  ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 + ((uint128_t) r1) * s1;
  t[3]  =  ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 + ((uint128_t) r2) * s1;
  t[4]  =  ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 + ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2;

  r4 *= 19;
  r1 *= 19;
  r2 *= 19;
  r3 *= 19;

  t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 + ((uint128_t) r2) * s3 + ((uint128_t) r3) * s2;
  t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 + ((uint128_t) r3) * s3;
  t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4;
  t[3] += ((uint128_t) r4) * s4;

                  r0 = (limb)t[0] & 0x7ffffffffffff; c = (limb)(t[0] >> 51);
  t[1] += c;      r1 = (limb)t[1] & 0x7ffffffffffff; c = (limb)(t[1] >> 51);
  t[2] += c;      r2 = (limb)t[2] & 0x7ffffffffffff; c = (limb)(t[2] >> 51);
  t[3] += c;      r3 = (limb)t[3] & 0x7ffffffffffff; c = (limb)(t[3] >> 51);
  t[4] += c;      r4 = (limb)t[4] & 0x7ffffffffffff; c = (limb)(t[4] >> 51);
  r0 +=   c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
  r1 +=   c;      c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
  r2 +=   c;

  output[0] = r0;
  output[1] = r1;
  output[2] = r2;
  output[3] = r3;
  output[4] = r4;
}

static inline void force_inline
fsquare_times(felem output, const felem in, limb count) {
  uint128_t t[5];
  limb r0,r1,r2,r3,r4,c;
  limb d0,d1,d2,d4,d419;

  r0 = in[0];
  r1 = in[1];
  r2 = in[2];
  r3 = in[3];
  r4 = in[4];

  do {
    d0 = r0 * 2;
    d1 = r1 * 2;
    d2 = r2 * 2 * 19;
    d419 = r4 * 19;
    d4 = d419 * 2;

    t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3     ));
    t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19));
    t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3     ));
    t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419   ));
    t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2     ));

                    r0 = (limb)t[0] & 0x7ffffffffffff; c = (limb)(t[0] >> 51);
    t[1] += c;      r1 = (limb)t[1] & 0x7ffffffffffff; c = (limb)(t[1] >> 51);
    t[2] += c;      r2 = (limb)t[2] & 0x7ffffffffffff; c = (limb)(t[2] >> 51);
    t[3] += c;      r3 = (limb)t[3] & 0x7ffffffffffff; c = (limb)(t[3] >> 51);
    t[4] += c;      r4 = (limb)t[4] & 0x7ffffffffffff; c = (limb)(t[4] >> 51);
    r0 +=   c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
    r1 +=   c;      c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
    r2 +=   c;
  } while(--count);

  output[0] = r0;
  output[1] = r1;
  output[2] = r2;
  output[3] = r3;
  output[4] = r4;
}

/* Load a little-endian 64-bit number  */
static limb
load_limb(const u8 *in) {
  return
    ((limb)in[0]) |
    (((limb)in[1]) << 8) |
    (((limb)in[2]) << 16) |
    (((limb)in[3]) << 24) |
    (((limb)in[4]) << 32) |
    (((limb)in[5]) << 40) |
    (((limb)in[6]) << 48) |
    (((limb)in[7]) << 56);
}

static void
store_limb(u8 *out, limb in) {
  out[0] = in & 0xff;
  out[1] = (in >> 8) & 0xff;
  out[2] = (in >> 16) & 0xff;
  out[3] = (in >> 24) & 0xff;
  out[4] = (in >> 32) & 0xff;
  out[5] = (in >> 40) & 0xff;
  out[6] = (in >> 48) & 0xff;
  out[7] = (in >> 56) & 0xff;
}

/* Take a little-endian, 32-byte number and expand it into polynomial form */
static void
fexpand(limb *output, const u8 *in) {
  output[0] = load_limb(in) & 0x7ffffffffffff;
  output[1] = (load_limb(in+6) >> 3) & 0x7ffffffffffff;
  output[2] = (load_limb(in+12) >> 6) & 0x7ffffffffffff;
  output[3] = (load_limb(in+19) >> 1) & 0x7ffffffffffff;
  output[4] = (load_limb(in+24) >> 12) & 0x7ffffffffffff;
}

/* Take a fully reduced polynomial form number and contract it into a
 * little-endian, 32-byte array
 */
static void
fcontract(u8 *output, const felem input) {
  uint128_t t[5];

  t[0] = input[0];
  t[1] = input[1];
  t[2] = input[2];
  t[3] = input[3];
  t[4] = input[4];

  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;

  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;

  /* now t is between 0 and 2^255-1, properly carried. */
  /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */

  t[0] += 19;

  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;

  /* now between 19 and 2^255-1 in both cases, and offset by 19. */

  t[0] += 0x8000000000000 - 19;
  t[1] += 0x8000000000000 - 1;
  t[2] += 0x8000000000000 - 1;
  t[3] += 0x8000000000000 - 1;
  t[4] += 0x8000000000000 - 1;

  /* now between 2^255 and 2^256-20, and offset by 2^255. */

  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
  t[4] &= 0x7ffffffffffff;

  store_limb(output,    t[0] | (t[1] << 51));
  store_limb(output+8,  (t[1] >> 13) | (t[2] << 38));
  store_limb(output+16, (t[2] >> 26) | (t[3] << 25));
  store_limb(output+24, (t[3] >> 39) | (t[4] << 12));
}

/* Input: Q, Q', Q-Q'
 * Output: 2Q, Q+Q'
 *
 *   x2 z3: long form
 *   x3 z3: long form
 *   x z: short form, destroyed
 *   xprime zprime: short form, destroyed
 *   qmqp: short form, preserved
 */
static void
fmonty(limb *x2, limb *z2, /* output 2Q */
       limb *x3, limb *z3, /* output Q + Q' */
       limb *x, limb *z,   /* input Q */
       limb *xprime, limb *zprime, /* input Q' */
       const limb *qmqp /* input Q - Q' */) {
  limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5],
        zzprime[5], zzzprime[5];

  memcpy(origx, x, 5 * sizeof(limb));
  fsum(x, z);
  fdifference_backwards(z, origx);  // does x - z

  memcpy(origxprime, xprime, sizeof(limb) * 5);
  fsum(xprime, zprime);
  fdifference_backwards(zprime, origxprime);
  fmul(xxprime, xprime, z);
  fmul(zzprime, x, zprime);
  memcpy(origxprime, xxprime, sizeof(limb) * 5);
  fsum(xxprime, zzprime);
  fdifference_backwards(zzprime, origxprime);
  fsquare_times(x3, xxprime, 1);
  fsquare_times(zzzprime, zzprime, 1);
  fmul(z3, zzzprime, qmqp);

  fsquare_times(xx, x, 1);
  fsquare_times(zz, z, 1);
  fmul(x2, xx, zz);
  fdifference_backwards(zz, xx);  // does zz = xx - zz
  fscalar_product(zzz, zz, 121665);
  fsum(zzz, xx);
  fmul(z2, zz, zzz);
}

// -----------------------------------------------------------------------------
// Maybe swap the contents of two limb arrays (@a and @b), each @len elements
// long. Perform the swap iff @swap is non-zero.
//
// This function performs the swap without leaking any side-channel
// information.
// -----------------------------------------------------------------------------
static void
swap_conditional(limb a[5], limb b[5], limb iswap) {
  unsigned i;
  const limb swap = -iswap;

  for (i = 0; i < 5; ++i) {
    const limb x = swap & (a[i] ^ b[i]);
    a[i] ^= x;
    b[i] ^= x;
  }
}

/* Calculates nQ where Q is the x-coordinate of a point on the curve
 *
 *   resultx/resultz: the x coordinate of the resulting curve point (short form)
 *   n: a little endian, 32-byte number
 *   q: a point of the curve (short form)
 */
static void
cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) {
  limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
  limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
  limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
  limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;

  unsigned i, j;

  memcpy(nqpqx, q, sizeof(limb) * 5);

  for (i = 0; i < 32; ++i) {
    u8 byte = n[31 - i];
    for (j = 0; j < 8; ++j) {
      const limb bit = byte >> 7;

      swap_conditional(nqx, nqpqx, bit);
      swap_conditional(nqz, nqpqz, bit);
      fmonty(nqx2, nqz2,
             nqpqx2, nqpqz2,
             nqx, nqz,
             nqpqx, nqpqz,
             q);
      swap_conditional(nqx2, nqpqx2, bit);
      swap_conditional(nqz2, nqpqz2, bit);

      t = nqx;
      nqx = nqx2;
      nqx2 = t;
      t = nqz;
      nqz = nqz2;
      nqz2 = t;
      t = nqpqx;
      nqpqx = nqpqx2;
      nqpqx2 = t;
      t = nqpqz;
      nqpqz = nqpqz2;
      nqpqz2 = t;

      byte <<= 1;
    }
  }

  memcpy(resultx, nqx, sizeof(limb) * 5);
  memcpy(resultz, nqz, sizeof(limb) * 5);
}


// -----------------------------------------------------------------------------
// Shamelessly copied from djb's code, tightened a little
// -----------------------------------------------------------------------------
static void
crecip(felem out, const felem z) {
  felem a,t0,b,c;

  /* 2 */ fsquare_times(a, z, 1); // a = 2
  /* 8 */ fsquare_times(t0, a, 2);
  /* 9 */ fmul(b, t0, z); // b = 9
  /* 11 */ fmul(a, b, a); // a = 11
  /* 22 */ fsquare_times(t0, a, 1);
  /* 2^5 - 2^0 = 31 */ fmul(b, t0, b);
  /* 2^10 - 2^5 */ fsquare_times(t0, b, 5);
  /* 2^10 - 2^0 */ fmul(b, t0, b);
  /* 2^20 - 2^10 */ fsquare_times(t0, b, 10);
  /* 2^20 - 2^0 */ fmul(c, t0, b);
  /* 2^40 - 2^20 */ fsquare_times(t0, c, 20);
  /* 2^40 - 2^0 */ fmul(t0, t0, c);
  /* 2^50 - 2^10 */ fsquare_times(t0, t0, 10);
  /* 2^50 - 2^0 */ fmul(b, t0, b);
  /* 2^100 - 2^50 */ fsquare_times(t0, b, 50);
  /* 2^100 - 2^0 */ fmul(c, t0, b);
  /* 2^200 - 2^100 */ fsquare_times(t0, c, 100);
  /* 2^200 - 2^0 */ fmul(t0, t0, c);
  /* 2^250 - 2^50 */ fsquare_times(t0, t0, 50);
  /* 2^250 - 2^0 */ fmul(t0, t0, b);
  /* 2^255 - 2^5 */ fsquare_times(t0, t0, 5);
  /* 2^255 - 21 */ fmul(out, t0, a);
}

int curve25519_donna(u8 *, const u8 *, const u8 *);

int
curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
  limb bp[5], x[5], z[5], zmone[5];
  uint8_t e[32];
  int i;

  for (i = 0;i < 32;++i) e[i] = secret[i];
  e[0] &= 248;
  e[31] &= 127;
  e[31] |= 64;

  fexpand(bp, basepoint);
  cmult(x, z, e, bp);
  crecip(zmone, z);
  fmul(z, x, zmone);
  fcontract(mypublic, z);
  return 0;
}
# 1 "curve25519-donna-c64.c"
# 1 "curve25519-donna-c64.c" 1
# 1 "<built-in>" 1
# 1 "<built-in>" 3
# 129 "<built-in>" 3
# 1 "<command line>" 1
# 1 "<built-in>" 2
# 1 "curve25519-donna-c64.c" 2
# 25 "curve25519-donna-c64.c"
# 1 "/usr/include/string.h" 1 3 4
# 27 "/usr/include/string.h" 3 4
# 1 "/usr/include/features.h" 1 3 4
# 323 "/usr/include/features.h" 3 4
# 1 "/usr/include/i386-linux-gnu/bits/predefs.h" 1 3 4
# 324 "/usr/include/features.h" 2 3 4
# 356 "/usr/include/features.h" 3 4
# 1 "/usr/include/i386-linux-gnu/sys/cdefs.h" 1 3 4
# 359 "/usr/include/i386-linux-gnu/sys/cdefs.h" 3 4
# 1 "/usr/include/i386-linux-gnu/bits/wordsize.h" 1 3 4
# 360 "/usr/include/i386-linux-gnu/sys/cdefs.h" 2 3 4
# 357 "/usr/include/features.h" 2 3 4
# 388 "/usr/include/features.h" 3 4
# 1 "/usr/include/i386-linux-gnu/gnu/stubs.h" 1 3 4



# 1 "/usr/include/i386-linux-gnu/bits/wordsize.h" 1 3 4
# 5 "/usr/include/i386-linux-gnu/gnu/stubs.h" 2 3 4


# 1 "/usr/include/i386-linux-gnu/gnu/stubs-32.h" 1 3 4
# 8 "/usr/include/i386-linux-gnu/gnu/stubs.h" 2 3 4
# 389 "/usr/include/features.h" 2 3 4
# 28 "/usr/include/string.h" 2 3 4






# 1 "/usr/include/clang/3.0/include/stddef.h" 1 3 4
# 31 "/usr/include/clang/3.0/include/stddef.h" 3 4
typedef __typeof__(((int*)0)-((int*)0)) ptrdiff_t;



typedef __typeof__(sizeof(int)) size_t;




typedef int wchar_t;
# 35 "/usr/include/string.h" 2 3 4
# 44 "/usr/include/string.h" 3 4
extern void *memcpy (void *__restrict __dest,
       __const void *__restrict __src, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));


extern void *memmove (void *__dest, __const void *__src, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));






extern void *memccpy (void *__restrict __dest, __const void *__restrict __src,
        int __c, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));





extern void *memset (void *__s, int __c, size_t __n) __attribute__ 
((__nothrow__)) __attribute__ ((__nonnull__ (1)));


extern int memcmp (__const void *__s1, __const void *__s2, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));
# 95 "/usr/include/string.h" 3 4
extern void *memchr (__const void *__s, int __c, size_t __n)
      __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1)));
# 128 "/usr/include/string.h" 3 4
extern char *strcpy (char *__restrict __dest, __const char *__restrict __src)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));

extern char *strncpy (char *__restrict __dest,
        __const char *__restrict __src, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));


extern char *strcat (char *__restrict __dest, __const char *__restrict __src)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));

extern char *strncat (char *__restrict __dest, __const char *__restrict __src,
        size_t __n) __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ 
(1, 2)));


extern int strcmp (__const char *__s1, __const char *__s2)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));

extern int strncmp (__const char *__s1, __const char *__s2, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));


extern int strcoll (__const char *__s1, __const char *__s2)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));

extern size_t strxfrm (char *__restrict __dest,
         __const char *__restrict __src, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (2)));







# 1 "/usr/include/xlocale.h" 1 3 4
# 28 "/usr/include/xlocale.h" 3 4
typedef struct __locale_struct
{

  struct __locale_data *__locales[13];


  const unsigned short int *__ctype_b;
  const int *__ctype_tolower;
  const int *__ctype_toupper;


  const char *__names[13];
} *__locale_t;


typedef __locale_t locale_t;
# 163 "/usr/include/string.h" 2 3 4


extern int strcoll_l (__const char *__s1, __const char *__s2, __locale_t __l)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2, 3)));

extern size_t strxfrm_l (char *__dest, __const char *__src, size_t __n,
    __locale_t __l) __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ 
(2, 4)));





extern char *strdup (__const char *__s)
     __attribute__ ((__nothrow__)) __attribute__ ((__malloc__)) __attribute__ 
((__nonnull__ (1)));






extern char *strndup (__const char *__string, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__malloc__)) __attribute__ 
((__nonnull__ (1)));
# 235 "/usr/include/string.h" 3 4
extern char *strchr (__const char *__s, int __c)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1)));
# 262 "/usr/include/string.h" 3 4
extern char *strrchr (__const char *__s, int __c)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1)));
# 284 "/usr/include/string.h" 3 4
extern size_t strcspn (__const char *__s, __const char *__reject)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));


extern size_t strspn (__const char *__s, __const char *__accept)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));
# 314 "/usr/include/string.h" 3 4
extern char *strpbrk (__const char *__s, __const char *__accept)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));
# 342 "/usr/include/string.h" 3 4
extern char *strstr (__const char *__haystack, __const char *__needle)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));




extern char *strtok (char *__restrict __s, __const char *__restrict __delim)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (2)));




extern char *__strtok_r (char *__restrict __s,
    __const char *__restrict __delim,
    char **__restrict __save_ptr)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (2, 3)));

extern char *strtok_r (char *__restrict __s, __const char *__restrict __delim,
         char **__restrict __save_ptr)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (2, 3)));
# 399 "/usr/include/string.h" 3 4
extern size_t strlen (__const char *__s)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1)));





extern size_t strnlen (__const char *__string, size_t __maxlen)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1)));





extern char *strerror (int __errnum) __attribute__ ((__nothrow__));
# 427 "/usr/include/string.h" 3 4
extern int strerror_r (int __errnum, char *__buf, size_t __buflen) __asm__ ("" 
"__xpg_strerror_r") __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ 
(2)));
# 445 "/usr/include/string.h" 3 4
extern char *strerror_l (int __errnum, __locale_t __l) __attribute__ 
((__nothrow__));





extern void __bzero (void *__s, size_t __n) __attribute__ ((__nothrow__)) 
__attribute__ ((__nonnull__ (1)));



extern void bcopy (__const void *__src, void *__dest, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));


extern void bzero (void *__s, size_t __n) __attribute__ ((__nothrow__)) 
__attribute__ ((__nonnull__ (1)));


extern int bcmp (__const void *__s1, __const void *__s2, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));
# 489 "/usr/include/string.h" 3 4
extern char *index (__const char *__s, int __c)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1)));
# 517 "/usr/include/string.h" 3 4
extern char *rindex (__const char *__s, int __c)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1)));




extern int ffs (int __i) __attribute__ ((__nothrow__)) __attribute__ 
((__const__));
# 536 "/usr/include/string.h" 3 4
extern int strcasecmp (__const char *__s1, __const char *__s2)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));


extern int strncasecmp (__const char *__s1, __const char *__s2, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__pure__)) __attribute__ 
((__nonnull__ (1, 2)));
# 559 "/usr/include/string.h" 3 4
extern char *strsep (char **__restrict __stringp,
       __const char *__restrict __delim)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));




extern char *strsignal (int __sig) __attribute__ ((__nothrow__));


extern char *__stpcpy (char *__restrict __dest, __const char *__restrict __src)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));
extern char *stpcpy (char *__restrict __dest, __const char *__restrict __src)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));



extern char *__stpncpy (char *__restrict __dest,
   __const char *__restrict __src, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));
extern char *stpncpy (char *__restrict __dest,
        __const char *__restrict __src, size_t __n)
     __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));
# 26 "curve25519-donna-c64.c" 2
# 1 "/usr/include/stdint.h" 1 3 4
# 27 "/usr/include/stdint.h" 3 4
# 1 "/usr/include/i386-linux-gnu/bits/wchar.h" 1 3 4
# 28 "/usr/include/stdint.h" 2 3 4
# 1 "/usr/include/i386-linux-gnu/bits/wordsize.h" 1 3 4
# 29 "/usr/include/stdint.h" 2 3 4








typedef signed char int8_t;
typedef short int int16_t;
typedef int int32_t;



__extension__
typedef long long int int64_t;




typedef unsigned char uint8_t;
typedef unsigned short int uint16_t;

typedef unsigned int uint32_t;





__extension__
typedef unsigned long long int uint64_t;






typedef signed char int_least8_t;
typedef short int int_least16_t;
typedef int int_least32_t;



__extension__
typedef long long int int_least64_t;



typedef unsigned char uint_least8_t;
typedef unsigned short int uint_least16_t;
typedef unsigned int uint_least32_t;



__extension__
typedef unsigned long long int uint_least64_t;






typedef signed char int_fast8_t;





typedef int int_fast16_t;
typedef int int_fast32_t;
__extension__
typedef long long int int_fast64_t;



typedef unsigned char uint_fast8_t;





typedef unsigned int uint_fast16_t;
typedef unsigned int uint_fast32_t;
__extension__
typedef unsigned long long int uint_fast64_t;
# 126 "/usr/include/stdint.h" 3 4
typedef int intptr_t;


typedef unsigned int uintptr_t;
# 138 "/usr/include/stdint.h" 3 4
__extension__
typedef long long int intmax_t;
__extension__
typedef unsigned long long int uintmax_t;
# 27 "curve25519-donna-c64.c" 2

typedef uint8_t u8;
typedef uint64_t limb;
typedef limb felem[5];


typedef unsigned uint128_t __attribute__((mode(TI)));





static inline void __attribute__((always_inline))
fsum(limb *output, const limb *in) {
  output[0] += in[0];
  output[1] += in[1];
  output[2] += in[2];
  output[3] += in[3];
  output[4] += in[4];
}







static inline void __attribute__((always_inline))
fdifference_backwards(felem out, const felem in) {

  static const limb two54m152 = (((limb)1) << 54) - 152;
  static const limb two54m8 = (((limb)1) << 54) - 8;

  out[0] = in[0] + two54m152 - out[0];
  out[1] = in[1] + two54m8 - out[1];
  out[2] = in[2] + two54m8 - out[2];
  out[3] = in[3] + two54m8 - out[3];
  out[4] = in[4] + two54m8 - out[4];
}


static inline void __attribute__((always_inline))
fscalar_product(felem output, const felem in, const limb scalar) {
  uint128_t a;

  a = ((uint128_t) in[0]) * scalar;
  output[0] = ((limb)a) & 0x7ffffffffffff;

  a = ((uint128_t) in[1]) * scalar + ((limb) (a >> 51));
  output[1] = ((limb)a) & 0x7ffffffffffff;

  a = ((uint128_t) in[2]) * scalar + ((limb) (a >> 51));
  output[2] = ((limb)a) & 0x7ffffffffffff;

  a = ((uint128_t) in[3]) * scalar + ((limb) (a >> 51));
  output[3] = ((limb)a) & 0x7ffffffffffff;

  a = ((uint128_t) in[4]) * scalar + ((limb) (a >> 51));
  output[4] = ((limb)a) & 0x7ffffffffffff;

  output[0] += (a >> 51) * 19;
}
# 98 "curve25519-donna-c64.c"
static inline void __attribute__((always_inline))
fmul(felem output, const felem in2, const felem in) {
  uint128_t t[5];
  limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;

  r0 = in[0];
  r1 = in[1];
  r2 = in[2];
  r3 = in[3];
  r4 = in[4];

  s0 = in2[0];
  s1 = in2[1];
  s2 = in2[2];
  s3 = in2[3];
  s4 = in2[4];

  t[0] = ((uint128_t) r0) * s0;
  t[1] = ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0;
  t[2] = ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 + ((uint128_t) r1) * s1;
  t[3] = ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 
+ ((uint128_t) r2) * s1;
  t[4] = ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 
+ ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2;

  r4 *= 19;
  r1 *= 19;
  r2 *= 19;
  r3 *= 19;

  t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 + ((uint128_t) r2) * s3 
+ ((uint128_t) r3) * s2;
  t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 + ((uint128_t) r3) * s3;
  t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4;
  t[3] += ((uint128_t) r4) * s4;

                  r0 = (limb)t[0] & 0x7ffffffffffff; c = (limb)(t[0] >> 51);
  t[1] += c; r1 = (limb)t[1] & 0x7ffffffffffff; c = (limb)(t[1] >> 51);
  t[2] += c; r2 = (limb)t[2] & 0x7ffffffffffff; c = (limb)(t[2] >> 51);
  t[3] += c; r3 = (limb)t[3] & 0x7ffffffffffff; c = (limb)(t[3] >> 51);
  t[4] += c; r4 = (limb)t[4] & 0x7ffffffffffff; c = (limb)(t[4] >> 51);
  r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
  r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
  r2 += c;

  output[0] = r0;
  output[1] = r1;
  output[2] = r2;
  output[3] = r3;
  output[4] = r4;
}

static inline void __attribute__((always_inline))
fsquare_times(felem output, const felem in, limb count) {
  uint128_t t[5];
  limb r0,r1,r2,r3,r4,c;
  limb d0,d1,d2,d4,d419;

  r0 = in[0];
  r1 = in[1];
  r2 = in[2];
  r3 = in[3];
  r4 = in[4];

  do {
    d0 = r0 * 2;
    d1 = r1 * 2;
    d2 = r2 * 2 * 19;
    d419 = r4 * 19;
    d4 = d419 * 2;

    t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * 
(r3 ));
    t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * 
(r3 * 19));
    t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * 
(r3 ));
    t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * 
(d419 ));
    t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * 
(r2 ));

                    r0 = (limb)t[0] & 0x7ffffffffffff; c = (limb)(t[0] >> 51);
    t[1] += c; r1 = (limb)t[1] & 0x7ffffffffffff; c = (limb)(t[1] >> 51);
    t[2] += c; r2 = (limb)t[2] & 0x7ffffffffffff; c = (limb)(t[2] >> 51);
    t[3] += c; r3 = (limb)t[3] & 0x7ffffffffffff; c = (limb)(t[3] >> 51);
    t[4] += c; r4 = (limb)t[4] & 0x7ffffffffffff; c = (limb)(t[4] >> 51);
    r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
    r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
    r2 += c;
  } while(--count);

  output[0] = r0;
  output[1] = r1;
  output[2] = r2;
  output[3] = r3;
  output[4] = r4;
}


static limb
load_limb(const u8 *in) {
  return
    ((limb)in[0]) |
    (((limb)in[1]) << 8) |
    (((limb)in[2]) << 16) |
    (((limb)in[3]) << 24) |
    (((limb)in[4]) << 32) |
    (((limb)in[5]) << 40) |
    (((limb)in[6]) << 48) |
    (((limb)in[7]) << 56);
}

static void
store_limb(u8 *out, limb in) {
  out[0] = in & 0xff;
  out[1] = (in >> 8) & 0xff;
  out[2] = (in >> 16) & 0xff;
  out[3] = (in >> 24) & 0xff;
  out[4] = (in >> 32) & 0xff;
  out[5] = (in >> 40) & 0xff;
  out[6] = (in >> 48) & 0xff;
  out[7] = (in >> 56) & 0xff;
}


static void
fexpand(limb *output, const u8 *in) {
  output[0] = load_limb(in) & 0x7ffffffffffff;
  output[1] = (load_limb(in+6) >> 3) & 0x7ffffffffffff;
  output[2] = (load_limb(in+12) >> 6) & 0x7ffffffffffff;
  output[3] = (load_limb(in+19) >> 1) & 0x7ffffffffffff;
  output[4] = (load_limb(in+24) >> 12) & 0x7ffffffffffff;
}




static void
fcontract(u8 *output, const felem input) {
  uint128_t t[5];

  t[0] = input[0];
  t[1] = input[1];
  t[2] = input[2];
  t[3] = input[3];
  t[4] = input[4];

  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;

  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;




  t[0] += 19;

  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;



  t[0] += 0x8000000000000 - 19;
  t[1] += 0x8000000000000 - 1;
  t[2] += 0x8000000000000 - 1;
  t[3] += 0x8000000000000 - 1;
  t[4] += 0x8000000000000 - 1;



  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
  t[4] &= 0x7ffffffffffff;

  store_limb(output, t[0] | (t[1] << 51));
  store_limb(output+8, (t[1] >> 13) | (t[2] << 38));
  store_limb(output+16, (t[2] >> 26) | (t[3] << 25));
  store_limb(output+24, (t[3] >> 39) | (t[4] << 12));
}
# 292 "curve25519-donna-c64.c"
static void
fmonty(limb *x2, limb *z2,
       limb *x3, limb *z3,
       limb *x, limb *z,
       limb *xprime, limb *zprime,
       const limb *qmqp ) {
  limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5],
        zzprime[5], zzzprime[5];

  memcpy(origx, x, 5 * sizeof(limb));
  fsum(x, z);
  fdifference_backwards(z, origx);

  memcpy(origxprime, xprime, sizeof(limb) * 5);
  fsum(xprime, zprime);
  fdifference_backwards(zprime, origxprime);
  fmul(xxprime, xprime, z);
  fmul(zzprime, x, zprime);
  memcpy(origxprime, xxprime, sizeof(limb) * 5);
  fsum(xxprime, zzprime);
  fdifference_backwards(zzprime, origxprime);
  fsquare_times(x3, xxprime, 1);
  fsquare_times(zzzprime, zzprime, 1);
  fmul(z3, zzzprime, qmqp);

  fsquare_times(xx, x, 1);
  fsquare_times(zz, z, 1);
  fmul(x2, xx, zz);
  fdifference_backwards(zz, xx);
  fscalar_product(zzz, zz, 121665);
  fsum(zzz, xx);
  fmul(z2, zz, zzz);
}
# 333 "curve25519-donna-c64.c"
static void
swap_conditional(limb a[5], limb b[5], limb iswap) {
  unsigned i;
  const limb swap = -iswap;

  for (i = 0; i < 5; ++i) {
    const limb x = swap & (a[i] ^ b[i]);
    a[i] ^= x;
    b[i] ^= x;
  }
}







static void
cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) {
  limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
  limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
  limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
  limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;

  unsigned i, j;

  memcpy(nqpqx, q, sizeof(limb) * 5);

  for (i = 0; i < 32; ++i) {
    u8 byte = n[31 - i];
    for (j = 0; j < 8; ++j) {
      const limb bit = byte >> 7;

      swap_conditional(nqx, nqpqx, bit);
      swap_conditional(nqz, nqpqz, bit);
      fmonty(nqx2, nqz2,
             nqpqx2, nqpqz2,
             nqx, nqz,
             nqpqx, nqpqz,
             q);
      swap_conditional(nqx2, nqpqx2, bit);
      swap_conditional(nqz2, nqpqz2, bit);

      t = nqx;
      nqx = nqx2;
      nqx2 = t;
      t = nqz;
      nqz = nqz2;
      nqz2 = t;
      t = nqpqx;
      nqpqx = nqpqx2;
      nqpqx2 = t;
      t = nqpqz;
      nqpqz = nqpqz2;
      nqpqz2 = t;

      byte <<= 1;
    }
  }

  memcpy(resultx, nqx, sizeof(limb) * 5);
  memcpy(resultz, nqz, sizeof(limb) * 5);
}





static void
crecip(felem out, const felem z) {
  felem a,t0,b,c;

          fsquare_times(a, z, 1);
          fsquare_times(t0, a, 2);
          fmul(b, t0, z);
           fmul(a, b, a);
           fsquare_times(t0, a, 1);
                       fmul(b, t0, b);
                   fsquare_times(t0, b, 5);
                   fmul(b, t0, b);
                    fsquare_times(t0, b, 10);
                   fmul(c, t0, b);
                    fsquare_times(t0, c, 20);
                   fmul(t0, t0, c);
                    fsquare_times(t0, t0, 10);
                   fmul(b, t0, b);
                     fsquare_times(t0, b, 50);
                    fmul(c, t0, b);
                      fsquare_times(t0, c, 100);
                    fmul(t0, t0, c);
                     fsquare_times(t0, t0, 50);
                    fmul(t0, t0, b);
                    fsquare_times(t0, t0, 5);
                   fmul(out, t0, a);
}

int curve25519_donna(u8 *, const u8 *, const u8 *);

int
curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
  limb bp[5], x[5], z[5], zmone[5];
  uint8_t e[32];
  int i;

  for (i = 0;i < 32;++i) e[i] = secret[i];
  e[0] &= 248;
  e[31] &= 127;
  e[31] |= 64;

  fexpand(bp, basepoint);
  cmult(x, z, e, bp);
  crecip(zmone, z);
  fmul(z, x, zmone);
  fcontract(mypublic, z);
  return 0;
}

--- End Message ---
--- Begin Message ---
X-CrossAssassin-Score: 31959

--- End Message ---

Reply via email to