#include <stdio.h>
#include <stdlib.h>
#include <setjmp.h>

#include <windows.h>

// undef to try with setjmp/longjmp
// good for sorting out if problem is caused by C++.
#define USE_THROW

#ifndef USE_THROW
jmp_buf jbuf;
#endif

struct _DISPATCHER_CONTEXT;

extern "C"
{

  /* The frame setup could be pretty much simplified.  But copying it
     from cegcc.dll verbatim is much easier.  (but looking at it again, I don't think
     it is 100% correct either.  */

// in assembly
extern void _call_raise_asm(int sig);

/* called with an exception stack, we must switch to the old stack... */
void __eh_continue(void * a);

void
call_raise_c (int sig)
{
  printf("throwing 1\n");
  fflush (stdout);
  throw 1;
}

long
_eh_handler (struct _EXCEPTION_RECORD *ExceptionRecord,
	     void *EstablisherFrame,
	     struct _CONTEXT *ContextRecord,
	     struct _DISPATCHER_CONTEXT *DispatcherContext)
{
  printf ("in handler\n");
  fflush (stdout);

#if 0
  /* Throwing here does seem to work, but I don't think it will be
     safe all the time.  WinCE should be expecting us to return.  It
     could store some global state recording that it is crawling for a
     handler, or something.  */
  throw 1;
#endif

#ifdef USE_THROW

  static int NestedException=0;
  if(NestedException)
  {
  	printf("nested exception\n");
  	goto Nest;
  }
  NestedException=1;

Nest:
  printf("Exception: Code:%x Flags:%x Addr:%x "
    "SP:%x LR:%x R0:%x R1:%x R2:%x R3:%x R4:%x R5:%x R12:%x FP:%x\n", 
    ExceptionRecord->ExceptionCode,
    ExceptionRecord->ExceptionFlags,
    ExceptionRecord->ExceptionAddress,
    ContextRecord->Sp,
    ContextRecord->Lr,
    ContextRecord->R0,
    ContextRecord->R1,
    ContextRecord->R2,
    ContextRecord->R3,
    ContextRecord->R4,
    ContextRecord->R5,
    ContextRecord->R12,
    EstablisherFrame
    );

  DWORD* sp = (DWORD*)ContextRecord->Sp;
  *--sp = ContextRecord->Pc;
  *--sp = ContextRecord->Lr;
  *--sp = ContextRecord->Sp;
  *--sp = ContextRecord->R12;
  *--sp = ContextRecord->R11;
  *--sp = ContextRecord->R10;
  *--sp = ContextRecord->R9;
  *--sp = ContextRecord->R8;
  *--sp = ContextRecord->R7;
  *--sp = ContextRecord->R6;
  *--sp = ContextRecord->R5;
  *--sp = ContextRecord->R4;
  *--sp = ContextRecord->R3;
  *--sp = ContextRecord->R2;
  *--sp = ContextRecord->R1;
  *--sp = ContextRecord->R0;
  *--sp = ContextRecord->Psr;

  ContextRecord->Sp = (DWORD) sp;
  ContextRecord->Pc = (DWORD) _call_raise_asm;

  NestedException = 0;

  ContextRecord->R0 = 1234;

  __eh_continue (ContextRecord);

  /* NOT REACHED */
  printf("handler returned!\n");
  exit(1);

#else
  longjmp (jbuf, 1);
#endif

  return EXCEPTION_CONTINUE_EXECUTION;
}

struct eh_data
{
  DWORD handler;
  DWORD data;
};

extern struct eh_data test_eh_data;

/*
  See _IMAGE_CE_RUNTIME_FUNCTION_ENTRY (for ARM):
  http://msdn2.microsoft.com/en-us/library/ms253987(VS.80).aspx
*/

__asm__(
"\t.section .pdata\n"
"pdata_start:\n"
"\t.word test_eh\n"
"\t.word "
"  1 << 31 " /* ExceptionFlag */
"| 1 << 30 " /* ThirtyTwoBit */
"| (((test_eh_end - test_eh) / 4) & 0x3ffffff) << 8" /* FuncLen */
"| (((test_eh_prolog_end - test_eh) / 4) & 0xff)\n" /* PrologLen */
);

extern IMAGE_CE_RUNTIME_FUNCTION_ENTRY pdata_start;

extern void test_eh_prolog_end ();
extern void test_eh_end ();

/* Should go directly before the function it represents.
   This probably breaks with -ffunction-sections.  */

__asm__ (
"\t.text\n"
"\t.global test_eh_data\n"
"test_eh_data:\n"
"\t.word _eh_handler\n"
"\t.word 0\n"
);

void test_eh (void)
{
  /* Needed above.  I'm not sure asm volatile is enough to force
     gcc to not spill prologue insns below it.  */

  __asm__ __volatile__ (
"test_eh_prolog_end:\n");

#ifndef USE_THROW
  if (setjmp (jbuf) == 1)
    {
      printf ("returned from handler\n");
      return;
    }
#endif

  printf ("test_eh = %lu\n", &test_eh);
  printf ("test_eh_prolog_end = %lu\n", &test_eh_prolog_end);
  printf ("test_eh_end = %lu\n", &test_eh_end);

  printf ("test_eh_data = %lu\n", &test_eh_data);
  printf ("handler = %lu\n", test_eh_data.handler);
  printf ("data = %lu\n", test_eh_data.data);

  IMAGE_CE_RUNTIME_FUNCTION_ENTRY entry = { 0 };
  entry.FuncStart = (DWORD)test_eh;
  entry.PrologLen = ((DWORD)test_eh_prolog_end - (DWORD)test_eh) / 4;
  entry.FuncLen = ((DWORD)test_eh_end - (DWORD)test_eh) / 4;
  entry.ThirtyTwoBit = 1;
  entry.ExceptionFlag = 1;

  struct eh_data* d = (struct eh_data*)&entry;
  printf ("entry = %08x\n", d->data);

  d = (struct eh_data*)&pdata_start;
  printf ("entry_g = %08x\n", d->data);

  printf ("p PrologLen = %lu\n", pdata_start.PrologLen);
  printf ("p FuncLen = %lu\n", pdata_start.FuncLen);

  printf ("e PrologLen = %lu\n", entry.PrologLen);
  printf ("e FuncLen = %lu\n", entry.FuncLen);

  /* here we go ... */

  int *zero;
  int result;

  zero = 0;
  result = *zero + *zero;

  /* Needed above.  See remark on test_eh_prologue_end.  */

  __asm__ __volatile__ (
"test_eh_end:\n");
}

int
main (int argc, char** argv)
{
  /* Doing the try/catch wrapping directly on test_eh doesn't work.
     Not sure why yet, but could be that the prologue/function end calcs
     get mixed up on that case.  Or, it could be the catch is optimized out
     in that case.  */

  /* I've also tried doing the access violation here in main, but for
     some reason the handler doesn't get recorded correctly.  Again,
     probably because of the hidden call to __gccmain and the
     prologue/function end calcs.  */

  try
    {
      test_eh ();
    }
  catch (...)
    {
      printf ("in catch\n");
    }

  return 0;
}

}
