Package: gcc-3.3 Version: 1:3.3.1-0rc2 Priority: serious Compiling fftw3 3.0.1 on powerpc fails since gcc updated to version -0rc2 (essentially the same package of fftw3 compiled using -0rc1).
To make the bug easier to reproduce, I created a .i file of the offending file. Compiling with optimization (the file compiles when optimization is turned off) results in: [EMAIL PROTECTED]:~/fftw3-3.0.1/dft/simd/codelets$ gcc -O -maltivec -mabi=altivec -c n1fv_9.i n1fv_9.c: In function `n1fv_9': n1fv_9.c:128: error: unrecognizable insn: (insn 2983 1654 1655 2 (nil) (set (reg:V4SF 77 v0) (const_vector:V4SF [ (const_double:SF 9.3969261646270751953125e-1 [0x0.f08fb2p+0]) (const_double:SF 9.3969261646270751953125e-1 [0x0.f08fb2p+0]) (const_double:SF 9.3969261646270751953125e-1 [0x0.f08fb2p+0]) (const_double:SF 9.3969261646270751953125e-1 [0x0.f08fb2p+0]) ])) -1 (nil) (nil)) n1fv_9.c:128: internal compiler error: in extract_insn, at recog.c:2175 The file n1fv_9.i is attached. -- James (Jay) Treacy [EMAIL PROTECTED]
# 1 "n1fv_9.c" # 1 "<built-in>" # 1 "<command line>" # 1 "n1fv_9.c" # 24 "n1fv_9.c" # 1 "../../../dft/codelet-dft.h" 1 # 31 "../../../dft/codelet-dft.h" # 1 "../../../kernel/ifftw.h" 1 # 27 "../../../kernel/ifftw.h" # 1 "../../../config.h" 1 # 28 "../../../kernel/ifftw.h" 2 # 1 "/usr/include/stdlib.h" 1 3 4 # 25 "/usr/include/stdlib.h" 3 4 # 1 "/usr/include/features.h" 1 3 4 # 291 "/usr/include/features.h" 3 4 # 1 "/usr/include/sys/cdefs.h" 1 3 4 # 292 "/usr/include/features.h" 2 3 4 # 314 "/usr/include/features.h" 3 4 # 1 "/usr/include/gnu/stubs.h" 1 3 4 # 315 "/usr/include/features.h" 2 3 4 # 26 "/usr/include/stdlib.h" 2 3 4 # 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 1 3 4 # 213 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 3 4 typedef unsigned int size_t; # 325 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 3 4 typedef long int wchar_t; # 34 "/usr/include/stdlib.h" 2 3 4 # 93 "/usr/include/stdlib.h" 3 4 typedef struct { int quot; int rem; } div_t; typedef struct { long int quot; long int rem; } ldiv_t; # 137 "/usr/include/stdlib.h" 3 4 extern size_t __ctype_get_mb_cur_max (void) ; extern double atof (__const char *__nptr) __attribute__ ((__pure__)); extern int atoi (__const char *__nptr) __attribute__ ((__pure__)); extern long int atol (__const char *__nptr) __attribute__ ((__pure__)); __extension__ extern long long int atoll (__const char *__nptr) __attribute__ ((__pure__)); extern double strtod (__const char *__restrict __nptr, char **__restrict __endptr) ; # 174 "/usr/include/stdlib.h" 3 4 extern long int strtol (__const char *__restrict __nptr, char **__restrict __endptr, int __base) ; extern unsigned long int strtoul (__const char *__restrict __nptr, char **__restrict __endptr, int __base) ; __extension__ extern long long int strtoq (__const char *__restrict __nptr, char **__restrict __endptr, int __base) ; __extension__ extern unsigned long long int strtouq (__const char *__restrict __nptr, char **__restrict __endptr, int __base) ; __extension__ extern long long int strtoll (__const char *__restrict __nptr, char **__restrict __endptr, int __base) ; __extension__ extern unsigned long long int strtoull (__const char *__restrict __nptr, char **__restrict __endptr, int __base) ; # 264 "/usr/include/stdlib.h" 3 4 extern double __strtod_internal (__const char *__restrict __nptr, char **__restrict __endptr, int __group) ; extern float __strtof_internal (__const char *__restrict __nptr, char **__restrict __endptr, int __group) ; extern long double __strtold_internal (__const char *__restrict __nptr, char **__restrict __endptr, int __group) ; extern long int __strtol_internal (__const char *__restrict __nptr, char **__restrict __endptr, int __base, int __group) ; extern unsigned long int __strtoul_internal (__const char *__restrict __nptr, char **__restrict __endptr, int __base, int __group) ; __extension__ extern long long int __strtoll_internal (__const char *__restrict __nptr, char **__restrict __endptr, int __base, int __group) ; __extension__ extern unsigned long long int __strtoull_internal (__const char * __restrict __nptr, char **__restrict __endptr, int __base, int __group) ; extern __inline double strtod (__const char *__restrict __nptr, char **__restrict __endptr) { return __strtod_internal (__nptr, __endptr, 0); } extern __inline long int strtol (__const char *__restrict __nptr, char **__restrict __endptr, int __base) { return __strtol_internal (__nptr, __endptr, __base, 0); } extern __inline unsigned long int strtoul (__const char *__restrict __nptr, char **__restrict __endptr, int __base) { return __strtoul_internal (__nptr, __endptr, __base, 0); } # 343 "/usr/include/stdlib.h" 3 4 __extension__ extern __inline long long int strtoq (__const char *__restrict __nptr, char **__restrict __endptr, int __base) { return __strtoll_internal (__nptr, __endptr, __base, 0); } __extension__ extern __inline unsigned long long int strtouq (__const char *__restrict __nptr, char **__restrict __endptr, int __base) { return __strtoull_internal (__nptr, __endptr, __base, 0); } __extension__ extern __inline long long int strtoll (__const char *__restrict __nptr, char **__restrict __endptr, int __base) { return __strtoll_internal (__nptr, __endptr, __base, 0); } __extension__ extern __inline unsigned long long int strtoull (__const char * __restrict __nptr, char **__restrict __endptr, int __base) { return __strtoull_internal (__nptr, __endptr, __base, 0); } extern __inline double atof (__const char *__nptr) { return strtod (__nptr, (char **) ((void *)0)); } extern __inline int atoi (__const char *__nptr) { return (int) strtol (__nptr, (char **) ((void *)0), 10); } extern __inline long int atol (__const char *__nptr) { return strtol (__nptr, (char **) ((void *)0), 10); } __extension__ extern __inline long long int atoll (__const char *__nptr) { return strtoll (__nptr, (char **) ((void *)0), 10); } # 408 "/usr/include/stdlib.h" 3 4 extern char *l64a (long int __n) ; extern long int a64l (__const char *__s) __attribute__ ((__pure__)); # 1 "/usr/include/sys/types.h" 1 3 4 # 29 "/usr/include/sys/types.h" 3 4 # 1 "/usr/include/bits/types.h" 1 3 4 # 28 "/usr/include/bits/types.h" 3 4 # 1 "/usr/include/bits/wordsize.h" 1 3 4 # 29 "/usr/include/bits/types.h" 2 3 4 # 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 1 3 4 # 32 "/usr/include/bits/types.h" 2 3 4 typedef unsigned char __u_char; typedef unsigned short int __u_short; typedef unsigned int __u_int; typedef unsigned long int __u_long; typedef signed char __int8_t; typedef unsigned char __uint8_t; typedef signed short int __int16_t; typedef unsigned short int __uint16_t; typedef signed int __int32_t; typedef unsigned int __uint32_t; __extension__ typedef signed long long int __int64_t; __extension__ typedef unsigned long long int __uint64_t; __extension__ typedef long long int __quad_t; __extension__ typedef unsigned long long int __u_quad_t; # 128 "/usr/include/bits/types.h" 3 4 # 1 "/usr/include/bits/typesizes.h" 1 3 4 # 129 "/usr/include/bits/types.h" 2 3 4 typedef unsigned long long int __dev_t; typedef unsigned int __uid_t; typedef unsigned int __gid_t; typedef unsigned long int __ino_t; typedef unsigned long long int __ino64_t; typedef unsigned int __mode_t; typedef unsigned int __nlink_t; typedef long int __off_t; typedef long long int __off64_t; typedef int __pid_t; typedef struct { int __val[2]; } __fsid_t; typedef long int __clock_t; typedef unsigned long int __rlim_t; typedef unsigned long long int __rlim64_t; typedef unsigned int __id_t; typedef long int __time_t; typedef unsigned int __useconds_t; typedef long int __suseconds_t; typedef int __daddr_t; typedef long int __swblk_t; typedef int __key_t; typedef int __clockid_t; typedef int __timer_t; typedef long int __blksize_t; typedef long int __blkcnt_t; typedef long long int __blkcnt64_t; typedef unsigned long int __fsblkcnt_t; typedef unsigned long long int __fsblkcnt64_t; typedef unsigned long int __fsfilcnt_t; typedef unsigned long long int __fsfilcnt64_t; typedef int __ssize_t; typedef __off64_t __loff_t; typedef __quad_t *__qaddr_t; typedef char *__caddr_t; typedef int __intptr_t; typedef unsigned int __socklen_t; # 32 "/usr/include/sys/types.h" 2 3 4 typedef __u_char u_char; typedef __u_short u_short; typedef __u_int u_int; typedef __u_long u_long; typedef __quad_t quad_t; typedef __u_quad_t u_quad_t; typedef __fsid_t fsid_t; typedef __loff_t loff_t; typedef __ino_t ino_t; # 62 "/usr/include/sys/types.h" 3 4 typedef __dev_t dev_t; typedef __gid_t gid_t; typedef __mode_t mode_t; typedef __nlink_t nlink_t; typedef __uid_t uid_t; typedef __off_t off_t; # 100 "/usr/include/sys/types.h" 3 4 typedef __pid_t pid_t; typedef __id_t id_t; typedef __ssize_t ssize_t; typedef __daddr_t daddr_t; typedef __caddr_t caddr_t; typedef __key_t key_t; # 133 "/usr/include/sys/types.h" 3 4 # 1 "/usr/include/time.h" 1 3 4 # 74 "/usr/include/time.h" 3 4 typedef __time_t time_t; # 92 "/usr/include/time.h" 3 4 typedef __clockid_t clockid_t; # 104 "/usr/include/time.h" 3 4 typedef __timer_t timer_t; # 134 "/usr/include/sys/types.h" 2 3 4 # 147 "/usr/include/sys/types.h" 3 4 # 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 1 3 4 # 148 "/usr/include/sys/types.h" 2 3 4 typedef unsigned long int ulong; typedef unsigned short int ushort; typedef unsigned int uint; # 191 "/usr/include/sys/types.h" 3 4 typedef int int8_t __attribute__ ((__mode__ (__QI__))); typedef int int16_t __attribute__ ((__mode__ (__HI__))); typedef int int32_t __attribute__ ((__mode__ (__SI__))); typedef int int64_t __attribute__ ((__mode__ (__DI__))); typedef unsigned int u_int8_t __attribute__ ((__mode__ (__QI__))); typedef unsigned int u_int16_t __attribute__ ((__mode__ (__HI__))); typedef unsigned int u_int32_t __attribute__ ((__mode__ (__SI__))); typedef unsigned int u_int64_t __attribute__ ((__mode__ (__DI__))); typedef int register_t __attribute__ ((__mode__ (__word__))); # 213 "/usr/include/sys/types.h" 3 4 # 1 "/usr/include/endian.h" 1 3 4 # 37 "/usr/include/endian.h" 3 4 # 1 "/usr/include/bits/endian.h" 1 3 4 # 38 "/usr/include/endian.h" 2 3 4 # 214 "/usr/include/sys/types.h" 2 3 4 # 1 "/usr/include/sys/select.h" 1 3 4 # 31 "/usr/include/sys/select.h" 3 4 # 1 "/usr/include/bits/select.h" 1 3 4 # 32 "/usr/include/sys/select.h" 2 3 4 # 1 "/usr/include/bits/sigset.h" 1 3 4 # 23 "/usr/include/bits/sigset.h" 3 4 typedef int __sig_atomic_t; typedef struct { unsigned long int __val[(1024 / (8 * sizeof (unsigned long int)))]; } __sigset_t; # 35 "/usr/include/sys/select.h" 2 3 4 typedef __sigset_t sigset_t; # 1 "/usr/include/time.h" 1 3 4 # 116 "/usr/include/time.h" 3 4 struct timespec { __time_t tv_sec; long int tv_nsec; }; # 45 "/usr/include/sys/select.h" 2 3 4 # 1 "/usr/include/bits/time.h" 1 3 4 # 67 "/usr/include/bits/time.h" 3 4 struct timeval { __time_t tv_sec; __suseconds_t tv_usec; }; # 47 "/usr/include/sys/select.h" 2 3 4 typedef __suseconds_t suseconds_t; typedef long int __fd_mask; # 67 "/usr/include/sys/select.h" 3 4 typedef struct { __fd_mask __fds_bits[1024 / (8 * sizeof (__fd_mask))]; } fd_set; typedef __fd_mask fd_mask; # 99 "/usr/include/sys/select.h" 3 4 extern int select (int __nfds, fd_set *__restrict __readfds, fd_set *__restrict __writefds, fd_set *__restrict __exceptfds, struct timeval *__restrict __timeout) ; # 122 "/usr/include/sys/select.h" 3 4 # 217 "/usr/include/sys/types.h" 2 3 4 # 1 "/usr/include/sys/sysmacros.h" 1 3 4 # 220 "/usr/include/sys/types.h" 2 3 4 # 231 "/usr/include/sys/types.h" 3 4 typedef __blkcnt_t blkcnt_t; typedef __fsblkcnt_t fsblkcnt_t; typedef __fsfilcnt_t fsfilcnt_t; # 266 "/usr/include/sys/types.h" 3 4 # 1 "/usr/include/bits/pthreadtypes.h" 1 3 4 # 23 "/usr/include/bits/pthreadtypes.h" 3 4 # 1 "/usr/include/bits/sched.h" 1 3 4 # 83 "/usr/include/bits/sched.h" 3 4 struct __sched_param { int __sched_priority; }; # 24 "/usr/include/bits/pthreadtypes.h" 2 3 4 typedef int __atomic_lock_t; struct _pthread_fastlock { long int __status; __atomic_lock_t __spinlock; }; typedef struct _pthread_descr_struct *_pthread_descr; typedef struct __pthread_attr_s { int __detachstate; int __schedpolicy; struct __sched_param __schedparam; int __inheritsched; int __scope; size_t __guardsize; int __stackaddr_set; void *__stackaddr; size_t __stacksize; } pthread_attr_t; typedef struct { struct _pthread_fastlock __c_lock; _pthread_descr __c_waiting; } pthread_cond_t; typedef struct { int __dummy; } pthread_condattr_t; typedef unsigned int pthread_key_t; typedef struct { int __m_reserved; int __m_count; _pthread_descr __m_owner; int __m_kind; struct _pthread_fastlock __m_lock; } pthread_mutex_t; typedef struct { int __mutexkind; } pthread_mutexattr_t; typedef int pthread_once_t; # 142 "/usr/include/bits/pthreadtypes.h" 3 4 typedef unsigned long int pthread_t; # 267 "/usr/include/sys/types.h" 2 3 4 # 415 "/usr/include/stdlib.h" 2 3 4 extern long int random (void) ; extern void srandom (unsigned int __seed) ; extern char *initstate (unsigned int __seed, char *__statebuf, size_t __statelen) ; extern char *setstate (char *__statebuf) ; struct random_data { int32_t *fptr; int32_t *rptr; int32_t *state; int rand_type; int rand_deg; int rand_sep; int32_t *end_ptr; }; extern int random_r (struct random_data *__restrict __buf, int32_t *__restrict __result) ; extern int srandom_r (unsigned int __seed, struct random_data *__buf) ; extern int initstate_r (unsigned int __seed, char *__restrict __statebuf, size_t __statelen, struct random_data *__restrict __buf) ; extern int setstate_r (char *__restrict __statebuf, struct random_data *__restrict __buf) ; extern int rand (void) ; extern void srand (unsigned int __seed) ; extern int rand_r (unsigned int *__seed) ; extern double drand48 (void) ; extern double erand48 (unsigned short int __xsubi[3]) ; extern long int lrand48 (void) ; extern long int nrand48 (unsigned short int __xsubi[3]) ; extern long int mrand48 (void) ; extern long int jrand48 (unsigned short int __xsubi[3]) ; extern void srand48 (long int __seedval) ; extern unsigned short int *seed48 (unsigned short int __seed16v[3]) ; extern void lcong48 (unsigned short int __param[7]) ; struct drand48_data { unsigned short int __x[3]; unsigned short int __old_x[3]; unsigned short int __c; unsigned short int __init; unsigned long long int __a; }; extern int drand48_r (struct drand48_data *__restrict __buffer, double *__restrict __result) ; extern int erand48_r (unsigned short int __xsubi[3], struct drand48_data *__restrict __buffer, double *__restrict __result) ; extern int lrand48_r (struct drand48_data *__restrict __buffer, long int *__restrict __result) ; extern int nrand48_r (unsigned short int __xsubi[3], struct drand48_data *__restrict __buffer, long int *__restrict __result) ; extern int mrand48_r (struct drand48_data *__restrict __buffer, long int *__restrict __result) ; extern int jrand48_r (unsigned short int __xsubi[3], struct drand48_data *__restrict __buffer, long int *__restrict __result) ; extern int srand48_r (long int __seedval, struct drand48_data *__buffer) ; extern int seed48_r (unsigned short int __seed16v[3], struct drand48_data *__buffer) ; extern int lcong48_r (unsigned short int __param[7], struct drand48_data *__buffer) ; extern void *malloc (size_t __size) __attribute__ ((__malloc__)); extern void *calloc (size_t __nmemb, size_t __size) __attribute__ ((__malloc__)); extern void *realloc (void *__ptr, size_t __size) __attribute__ ((__malloc__)); extern void free (void *__ptr) ; extern void cfree (void *__ptr) ; # 1 "/usr/include/alloca.h" 1 3 4 # 25 "/usr/include/alloca.h" 3 4 # 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 1 3 4 # 26 "/usr/include/alloca.h" 2 3 4 extern void *alloca (size_t __size) ; # 577 "/usr/include/stdlib.h" 2 3 4 extern void *valloc (size_t __size) __attribute__ ((__malloc__)); # 590 "/usr/include/stdlib.h" 3 4 extern void abort (void) __attribute__ ((__noreturn__)); extern int atexit (void (*__func) (void)) ; extern int on_exit (void (*__func) (int __status, void *__arg), void *__arg) ; extern void exit (int __status) __attribute__ ((__noreturn__)); # 622 "/usr/include/stdlib.h" 3 4 extern char *getenv (__const char *__name) ; extern char *__secure_getenv (__const char *__name) ; extern int putenv (char *__string) ; extern int setenv (__const char *__name, __const char *__value, int __replace) ; extern int unsetenv (__const char *__name) ; extern int clearenv (void) ; # 661 "/usr/include/stdlib.h" 3 4 extern char *mktemp (char *__template) ; extern int mkstemp (char *__template) ; # 688 "/usr/include/stdlib.h" 3 4 extern char *mkdtemp (char *__template) ; extern int system (__const char *__command) ; # 712 "/usr/include/stdlib.h" 3 4 extern char *realpath (__const char *__restrict __name, char *__restrict __resolved) ; typedef int (*__compar_fn_t) (__const void *, __const void *); extern void *bsearch (__const void *__key, __const void *__base, size_t __nmemb, size_t __size, __compar_fn_t __compar); extern void qsort (void *__base, size_t __nmemb, size_t __size, __compar_fn_t __compar); extern int abs (int __x) __attribute__ ((__const__)); extern long int labs (long int __x) __attribute__ ((__const__)); extern div_t div (int __numer, int __denom) __attribute__ ((__const__)); extern ldiv_t ldiv (long int __numer, long int __denom) __attribute__ ((__const__)); # 776 "/usr/include/stdlib.h" 3 4 extern char *ecvt (double __value, int __ndigit, int *__restrict __decpt, int *__restrict __sign) ; extern char *fcvt (double __value, int __ndigit, int *__restrict __decpt, int *__restrict __sign) ; extern char *gcvt (double __value, int __ndigit, char *__buf) ; extern char *qecvt (long double __value, int __ndigit, int *__restrict __decpt, int *__restrict __sign) ; extern char *qfcvt (long double __value, int __ndigit, int *__restrict __decpt, int *__restrict __sign) ; extern char *qgcvt (long double __value, int __ndigit, char *__buf) ; extern int ecvt_r (double __value, int __ndigit, int *__restrict __decpt, int *__restrict __sign, char *__restrict __buf, size_t __len) ; extern int fcvt_r (double __value, int __ndigit, int *__restrict __decpt, int *__restrict __sign, char *__restrict __buf, size_t __len) ; extern int qecvt_r (long double __value, int __ndigit, int *__restrict __decpt, int *__restrict __sign, char *__restrict __buf, size_t __len) ; extern int qfcvt_r (long double __value, int __ndigit, int *__restrict __decpt, int *__restrict __sign, char *__restrict __buf, size_t __len) ; extern int mblen (__const char *__s, size_t __n) ; extern int mbtowc (wchar_t *__restrict __pwc, __const char *__restrict __s, size_t __n) ; extern int wctomb (char *__s, wchar_t __wchar) ; extern size_t mbstowcs (wchar_t *__restrict __pwcs, __const char *__restrict __s, size_t __n) ; extern size_t wcstombs (char *__restrict __s, __const wchar_t *__restrict __pwcs, size_t __n) ; extern int rpmatch (__const char *__response) ; # 908 "/usr/include/stdlib.h" 3 4 extern int getloadavg (double __loadavg[], int __nelem) ; # 30 "../../../kernel/ifftw.h" 2 # 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stdarg.h" 1 3 4 # 43 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stdarg.h" 3 4 typedef __builtin_va_list __gnuc_va_list; # 105 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stdarg.h" 3 4 typedef __gnuc_va_list va_list; # 31 "../../../kernel/ifftw.h" 2 # 1 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 1 3 4 # 151 "/usr/lib/gcc-lib/powerpc-linux/3.3.1/include/stddef.h" 3 4 typedef int ptrdiff_t; # 32 "../../../kernel/ifftw.h" 2 # 1 "/usr/include/stdint.h" 1 3 4 # 27 "/usr/include/stdint.h" 3 4 # 1 "/usr/include/bits/wchar.h" 1 3 4 # 28 "/usr/include/stdint.h" 2 3 4 # 1 "/usr/include/bits/wordsize.h" 1 3 4 # 29 "/usr/include/stdint.h" 2 3 4 # 49 "/usr/include/stdint.h" 3 4 typedef unsigned char uint8_t; typedef unsigned short int uint16_t; typedef unsigned int uint32_t; __extension__ typedef unsigned long long int uint64_t; typedef signed char int_least8_t; typedef short int int_least16_t; typedef int int_least32_t; __extension__ typedef long long int int_least64_t; typedef unsigned char uint_least8_t; typedef unsigned short int uint_least16_t; typedef unsigned int uint_least32_t; __extension__ typedef unsigned long long int uint_least64_t; typedef signed char int_fast8_t; typedef int int_fast16_t; typedef int int_fast32_t; __extension__ typedef long long int int_fast64_t; typedef unsigned char uint_fast8_t; typedef unsigned int uint_fast16_t; typedef unsigned int uint_fast32_t; __extension__ typedef unsigned long long int uint_fast64_t; # 126 "/usr/include/stdint.h" 3 4 typedef int intptr_t; typedef unsigned int uintptr_t; # 138 "/usr/include/stdint.h" 3 4 __extension__ typedef long long int intmax_t; __extension__ typedef unsigned long long int uintmax_t; # 39 "../../../kernel/ifftw.h" 2 # 1 "/usr/include/inttypes.h" 1 3 4 # 35 "/usr/include/inttypes.h" 3 4 typedef long int __gwchar_t; # 274 "/usr/include/inttypes.h" 3 4 # 288 "/usr/include/inttypes.h" 3 4 typedef struct { long long int quot; long long int rem; } imaxdiv_t; extern intmax_t imaxabs (intmax_t __n) __attribute__ ((__const__)); extern imaxdiv_t imaxdiv (intmax_t __numer, intmax_t __denom) __attribute__ ((__const__)); extern intmax_t strtoimax (__const char *__restrict __nptr, char **__restrict __endptr, int __base) ; extern uintmax_t strtoumax (__const char *__restrict __nptr, char ** __restrict __endptr, int __base) ; extern intmax_t wcstoimax (__const __gwchar_t *__restrict __nptr, __gwchar_t **__restrict __endptr, int __base) ; extern uintmax_t wcstoumax (__const __gwchar_t *__restrict __nptr, __gwchar_t ** __restrict __endptr, int __base) ; # 396 "/usr/include/inttypes.h" 3 4 extern __inline intmax_t strtoimax (__const char *__restrict nptr, char **__restrict endptr, int base) { return __strtoll_internal (nptr, endptr, base, 0); } # 414 "/usr/include/inttypes.h" 3 4 extern __inline uintmax_t strtoumax (__const char *__restrict nptr, char **__restrict endptr, int base) { return __strtoull_internal (nptr, endptr, base, 0); } __extension__ extern long long int __wcstoll_internal (__const __gwchar_t * __restrict __nptr, __gwchar_t **__restrict __endptr, int __base, int __group) ; extern __inline intmax_t wcstoimax (__const __gwchar_t *__restrict nptr, __gwchar_t **__restrict endptr, int base) { return __wcstoll_internal (nptr, endptr, base, 0); } __extension__ extern unsigned long long int __wcstoull_internal (__const __gwchar_t * __restrict __nptr, __gwchar_t ** __restrict __endptr, int __base, int __group) ; extern __inline uintmax_t wcstoumax (__const __gwchar_t *__restrict nptr, __gwchar_t **__restrict endptr, int base) { return __wcstoull_internal (nptr, endptr, base, 0); } # 43 "../../../kernel/ifftw.h" 2 typedef float R; # 80 "../../../kernel/ifftw.h" typedef struct problem_s problem; typedef struct plan_s plan; typedef struct solver_s solver; typedef struct planner_s planner; typedef struct printer_s printer; typedef struct scanner_s scanner; # 157 "../../../kernel/ifftw.h" extern void fftwf_assertion_failed(const char *s, int line, const char *file); # 171 "../../../kernel/ifftw.h" extern void fftwf_debug(const char *format, ...); enum malloc_tag { EVERYTHING, PLANS, SOLVERS, PROBLEMS, BUFFERS, HASHT, TENSORS, PLANNERS, SLVDESCS, TWIDDLES, STRIDES, OTHER, MALLOC_WHAT_LAST }; extern void fftwf_ifree(void *ptr); extern void fftwf_ifree0(void *ptr); # 207 "../../../kernel/ifftw.h" extern void *fftwf_malloc_plain(size_t sz); # 231 "../../../kernel/ifftw.h" typedef struct { double add; double mul; double fma; double other; } opcnt; void fftwf_ops_zero(opcnt *dst); void fftwf_ops_other(int o, opcnt *dst); void fftwf_ops_cpy(const opcnt *src, opcnt *dst); void fftwf_ops_add(const opcnt *a, const opcnt *b, opcnt *dst); void fftwf_ops_add2(const opcnt *a, opcnt *dst); void fftwf_ops_madd(int m, const opcnt *a, const opcnt *b, opcnt *dst); void fftwf_ops_madd2(int m, const opcnt *a, opcnt *dst); int fftwf_imax(int a, int b); int fftwf_imin(int a, int b); int fftwf_iabs(int a); typedef unsigned long md5uint; typedef md5uint md5sig[4]; typedef struct { md5sig s; unsigned char c[64]; unsigned l; } md5; void fftwf_md5begin(md5 *p); void fftwf_md5putb(md5 *p, const void *d_, int len); void fftwf_md5puts(md5 *p, const char *s); void fftwf_md5putc(md5 *p, unsigned char c); void fftwf_md5int(md5 *p, int i); void fftwf_md5unsigned(md5 *p, unsigned i); void fftwf_md5ptrdiff(md5 *p, ptrdiff_t d); void fftwf_md5end(md5 *p); typedef struct { int n; int is; int os; } iodim; typedef struct { int rnk; iodim dims[1]; } tensor; # 322 "../../../kernel/ifftw.h" typedef enum { INPLACE_IS, INPLACE_OS } inplace_kind; tensor *fftwf_mktensor(int rnk); tensor *fftwf_mktensor_0d(void); tensor *fftwf_mktensor_1d(int n, int is, int os); tensor *fftwf_mktensor_2d(int n0, int is0, int os0, int n1, int is1, int os1); int fftwf_tensor_sz(const tensor *sz); void fftwf_tensor_md5(md5 *p, const tensor *t); int fftwf_tensor_max_index(const tensor *sz); int fftwf_tensor_min_istride(const tensor *sz); int fftwf_tensor_min_ostride(const tensor *sz); int fftwf_tensor_min_stride(const tensor *sz); int fftwf_tensor_inplace_strides(const tensor *sz); int fftwf_tensor_inplace_strides2(const tensor *a, const tensor *b); tensor *fftwf_tensor_copy(const tensor *sz); int fftwf_tensor_kosherp(const tensor *x); tensor *fftwf_tensor_copy_inplace(const tensor *sz, inplace_kind k); tensor *fftwf_tensor_copy_except(const tensor *sz, int except_dim); tensor *fftwf_tensor_copy_sub(const tensor *sz, int start_dim, int rnk); tensor *fftwf_tensor_compress(const tensor *sz); tensor *fftwf_tensor_compress_contiguous(const tensor *sz); tensor *fftwf_tensor_append(const tensor *a, const tensor *b); void fftwf_tensor_split(const tensor *sz, tensor **a, int a_rnk, tensor **b); int fftwf_tensor_tornk1(const tensor *t, int *n, int *is, int *os); void fftwf_tensor_destroy(tensor *sz); void fftwf_tensor_destroy2(tensor *a, tensor *b); void fftwf_tensor_destroy4(tensor *a, tensor *b, tensor *c, tensor *d); void fftwf_tensor_print(const tensor *sz, printer *p); int fftwf_dimcmp(const iodim *a, const iodim *b); typedef struct { void (*hash) (const problem *ego, md5 *p); void (*zero) (const problem *ego); void (*print) (problem *ego, printer *p); void (*destroy) (problem *ego); } problem_adt; struct problem_s { const problem_adt *adt; }; problem *fftwf_mkproblem(size_t sz, const problem_adt *adt); void fftwf_problem_destroy(problem *ego); struct printer_s { void (*print)(printer *p, const char *format, ...); void (*vprint)(printer *p, const char *format, va_list ap); void (*putchr)(printer *p, char c); void (*cleanup)(printer *p); int indent; int indent_incr; }; printer *fftwf_mkprinter(size_t size, void (*putchr)(printer *p, char c), void (*cleanup)(printer *p)); void fftwf_printer_destroy(printer *p); struct scanner_s { int (*scan)(scanner *sc, const char *format, ...); int (*vscan)(scanner *sc, const char *format, va_list ap); int (*getchr)(scanner *sc); int ungotc; }; scanner *fftwf_mkscanner(size_t size, int (*getchr)(scanner *sc)); void fftwf_scanner_destroy(scanner *sc); typedef struct { void (*solve)(const plan *ego, const problem *p); void (*awake)(plan *ego, int flag); void (*print)(const plan *ego, printer *p); void (*destroy)(plan *ego); } plan_adt; struct plan_s { const plan_adt *adt; int awake_refcnt; opcnt ops; double pcost; }; plan *fftwf_mkplan(size_t size, const plan_adt *adt); void fftwf_plan_destroy_internal(plan *ego); void fftwf_plan_awake(plan *ego, int flag); void fftwf_plan_null_destroy(plan *ego); typedef struct { plan *(*mkplan)(const solver *ego, const problem *p, planner *plnr); } solver_adt; struct solver_s { const solver_adt *adt; int refcnt; }; solver *fftwf_mksolver(size_t size, const solver_adt *adt); void fftwf_solver_use(solver *ego); void fftwf_solver_destroy(solver *ego); void fftwf_solver_register(planner *plnr, solver *s); typedef struct slvdesc_s { solver *slv; const char *reg_nam; unsigned nam_hash; int reg_id; } slvdesc; typedef struct solution_s solution; enum { DESTROY_INPUT = 0x1, NO_SIMD = 0x2, CONSERVE_MEMORY = 0x4, NO_DHT_R2HC = 0x8 }; enum { BELIEVE_PCOST = 0x1, DFT_R2HC_ICKY = 0x2, NONTHREADED_ICKY = 0x4, NO_BUFFERING = 0x8, NO_EXHAUSTIVE = 0x10, NO_INDIRECT_OP = 0x20, NO_LARGE_GENERIC = 0x40, NO_RANK_SPLITS = 0x80, NO_VRANK_SPLITS = 0x100, NO_VRECURSE = 0x200, NO_UGLY = 0x400, NO_SEARCH = 0x800, ESTIMATE = 0x1000, IMPATIENCE_FLAGS = (ESTIMATE | (ESTIMATE - 1)), BLESSING = 0x4000, H_VALID = 0x8000, NONIMPATIENCE_FLAGS = BLESSING }; # 507 "../../../kernel/ifftw.h" typedef enum { FORGET_ACCURSED, FORGET_EVERYTHING } amnesia; typedef struct { void (*register_solver)(planner *ego, solver *s); plan *(*mkplan)(planner *ego, problem *p); void (*forget)(planner *ego, amnesia a); void (*exprt)(planner *ego, printer *p); int (*imprt)(planner *ego, scanner *sc); } planner_adt; struct planner_s { const planner_adt *adt; void (*hook)(plan *pln, const problem *p, int optimalp); slvdesc *slvdescs; unsigned nslvdesc, slvdescsiz; const char *cur_reg_nam; int cur_reg_id; solution *solutions; unsigned hashsiz, nelem; int nthr; unsigned problem_flags; unsigned short planner_flags; int nplan; double pcost, epcost; int nprob; int lookup, succ_lookup, lookup_iter; int insert, insert_iter, insert_unknown; int nrehash; }; planner *fftwf_mkplanner(void); void fftwf_planner_destroy(planner *ego); # 575 "../../../kernel/ifftw.h" plan *fftwf_mkplan_d(planner *ego, problem *p); # 593 "../../../kernel/ifftw.h" typedef int stride; # 607 "../../../kernel/ifftw.h" struct solvtab_s { void (*reg)(planner *); const char *reg_nam; }; typedef struct solvtab_s solvtab[]; void fftwf_solvtab_exec(const solvtab tbl, planner *p); int fftwf_pickdim(int which_dim, const int *buddies, int nbuddies, const tensor *sz, int oop, int *dp); enum { TW_COS = 0, TW_SIN = 1, TW_TAN = 2, TW_NEXT = 3, TW_FULL = 4, TW_GENERIC = 5 }; typedef struct { unsigned char op; unsigned char v; short i; } tw_instr; typedef struct twid_s { R *W; int n, r, m; int refcnt; const tw_instr *instr; struct twid_s *cdr; } twid; void fftwf_mktwiddle(twid **pp, const tw_instr *instr, int n, int r, int m); void fftwf_twiddle_destroy(twid **pp); int fftwf_twiddle_length(int r, const tw_instr *p); void fftwf_twiddle_awake(int flg, twid **pp, const tw_instr *instr, int n, int r, int m); typedef double trigreal; extern trigreal fftwf_cos2pi(int, int); extern trigreal fftwf_sin2pi(int, int); extern trigreal fftwf_tan2pi(int, int); extern trigreal fftwf_sincos(trigreal m, trigreal n, int sinp); # 676 "../../../kernel/ifftw.h" int fftwf_power_mod(int n, int m, int p); int fftwf_find_generator(int p); int fftwf_first_divisor(int n); int fftwf_is_prime(int n); int fftwf_next_prime(int n); typedef struct rader_tls rader_tl; void fftwf_rader_tl_insert(int k1, int k2, int k3, R *W, rader_tl **tl); R *fftwf_rader_tl_find(int k1, int k2, int k3, rader_tl *t); void fftwf_rader_tl_delete(R *W, rader_tl **tl); void fftwf_transpose(R *A, int n, int m, int d, int N, R *buf); void fftwf_transpose_slow(R *a, int nx, int ny, int N, char *move, int move_size, R *buf); int fftwf_transposable(const iodim *a, const iodim *b, int vl, int s, R *ri, R *ii); void fftwf_transpose_dims(const iodim *a, const iodim *b, int *n, int *m, int *d, int *nd, int *md); int fftwf_transpose_simplep(const iodim *a, const iodim *b, int vl, int s, R *ri, R *ii); int fftwf_transpose_slowp(const iodim *a, const iodim *b, int N); void fftwf_null_awake(plan *ego, int awake); int fftwf_square(int x); double fftwf_measure_execution_time(plan *pln, const problem *p); int fftwf_alignment_of(R *p); unsigned fftwf_hash(const char *s); int fftwf_compute_nbuf(int n, int vl, int nbuf, int maxbufsz); int fftwf_ct_uglyp(int min_n, int n, int r); R *fftwf_taint(R *p, int s); R *fftwf_join_taint(R *p1, R *p2); # 744 "../../../kernel/ifftw.h" typedef R E; # 763 "../../../kernel/ifftw.h" static __inline__ E FMA(E a, E b, E c) { E x = a * b; x = x + c; return x; } static __inline__ E FMS(E a, E b, E c) { E x = a * b; x = x - c; return x; } static __inline__ E FNMA(E a, E b, E c) { E x = a * b; x = - (x + c); return x; } static __inline__ E FNMS(E a, E b, E c) { E x = a * b; x = - (x - c); return x; } # 32 "../../../dft/codelet-dft.h" 2 typedef struct kdft_desc_s kdft_desc; typedef struct { int (*okp)( const kdft_desc *desc, const R *ri, const R *ii, const R *ro, const R *io, int is, int os, int vl, int ivs, int ovs, const planner *plnr); int vl; } kdft_genus; struct kdft_desc_s { int sz; const char *nam; opcnt ops; const kdft_genus *genus; int is; int os; int ivs; int ovs; }; typedef void (*kdft) (const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int vl, int ivs, int ovs); void fftwf_kdft_register(planner *p, kdft codelet, const kdft_desc *desc); typedef struct ct_desc_s ct_desc; typedef struct { int (*okp)( const struct ct_desc_s *desc, const R *rio, const R *iio, int ios, int vs, int m, int dist, const planner *plnr); int vl; } ct_genus; struct ct_desc_s { int radix; const char *nam; const tw_instr *tw; opcnt ops; const ct_genus *genus; int s1; int s2; int dist; }; typedef const R *(*kdft_dit) (R *rioarray, R *iioarray, const R *W, stride ios, int m, int dist); void fftwf_kdft_dit_register(planner *p, kdft_dit codelet, const ct_desc *desc); typedef const R *(*kdft_difsq) (R *rioarray, R *iioarray, const R *W, stride is, stride vs, int m, int dist); void fftwf_kdft_difsq_register(planner *p, kdft_difsq codelet, const ct_desc *desc); typedef const R *(*kdft_dif) (R *rioarray, R *iioarray, const R *W, stride ios, int m, int dist); void fftwf_kdft_dif_register(planner *p, kdft_dif codelet, const ct_desc *desc); extern const solvtab fftwf_solvtab_dft_standard; extern const solvtab fftwf_solvtab_dft_inplace; extern const solvtab fftwf_solvtab_dft_simd; # 25 "n1fv_9.c" 2 # 40 "n1fv_9.c" # 1 "../../../dft/simd/n1f.h" 1 # 22 "../../../dft/simd/n1f.h" # 1 "../../../simd/simd.h" 1 # 30 "../../../simd/simd.h" # 1 "../../../simd/simd-altivec.h" 1 # 41 "../../../simd/simd-altivec.h" static inline __attribute__((vector_size(16))) float vec_perm (__attribute__((vector_size(16))) float a1, __attribute__((vector_size(16))) float a2, __attribute__((vector_size(16))) unsigned char a3) { return (__attribute__((vector_size(16))) float) __builtin_altivec_vperm_4si ((__attribute__((vector_size(16))) signed int) a1, (__attribute__((vector_size(16))) signed int) a2, (__attribute__((vector_size(16))) signed char) a3); } static inline __attribute__((vector_size(16))) float vec_sel (__attribute__((vector_size(16))) float a1, __attribute__((vector_size(16))) float a2, __attribute__((vector_size(16))) unsigned int a3) { return (__attribute__((vector_size(16))) float) __builtin_altivec_vsel_4si ((__attribute__((vector_size(16))) signed int) a1, (__attribute__((vector_size(16))) signed int) a2, (__attribute__((vector_size(16))) signed int) a3); } static inline __attribute__((vector_size(16))) float vec_ld (int a1, float *a2) { return (__attribute__((vector_size(16))) float) __builtin_altivec_lvx (a1, (void *) a2); } static inline void vec_ste (__attribute__((vector_size(16))) float a1, int a2, void *a3) { __builtin_altivec_stvewx ((__attribute__((vector_size(16))) signed int) a1, a2, (void *) a3); } static inline void vec_st (__attribute__((vector_size(16))) float a1, int a2, void *a3) { __builtin_altivec_stvx ((__attribute__((vector_size(16))) signed int) a1, a2, (void *) a3); } static inline __attribute__((vector_size(16))) float vec_mergeh (__attribute__((vector_size(16))) float a1, __attribute__((vector_size(16))) float a2) { return (__attribute__((vector_size(16))) float) __builtin_altivec_vmrghw ((__attribute__((vector_size(16))) signed int) a1, (__attribute__((vector_size(16))) signed int) a2); } static inline __attribute__((vector_size(16))) float vec_mergel (__attribute__((vector_size(16))) float a1, __attribute__((vector_size(16))) float a2) { __attribute__((vector_size(16))) float ret; __asm__("vmrglw %0, %1, %2" : "=v"(ret) : "v"(a1), "v"(a2)); return ret; } static inline __attribute__((vector_size(16))) float vec_xor (__attribute__((vector_size(16))) float a1, __attribute__((vector_size(16))) float a2) { return (__attribute__((vector_size(16))) float) __builtin_altivec_vxor ((__attribute__((vector_size(16))) signed int) a1, (__attribute__((vector_size(16))) signed int) a2); } # 103 "../../../simd/simd-altivec.h" typedef __attribute__((vector_size(16))) float V; static inline V VMUL(V a, V b) { const V zero = (__attribute__((vector_size(16))) float){-0.0, -0.0, -0.0, -0.0}; return __builtin_altivec_vmaddfp(a, b, zero); } static inline V VFMS(V a, V b, V c) { return __builtin_altivec_vsubfp(VMUL(a, b), c); } extern const __attribute__((vector_size(16))) unsigned int fftwf_altivec_ld_selmsk; static inline V LDA(const R *x, int ivs, const R *aligned_like) { (void)ivs; (void)aligned_like; return vec_ld(0, (R *)x); } static inline V LD(const R *x, int ivs, const R *aligned_like) { int fivs = 4 * ivs; __attribute__((vector_size(16))) unsigned char ml = ((__attribute__((vector_size(16))) unsigned char) __builtin_altivec_lvsr ((fivs + 8), ((R *)aligned_like))); __attribute__((vector_size(16))) unsigned char mh = ((__attribute__((vector_size(16))) unsigned char) __builtin_altivec_lvsl ((0), ((R *)aligned_like))); __attribute__((vector_size(16))) unsigned char msk = (__attribute__((vector_size(16))) unsigned char)vec_sel((V)mh, (V)ml, fftwf_altivec_ld_selmsk); return vec_perm(vec_ld(0, (R *)x), vec_ld(fivs, (R *)x), msk); } static inline void STH(R *x, V v, const R *aligned_like) { v = vec_perm(v, v, ((__attribute__((vector_size(16))) unsigned char) __builtin_altivec_lvsr ((0), ((R *)aligned_like)))); vec_ste(v, 0, x); vec_ste(v, 4, x); } static inline void STL(R *x, V v, int ovs, const R *aligned_like) { int fovs = 4 * ovs; v = vec_perm(v, v, ((__attribute__((vector_size(16))) unsigned char) __builtin_altivec_lvsr ((fovs + 8), ((R *)aligned_like)))); vec_ste(v, fovs, x); vec_ste(v, 4 + fovs, x); } static inline void STA(R *x, V v, int ovs, const R *aligned_like) { vec_st(v, 0, x); } static inline void ST(R *x, V v, int ovs, const R *aligned_like) { STH(x, v, aligned_like); STL(x, v, ovs, aligned_like); } extern const __attribute__((vector_size(16))) unsigned int fftwf_altivec_flipri_perm; static inline V FLIP_RI(V x) { return vec_perm(x, x, (const __attribute__((vector_size(16))) unsigned char)fftwf_altivec_flipri_perm); } extern const __attribute__((vector_size(16))) float fftwf_altivec_chsr_msk; extern const __attribute__((vector_size(16))) float fftwf_altivec_chsr_sgn; static inline V CHS_R(V x) { return vec_xor(x, fftwf_altivec_chsr_msk); } static inline V VBYI(V x) { return CHS_R(FLIP_RI(x)); } static inline V VFMAI(V b, V c) { return __builtin_altivec_vmaddfp(FLIP_RI(b), fftwf_altivec_chsr_sgn, c); } static inline V VFNMSI(V b, V c) { return __builtin_altivec_vnmsubfp(FLIP_RI(b), fftwf_altivec_chsr_sgn, c); } static inline V BYTW(const R *t, V sr) { const V *twp = (const V *)t; V si = VBYI(sr); V tx = twp[0]; V tr = vec_mergeh(tx, tx); V ti = vec_mergel(tx, tx); return __builtin_altivec_vmaddfp(ti, si, VMUL(tr, sr)); } static inline V BYTWJ(const R *t, V sr) { const V *twp = (const V *)t; V si = VBYI(sr); V tx = twp[0]; V tr = vec_mergeh(tx, tx); V ti = vec_mergel(tx, tx); return __builtin_altivec_vnmsubfp(ti, si, VMUL(tr, sr)); } # 31 "../../../simd/simd.h" 2 # 23 "../../../dft/simd/n1f.h" 2 extern const kdft_genus fftwf_dft_n1fsimd_genus; # 41 "n1fv_9.c" 2 static void n1fv_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs) { const V KP342020143 = (__attribute__((vector_size(16))) float){+0.342020143325668733044099614682259580763083368, +0.342020143325668733044099614682259580763083368, +0.342020143325668733044099614682259580763083368, +0.342020143325668733044099614682259580763083368}; const V KP813797681 = (__attribute__((vector_size(16))) float){+0.813797681349373692844693217248393223289101568, +0.813797681349373692844693217248393223289101568, +0.813797681349373692844693217248393223289101568, +0.813797681349373692844693217248393223289101568}; const V KP939692620 = (__attribute__((vector_size(16))) float){+0.939692620785908384054109277324731469936208134, +0.939692620785908384054109277324731469936208134, +0.939692620785908384054109277324731469936208134, +0.939692620785908384054109277324731469936208134}; const V KP296198132 = (__attribute__((vector_size(16))) float){+0.296198132726023843175338011893050938967728390, +0.296198132726023843175338011893050938967728390, +0.296198132726023843175338011893050938967728390, +0.296198132726023843175338011893050938967728390}; const V KP642787609 = (__attribute__((vector_size(16))) float){+0.642787609686539326322643409907263432907559884, +0.642787609686539326322643409907263432907559884, +0.642787609686539326322643409907263432907559884, +0.642787609686539326322643409907263432907559884}; const V KP663413948 = (__attribute__((vector_size(16))) float){+0.663413948168938396205421319635891297216863310, +0.663413948168938396205421319635891297216863310, +0.663413948168938396205421319635891297216863310, +0.663413948168938396205421319635891297216863310}; const V KP556670399 = (__attribute__((vector_size(16))) float){+0.556670399226419366452912952047023132968291906, +0.556670399226419366452912952047023132968291906, +0.556670399226419366452912952047023132968291906, +0.556670399226419366452912952047023132968291906}; const V KP766044443 = (__attribute__((vector_size(16))) float){+0.766044443118978035202392650555416673935832457, +0.766044443118978035202392650555416673935832457, +0.766044443118978035202392650555416673935832457, +0.766044443118978035202392650555416673935832457}; const V KP984807753 = (__attribute__((vector_size(16))) float){+0.984807753012208059366743024589523013670643252, +0.984807753012208059366743024589523013670643252, +0.984807753012208059366743024589523013670643252, +0.984807753012208059366743024589523013670643252}; const V KP150383733 = (__attribute__((vector_size(16))) float){+0.150383733180435296639271897612501926072238258, +0.150383733180435296639271897612501926072238258, +0.150383733180435296639271897612501926072238258, +0.150383733180435296639271897612501926072238258}; const V KP852868531 = (__attribute__((vector_size(16))) float){+0.852868531952443209628250963940074071936020296, +0.852868531952443209628250963940074071936020296, +0.852868531952443209628250963940074071936020296, +0.852868531952443209628250963940074071936020296}; const V KP173648177 = (__attribute__((vector_size(16))) float){+0.173648177666930348851716626769314796000375677, +0.173648177666930348851716626769314796000375677, +0.173648177666930348851716626769314796000375677, +0.173648177666930348851716626769314796000375677}; const V KP500000000 = (__attribute__((vector_size(16))) float){+0.500000000000000000000000000000000000000000000, +0.500000000000000000000000000000000000000000000, +0.500000000000000000000000000000000000000000000, +0.500000000000000000000000000000000000000000000}; const V KP866025403 = (__attribute__((vector_size(16))) float){+0.866025403784438646763723170752936183471402627, +0.866025403784438646763723170752936183471402627, +0.866025403784438646763723170752936183471402627, +0.866025403784438646763723170752936183471402627}; int i; const R *xi; R *xo; xi = ri; xo = ro; ; for (i = v; i > 0; i = i - 2, xi = xi + (2 * ivs), xo = xo + (2 * ovs)) { V T5, Ts, Tj, To, Tf, Tn, Tp, Tu, Tl, Ta, Tk, Tm, Tt; { V T1, T2, T3, T4; T1 = LD(&(xi[0]), ivs, &(xi[0])); T2 = LD(&(xi[(is * 3)]), ivs, &(xi[(is * 1)])); T3 = LD(&(xi[(is * 6)]), ivs, &(xi[0])); T4 = __builtin_altivec_vaddfp(T2, T3); T5 = __builtin_altivec_vaddfp(T1, T4); Ts = VMUL(KP866025403, __builtin_altivec_vsubfp(T3, T2)); Tj = __builtin_altivec_vnmsubfp(KP500000000, T4, T1); } { V Tb, Te, Tc, Td; Tb = LD(&(xi[(is * 2)]), ivs, &(xi[0])); Tc = LD(&(xi[(is * 5)]), ivs, &(xi[(is * 1)])); Td = LD(&(xi[(is * 8)]), ivs, &(xi[0])); Te = __builtin_altivec_vaddfp(Tc, Td); To = __builtin_altivec_vsubfp(Td, Tc); Tf = __builtin_altivec_vaddfp(Tb, Te); Tn = __builtin_altivec_vnmsubfp(KP500000000, Te, Tb); Tp = __builtin_altivec_vmaddfp(KP173648177, Tn, VMUL(KP852868531, To)); Tu = __builtin_altivec_vnmsubfp(KP984807753, Tn, VMUL(KP150383733, To)); } { V T6, T9, T7, T8; T6 = LD(&(xi[(is * 1)]), ivs, &(xi[(is * 1)])); T7 = LD(&(xi[(is * 4)]), ivs, &(xi[0])); T8 = LD(&(xi[(is * 7)]), ivs, &(xi[(is * 1)])); T9 = __builtin_altivec_vaddfp(T7, T8); Tl = __builtin_altivec_vsubfp(T8, T7); Ta = __builtin_altivec_vaddfp(T6, T9); Tk = __builtin_altivec_vnmsubfp(KP500000000, T9, T6); Tm = __builtin_altivec_vmaddfp(KP766044443, Tk, VMUL(KP556670399, Tl)); Tt = __builtin_altivec_vnmsubfp(KP642787609, Tk, VMUL(KP663413948, Tl)); } { V Ti, Tg, Th, Tz, TA; Ti = VBYI(VMUL(KP866025403, __builtin_altivec_vsubfp(Tf, Ta))); Tg = __builtin_altivec_vaddfp(Ta, Tf); Th = __builtin_altivec_vnmsubfp(KP500000000, Tg, T5); ST(&(xo[0]), __builtin_altivec_vaddfp(T5, Tg), ovs, &(xo[0])); ST(&(xo[(os * 3)]), __builtin_altivec_vaddfp(Th, Ti), ovs, &(xo[(os * 1)])); ST(&(xo[(os * 6)]), __builtin_altivec_vsubfp(Th, Ti), ovs, &(xo[0])); Tz = __builtin_altivec_vmaddfp(KP173648177, Tk, __builtin_altivec_vnmsubfp(KP296198132, To, __builtin_altivec_vnmsubfp(KP939692620, Tn, __builtin_altivec_vnmsubfp(KP852868531, Tl, Tj)))); TA = VBYI(__builtin_altivec_vsubfp(__builtin_altivec_vnmsubfp(KP342020143, Tn, __builtin_altivec_vnmsubfp(KP150383733, Tl, __builtin_altivec_vnmsubfp(KP984807753, Tk, VMUL(KP813797681, To)))), Ts)); ST(&(xo[(os * 7)]), __builtin_altivec_vsubfp(Tz, TA), ovs, &(xo[(os * 1)])); ST(&(xo[(os * 2)]), __builtin_altivec_vaddfp(Tz, TA), ovs, &(xo[0])); { V Tr, Tx, Tw, Ty, Tq, Tv; Tq = __builtin_altivec_vaddfp(Tm, Tp); Tr = __builtin_altivec_vaddfp(Tj, Tq); Tx = __builtin_altivec_vmaddfp(KP866025403, __builtin_altivec_vsubfp(Tt, Tu), __builtin_altivec_vnmsubfp(KP500000000, Tq, Tj)); Tv = __builtin_altivec_vaddfp(Tt, Tu); Tw = VBYI(__builtin_altivec_vaddfp(Ts, Tv)); Ty = VBYI(__builtin_altivec_vaddfp(Ts, __builtin_altivec_vnmsubfp(KP500000000, Tv, VMUL(KP866025403, __builtin_altivec_vsubfp(Tp, Tm))))); ST(&(xo[(os * 8)]), __builtin_altivec_vsubfp(Tr, Tw), ovs, &(xo[0])); ST(&(xo[(os * 4)]), __builtin_altivec_vaddfp(Tx, Ty), ovs, &(xo[0])); ST(&(xo[(os * 1)]), __builtin_altivec_vaddfp(Tw, Tr), ovs, &(xo[(os * 1)])); ST(&(xo[(os * 5)]), __builtin_altivec_vsubfp(Tx, Ty), ovs, &(xo[(os * 1)])); } } } ; } static const kdft_desc desc = { 9, "n1fv_9", {30, 10, 16, 0}, &fftwf_dft_n1fsimd_genus, 0, 0, 0, 0 }; void fftwf_codelet_n1fv_9 (planner *p) { fftwf_kdft_register (p, n1fv_9, &desc); }