/* * This file is part of pocketfft. * Licensed under a 3-clause BSD style license - see LICENSE.md */ /* * Main implementation file. * * Copyright (C) 2004-2018 Max-Planck-Society * \author Martin Reinecke */ #define NPY_NO_DEPRECATED_API NPY_API_VERSION #include "Python.h" #include "numpy/arrayobject.h" #include #include #include #include "npy_config.h" #define restrict NPY_RESTRICT #define RALLOC(type,num) \ ((type *)malloc((num)*sizeof(type))) #define DEALLOC(ptr) \ do { free(ptr); (ptr)=NULL; } while(0) #define SWAP(a,b,type) \ do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0) #ifdef __GNUC__ #define NOINLINE __attribute__((noinline)) #define WARN_UNUSED_RESULT __attribute__ ((warn_unused_result)) #else #define NOINLINE #define WARN_UNUSED_RESULT #endif struct cfft_plan_i; typedef struct cfft_plan_i * cfft_plan; struct rfft_plan_i; typedef struct rfft_plan_i * rfft_plan; // adapted from https://stackoverflow.com/questions/42792939/ // CAUTION: this function only works for arguments in the range [-0.25; 0.25]! static void my_sincosm1pi (double a, double *restrict res) { double s = a * a; /* Approximate cos(pi*x)-1 for x in [-0.25,0.25] */ double r = -1.0369917389758117e-4; r = fma (r, s, 1.9294935641298806e-3); r = fma (r, s, -2.5806887942825395e-2); r = fma (r, s, 2.3533063028328211e-1); r = fma (r, s, -1.3352627688538006e+0); r = fma (r, s, 4.0587121264167623e+0); r = fma (r, s, -4.9348022005446790e+0); double c = r*s; /* Approximate sin(pi*x) for x in [-0.25,0.25] */ r = 4.6151442520157035e-4; r = fma (r, s, -7.3700183130883555e-3); r = fma (r, s, 8.2145868949323936e-2); r = fma (r, s, -5.9926452893214921e-1); r = fma (r, s, 2.5501640398732688e+0); r = fma (r, s, -5.1677127800499516e+0); s = s * a; r = r * s; s = fma (a, 3.1415926535897931e+0, r); res[0] = c; res[1] = s; } NOINLINE static void calc_first_octant(size_t den, double * restrict res) { size_t n = (den+4)>>3; if (n==0) return; res[0]=1.; res[1]=0.; if (n==1) return; size_t l1=(size_t)sqrt(n); for (size_t i=1; in) end = n-start; for (size_t i=1; i>2; size_t i=0, idx1=0, idx2=2*ndone-2; for (; i+1>1; double * p = res+n-1; calc_first_octant(n<<2, p); int i4=0, in=n, i=0; for (; i4<=in-i4; ++i, i4+=4) // octant 0 { res[2*i] = p[2*i4]; res[2*i+1] = p[2*i4+1]; } for (; i4-in <= 0; ++i, i4+=4) // octant 1 { int xm = in-i4; res[2*i] = p[2*xm+1]; res[2*i+1] = p[2*xm]; } for (; i4<=3*in-i4; ++i, i4+=4) // octant 2 { int xm = i4-in; res[2*i] = -p[2*xm+1]; res[2*i+1] = p[2*xm]; } for (; i>2; if ((n&7)==0) res[quart] = res[quart+1] = hsqt2; for (size_t i=2, j=2*quart-2; i>1; if ((n&3)==0) for (size_t i=0; i>1))<<1)==n) { res=2; n=tmp; } size_t limit=(size_t)sqrt(n+0.01); for (size_t x=3; x<=limit; x+=2) while (((tmp=(n/x))*x)==n) { res=x; n=tmp; limit=(size_t)sqrt(n+0.01); } if (n>1) res=n; return res; } NOINLINE static double cost_guess (size_t n) { const double lfp=1.1; // penalty for non-hardcoded larger factors size_t ni=n; double result=0.; size_t tmp; while (((tmp=(n>>1))<<1)==n) { result+=2; n=tmp; } size_t limit=(size_t)sqrt(n+0.01); for (size_t x=3; x<=limit; x+=2) while ((tmp=(n/x))*x==n) { result+= (x<=5) ? x : lfp*x; // penalize larger prime factors n=tmp; limit=(size_t)sqrt(n+0.01); } if (n>1) result+=(n<=5) ? n : lfp*n; return result*ni; } /* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ NOINLINE static size_t good_size(size_t n) { if (n<=6) return n; size_t bestfac=2*n; for (size_t f2=1; f2=n) bestfac=f235711; return bestfac; } typedef struct cmplx { double r,i; } cmplx; #define NFCT 25 typedef struct cfftp_fctdata { size_t fct; cmplx *tw, *tws; } cfftp_fctdata; typedef struct cfftp_plan_i { size_t length, nfct; cmplx *mem; cfftp_fctdata fct[NFCT]; } cfftp_plan_i; typedef struct cfftp_plan_i * cfftp_plan; #define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; } #define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; } #define SCALEC(a,b) { a.r*=b; a.i*=b; } #define ROT90(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; } #define ROTM90(a) { double tmp_=-a.r; a.r=a.i; a.i=tmp_; } #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))] #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))] #define WA(x,i) wa[(i)-1+(x)*(ido-1)] /* a = b*c */ #define A_EQ_B_MUL_C(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; } /* a = conj(b)*c*/ #define A_EQ_CB_MUL_C(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; } #define PMSIGNC(a,b,c,d) { a.r=c.r+sign*d.r; a.i=c.i+sign*d.i; b.r=c.r-sign*d.r; b.i=c.i-sign*d.i; } /* a = b*c */ #define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-sign*b.i*c.i; a.i=b.r*c.i+sign*b.i*c.r; } /* a *= b */ #define MULPMSIGNCEQ(a,b) { double xtmp=a.r; a.r=b.r*a.r-sign*b.i*a.i; a.i=b.r*a.i+sign*b.i*xtmp; } NOINLINE static void pass2b (size_t ido, size_t l1, const cmplx * restrict cc, cmplx * restrict ch, const cmplx * restrict wa) { const size_t cdim=2; if (ido==1) for (size_t k=0; kip) iwal-=ip; cmplx xwal=wal[iwal]; iwal+=l; if (iwal>ip) iwal-=ip; cmplx xwal2=wal[iwal]; for (size_t ik=0; ikip) iwal-=ip; cmplx xwal=wal[iwal]; for (size_t ik=0; iklength==1) return 0; size_t len=plan->length; size_t l1=1, nf=plan->nfct; cmplx *ch = RALLOC(cmplx, len); if (!ch) return -1; cmplx *p1=c, *p2=ch; for(size_t k1=0; k1fct[k1].fct; size_t l2=ip*l1; size_t ido = len/l2; if (ip==4) sign>0 ? pass4b (ido, l1, p1, p2, plan->fct[k1].tw) : pass4f (ido, l1, p1, p2, plan->fct[k1].tw); else if(ip==2) sign>0 ? pass2b (ido, l1, p1, p2, plan->fct[k1].tw) : pass2f (ido, l1, p1, p2, plan->fct[k1].tw); else if(ip==3) sign>0 ? pass3b (ido, l1, p1, p2, plan->fct[k1].tw) : pass3f (ido, l1, p1, p2, plan->fct[k1].tw); else if(ip==5) sign>0 ? pass5b (ido, l1, p1, p2, plan->fct[k1].tw) : pass5f (ido, l1, p1, p2, plan->fct[k1].tw); else if(ip==7) pass7 (ido, l1, p1, p2, plan->fct[k1].tw, sign); else if(ip==11) pass11(ido, l1, p1, p2, plan->fct[k1].tw, sign); else { if (passg(ido, ip, l1, p1, p2, plan->fct[k1].tw, plan->fct[k1].tws, sign)) { DEALLOC(ch); return -1; } SWAP(p1,p2,cmplx *); } SWAP(p1,p2,cmplx *); l1=l2; } if (p1!=c) { if (fct!=1.) for (size_t i=0; ilength; size_t nfct=0; while ((length%4)==0) { if (nfct>=NFCT) return -1; plan->fct[nfct++].fct=4; length>>=2; } if ((length%2)==0) { length>>=1; // factor 2 should be at the front of the factor list if (nfct>=NFCT) return -1; plan->fct[nfct++].fct=2; SWAP(plan->fct[0].fct, plan->fct[nfct-1].fct,size_t); } size_t maxl=(size_t)(sqrt((double)length))+1; for (size_t divisor=3; (length>1)&&(divisor=NFCT) return -1; plan->fct[nfct++].fct=divisor; length/=divisor; } maxl=(size_t)(sqrt((double)length))+1; } if (length>1) plan->fct[nfct++].fct=length; plan->nfct=nfct; return 0; } NOINLINE static size_t cfftp_twsize (cfftp_plan plan) { size_t twsize=0, l1=1; for (size_t k=0; knfct; ++k) { size_t ip=plan->fct[k].fct, ido= plan->length/(l1*ip); twsize+=(ip-1)*(ido-1); if (ip>11) twsize+=ip; l1*=ip; } return twsize; } NOINLINE WARN_UNUSED_RESULT static int cfftp_comp_twiddle (cfftp_plan plan) { size_t length=plan->length; double *twid = RALLOC(double, 2*length); if (!twid) return -1; sincos_2pibyn(length, twid); size_t l1=1; size_t memofs=0; for (size_t k=0; knfct; ++k) { size_t ip=plan->fct[k].fct, ido= length/(l1*ip); plan->fct[k].tw=plan->mem+memofs; memofs+=(ip-1)*(ido-1); for (size_t j=1; jfct[k].tw[(j-1)*(ido-1)+i-1].r = twid[2*j*l1*i]; plan->fct[k].tw[(j-1)*(ido-1)+i-1].i = twid[2*j*l1*i+1]; } if (ip>11) { plan->fct[k].tws=plan->mem+memofs; memofs+=ip; for (size_t j=0; jfct[k].tws[j].r = twid[2*j*l1*ido]; plan->fct[k].tws[j].i = twid[2*j*l1*ido+1]; } } l1*=ip; } DEALLOC(twid); return 0; } static cfftp_plan make_cfftp_plan (size_t length) { if (length==0) return NULL; cfftp_plan plan = RALLOC(cfftp_plan_i,1); if (!plan) return NULL; plan->length=length; plan->nfct=0; for (size_t i=0; ifct[i]=(cfftp_fctdata){0,0,0}; plan->mem=0; if (length==1) return plan; if (cfftp_factorize(plan)!=0) { DEALLOC(plan); return NULL; } size_t tws=cfftp_twsize(plan); plan->mem=RALLOC(cmplx,tws); if (!plan->mem) { DEALLOC(plan); return NULL; } if (cfftp_comp_twiddle(plan)!=0) { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } return plan; } static void destroy_cfftp_plan (cfftp_plan plan) { DEALLOC(plan->mem); DEALLOC(plan); } typedef struct rfftp_fctdata { size_t fct; double *tw, *tws; } rfftp_fctdata; typedef struct rfftp_plan_i { size_t length, nfct; double *mem; rfftp_fctdata fct[NFCT]; } rfftp_plan_i; typedef struct rfftp_plan_i * rfftp_plan; #define WA(x,i) wa[(i)+(x)*(ido-1)] #define PM(a,b,c,d) { a=c+d; b=c-d; } /* (a+ib) = conj(c+id) * (e+if) */ #define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; } #define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))] #define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))] NOINLINE static void radf2 (size_t ido, size_t l1, const double * restrict cc, double * restrict ch, const double * restrict wa) { const size_t cdim=2; for (size_t k=0; k1) { for (size_t j=1, jc=ip-1; j=ip) iang-=ip; double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; iang+=l; if (iang>=ip) iang-=ip; double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; iang+=l; if (iang>=ip) iang-=ip; double ar3=csarr[2*iang], ai3=csarr[2*iang+1]; iang+=l; if (iang>=ip) iang-=ip; double ar4=csarr[2*iang], ai4=csarr[2*iang+1]; for (size_t ik=0; ik=ip) iang-=ip; double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; iang+=l; if (iang>=ip) iang-=ip; double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; for (size_t ik=0; ik=ip) iang-=ip; double ar=csarr[2*iang], ai=csarr[2*iang+1]; for (size_t ik=0; ikip) iang-=ip; double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; iang+=l; if(iang>ip) iang-=ip; double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; iang+=l; if(iang>ip) iang-=ip; double ar3=csarr[2*iang], ai3=csarr[2*iang+1]; iang+=l; if(iang>ip) iang-=ip; double ar4=csarr[2*iang], ai4=csarr[2*iang+1]; for (size_t ik=0; ikip) iang-=ip; double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; iang+=l; if(iang>ip) iang-=ip; double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; for (size_t ik=0; ikip) iang-=ip; double war=csarr[2*iang], wai=csarr[2*iang+1]; for (size_t ik=0; iklength==1) return 0; size_t n=plan->length; size_t l1=n, nf=plan->nfct; double *ch = RALLOC(double, n); if (!ch) return -1; double *p1=c, *p2=ch; for(size_t k1=0; k1fct[k].fct; size_t ido=n / l1; l1 /= ip; if(ip==4) radf4(ido, l1, p1, p2, plan->fct[k].tw); else if(ip==2) radf2(ido, l1, p1, p2, plan->fct[k].tw); else if(ip==3) radf3(ido, l1, p1, p2, plan->fct[k].tw); else if(ip==5) radf5(ido, l1, p1, p2, plan->fct[k].tw); else { radfg(ido, ip, l1, p1, p2, plan->fct[k].tw, plan->fct[k].tws); SWAP (p1,p2,double *); } SWAP (p1,p2,double *); } copy_and_norm(c,p1,n,fct); DEALLOC(ch); return 0; } WARN_UNUSED_RESULT static int rfftp_backward(rfftp_plan plan, double c[], double fct) { if (plan->length==1) return 0; size_t n=plan->length; size_t l1=1, nf=plan->nfct; double *ch = RALLOC(double, n); if (!ch) return -1; double *p1=c, *p2=ch; for(size_t k=0; kfct[k].fct, ido= n/(ip*l1); if(ip==4) radb4(ido, l1, p1, p2, plan->fct[k].tw); else if(ip==2) radb2(ido, l1, p1, p2, plan->fct[k].tw); else if(ip==3) radb3(ido, l1, p1, p2, plan->fct[k].tw); else if(ip==5) radb5(ido, l1, p1, p2, plan->fct[k].tw); else radbg(ido, ip, l1, p1, p2, plan->fct[k].tw, plan->fct[k].tws); SWAP (p1,p2,double *); l1*=ip; } copy_and_norm(c,p1,n,fct); DEALLOC(ch); return 0; } WARN_UNUSED_RESULT static int rfftp_factorize (rfftp_plan plan) { size_t length=plan->length; size_t nfct=0; while ((length%4)==0) { if (nfct>=NFCT) return -1; plan->fct[nfct++].fct=4; length>>=2; } if ((length%2)==0) { length>>=1; // factor 2 should be at the front of the factor list if (nfct>=NFCT) return -1; plan->fct[nfct++].fct=2; SWAP(plan->fct[0].fct, plan->fct[nfct-1].fct,size_t); } size_t maxl=(size_t)(sqrt((double)length))+1; for (size_t divisor=3; (length>1)&&(divisor=NFCT) return -1; plan->fct[nfct++].fct=divisor; length/=divisor; } maxl=(size_t)(sqrt((double)length))+1; } if (length>1) plan->fct[nfct++].fct=length; plan->nfct=nfct; return 0; } static size_t rfftp_twsize(rfftp_plan plan) { size_t twsize=0, l1=1; for (size_t k=0; knfct; ++k) { size_t ip=plan->fct[k].fct, ido= plan->length/(l1*ip); twsize+=(ip-1)*(ido-1); if (ip>5) twsize+=2*ip; l1*=ip; } return twsize; return 0; } WARN_UNUSED_RESULT NOINLINE static int rfftp_comp_twiddle (rfftp_plan plan) { size_t length=plan->length; double *twid = RALLOC(double, 2*length); if (!twid) return -1; sincos_2pibyn_half(length, twid); size_t l1=1; double *ptr=plan->mem; for (size_t k=0; knfct; ++k) { size_t ip=plan->fct[k].fct, ido=length/(l1*ip); if (knfct-1) // last factor doesn't need twiddles { plan->fct[k].tw=ptr; ptr+=(ip-1)*(ido-1); for (size_t j=1; jfct[k].tw[(j-1)*(ido-1)+2*i-2] = twid[2*j*l1*i]; plan->fct[k].tw[(j-1)*(ido-1)+2*i-1] = twid[2*j*l1*i+1]; } } if (ip>5) // special factors required by *g functions { plan->fct[k].tws=ptr; ptr+=2*ip; plan->fct[k].tws[0] = 1.; plan->fct[k].tws[1] = 0.; for (size_t i=1; i<=(ip>>1); ++i) { plan->fct[k].tws[2*i ] = twid[2*i*(length/ip)]; plan->fct[k].tws[2*i+1] = twid[2*i*(length/ip)+1]; plan->fct[k].tws[2*(ip-i) ] = twid[2*i*(length/ip)]; plan->fct[k].tws[2*(ip-i)+1] = -twid[2*i*(length/ip)+1]; } } l1*=ip; } DEALLOC(twid); return 0; } NOINLINE static rfftp_plan make_rfftp_plan (size_t length) { if (length==0) return NULL; rfftp_plan plan = RALLOC(rfftp_plan_i,1); if (!plan) return NULL; plan->length=length; plan->nfct=0; plan->mem=NULL; for (size_t i=0; ifct[i]=(rfftp_fctdata){0,0,0}; if (length==1) return plan; if (rfftp_factorize(plan)!=0) { DEALLOC(plan); return NULL; } size_t tws=rfftp_twsize(plan); plan->mem=RALLOC(double,tws); if (!plan->mem) { DEALLOC(plan); return NULL; } if (rfftp_comp_twiddle(plan)!=0) { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } return plan; } NOINLINE static void destroy_rfftp_plan (rfftp_plan plan) { DEALLOC(plan->mem); DEALLOC(plan); } typedef struct fftblue_plan_i { size_t n, n2; cfftp_plan plan; double *mem; double *bk, *bkf; } fftblue_plan_i; typedef struct fftblue_plan_i * fftblue_plan; NOINLINE static fftblue_plan make_fftblue_plan (size_t length) { fftblue_plan plan = RALLOC(fftblue_plan_i,1); if (!plan) return NULL; plan->n = length; plan->n2 = good_size(plan->n*2-1); plan->mem = RALLOC(double, 2*plan->n+2*plan->n2); if (!plan->mem) { DEALLOC(plan); return NULL; } plan->bk = plan->mem; plan->bkf = plan->bk+2*plan->n; /* initialize b_k */ double *tmp = RALLOC(double,4*plan->n); if (!tmp) { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } sincos_2pibyn(2*plan->n,tmp); plan->bk[0] = 1; plan->bk[1] = 0; size_t coeff=0; for (size_t m=1; mn; ++m) { coeff+=2*m-1; if (coeff>=2*plan->n) coeff-=2*plan->n; plan->bk[2*m ] = tmp[2*coeff ]; plan->bk[2*m+1] = tmp[2*coeff+1]; } /* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */ double xn2 = 1./plan->n2; plan->bkf[0] = plan->bk[0]*xn2; plan->bkf[1] = plan->bk[1]*xn2; for (size_t m=2; m<2*plan->n; m+=2) { plan->bkf[m] = plan->bkf[2*plan->n2-m] = plan->bk[m] *xn2; plan->bkf[m+1] = plan->bkf[2*plan->n2-m+1] = plan->bk[m+1] *xn2; } for (size_t m=2*plan->n;m<=(2*plan->n2-2*plan->n+1);++m) plan->bkf[m]=0.; plan->plan=make_cfftp_plan(plan->n2); if (!plan->plan) { DEALLOC(tmp); DEALLOC(plan->mem); DEALLOC(plan); return NULL; } if (cfftp_forward(plan->plan,plan->bkf,1.)!=0) { DEALLOC(tmp); DEALLOC(plan->mem); DEALLOC(plan); return NULL; } DEALLOC(tmp); return plan; } NOINLINE static void destroy_fftblue_plan (fftblue_plan plan) { DEALLOC(plan->mem); destroy_cfftp_plan(plan->plan); DEALLOC(plan); } NOINLINE WARN_UNUSED_RESULT static int fftblue_fft(fftblue_plan plan, double c[], int isign, double fct) { size_t n=plan->n; size_t n2=plan->n2; double *bk = plan->bk; double *bkf = plan->bkf; double *akf = RALLOC(double, 2*n2); if (!akf) return -1; /* initialize a_k and FFT it */ if (isign>0) for (size_t m=0; m<2*n; m+=2) { akf[m] = c[m]*bk[m] - c[m+1]*bk[m+1]; akf[m+1] = c[m]*bk[m+1] + c[m+1]*bk[m]; } else for (size_t m=0; m<2*n; m+=2) { akf[m] = c[m]*bk[m] + c[m+1]*bk[m+1]; akf[m+1] =-c[m]*bk[m+1] + c[m+1]*bk[m]; } for (size_t m=2*n; m<2*n2; ++m) akf[m]=0; if (cfftp_forward (plan->plan,akf,fct)!=0) { DEALLOC(akf); return -1; } /* do the convolution */ if (isign>0) for (size_t m=0; m<2*n2; m+=2) { double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; akf[m ] = akf[m]*bkf[m] + akf[m+1]*bkf[m+1]; akf[m+1] = im; } else for (size_t m=0; m<2*n2; m+=2) { double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; akf[m ] = akf[m]*bkf[m] - akf[m+1]*bkf[m+1]; akf[m+1] = im; } /* inverse FFT */ if (cfftp_backward (plan->plan,akf,1.)!=0) { DEALLOC(akf); return -1; } /* multiply by b_k */ if (isign>0) for (size_t m=0; m<2*n; m+=2) { c[m] = bk[m] *akf[m] - bk[m+1]*akf[m+1]; c[m+1] = bk[m+1]*akf[m] + bk[m] *akf[m+1]; } else for (size_t m=0; m<2*n; m+=2) { c[m] = bk[m] *akf[m] + bk[m+1]*akf[m+1]; c[m+1] =-bk[m+1]*akf[m] + bk[m] *akf[m+1]; } DEALLOC(akf); return 0; } WARN_UNUSED_RESULT static int cfftblue_backward(fftblue_plan plan, double c[], double fct) { return fftblue_fft(plan,c,1,fct); } WARN_UNUSED_RESULT static int cfftblue_forward(fftblue_plan plan, double c[], double fct) { return fftblue_fft(plan,c,-1,fct); } WARN_UNUSED_RESULT static int rfftblue_backward(fftblue_plan plan, double c[], double fct) { size_t n=plan->n; double *tmp = RALLOC(double,2*n); if (!tmp) return -1; tmp[0]=c[0]; tmp[1]=0.; memcpy (tmp+2,c+1, (n-1)*sizeof(double)); if ((n&1)==0) tmp[n+1]=0.; for (size_t m=2; mn; double *tmp = RALLOC(double,2*n); if (!tmp) return -1; for (size_t m=0; mblueplan=0; plan->packplan=0; if ((length<50) || (largest_prime_factor(length)<=sqrt(length))) { plan->packplan=make_cfftp_plan(length); if (!plan->packplan) { DEALLOC(plan); return NULL; } return plan; } double comp1 = cost_guess(length); double comp2 = 2*cost_guess(good_size(2*length-1)); comp2*=1.5; /* fudge factor that appears to give good overall performance */ if (comp2blueplan=make_fftblue_plan(length); if (!plan->blueplan) { DEALLOC(plan); return NULL; } } else { plan->packplan=make_cfftp_plan(length); if (!plan->packplan) { DEALLOC(plan); return NULL; } } return plan; } static void destroy_cfft_plan (cfft_plan plan) { if (plan->blueplan) destroy_fftblue_plan(plan->blueplan); if (plan->packplan) destroy_cfftp_plan(plan->packplan); DEALLOC(plan); } WARN_UNUSED_RESULT static int cfft_backward(cfft_plan plan, double c[], double fct) { if (plan->packplan) return cfftp_backward(plan->packplan,c,fct); // if (plan->blueplan) return cfftblue_backward(plan->blueplan,c,fct); } WARN_UNUSED_RESULT static int cfft_forward(cfft_plan plan, double c[], double fct) { if (plan->packplan) return cfftp_forward(plan->packplan,c,fct); // if (plan->blueplan) return cfftblue_forward(plan->blueplan,c,fct); } typedef struct rfft_plan_i { rfftp_plan packplan; fftblue_plan blueplan; } rfft_plan_i; static rfft_plan make_rfft_plan (size_t length) { if (length==0) return NULL; rfft_plan plan = RALLOC(rfft_plan_i,1); if (!plan) return NULL; plan->blueplan=0; plan->packplan=0; if ((length<50) || (largest_prime_factor(length)<=sqrt(length))) { plan->packplan=make_rfftp_plan(length); if (!plan->packplan) { DEALLOC(plan); return NULL; } return plan; } double comp1 = 0.5*cost_guess(length); double comp2 = 2*cost_guess(good_size(2*length-1)); comp2*=1.5; /* fudge factor that appears to give good overall performance */ if (comp2blueplan=make_fftblue_plan(length); if (!plan->blueplan) { DEALLOC(plan); return NULL; } } else { plan->packplan=make_rfftp_plan(length); if (!plan->packplan) { DEALLOC(plan); return NULL; } } return plan; } static void destroy_rfft_plan (rfft_plan plan) { if (plan->blueplan) destroy_fftblue_plan(plan->blueplan); if (plan->packplan) destroy_rfftp_plan(plan->packplan); DEALLOC(plan); } WARN_UNUSED_RESULT static int rfft_backward(rfft_plan plan, double c[], double fct) { if (plan->packplan) return rfftp_backward(plan->packplan,c,fct); else // if (plan->blueplan) return rfftblue_backward(plan->blueplan,c,fct); } WARN_UNUSED_RESULT static int rfft_forward(rfft_plan plan, double c[], double fct) { if (plan->packplan) return rfftp_forward(plan->packplan,c,fct); else // if (plan->blueplan) return rfftblue_forward(plan->blueplan,c,fct); } static PyObject * execute_complex(PyObject *a1, int is_forward, double fct) { PyArrayObject *data = (PyArrayObject *)PyArray_FromAny(a1, PyArray_DescrFromType(NPY_CDOUBLE), 1, 0, NPY_ARRAY_ENSURECOPY | NPY_ARRAY_DEFAULT | NPY_ARRAY_ENSUREARRAY | NPY_ARRAY_FORCECAST, NULL); if (!data) return NULL; int npts = PyArray_DIM(data, PyArray_NDIM(data) - 1); cfft_plan plan=NULL; int nrepeats = PyArray_SIZE(data)/npts; double *dptr = (double *)PyArray_DATA(data); int fail=0; Py_BEGIN_ALLOW_THREADS; plan = make_cfft_plan(npts); if (!plan) fail=1; if (!fail) for (int i = 0; i < nrepeats; i++) { int res = is_forward ? cfft_forward(plan, dptr, fct) : cfft_backward(plan, dptr, fct); if (res!=0) { fail=1; break; } dptr += npts*2; } if (plan) destroy_cfft_plan(plan); Py_END_ALLOW_THREADS; if (fail) { Py_XDECREF(data); return PyErr_NoMemory(); } return (PyObject *)data; } static PyObject * execute_real_forward(PyObject *a1, double fct) { rfft_plan plan=NULL; int fail = 0; PyArrayObject *data = (PyArrayObject *)PyArray_FromAny(a1, PyArray_DescrFromType(NPY_DOUBLE), 1, 0, NPY_ARRAY_DEFAULT | NPY_ARRAY_ENSUREARRAY | NPY_ARRAY_FORCECAST, NULL); if (!data) return NULL; int ndim = PyArray_NDIM(data); const npy_intp *odim = PyArray_DIMS(data); int npts = odim[ndim - 1]; npy_intp *tdim=(npy_intp *)malloc(ndim*sizeof(npy_intp)); if (!tdim) { Py_XDECREF(data); return NULL; } for (int d=0; d