cuda - cuFFT output not correct -
i have problem program:
#include <stdlib.h> #include <stdio.h> #include <string.h> #include <math.h> #include <cufft.h> #include <cucomplex.h> #define signal_size 1024 int main(int argc, char **argv) { cudaevent_t start, stop; cudaeventcreate(&start); cudaeventcreate(&stop); // allocate host memory signal cudoublecomplex *h_signal = (cudoublecomplex *) malloc(sizeof(cudoublecomplex) * signal_size); // initalize memory signal (unsigned int = 0; < signal_size; ++i) { if((double)i/signal_size>=0 && (double)i/signal_size<0.5) h_signal[i].x = (double)i/signal_size; else if((double)i/signal_size>=0.5 && (double)i/signal_size<1) h_signal[i].x = (double)i/signal_size-1; h_signal[i].y = 0; } // allocate device memory signal cudoublecomplex *d_signal; cudamalloc((void **) &d_signal, signal_size*sizeof(cudoublecomplex)); // re-create host memory device cudamemcpy(d_signal, h_signal, signal_size*sizeof(cudoublecomplex), cudamemcpyhosttodevice); cudaeventrecord(start, 0); cuffthandle plan; cufftplan1d(&plan, signal_size , cufft_c2c, 1); // fft computation cufftexecc2c(plan, (cufftcomplex *) d_signal, (cufftcomplex *) d_signal, cufft_forward); cufftexecc2c(plan, (cufftcomplex *) d_signal, (cufftcomplex *) d_signal, cufft_inverse); cudoublecomplex *h_signal_inv =(cudoublecomplex *) malloc(sizeof(cudoublecomplex) * signal_size); cudamemcpy(h_signal_inv, d_signal, sizeof(cudoublecomplex) * signal_size, cudamemcpydevicetohost); cudaeventrecord(stop, 0); cudaeventsynchronize(stop); float elapsedtime; cudaeventelapsedtime(&elapsedtime, start, stop); printf("elapsed time: %3.1f ms\n", elapsedtime); for(int i=0;i<signal_size;i++) printf("\n%f %f", h_signal[i].x, h_signal_inv[i].x); cufftdestroy(plan); free(h_signal); free(h_signal_inv); cudafree(d_signal); cudadevicereset(); homecoming 0; }
i'd transform signal , come inverse, output wrong in first half.
can help me find errors?
thank much!
you getting datatypes confused.
cufftdoublecomplex
not same cufftcomplex
. when using cufftdoublecomplex
, your transform type should z2z, not c2c.
also, in order see info parity when doing forwards transform followed inverse transform using cufft, it's necessary split result signal size:
cufft performs un-normalized ffts; is, performing forwards fft on input info set followed inverse fft on resulting set yields info equal input, scaled number of elements. scaling either transform reciprocal of size of info set left user perform seen fit.
the next code has above issues addressed , should give improve results:
#include <stdlib.h> #include <stdio.h> #include <string.h> #include <math.h> #include <cufft.h> #include <cucomplex.h> #define signal_size 1024 int main(int argc, char **argv) { cudaevent_t start, stop; cudaeventcreate(&start); cudaeventcreate(&stop); // allocate host memory signal cudoublecomplex *h_signal = (cudoublecomplex *) malloc(sizeof(cudoublecomplex) * signal_size); // initalize memory signal (unsigned int = 0; < signal_size; ++i) { if((double)i/signal_size>=0 && (double)i/signal_size<0.5) h_signal[i].x = (double)i/signal_size; else if((double)i/signal_size>=0.5 && (double)i/signal_size<1) h_signal[i].x = (double)i/signal_size-1; h_signal[i].y = 0; } // allocate device memory signal cudoublecomplex *d_signal; cudamalloc((void **) &d_signal, signal_size*sizeof(cudoublecomplex)); // re-create host memory device cudamemcpy(d_signal, h_signal, signal_size*sizeof(cudoublecomplex), cudamemcpyhosttodevice); cudaeventrecord(start, 0); cuffthandle plan; cufftplan1d(&plan, signal_size , cufft_z2z, 1); // fft computation cufftexecz2z(plan, d_signal, d_signal, cufft_forward); cufftexecz2z(plan, d_signal, d_signal, cufft_inverse); cudoublecomplex *h_signal_inv =(cudoublecomplex *) malloc(sizeof(cudoublecomplex) * signal_size); cudamemcpy(h_signal_inv, d_signal, sizeof(cudoublecomplex) * signal_size, cudamemcpydevicetohost); cudaeventrecord(stop, 0); cudaeventsynchronize(stop); float elapsedtime; cudaeventelapsedtime(&elapsedtime, start, stop); printf("elapsed time: %3.1f ms\n", elapsedtime); for(int i=0;i<signal_size;i++) printf("\n%f %f", h_signal[i].x, h_signal_inv[i].x/signal_size); cufftdestroy(plan); free(h_signal); free(h_signal_inv); cudafree(d_signal); cudadevicereset(); homecoming 0; }
cuda cufft
No comments:
Post a Comment