/*
 * SDSPFFT  FFT S-function.
 * DSP Blockset S-Function to perform an FFT on real or complex inputs. 
 * The outputs are always complex.
 *
 * This SIMULINK S-function computes the FFT of the input vector; 
 * It uses a Cooley-Tukey radix-2, decimation-in-frequency FFT algorithm.
 *
 *  Copyright 1995-2000 The MathWorks, Inc.
 *  $Revision: 1.20 $  $Date: 2000/06/12 19:32:47 $
 */
#define S_FUNCTION_NAME sdspfft
#define S_FUNCTION_LEVEL 2

#define DATA_TYPES_IMPLEMENTED

#include "dsp_sim.h"
#include "dspfft_rt.h"

enum {INPORT=0, NUM_INPORTS}; 
enum {OUTPORT=0, NUM_OUTPORTS}; 
enum {NCHANS_ARGC=0, NUM_ARGS};
#define NCHANS_ARG (ssGetSFcnParam(S,NCHANS_ARGC))


#define MDL_CHECK_PARAMETERS
static void mdlCheckParameters(SimStruct *S)
{
#ifdef MATLAB_MEX_FILE
    real_T d;
    int_T  i;

    /* Number of channels */
    if OK_TO_CHECK_VAR(S, NCHANS_ARG) { 
        if ((mxGetNumberOfElements(NCHANS_ARG) != 1) || 
            !mxIsNumeric(NCHANS_ARG)  || 
            !mxIsDouble(NCHANS_ARG) || 
            mxIsEmpty(NCHANS_ARG)   ||
            mxIsComplex(NCHANS_ARG)) {
            THROW_ERROR(S, "Number of channels must be a scalar.");
        }

        d = mxGetPr(NCHANS_ARG)[0];
        i = (int_T)d;
        if ( (d != i) || (i < 1) ) {
            THROW_ERROR(S, "Number of channels must be an integer > 0");
        }
    }
#endif
}



static void mdlInitializeSizes(SimStruct *S)
{
    ssSetNumSFcnParams(S, NUM_ARGS);

#if defined(MATLAB_MEX_FILE)
    if (ssGetNumSFcnParams(S) != ssGetSFcnParamsCount(S)) return;
    mdlCheckParameters(S);
    if (ssGetErrorStatus(S) != NULL) return;
#endif

	/* Parameters */
    ssSetSFcnParamNotTunable(S, NCHANS_ARGC);
    
	/* Inputs: */
    if (!ssSetNumInputPorts(S, NUM_INPORTS)) return;
    ssSetInputPortWidth(            S, INPORT, DYNAMICALLY_SIZED);
    ssSetInputPortDirectFeedThrough(S, INPORT, 1);
    ssSetInputPortComplexSignal(    S, INPORT, COMPLEX_INHERITED);
    ssSetInputPortReusable(         S, INPORT, 1);
    ssSetInputPortOverWritable(     S, INPORT, 1); 

    /* Outputs: */
    if (!ssSetNumOutputPorts(S,NUM_OUTPORTS)) return;
    ssSetOutputPortWidth(        S, OUTPORT, DYNAMICALLY_SIZED);
    ssSetOutputPortComplexSignal(S, OUTPORT, COMPLEX_YES);
    ssSetOutputPortReusable(     S, OUTPORT, 1);

    ssSetNumSampleTimes(S, 1);
    ssSetOptions(       S, SS_OPTION_EXCEPTION_FREE_CODE |
                        SS_OPTION_USE_TLC_WITH_ACCELERATOR);
}


static void mdlInitializeSampleTimes(SimStruct *S)
{
    ssSetSampleTime(S, 0, INHERITED_SAMPLE_TIME);
    ssSetOffsetTime(S, 0, FIXED_IN_MINOR_STEP_OFFSET);
}



#define MDL_START
static void mdlStart(SimStruct *S)
{
#ifdef MATLAB_MEX_FILE
    const int_T     nchans    = (int_T)mxGetPr(NCHANS_ARG)[0];
    const int_T     N         = ssGetInputPortWidth(S,INPORT) / nchans;
    int_T           dummy;

    if (ssGetSampleTime(S,0) == CONTINUOUS_SAMPLE_TIME) {
        THROW_ERROR(S, "Continuous sample times not allowed for FFT block.");
    }

    /* Check to make sure input size is a power of 2: */
    if (frexp((real_T)N, &dummy) != 0.5) {
        THROW_ERROR(S,"Width of input to FFT must be a power of 2");
    }
    if (N == 1) {
      mexWarnMsgTxt("Computing the FFT of a scalar input.\n");
    }
#endif
}


static void mdlOutputs(SimStruct *S, int_T tid)
{
    const boolean_T c0        = (boolean_T)(ssGetInputPortComplexSignal(S,INPORT) == COMPLEX_YES);
    const int_T     nchans    = (int_T)mxGetPr(NCHANS_ARG)[0];
    const int_T     N         = ssGetInputPortWidth(S,INPORT) / nchans;
    creal_T        *y         = (creal_T *)ssGetOutputPortSignal(S,OUTPORT);
    const boolean_T need_copy = (boolean_T)(ssGetInputPortBufferDstPort(S, INPORT) != OUTPORT);
    
    if (!c0) {
        /* Real */
        const int_T N2 = N >> 1;  /* Length of complex FFT is half width */
        const int_T N4 = N2 >> 1; /* Half length of complex FFT  */
        InputRealPtrsType uptr = ssGetInputPortRealSignalPtrs(S, INPORT);
        
        int_T f;
        
        if(!need_copy) THROW_ERROR(S,"Real input cannot share buffer with complex output.");
        
        for (f = 0; f++ < nchans; ) {
            
            /* Scalar */
            if (N == 1) {
                y->re = **uptr++;
                (y++)->im = 0.0;
                
            } else {
                /* A length-N real input is interpreted as a length-N/2 complex input */
                
                /* Real input will never share a buffer with complex output. */
                /* Copy inputs to outputs for dspfft in-place algorithm.     */
                int_T i;
                for (i=N2; i-- > 0;) {
                    y->re     = **uptr++;
                    (y++)->im = **uptr++;
                }
                y -= N2;
                
                /* Compute length N/2 FFT: */
                dspfft(N2, y);
                
                {
                    creal_T      W     = {1.0, 0.0};
                    const real_T theta = -8 * atan(1.0) / N;
                    creal_T      twid;
                    int_T        i;
                    
                    /* Complex exponential: cos+jsin */
                    twid.re = cos(theta);
                    twid.im = sin(theta);
                    
                    y[N2].re = y[0].re - y[0].im;
                    y[N2].im = 0.0;
                    
                    y[0].re += y[0].im; 
                    y[0].im  = 0.0;
                    
                    for (i = 1; i < N4; i++) {
                        creal_T a, b, c, d;
                        
                        a = y[i];
                        b = y[N2-i];
                        
                        c.re = 0.5 * (a.re + b.re);
                        c.im = 0.5 * (a.im - b.im);
                        
                        d.re =  0.5 * (a.im + b.im);
                        d.im = -0.5 * (a.re - b.re);
                        
                        {
                            creal_T ctemp;
                            
                            /* W *= twid */
                            ctemp.re = CMULT_RE(W, twid);
                            ctemp.im = CMULT_IM(W, twid);
                            W = ctemp;
                            
                            /* Precompute W*d: */
                            ctemp.re = CMULT_RE(W, d);
                            ctemp.im = CMULT_IM(W, d);
                            
                            /* y[i] = c + W * d */
                            y[i].re = c.re + ctemp.re;
                            y[i].im = c.im + ctemp.im;
                            
                            /* y[N2 - i] = conj(c - W*d) */
                            y[N2 - i].re =  c.re - ctemp.re;
                            y[N2 - i].im = -c.im + ctemp.im;
                        }
                    }            
                    y[N4].im = -y[N4].im;
                    
                    /* y[i+N2] = conj(y[N2-i]) */
                    for (i = 1; i < N2; i++) {
                        y[i + N2].re =  y[N2 - i].re;
                        y[i + N2].im = -y[N2 - i].im;
                    }
                }
                /* Make sure uptr and y are set up for next channel */
                y += N;
                
            } /* end of else N > 1 case */
        } /* end of nChans for loop */
        
    } else {    /* Complex */
        
        InputPtrsType uptr = ssGetInputPortSignalPtrs(S,INPORT);
        int_T f;
        
        for (f = 0; f++ < nchans; ) {
            
            /* Copy inputs to outputs if not sharing buffer */
            if (need_copy) {
                int_T i;
                
                /* Copy inputs into outputs, since FFT operates in place: */
                for (i=0; i++ < N;) {
                    *y++ = *((creal_T *)(*uptr++));
                }
                y -= N;
            }
            
            dspfft(N, y);
            y += N;
        }
    }
}


static void mdlTerminate(SimStruct *S)
{
}


#ifdef	MATLAB_MEX_FILE
#include "simulink.c"
#else
#include "cg_sfun.h"
#endif
