/*
 * SDSPRDCT  Real DCT S-function.
 * DSP Blockset S-Function to perform a DCT on real data.
 *
 *  Copyright 1995-2000 The MathWorks, Inc. 
 *  $Revision: 1.7 $  $Date: 2000/05/05 20:16:05 $
 */

#define S_FUNCTION_NAME sdsprdct

#include <math.h>
#include "dsp_sim.h"

extern void fft(int_T n, real_T *xr, real_T *xi);


static void mdlInitializeSizes(SimStruct *S)
{
  ssSetNumInputs(        S, DYNAMICALLY_SIZED);
  ssSetNumOutputs(       S, DYNAMICALLY_SIZED);
  ssSetDirectFeedThrough(S, 1);
  ssSetNumSampleTimes(   S, 1);
  ssSetNumRWork(         S, DYNAMICALLY_SIZED); 
  ssSetOptions(          S, SS_OPTION_EXCEPTION_FREE_CODE |
                            SS_OPTION_USING_ssGetUPtrs    ); 
}


static void mdlInitializeSampleTimes(SimStruct *S)
{
  ssSetSampleTimeEvent(S, 0, INHERITED_SAMPLE_TIME);
  ssSetOffsetTimeEvent(S, 0, 0.0);
}


static void mdlInitializeConditions(real_T *x0, SimStruct *S)
{
  int_T numinputs = ssGetNumInputs(S);
  
#ifdef MATLAB_MEX_FILE
  if (numinputs != -1) {
    int_T dummy;
    if (frexp((real_T)numinputs, &dummy) != 0.5) {
      ssSetErrorStatus(S,"Width of input to DCT must be a power of 2");
      return;
    }
    if (numinputs == 1) {
      mexWarnMsgTxt("Computing the DCT of a scalar input.\n");
    }
  }
#endif
  
  /*
   * Need to compute weights to multiply DFT coefficients.
   * By computing the real and imag parts
   *
   * ww = (exp(-i*(0:n-1)*pi/(2*n))/sqrt(2*n)).';
   * ww(1) = ww(1) / sqrt(2);
   *
   * exp(-i*x) = cos(x) - i*sin(x) 
   */
  
  {
    const int_T  N    = ssGetNumInputs(S);
    const real_T piN2 = 2.0*atan(1.0)/N;  /* pi / (2*N) */
    real_T       den  = sqrt(2*N)/2.0;
    real_T      *ww   = ssGetRWork(S);
    int_T        i;

    for (i = 0; i < N; i++) {
      ww[i]     =  cos(i*piN2) / den;
      ww[i + N] = -sin(i*piN2) / den;
    }

    ww[0] /= sqrt(2.0);
  }
}


static void mdlOutputs(real_T *y, const real_T *x, const real_T *u, 
                       SimStruct *S, int_T tid)
{
  
  if (ssIsMajorTimeStep(S)) {
    
    /* 
     * Use a length N/2 complex FFT to compute a length N real FFT.
     * Equivalent MATLAB code:
     *    function y=rfft(x)
     *    
     *      N=length(x);
     *      xe = x(1:2:N);
     *      xo = x(2:2:N);
     *      V = fft(xe + j*xo);
     *    
     *      X0 =    1/2*(V+cflip(conj(V)));
     *      X1 = -j*1/2*(V-cflip(conj(V)));
     *    
     *      W=exp(-j*2*pi*(0:N/2-1)/N);
     *      y=X0(:) + W(:).*X1(:);
     *      y=[y; X0(1) - X1(1); conj(y(N/2:-1:2))];
     *
     * cflip: keeps first element, flips remaining, performs conjugate
     */
    
    const int_T     N    = ssGetNumInputs(S); /* length of real input vector */
    const int_T     N2   = N / 2;             /* length of complex FFT       */
    const int_T     N4   = N2 / 2;            /* half length of complex FFT  */
    UPtrsType       uptr = ssGetUPtrs(S);
    real_T         *yr   = y;
    real_T         *yi   = y + N;
    int_T           i;

    if (N < 4) {
        *yr = **uptr;
        *yi = ((N==1) ? 0.0 :**(uptr+1));
		
	} else {
      for(i = 0; i<N4; i++) {
		*yr++ = **(uptr + 4*i);
		*yi++ = **(uptr + 4*i + 2);
	  }
      
      for(i = 0; i<N4; i++){
		*yr++ = **(uptr + (N - 1) - 4*i);
		*yi++ = **(uptr + (N - 1) - 4*i - 2);
      }
    }		
    
    /* Compute length N/2 FFT: */
    fft(N2, y, y + N);
    
    {
      real_T Wr = 1.0;
      real_T Wi = 0.0;
      /* Use "volatile" to fix PPC/MrC optimizer bug */
      volatile real_T theta = -8 * atan(1.0) / N;
      real_T ct = cos(theta);
      real_T st = sin(theta);
      
      real_T *X0 = y;
      real_T *X1 = y+N;
      
      yr = y;
      yi = y+N;
      
      yr[N2] = X0[0] - X1[0];
      yi[N2] = 0.0;
      
      yr[0]  = X0[0] + X1[0];
      yi[0]  = 0.0;
      
      for (i = 1; i < N4; i++) {
		real_T X0_1  = X0[i];
		real_T X1_1  = X1[i];
		real_T X0_2  = X0[N2 - i];
		real_T X1_2  = X1[N2 - i];
		real_T X0_1_ =  0.5 * (X0_1 + X0_2);
		real_T X0_2_ =  0.5 * (X1_1 - X1_2);
		real_T X1_1_ =  0.5 * (X1_1 + X1_2);
		real_T X1_2_ = -0.5 * (X0_1 - X0_2);
		real_T Wr_t  = Wr;
		Wr = Wr_t * ct - Wi * st;
		Wi = Wr_t * st + Wi * ct;
		yr[i]      =  X0_1_ + Wr * X1_1_ - Wi * X1_2_;
		yi[i]      =  X0_2_ + Wr * X1_2_ + Wi * X1_1_;
		yr[N2 - i] =  X0_1_ - Wr * X1_1_ + Wi * X1_2_;
		yi[N2 - i] = -X0_2_ + Wr * X1_2_ + Wi * X1_1_;
      }
      
      yr[N4] =  X0[N4];
      yi[N4] = -X1[N4];
      
      for (i = 1; i < N2; i++) {
		yr[i + N2] =  yr[N2 - i];
		yi[i + N2] = -yi[N2 - i];
      }
    }
    
    /* Multiply by weights: */
    {
      real_T *wr = ssGetRWork(S);
      real_T *wi = wr + N;
      int_T   i;

      yr = y;
      yi = y + N; 
      
      for (i=0; i<N; i++) {
		*y++ = *yr++ * *wr++ - *yi++ * *wi++;
	  }
    } 
  } 
}


static void mdlUpdate(real_T *x, const real_T *u, SimStruct *S, int_T tid)
{
}


static void mdlDerivatives(real_T *dx, const real_T *x, const real_T *u, 
                           SimStruct *S, int_T tid)
{
}


static void mdlTerminate(SimStruct *S)
{
}

#ifdef MATLAB_MEX_FILE

#define MDL_GET_INPUT_PORT_WIDTH
/* Function: mdlGetInputPortWidth =============================================
 *
 */
static int_T mdlGetInputPortWidth(SimStruct *S, int_T outputPortWidth)
{
  if (outputPortWidth % 2 != 0) {
    ssSetErrorStatus(S,"Invalid output port width");
    return(1);
  }
  return(outputPortWidth/2);
}

#define MDL_GET_OUTPUT_PORT_WIDTH
/* Function: mdlGetOutputPortWidth ============================================
 *
 */
static int_T mdlGetOutputPortWidth(SimStruct *S, int_T inputPortWidth)
{
  return(inputPortWidth*2);
}

#endif

/* Function: mdlSetWorkWidths =================================================
 * Abstract:
 *
 */
#ifdef MATLAB_MEX_FILE
#define MDL_SET_WORK_WIDTHS
static void mdlSetWorkWidths(SimStruct *S)
{
  ssSetNumRWork(S, (int_T)(ssGetNumInputs(S)*2));
}
#endif

#include "dsp_trailer.c"
 
/* [EOF]: sdsprdct.c */