/*
* File: sdspqre2.c
*
* Abstract:
*      DSP Blockset S-function for Economy-sized QR factorization with column pivoting
*
* Copyright 1995-2000 The MathWorks, Inc.
* $Revision: 1.12 $ $Date: 2000/06/11 23:24:15 $
*/
#define S_FUNCTION_NAME  sdspqre2
#define S_FUNCTION_LEVEL 2

#include "dsp_sim.h"
#include "dspqrsl_rt.h"
#include "dspqrdc_rt.h"

enum {QRAUX_IDX=0, WORK_IDX, S_IDX, MAX_NUM_DWORKS};
enum {JPVT_IDX, NUM_IWORKS};
enum {INPORT_A=0, NUM_INPORTS};
enum {OUTPORT_Q=0, OUTPORT_R, OUTPORT_E, NUM_OUTPORTS};
enum {NUM_PARAMS=0};

static void mdlInitializeSizes(SimStruct *S)
{
    ssSetNumSFcnParams(S,  NUM_PARAMS);
    if (ssGetNumSFcnParams(S) != ssGetSFcnParamsCount(S)) return;

    if (!ssSetNumInputPorts(S, NUM_INPORTS)) return;
    if (!ssSetInputPortDimensionInfo(S, INPORT_A, DYNAMIC_DIMENSION)) return;
    ssSetInputPortFrameData(         S, INPORT_A, FRAME_INHERITED);
    ssSetInputPortDirectFeedThrough( S, INPORT_A, 1);
    ssSetInputPortComplexSignal(     S, INPORT_A, COMPLEX_INHERITED);
    ssSetInputPortReusable(          S, INPORT_A, 1);
    ssSetInputPortOverWritable(      S, INPORT_A, 1);
    ssSetInputPortDataType(          S, INPORT_A, SS_DOUBLE);
    
    if (!ssSetNumOutputPorts(S, NUM_OUTPORTS)) return;

    if (!ssSetOutputPortDimensionInfo(S, OUTPORT_Q, DYNAMIC_DIMENSION)) return;
    ssSetOutputPortFrameData(         S, OUTPORT_Q, FRAME_NO);
    ssSetOutputPortComplexSignal(     S, OUTPORT_Q, COMPLEX_INHERITED);
    ssSetOutputPortReusable(          S, OUTPORT_Q, 1);
    ssSetOutputPortDataType(          S, OUTPORT_Q, SS_DOUBLE);
    
    if (!ssSetOutputPortDimensionInfo(S, OUTPORT_R, DYNAMIC_DIMENSION)) return;
    ssSetOutputPortFrameData(         S, OUTPORT_R, FRAME_NO);
    ssSetOutputPortComplexSignal(     S, OUTPORT_R, COMPLEX_INHERITED);
    ssSetOutputPortReusable(          S, OUTPORT_R, 1);
    ssSetOutputPortDataType(          S, OUTPORT_R, SS_DOUBLE);
    
    if (!ssSetOutputPortDimensionInfo(S, OUTPORT_E, DYNAMIC_DIMENSION)) return;
    ssSetOutputPortFrameData(         S, OUTPORT_E, FRAME_NO);
    ssSetOutputPortComplexSignal(     S, OUTPORT_E, COMPLEX_NO);
    ssSetOutputPortReusable(          S, OUTPORT_E, 1);
    ssSetOutputPortDataType(          S, OUTPORT_E, SS_DOUBLE);
    
    if(!ssSetNumDWork(S, DYNAMICALLY_SIZED)) return;
    ssSetNumIWork(S, DYNAMICALLY_SIZED);
    
    ssSetNumSampleTimes(S, 1);
    ssSetOptions(S, SS_OPTION_EXCEPTION_FREE_CODE |
                 SS_OPTION_USE_TLC_WITH_ACCELERATOR);
}



static void mdlInitializeSampleTimes(SimStruct *S)
{
    ssSetSampleTime(S, 0, INHERITED_SAMPLE_TIME);
    ssSetOffsetTime(S, 0, 0.0);
}


/*
 * sdspqre accepts a copy of A in the m-by-n R and returns
 * its QR factorization in Q, R and the permutation vector E.
 * qraux, work and jpvt are used as auxilliary work vectors.
 * If the system is overdetermined, the n-by-n economy-sized
 * upper triangular version of R is copied into S.
 */
static void sdspqre_real(
    int_T	m,
    int_T	n,
    real_T *q,
    real_T	*r,
    real_T	*e,
    real_T *qraux,
    real_T	*work,
    int_T	*jpvt,
    real_T	*s
    )
{
    int_T	i, j, minmn, *pjpvt;
    real_T	*pq, *pr, *ps, *pe, Zero = 0.0, One = 1.0;
    
    dspqrdc_real(m, n, r, qraux, jpvt, work);
    
    /* explicitly form q by manipulating identity */
    minmn = MIN(m,n);
    pq = q;
    for(j=m*minmn; j-- > 0; ) {
        *pq++ = Zero;
    }
    pq = q;	/* pointer to q(j,j) */
    for(j=minmn; j-- > 0; ) {
        *pq = One;
        pq += m+1;
    }
    /*
    * Convert cols of identity into cols of q.
    * sdspqrsl1 uses info stored in lower triangle of r and in
    * vector qraux to work on columns of identity matrix I and
    * transform them into q*I(:,j) i.e. the columns of q.
    */
    pq = q;	/* pointer to q(1,j) */
    for (j=minmn; j-- > 0; ) {
        dspqrsl1_real(m, n, r, qraux, pq);
        pq += m;
    }
    
    if (m > n) { /* Copy upper triangle of r to s */
        pr = r;	/* pointer to r(1:j,j) */
        ps = s;	/* pointer to s(1:j,j) */
        for (j=0; j < n; j++) {
            for(i=0; i <= j; i++) {
                *ps++ = *pr++;
            }
            pr += m-j-1;
            ps += n-j-1;
        }
    } else { /* Zero strict lower triangle of r */
        pr = r + (m-1)*m - 1; /* pointer to r(j:end,j-1) */
        for(j=m-1; j-->0; ) {
            for(i=m; i-- > j+1; ) {
                *pr-- = Zero;
            }
            pr -= (j+1);
        }
    }
    
    /* form permutation vector e */
    pe = e;			/* pointer to e(j) */
    pjpvt = jpvt;	/* pointer to jpvt(j) */
    for (j=n; j-- > 0; ) {
        *pe = (real_T)(*pjpvt) + 1;
        pe++;
        pjpvt++;
    }
}


static void sdspqre_cplx(
    int_T	 m,
    int_T	 n,
    creal_T *q,
    creal_T *r,
    real_T	 *e,
    creal_T *qraux,
    creal_T *work,
    int_T	 *jpvt,
    creal_T *s
    )
{
    int_T	i, j, minmn, *pjpvt;
    creal_T	*pq, *pr, *ps, Zero = {0.0, 0.0}, One = {1.0, 0.0};
    real_T	*pe;
    
    
    dspqrdc_cplx(m, n, r, qraux, jpvt, work);
    
    /* explicitly form q by manipulating identity */
    minmn = MIN(m,n);
    pq = q;
    for(j=m*minmn; j-- > 0; ) {
        *pq++ = Zero;
    }
    pq = q;	/* pointer to q(j,j) */
    for(j=minmn; j-- > 0; ) {
        *pq = One;
        pq += m+1;
    }
    
    /*
    * Convert cols of identity into cols of q.
    * sdspqrsl1 uses info stored in lower triangle of r and in vector qraux
    * to work on columns of identity matrix I and transform them into
    * q*I(:,j) i.e. the columns of q.
    */
    pq = q;	/* pointer to q(1,j) */
    for (j=minmn; j-- > 0; ) {
        dspqrsl1_cplx(m, n, r, qraux, pq);
        pq += m;
    }
    
    if (m > n) { /* Copy upper triangle of r to s */
        pr = r;	/* pointer to r(1:j,j) */
        ps = s;	/* pointer to s(1:j,j) */
        for (j=0; j < n; j++) {
            for(i=0; i <= j; i++) {
                *ps++ = *pr++;
            }
            pr += m-j-1;
            ps += n-j-1;
        }
    } else { /* Zero strict lower triangle of r */
        pr = r + (m-1)*m - 1; /* pointer to r(j:end,j-1) */
        for(j=m-1; j-->0; ) {
            for(i=m; i-- > j+1; ) {
                *pr-- = Zero;
            }
            pr -= (j+1);
        }
    }
    
    /* form permutation vector e */
    pe = e;			/* pointer to e(j) */
    pjpvt = jpvt;	/* pointer to jpvt(j) */
    for (j=n; j-- > 0; ) {
        *pe++ = (real_T)(*pjpvt++) + 1;
    }
    
    /*
    * At this point, MATLAB checks whether Q and R have all-zero
    * imaginary parts in which case it frees that memory.
    * Is there an S-Function equivalent?
    */
}


/*
* Compute the economy-sized qr of m-by-n input A:
* MATLAB command: [Q,R,E] = qr(A,0);
* Q is m-by-min(m,n), R is min(m,n)-by-n and E is 1-by-n
* Q and R have the complexity of A; E is always real.
* qraux and work are length n and have A's complexity.
* If m>n then A is copied to m-by-n work matrix S and
* at the end its n-by-n upper triangle is copied to R.
* If m<=n then A is copied directly to m-by-n output R
* and work matrix S need not be allocated.
*/
static void mdlOutputs(SimStruct *S, int_T tid)
{
    boolean_T cA	= (boolean_T)(ssGetInputPortComplexSignal(S,INPORT_A) == COMPLEX_YES);
    
    const int_T numDimsA = ssGetInputPortNumDimensions(S, INPORT_A);
    const int_T *dimsA   = ssGetInputPortDimensions(S, INPORT_A); 
    int_T        M       = dimsA[0];
    int_T        N       = (numDimsA == 2) ? dimsA[1] : 1;
    int_T        MN      = M * N;

    const boolean_T need_copy = (boolean_T)(ssGetInputPortBufferDstPort(S, INPORT_A) != OUTPORT_R);
    {
        if (need_copy) {
            if (cA) {
                InputPtrsType pA = ssGetInputPortSignalPtrs(S, INPORT_A);
                if (M > N) {
                    creal_T *pS = (creal_T *)(ssGetDWork(S, S_IDX));
                    while(MN-- > 0) {
                        *pS++ = *((creal_T *)(*pA++));
                    }
                } else {
                    creal_T *pR = (creal_T *)(ssGetOutputPortSignal(S, OUTPORT_R));
                    while(MN-- > 0) {
                        *pR++ = *((creal_T *)(*pA++));
                    }
                }
            } else {
                InputRealPtrsType pA = ssGetInputPortRealSignalPtrs(S, INPORT_A);
                if (M > N) {
                    real_T *pS = (real_T *)(ssGetDWork(S, S_IDX));
                    while(MN-- > 0) {
                        *pS++ = **pA++;
                    }
                } else {
                    real_T *pR = (real_T *)(ssGetOutputPortSignal(S, OUTPORT_R));
                    while(MN-- > 0) {
                        *pR++ = **pA++;
                    }
                }
            }
        }
    }
    
    /* Compute the qr factorization */
    {
        void  *pQ     = ssGetOutputPortSignal(S, OUTPORT_Q);
        void  *pR     = ssGetOutputPortSignal(S, OUTPORT_R);
        void  *pE     = ssGetOutputPortSignal(S, OUTPORT_E);
        void  *pqraux = ssGetDWork(S, QRAUX_IDX);
        void  *pwork  = (creal_T *)ssGetDWork(S, WORK_IDX);
        int_T *pjpvt  = ssGetIWork(S);
        void  *pS     = (M > N) ? ssGetDWork(S, S_IDX) : (void *)0;
        

        /* Reset the pivot indices: */
        memset(pjpvt, 0, N * sizeof(int_T));

        if (cA) {
            if (M > N) {
                sdspqre_cplx(M, N, (creal_T *)pQ, (creal_T *)pS, (real_T *)pE,
                    (creal_T *)pqraux, (creal_T *)pwork, pjpvt, (creal_T *)pR);
            } else {
                sdspqre_cplx(M, N, (creal_T *)pQ, (creal_T *)pR, (real_T *)pE,
                    (creal_T *)pqraux, (creal_T *)pwork, pjpvt, (creal_T *)pS);
            }
        } else {
            if (M > N) {
                sdspqre_real(M, N, (real_T *)pQ, (real_T *)pS, (real_T *)pE,
                    (real_T *)pqraux, (real_T *)pwork, pjpvt, (real_T *)pR);
            } else {
                sdspqre_real(M, N, (real_T *)pQ, (real_T *)pR, (real_T *)pE,
                    (real_T *)pqraux, (real_T *)pwork, pjpvt, (real_T *)pS);
            }
        }
    }
}


static void mdlTerminate(SimStruct *S)
{
}



#if defined(MATLAB_MEX_FILE)
#define MDL_SET_INPUT_PORT_DIMENSION_INFO
static void mdlSetInputPortDimensionInfo(SimStruct *S, 
                                      int_T port,
                                      const DimsInfo_T *dimsInfo)
{
    if(!ssSetInputPortDimensionInfo(S, port, dimsInfo)) return;

    /* No input error checking since input can be any dimension. 
     * Unoriented inputs are interpreted as an oriented column vectors. 
     */

    {
        /* Set output ports now that M and N are known:
         * 
         *  OUTPORT_Q: M x MIN(M,N) 
         *  OUTPORT_R: MIN(M,N) x N 
         *  OUTPORT_E: Always unoriented, with N elements
         */

        const int_T numDims = dimsInfo->numDims;
        const int_T M = dimsInfo->dims[0];
        const int_T N = (numDims == 2) ? dimsInfo->dims[1] : 1;
        const int_T P = MIN(M,N);

        if (isOutputDynamicallySized(S, OUTPORT_Q)) {
            if (!ssSetOutputPortMatrixDimensions(S, OUTPORT_Q, M, P)) return;
        }

        if (isOutputDynamicallySized(S, OUTPORT_R)) {
            if (!ssSetOutputPortMatrixDimensions(S, OUTPORT_R, P, N)) return;
        }

        if (isOutputDynamicallySized(S, OUTPORT_E)) {
            if (!ssSetOutputPortVectorDimension( S, OUTPORT_E, N)) return;
        }
    }
}


# define MDL_SET_OUTPUT_PORT_DIMENSION_INFO
static void mdlSetOutputPortDimensionInfo(SimStruct        *S, 
                                          int_T            port,
                                          const DimsInfo_T *dimsInfo)
{
    if(!ssSetOutputPortDimensionInfo(S, port, dimsInfo)) return;

    /* Output error checking: 
     *  Port E must be unoriented 
     *  Ports Q and R must be oriented.
     */
    if (port == OUTPORT_E) {
        ErrorIfOutputIsOriented(S, port);
    } else {
        ErrorIfOutputIsUnoriented(S, port);
    }

    /* Set other ports: 
     */
    
    /* If R is known, E can be set */
    if ((port == OUTPORT_R) && (isOutputDynamicallySized(S, OUTPORT_E))) {
        if (!ssSetOutputPortVectorDimension(S, OUTPORT_E, dimsInfo->dims[1])) return;
    }

    /* If Q and R are known and not scalars, A can be set */
    if ((!isOutputDynamicallySized(S, OUTPORT_Q)) &&  (!isOutputScalar(S, OUTPORT_Q)) &&
        (!isOutputDynamicallySized(S, OUTPORT_R)) &&  (!isOutputScalar(S, OUTPORT_R))
       ) {


        const int_T numDimsR = ssGetInputPortNumDimensions(S, INPORT_A);
        const int_T *dims_Q  = ssGetInputPortDimensions(S, OUTPORT_Q); 
        const int_T *dims_R  = ssGetInputPortDimensions(S, OUTPORT_R); 
        const int_T  M       = dims_Q[0];
        const int_T  N       = (numDimsR == 2) ? dims_R[1] : 1;

        if (!ssSetInputPortMatrixDimensions(S, INPORT_A, M, N)) return;
    }
}


#define MDL_SET_INPUT_PORT_FRAME_DATA
static void mdlSetInputPortFrameData(SimStruct *S, 
                                      int_T port,
                                      Frame_T frameData)
{
    ssSetInputPortFrameData(S, port, frameData);
}


#define MDL_SET_WORK_WIDTHS
static void mdlSetWorkWidths(SimStruct *S)
{
    const int_T  numDimsA = ssGetInputPortNumDimensions(S, INPORT_A);
    const int_T *dims     = ssGetInputPortDimensions(S,INPORT_A);
    const int_T  M        = dims[0];
    const int_T  N        = (numDimsA ==2) ? dims[1] : 1;
    const int_T  MN	  = M * N;

    const int_T N_DWORKS = (M > N) ? MAX_NUM_DWORKS : MAX_NUM_DWORKS-1;
    CSignal_T Cplx = ssGetInputPortComplexSignal(S, INPORT_A);
    
    if(!ssSetNumDWork(      S, N_DWORKS)) return;
    
    ssSetDWorkWidth(        S, QRAUX_IDX, N);
    ssSetDWorkDataType(     S, QRAUX_IDX, SS_DOUBLE);
    ssSetDWorkComplexSignal(S, QRAUX_IDX, Cplx);
    
    ssSetDWorkWidth(        S, WORK_IDX, N);
    ssSetDWorkDataType(     S, WORK_IDX, SS_DOUBLE);
    ssSetDWorkComplexSignal(S, WORK_IDX, Cplx);
    
    if (N_DWORKS == MAX_NUM_DWORKS) {
        ssSetDWorkWidth(        S, S_IDX, MN);
        ssSetDWorkDataType(     S, S_IDX, SS_DOUBLE);
        ssSetDWorkComplexSignal(S, S_IDX, Cplx);
    }

    ssSetNumIWork(S, N);
 }
#endif


#include "dsp_trailer.c"

/* [EOF] sdspqre2.c */
