/*
 * File: sdspldl2.c
 *
 * Abstract:
 *      S-function for LDL factorization
 *
 * Copyright 1995-2000 The MathWorks, Inc.
 * $Revision: 1.15 $
 * $Date: 2000/07/21 18:32:41 $
 */
#define S_FUNCTION_NAME  sdspldl2
#define S_FUNCTION_LEVEL 2

#include "dsp_sim.h"

enum {V_IDX=0, NUM_DWORKS};

enum {INPORT_A=0, NUM_INPORTS};

enum {OUTPORT_LDL=0, NUM_OUTPORTS};

enum {ERR_ARGC=0, NUM_PARAMS};
#define ERR_ARG(S) (ssGetSFcnParam(S,ERR_ARGC))

typedef enum {ERR_IGNORE=1, ERR_WARNING, ERR_ERROR} ErrMode;

#define MDL_CHECK_PARAMETERS
static void mdlCheckParameters(SimStruct *S) {
#ifdef MATLAB_MEX_FILE
    if (!IS_FLINT_IN_RANGE(ERR_ARG(S),1,3)) {
        THROW_ERROR(S, "Error mode must be 1=IGNORE, 2=WARNING, or 3=ERROR");
    }
#endif
}

/*
 *  This code does not check for realness on the main diagonal,
 *  but to prevent any imaginary part from affecting the results,
 *  we set it to zero. The code could take this into account, but
 *  it does not.
 *
 *  Only the real part of the diagonal and the lower triangle of the
 *  input matrix is used.
 */
static boolean_T ldl_cplx(
    creal_T    *A,
    creal_T    *V,
    const int_T n
)
{
    int_T j;
    creal_T *Aji;
    creal_T *Vi;

    for(j=0; j<n; j++) {
        Aji = A+j;
        Vi = V;
        {
            creal_T *Aii = A;
            int_T   i;

            (Aji+j*n)->im = 0.0; /* Remove imaginary part on diagonal */
            for (i=j; i-- > 0; ) {
                Vi->re     = CMULT_XCONJ_RE(*Aji, *Aii);
                (Vi++)->im = CMULT_XCONJ_IM(*Aji, *Aii);
                Aii += n+1;
                Aji += n;
            }
        }
        {
            /* At this point, Aji points to A[j,j], vi points to v[j] */
            creal_T *Vj  = Vi;
            Vi  = V;
            *Vj = *Aji;
            Aji = A+j;  /* Point to A[j,i] for i=1 */
            {
                real_T Vjsum = Vj->re;
                int_T  i;
                for(i=j; i-- > 0; ) {
                    /* Diag is real, no need to compute imaginary part */
                    Vjsum -= CMULT_RE(*Aji, *Vi);
                    Vi++;
                    Aji += n;
                }
				/* Check for positive definiteness */
				if (Vjsum <= 0.0) {
					return((boolean_T)1);  /* Return with error flag set */
				}
                Vj->re = Vjsum;
                Vj->im = 0.0;
            }

            /* At this point, Aji points to A[j,j] */
            *Aji = *Vj;
            {
                creal_T *Ajpki = A+j+1;
                int_T   i;
                Vi = V;
                for (i=j; i-- > 0; ) {
                    creal_T *Ajik = Aji;  /* init to subdiagonal */
                    const creal_T Vi_val = *Vi++;
                    int_T k;
                    for (k=n-j-1; k-- > 0; ) {
                        (++Ajik)->re -= CMULT_RE(*Ajpki, Vi_val);
                        Ajik->im     -= CMULT_IM(*Ajpki, Vi_val);
                        Ajpki++;
                    }
                    /* At this point, Ajpki points to A[1, i+1]
                     * Increment so Ajpki points to A[j+1, i+1]
                     */
                    Ajpki += j+1;
                }
            }
            {
                creal_T *Ajik = Aji;
                int_T k;
#if 1
                /* Better accuracy: */
                const creal_T Vjval = *Vj;
                for (k=n-j-1; k-- > 0; ) {
                    creal_T Ajik_val;
                    Ajik_val = *(++Ajik);
                    CDIV(Ajik_val, Vjval, *Ajik);
                }
#else
                /* More efficient: */
                const creal_T Vjrecip = CRECIP(*Vj);
                for (k=n-j-1; k-- > 0; ) {
                    creal_T Ajik_val;
                    Ajik_val = *(++Ajik);
                    Ajik->re = CMULT_RE(Ajik_val, Vjrecip);
                    Ajik->im = CMULT_IM(Ajik_val, Vjrecip);
                }
#endif
            }
        }
    } /* j loop */

    /* Transpose and copy upper subtriang to lower */
    {
        int_T c;
        for (c=0; c<n; c++) {
            int_T r;
            for (r=c; r<n; r++) {
                A[r*n+c].re =  A[c*n+r].re;
                A[r*n+c].im = -A[c*n+r].im; /* Hermitian transpose */
            }
        }
    }
    return((boolean_T)0);
}


/*
 *  Only the lower triangle of the input matrix is used.
 */
static boolean_T ldl_real(
    real_T     *A,
    real_T     *V,
    const int_T n
)
{
    int_T j;
    for(j=0; j<n; j++) {
        real_T *Aji = A+j;
        real_T *Vi = V;
        {
            real_T *Aii = A;
            int_T   i;
            for (i=j; i-- > 0; ) {
                *Vi++ = *Aji * *Aii;  /* conj(Aji) */
                Aii += n+1;
                Aji += n;
            }
        }
        {
            /* At this point, Aji points to A[j,j], vi points to v[j] */
            real_T *Vj  = Vi;
            Vi  = V;
            *Vj = *Aji;
            Aji = A+j;  /* Point to A[j,i] for i=1 */
            {
                real_T Vjsum = *Vj;
                int_T  i;
                for(i=j; i-- > 0; ) {
                    Vjsum -= *Aji * *Vi++;
                    Aji += n;
                }
		/* Check for pos def */
		if ( Vjsum <= 0.0 ) {
		  return((boolean_T)1);  /* Return with error flag set */
		}
                *Vj = Vjsum;
            }

            /* At this point, Aji points to A[j,j] */
            *Aji = *Vj;
            {
                real_T *Ajpki = A+j+1;
                int_T   i;
                Vi = V;
                for (i=j; i-- > 0; ) {
                    real_T *Ajik = Aji;  /* init to subdiagonal */
                    const real_T Vi_val = *Vi++;
                    int_T k;
                    for (k=n-j-1; k-- > 0; ) {
                        *(++Ajik) -= *Ajpki++ * Vi_val;
                    }
                    /* At this point, Ajpki points to A[1, i+1]
                     * Increment so Ajpki points to A[j+1, i+1]
                     */
                    Ajpki += j+1;
                }
            }
            {
                real_T *Ajik = Aji;
                int_T k;
#if 1
                /* Better accuracy: */
                const real_T Vjval = *Vj;
                for (k=n-j-1; k-- > 0; ) {
                    *(++Ajik) /= Vjval;
                }
#else
                /* More efficient: */
                const real_T Vjrecip = 1.0 / *Vj;
                for (k=n-j-1; k-- > 0; ) {
                    *(++Ajik) *= Vjrecip;
                }
#endif
            }
        }
    } /* j loop */

    /* Transpose and copy upper subtriang to lower */
    {
        int_T c;
        for (c=0; c<n; c++) {
            int_T r;
            for (r=c; r<n; r++) {
                A[r*n+c] = A[c*n+r];
            }
        }
    }
    return((boolean_T)0);
}


static void mdlInitializeSizes(SimStruct *S)
{
    ssSetNumSFcnParams(S, NUM_PARAMS);
    
#if defined(MATLAB_MEX_FILE)
    if (ssGetNumSFcnParams(S) != ssGetSFcnParamsCount(S)) return;
    mdlCheckParameters(S);
    if (ssGetErrorStatus(S) != NULL) return;
#endif
        
    if (!ssSetNumInputPorts(S, NUM_INPORTS)) return;
    if (!ssSetInputPortDimensionInfo(S, INPORT_A, DYNAMIC_DIMENSION)) return;
    ssSetInputPortFrameData(         S, INPORT_A, FRAME_INHERITED);
    ssSetInputPortDirectFeedThrough( S, INPORT_A, 1);
    ssSetInputPortComplexSignal(     S, INPORT_A, COMPLEX_INHERITED);
    ssSetInputPortReusable(          S, INPORT_A, 1);
    ssSetInputPortOverWritable(      S, INPORT_A, 1);
    ssSetInputPortDataType(          S, INPORT_A, SS_DOUBLE);

    if (!ssSetNumOutputPorts(S, NUM_OUTPORTS)) return;
    if (!ssSetOutputPortDimensionInfo(S, OUTPORT_LDL, DYNAMIC_DIMENSION)) return;
    ssSetOutputPortFrameData(         S, OUTPORT_LDL, FRAME_NO);
    ssSetOutputPortComplexSignal(     S, OUTPORT_LDL, COMPLEX_INHERITED);
    ssSetOutputPortReusable(          S, OUTPORT_LDL, 1);
    ssSetOutputPortDataType(          S, OUTPORT_LDL, SS_DOUBLE);

    if(!ssSetNumDWork(S, DYNAMICALLY_SIZED)) return;

    ssSetNumSampleTimes(S, 1);
    ssSetOptions(S, SS_OPTION_EXCEPTION_FREE_CODE |
                 SS_OPTION_USE_TLC_WITH_ACCELERATOR);
}


static void mdlInitializeSampleTimes(SimStruct *S)
{
    ssSetSampleTime(S, 0, INHERITED_SAMPLE_TIME);
    ssSetOffsetTime(S, 0, 0.0);
}


static void mdlOutputs(SimStruct *S, int_T tid)
{
    boolean_T cA = (boolean_T)(ssGetInputPortComplexSignal(S,INPORT_A) == COMPLEX_YES);
    const ErrMode err_mode = (ErrMode)((int_T)(mxGetPr(ERR_ARG(S))[0]));

    /* Copy input elements to output storage area: */
    {
        const boolean_T need_copy = (boolean_T)(ssGetInputPortBufferDstPort(S, INPORT_A) != OUTPORT_LDL);
        if (need_copy) {
            int_T i = ssGetInputPortWidth(S, INPORT_A);

            if (cA) {
                InputPtrsType pA = ssGetInputPortSignalPtrs(S, INPORT_A);
                creal_T      *y  = (creal_T *)ssGetOutputPortSignal(S, OUTPORT_LDL);
                while(i-- > 0) {
                    *y++ = *((creal_T *)(*pA++));
                }
            } else {
                InputRealPtrsType pA = ssGetInputPortRealSignalPtrs(S, INPORT_A);
                real_T           *y  = ssGetOutputPortRealSignal(S, OUTPORT_LDL);
                while(i-- > 0) {
                    *y++ = **pA++;
                }
            }
        }
    }
    
    /* Compute LDL decomposition: */
    {
        const int_T *dims = ssGetOutputPortDimensions(S,OUTPORT_LDL);
        const int_T  N    = dims[0];
        void        *pLDL = ssGetOutputPortSignal(S, OUTPORT_LDL);
        void        *V    = ssGetDWork(S, V_IDX);
        boolean_T    err;

        if (cA) {
            err = ldl_cplx((creal_T *)pLDL, (creal_T *)V, N);
        } else {
            err = ldl_real((real_T  *)pLDL, (real_T *)V,  N);
        }

#ifdef MATLAB_MEX_FILE
        if (err) {
            /* Input matrix not positive definite */
            static char header[] = "Input matrix to block '%s' is not positive definite.";
            static char msg_err[] = "Input matrix is not positive definite.";
            static char msg_warn[256];
			int_T i;
            if (err_mode == ERR_WARNING) {
				if (strlen(header) - 2 + strlen(ssGetPath(S)) > 255) {
					strcpy(msg_warn,"Input matrix to LDL Factorization is not positive definite.");
                } else {
					sprintf(msg_warn,header, ssGetPath(S));
					for (i=strlen(msg_warn); i-- > 0; ) {
						if (msg_warn[i] == '\n') msg_warn[i] = ' ';
					}
					mexWarnMsgTxt(msg_warn);
				}
            } else if (err_mode == ERR_ERROR) {
                ssSetErrorStatus(S, msg_err);
            }
        }
#endif

    }
}


static void mdlTerminate(SimStruct *S)
{
}


#if defined(MATLAB_MEX_FILE)
#define MDL_SET_INPUT_PORT_DIMENSION_INFO
static void mdlSetInputPortDimensionInfo(SimStruct *S, 
                                      int_T port,
                                      const DimsInfo_T *dimsInfo)
{
    if(!ssSetInputPortDimensionInfo(S, port, dimsInfo)) return;

    ErrorIfInputIsNotSquareMatrix(S, port);

    /* Set output port */
    if (isOutputDynamicallySized(S, OUTPORT_LDL)) {
        if(!ssSetOutputPortMatrixDimensions(S, OUTPORT_LDL, dimsInfo->dims[0], dimsInfo->dims[0])) return;
    }
}


# define MDL_SET_OUTPUT_PORT_DIMENSION_INFO
static void mdlSetOutputPortDimensionInfo(SimStruct        *S, 
                                          int_T            port,
                                          const DimsInfo_T *dimsInfo)
{
    if(!ssSetOutputPortDimensionInfo(S, port, dimsInfo)) return;
    
    ErrorIfOutputIsNotSquareMatrix(S, port);

    /* Set input port */
    if (!isOutputScalar(S, port) && isInputDynamicallySized(S, INPORT_A)) {
        if (!ssSetInputPortDimensionInfo(S, INPORT_A, dimsInfo)) return;
    }
}


#define MDL_SET_INPUT_PORT_FRAME_DATA
static void mdlSetInputPortFrameData(SimStruct *S, 
                                      int_T port,
                                      Frame_T frameData)
{
    ssSetInputPortFrameData(S, port, frameData);
}


#define MDL_SET_WORK_WIDTHS
static void mdlSetWorkWidths(SimStruct *S)
{
    const int_T *dims = ssGetOutputPortDimensions(S,OUTPORT_LDL);
    const int_T  N    = dims[0];
    CSignal_T    Cplx = ssGetOutputPortComplexSignal(S, OUTPORT_LDL);

    if(!ssSetNumDWork(      S, NUM_DWORKS)) return;
    ssSetDWorkWidth(        S, V_IDX, N);
    ssSetDWorkDataType(     S, V_IDX, SS_DOUBLE);
    ssSetDWorkComplexSignal(S, V_IDX, Cplx);
}
#endif

#include "dsp_cplxhs11.c"

#include "dsp_trailer.c"

/* [EOF] sdspldl2.c */
