/* $Revision: 1.17 $ */
/*============================================================================
 *  Syntax: [sys, x0, str, ts] =
 *          sviterbi(t, x, u, flag, tran_func, leng, tran_prob, plot_flag);
 *SVITERBA Simulink file for convolution decoding using viterbi algorithm.
 *  This file is designed to be used in a Simulink S-Function block.
 *  The function requires the system inputs
 *  code_param = [N, K, M, num_state, num_std_sta]; % prepared by simviter
 *  tran_func  = [A, B; C, D];                      % prepared by simviter
 *  leng                                            % memory length
 *  tran_prob                                       % transfer probability
 *     % when it is not a three row matrix, take the code as hard decision one.
 *  plot_flag                                       % should plot or not.
 *     % when it is not a positive integer, don't plot.
 *     % when it is a positive integer, keep the plot have the time length
 *     % as required.
 *  This function keeps a number of discrete-time states:
 *  figure number. -Inf: to be opened; 0 not need to plot; positive: handle
 *  figure posit.  Current figure position.
 *  trace_num.     current trace number.
 *  trace_flag.    flag to indicate that computation is not under leng.
 *  stater.        variable in keeping the very beinging of state.
 *  trace.         a LENG-by-NUM_STD_STA matrix storage the traces.
 *  solut.         a LENG-by-NUM_STD_STA matrix storage the possible msg.
 *  expense.       a 2-by-NUM_STD_STA matrix storage the expense.
 *
 *=============================================================================
 * Originally designed by Wes Wang,
 * Copyright 1996-2000 The MathWorks, Inc.
 *
 * Jun Wu,  The MathWorks, Inc.
 * Feb-07, 1996
 *
 *===========================================================================*/
#define S_FUNCTION_NAME sviterba

#ifdef MATLAB_MEX_FILE
#include "mex.h"
#endif

/* need to include simstruc.h for the definition of the SimStruct and
 * its associated macro definitions.
 */ 
#include "simstruc.h"
#include "tmwtypes.h"
#include <math.h>

/* For RTW */
#if defined(RT) || defined(NRT)  
#undef  mexPrintf
#define mexPrintf printf
#endif

#define NUM_ARGS            4           /* four additional input arguments */
#define TRAN_FUNC       ssGetArg(S,0)   /* the length of  output vector */
#define LENG            ssGetArg(S,1)   /* the memory length */
#define TRAN_PROB       ssGetArg(S,2)   /* the transfer probability */
#define PLOT_FLAG       ssGetArg(S,3)   /* the flag of plot */

#define Prim 2
static void de2bi(int_T *pde, int_T dim, int_T pow_dim, int_T *pbi)
{
  int_T     i,j, tmp;

  if( pde[0] < 0 ){
    for(i=0; i < pow_dim; i++){
      tmp = i;
      for(j=0; j < dim; j++){
        pbi[i+j*pow_dim] = tmp % Prim;
        if(j < dim-1)
          tmp = (int_T)(tmp/Prim);
      }
    }
  }else{
    for(i=0; i < pow_dim; i++){
      tmp = pde[i];
      for(j=0; j < dim; j++){
        pbi[i+j*pow_dim] = tmp % Prim;
        if(j < dim-1)
          tmp = (int_T)(tmp/Prim);
      }
    }
  }
}
static void bi2de(int_T *pbi, int_T pow_dim, int_T dim, int_T *pde)
{
  int_T     i, j, k;
  
  for(i=0; i<pow_dim; i++)
    pde[i] = 0;
  
  for (i=0; i < pow_dim; i++){
    for (j=0; j < dim; j++){
      if (pbi[i+j*pow_dim] != 0){
        if(j > 0){
          for (k=0; k < j; k++)
            pbi[i+j*pow_dim] = pbi[i+j*pow_dim]*Prim;
        }
      }
      pde[i] = pde[i] + (int_T)pbi[i+j*pow_dim];
    }
  }
}

static void shortdbl(real_T *expense, real_T *sol, int_T n_std_sta, real_T *Rwork, int_T *Iwork)
{        
  int_T     i, j, k, PowPowM, aft_indx, len_indx, len, indx;
  int_T     *wei_indx, *sub_indx;
  real_T  max;
  real_T  *wei_mat_exp, *wei_sum, *sol_tmp;
  real_T  wei_mat_sol;    
  
  wei_mat_exp = Rwork;
  wei_sum     = Rwork + n_std_sta;
  sol_tmp     = Rwork + 2*n_std_sta;
  wei_indx    = Iwork;
  sub_indx    = Iwork + n_std_sta;
  
  PowPowM = n_std_sta * n_std_sta;
  for(i=0; i < PowPowM; i++)
    sol_tmp[i] = sol[i];
  for(i=1; i <= n_std_sta; i++){
    aft_indx = i * n_std_sta - n_std_sta;
    for(j=1; j <= n_std_sta; j++){
      for(k=0; k < n_std_sta; k++)
        wei_mat_exp[k] = expense[k * n_std_sta + j - 1];
      len_indx = 0;
      for(k=0; k < n_std_sta; k++){
        wei_mat_sol = sol_tmp[aft_indx + k];
        if ( wei_mat_exp[k] > 0 || wei_mat_sol > 0 ) {
          wei_sum[k] = 1;
	    }else{
          wei_sum[k] = wei_mat_exp[k] + wei_mat_sol;
          wei_indx[len_indx] = k;
          len_indx++;
	    }
      }
      
      if (len_indx > 0) {
        len = 0;
        max = wei_sum[wei_indx[0]];
        sub_indx[0] = wei_indx[0];                    
        k = 1;
        while (k < len_indx) {
          if ( max < wei_sum[wei_indx[k]] ) {
            max = wei_sum[wei_indx[k]];
            sub_indx[0] = wei_indx[k];
          }
          k++;
        }
        for(k=0; k < len_indx; k++){
          if (wei_sum[wei_indx[k]] == max ){
            sub_indx[len] = wei_indx[k];
            len++;
          }
        }
        indx = sub_indx[0];                        
        if (len > 1){
          max = wei_mat_exp[sub_indx[0]];
          k = 1;
          while(k < len){
            if( max < wei_mat_exp[sub_indx[k]] ) {
              max = wei_mat_exp[sub_indx[k]];
              indx = sub_indx[k];
            }
            k++;
          }
        }
        sol[aft_indx + j - 1] = wei_sum[indx];
      }else{
        sol[aft_indx + j - 1] = 1;
      }
    }
  }
}
static void shortint(int_T *expense, int_T *sol, int_T n_std_sta, int_T *Iwork)
{
  int_T     i, j, k, PowPowM, aft_indx, len_indx, len, indx;
  int_T     min;
  int_T     wei_mat_sol;    
  int_T     *wei_mat_exp, *wei_sum, *sol_tmp, *wei_indx, *sub_indx;
  
  wei_mat_exp = Iwork; 
  wei_sum     = Iwork + n_std_sta;
  wei_indx    = Iwork + 2*n_std_sta;
  sub_indx    = Iwork + 3*n_std_sta;
  sol_tmp     = Iwork + 4*n_std_sta;
  
  PowPowM = n_std_sta * n_std_sta;
  for(i=0; i < PowPowM; i++)
    sol_tmp[i] = sol[i];
  for(i=1; i <= n_std_sta; i++){
    aft_indx = i * n_std_sta - n_std_sta;
    for(j=1; j <= n_std_sta; j++){
      for(k=0; k < n_std_sta; k++)
	    wei_mat_exp[k] =expense[k * n_std_sta + j - 1];
      len_indx = 0;
      for(k=0; k < n_std_sta; k++){                
	    wei_mat_sol = sol_tmp[aft_indx + k];
	    if ( wei_mat_exp[k] < 0 || wei_mat_sol < 0 ) {
	       wei_sum[k] = -1;
	    }else{
	       wei_sum[k] = wei_mat_exp[k] + wei_mat_sol;
	       wei_indx[len_indx] = k;
	       len_indx++;
	    }
      }
      
      if (len_indx > 0) {
	     len = 0;
	     min = wei_sum[wei_indx[0]];
	     sub_indx[0] = wei_indx[0];                    
	     k = 1;
	     while (k < len_indx) {
	       if ( min > wei_sum[wei_indx[k]] ) {
	         min = wei_sum[wei_indx[k]];
	         sub_indx[0] = wei_indx[k];
	       }
	       k++;
	     }
	     for(k=0; k < len_indx; k++){
	       if (wei_sum[wei_indx[k]] == min ){
	         sub_indx[len] = wei_indx[k];
	         len++;
	       }
	     }
	     indx = sub_indx[0];                        
	     if (len > 1){
	       min = wei_mat_exp[sub_indx[0]];
	       k = 1;
	       while(k < len){
	         if( min > wei_mat_exp[sub_indx[k]] ) {
	           min = wei_mat_exp[sub_indx[k]];
	           indx = sub_indx[k];
	         }
	         k++;
	       }
	     }
	     sol[aft_indx + j - 1] = wei_sum[indx];
      }else{
	     sol[aft_indx + j - 1] = -1;
      }
    }
  }
}
static void mdlInitializeSizes(SimStruct *S)
{
  int_T     i;  
  int_T     N, M, K, K2, colFunc, rowFunc, n_tran_prob, m_tran_prob;
  int_T     num_state, n_std_sta, PowPowM;
  int_T     leng, plot_flag, expen_flag;
  
  leng = (int_T)mxGetPr(LENG)[0];
  plot_flag = (int_T)mxGetPr(PLOT_FLAG)[0];
  rowFunc = mxGetM(TRAN_FUNC);
  colFunc = mxGetN(TRAN_FUNC);
  n_tran_prob = mxGetM(TRAN_PROB);
  m_tran_prob = mxGetN(TRAN_PROB);
  
  if ( n_tran_prob == 3 )
    expen_flag = 1;
  else
    expen_flag = 0;
  
  if( mxGetPr(TRAN_FUNC)[rowFunc*colFunc-1] < 0 ){
    N = (int_T)mxGetPr(TRAN_FUNC)[rowFunc*(colFunc-1)];
    K = (int_T)mxGetPr(TRAN_FUNC)[rowFunc*(colFunc-1)+1];
    M = (int_T)mxGetPr(TRAN_FUNC)[rowFunc*(colFunc-1)+2];
  }else{
    M = (int_T)mxGetPr(TRAN_FUNC)[1];
    N = (int_T)mxGetPr(TRAN_FUNC)[rowFunc];
    K = (int_T)mxGetPr(TRAN_FUNC)[rowFunc+1];
  }
  
  num_state = M;
  n_std_sta = 1;
  for(i=0; i < M; i++)
    n_std_sta = n_std_sta * 2;
  PowPowM = n_std_sta * n_std_sta;
  K2 = 1;
  for(i=0; i < K; i++)
    K2 = K2 * 2;
  
  ssSetNumContStates(     S, 0);          /* number of continuous states */
  ssSetNumDiscStates(     S, 6+2*leng*PowPowM+leng*N+K); /* number of discrete states */
  ssSetNumOutputs(        S, K+1);        /* number of outputs */
  ssSetNumInputs(         S, N+1);        /* number of inputs */
  ssSetDirectFeedThrough( S, 0);          /* direct feedthrough flag */
  ssSetNumSampleTimes(    S, 1);          /* number of sample times */
  ssSetNumInputArgs(      S, NUM_ARGS);   /* number of input arguments */
  if( expen_flag == 1 ){
    ssSetNumRWork(      S, n_tran_prob*m_tran_prob+(leng+3)*PowPowM+K+1+2*n_std_sta);/* number of real working space */
    /* tran_prob_tmp -------- n_tran_prob*m_tran_prob
     * expense -------- leng*PowPowM
     * Y -------------- K+1
     * sol ------------ PowPowM
     * handles -------- 5+2*n_std_sta
     * expen_tmp ------ PowPowM
     * tmpRwork ------- 2*n_std_sta+PowPowM
     */
    if( mxGetPr(TRAN_FUNC)[rowFunc*colFunc-1] < 0 ){
      ssSetNumIWork( S,  (M+N)*(M+K)+leng*(PowPowM+N)+K*(K2+1)+(4+M)*n_std_sta+3*N+2*M);
      /* A -------------- M*M
       * B -------------- M*K
       * C -------------- N*M
       * D -------------- N*K
       * solu ----------- leng*PowPowM
       * code ----------- leng*N
       * inp_pre -------- K*2^K
       * cur_sta_pre ---- M*2^M
       * expen_work ----- N
       * pre_state ------	n_std_sta
       * cur_sta -------- M
       * inp ------------ K
       * nex_sta -------- M
       * out ------------ N
       * expenOut ------- N
       * aft_state ------	n_std_sta
       * tmpIwork ------- 2*n_std_sta
       */
    }else{
      ssSetNumIWork( S, (rowFunc-2)*(M+N+2)+leng*(N+PowPowM)+K*(K2+1)+(4+M)*n_std_sta+3*N+2*M);
      /* TRAN_A --------- rowFunc-2
       * TRAN_B --------- rowFunc-2
       * A -------------- (rowFunc-2)*M
       * B -------------- (rowFunc-2)*N
       * same as above from *solu
       */
    }
  }else{
    ssSetNumRWork(          S, 0);/* number of real working space */
    /* handles -------- 5+2*n_std_sta */
    if( mxGetPr(TRAN_FUNC)[rowFunc*colFunc-1] < 0 ){
      ssSetNumIWork( S,  (M+N)*(M+K)+(2*leng+3)*PowPowM+K*(K2+2)+(6+M)*n_std_sta+(leng+2)*N+2*M+1);
      /* A -------------- M*M
       * B -------------- M*K
       * C -------------- N*M
       * D -------------- N*K
       * expense1 ------- leng*PowPowM
       * solu ----------- leng*PowPowM
       * code ----------- leng*N
       * Y1 ------------- K+1
       * inp_pre -------- K*2^K
       * cur_sta_pre ---- M*2^M
       * pre_state ------	n_std_sta
       * cur_sta -------- M
       * inp ------------ K
       * nex_sta -------- M
       * out ------------ N
       * expenOut ------- N
       * aft_state ------	n_std_sta
       * sol1 ----------- PowPowM
       * expen_tmp1 ----- PowPowM
       * tmpIwork ------- 4*n_std_sta+PowPowM
       */
    }else{
      ssSetNumIWork( S, (rowFunc-2)*(M+N+2)+(2*leng+3)*PowPowM+K*(K2+2)+(6+M)*n_std_sta+(leng+2)*N+2*M+1);
      /* TRAN_A --------- rowFunc-2
       * TRAN_B --------- rowFunc-2
       * A -------------- (rowFunc-2)*M
       * B -------------- (rowFunc-2)*N
       * same as above from *expense1
       */
    }
  }
  ssSetNumPWork(          S, 0);
}

/*
 * mdlInitializeConditions - initialize the states
 * Initialize the states, Integers and real-numbers
 */
static void mdlInitializeConditions(real_T *x0, SimStruct *S)
{
  /*  [A, B, C, D, N, K, M] = gen2abcd(tran_func);
   *  num_state = M;
   *  n_std_sta = 2^M;
   *  PowPowM = n_std_sta^2;
   *
   *  [n_tran_prob, m_tran_prob] = size(tran_prob);
   *  if n_tran_prob == 3
   *    expen_flag = 1;    % expense flag == 0; BSC code. == 1, with TRAN_PROB.
   *  else
   *    expen_flag = 0;
   *  end;
   *
   *  % veraible to record the weight trace back to the first state.
   *  % the column number is the state number - 1.
   *  % the first line is the previous. The second line is the current.
   *  expense = ones(leng, PowPowM) * NaN;
   *  expense(leng, [1:n_std_sta]) = zeros(1, n_std_sta);
   *  solu = zeros(leng, PowPowM);
   *
   *  % the column number is the state + 1.
   *  % the contents is the state it transferred from.
   *  % the starter is always single number.
   *  starter = 0;
   *
   *  % the solution in each of the transfer as above.
   *  % ith row is the transition input (msg) from (i-1)th row of trace to ith row
   *  % of trace. When i = 1, the transfer is from starter to trace.
   *
   *  if plot_flag > 0
   *    x0 = -Inf;
   *  else
   *    x0 = 0;
   *  end;
   *  code = zeros(leng, N);
   *  y = zeros(K+1, 1);
   *
   *  % add output storage.
   *  x0 = [x0; 0; 0; 0; starter; expense(:); solu(:); code(:); y];
   *%        |  |  |  |      |       |        |        \- output
   *%        \  \  \  \      \       \        \ decode
   *%         \  \  \  \      \start  \-weight, expense
   *%          \  \  \  \-trace_flag
   *%           \  \  \-trace_num
   *%            \  \-fig_position
   *%             \-figure handle
   *  sys = [0; length(x0); K+1; N+1; 0; 0; 1];
   *  ts = [-1, 0];
   */
  int_T     i, j;
  int_T     len_C, leng, plot_flag, NaN;
  int_T     N, M, K, K2, colFunc, rowFunc, n_tran_prob, m_tran_prob;
  int_T     num_state, n_std_sta, PowPowM;
  int_T     fig_position, trace_num, trace_flag, starter, expen_flag;
  int_T     *A, *B, *C, *D, *TRAN_A, *TRAN_B, *expense1, *expen_tmp1, *solu, *code, *Y1;
  int_T     *inp_pre, *cur_sta_pre, *expen_work, *pre_state, *cur_sta, *inp;
  int_T     *nex_sta, *out, *expenOut, *aft_state, *sol1, *tmpIwork; 
  real_T  *expense, *expen_tmp, *Y, *sol, *handles, *tmpRwork;
  real_T  *tran_prob_tmp;
  
  leng = (int_T)mxGetPr(LENG)[0];
  plot_flag = (int_T)mxGetPr(PLOT_FLAG)[0];
  colFunc = mxGetN(TRAN_FUNC);
  rowFunc = mxGetM(TRAN_FUNC);
  n_tran_prob = mxGetM(TRAN_PROB);
  m_tran_prob = mxGetN(TRAN_PROB);
  
  if ( n_tran_prob == 3 ){
    expen_flag = 1;
    NaN = 1;
  }else{
    expen_flag = 0;
    NaN = -1;
  }
  
  if( mxGetPr(TRAN_FUNC)[rowFunc*colFunc-1] < 0 ){
    N = (int_T)mxGetPr(TRAN_FUNC)[ rowFunc*(colFunc-1) ];
    K = (int_T)mxGetPr(TRAN_FUNC)[ rowFunc*(colFunc-1)+1 ];
    M = (int_T)mxGetPr(TRAN_FUNC)[ rowFunc*(colFunc-1)+2 ];
    len_C = M*N;
    
    A = ssGetIWork(S);
    B = A + M*M;
    C = B + M*K;
    D = C + N*M;
    
    /* Get the input Matrix A, B, C, D */
    for( i=0; i < M+N; i++ ){
      for( j=0; j < M+K; j++ ){
        if( i<M   && j<M )
          A[i+j*M] = (int_T)mxGetPr(TRAN_FUNC)[i+j*(M+N)];
    	if( i>M-1 && j<M )
	      C[i+j*N-M] = (int_T)mxGetPr(TRAN_FUNC)[i+j*(M+N)];
        if( i<M   && j>M-1 )
          B[i+j*M-M*M] = (int_T)mxGetPr(TRAN_FUNC)[i+j*(M+N)];
    	if( i>M-1 && j>M-1 )
          D[i+j*N-M*(N+1)] = (int_T)mxGetPr(TRAN_FUNC)[i+j*(M+N)];
      }
    }
  }else{
    N = (int_T)mxGetPr(TRAN_FUNC)[rowFunc];
    K = (int_T)mxGetPr(TRAN_FUNC)[rowFunc+1];
    M = (int_T)mxGetPr(TRAN_FUNC)[1];
    len_C = 0;

    /* Assignment */
    TRAN_A =    ssGetIWork(S);/*   !--- size of *TRAN_A */
    TRAN_B =    ssGetIWork(S) + (rowFunc-2);/* <--- size of *TRAN_B */
    A =         ssGetIWork(S) + 2*(rowFunc-2);/*    !----- size of *A */ 
    B =         ssGetIWork(S) + 2*(rowFunc-2) + (rowFunc-2)*M;/*    !----- size of *B */
    
    /* Get the input Matrix A, B */
    for(i=0; i < rowFunc-2; i++){
      TRAN_A[i] = (int_T)mxGetPr(TRAN_FUNC)[i+2];
      TRAN_B[i] = (int_T)mxGetPr(TRAN_FUNC)[rowFunc+i+2];
    }
    de2bi(TRAN_A, M, rowFunc-2, A);
    de2bi(TRAN_B, N, rowFunc-2, B);
  }

  num_state = M;
  n_std_sta = 1;
  for(i=0; i < M; i++)
    n_std_sta = n_std_sta * 2;
  PowPowM = n_std_sta * n_std_sta;
  K2 = 1;
  for(i=0; i < K; i++)
    K2 = K2 * 2;
  
  if( expen_flag != 0 ){
    tran_prob_tmp    = ssGetRWork(S);
    expense     = ssGetRWork(S) + n_tran_prob*m_tran_prob;
    Y           = ssGetRWork(S) + n_tran_prob*m_tran_prob + leng*PowPowM;
    sol         = ssGetRWork(S) + n_tran_prob*m_tran_prob + leng*PowPowM + (K+1);
    expen_tmp   = ssGetRWork(S) + n_tran_prob*m_tran_prob + leng*PowPowM + (K+1) + PowPowM;
    tmpRwork    = expen_tmp + PowPowM;
    
    if( mxGetPr(TRAN_FUNC)[rowFunc*colFunc-1] < 0 )
      solu = D + N*K;     /* size of *code is leng*N */
    else
      solu = B + N*(rowFunc-2);
    
    code = solu + leng*PowPowM;        
    inp_pre = code + leng*N;        /* allocate K*2^K for *inp_pre */
    cur_sta_pre = inp_pre + K2*K;   /*  M*2^M for *cur_sta_pre. */
    expen_work = cur_sta_pre + M*n_std_sta;  /* allocate N for *expen_work */
    pre_state = expen_work + N;     /* allocate n_std_sta for *pre_state */
    cur_sta = pre_state + n_std_sta;/* allocate M for *cur_sta */
    inp = cur_sta + M;              /* allocate K for *inp */
    nex_sta = inp + K;              /* allocate M for *nex_sta */
    out = nex_sta + M;              /* allocate N for *out */
    expenOut = out + N;             /* allocate N for *expenOut */
    aft_state = expenOut + N;       /* allocate n_std_sta for *aft_state */
    tmpIwork    = aft_state + 2*n_std_sta;
    
    /*  % veraible to record the weight trace back to the first state.
     *  % the column number is the state number - 1.
     *  % the first line is the previous. The second line is the current.
     *  expense = ones(leng, PowPowM) * NaN;
     *  expense(leng, [1:n_std_sta]) = zeros(1, n_std_sta);
     *      |NaN ......NaN|
     *      |NaN ......NaN|           (leng*PowPowM)
     *      |.............|   <------- Matrix *expense
     *      |NaN ......NaN|
     *      |0 . .0NaN.NaN| 
     *
     *  solu = zeros(leng, PowPowM);
     *      |0 0 ..... 0 0|
     *      |0 0 ..... 0 0|           (leng*PowPowM)
     *      |.............|   <------- Matrix *solu
     *      |0 0 ..... 0 0|
     *      |0 0 ..... 0 0|
     */
    for(i=0; i < leng*PowPowM; i++){
      expense[i] = NaN;
      solu[i] = 0;
    }        
    for(i=0; i < n_std_sta; i++)
      expense[leng+i*leng-1] = 0;
    
    starter = 0;
    
    if(plot_flag > 0)
      x0[0] = -1;
    else
      x0[0] = 0;
    
    for(i=0; i < leng*N; i++)
      code[i] = 0;
    for(i=0; i < K+1; i++)
      Y[i] = 0;
    fig_position = 0;
    trace_num = 0;
    trace_flag = 0;
    
    x0[1] = (real_T)fig_position;
    x0[2] = (real_T)trace_num;
    x0[3] = (real_T)trace_flag;
    x0[4] = (real_T)starter;
    for(i=5; i < 5+leng*PowPowM; i++)
      x0[i] = expense[i-5];
    for(i=0; i < leng*PowPowM; i++)
      x0[i+5+leng*PowPowM] = (real_T)solu[i];
    for(i=0; i < leng*N; i++)
      x0[i+5+2*leng*PowPowM] = (real_T)code[i];
    for(i=0; i < K+1; i++)
      x0[i+5+2*leng*PowPowM+leng*N] = Y[i];
    
  }else{ /* tran_prob is not 3-row matrix */
    
    if( mxGetPr(TRAN_FUNC)[rowFunc*colFunc-1] < 0 )
      expense1 = D + N*K;
    else
      expense1 = B + N*(rowFunc-2);
    
    solu = expense1 + leng*PowPowM;    
    code = solu + leng*PowPowM;
    Y1 = code + leng*N;              /* size of *Y1 is (K+1) */
    inp_pre = Y1 + (K+1);            /* allocate K*2^K for *inp_pre */
    cur_sta_pre = inp_pre + K2*K;   /*  M*2^M for *cur_sta_pre. */
    pre_state =  cur_sta_pre + M*n_std_sta;  /* allocate n_std_sta for *pre_state */
    cur_sta = pre_state + n_std_sta;/* allocate M for *cur_sta */
    inp = cur_sta + M;              /* allocate K for *inp */
    nex_sta = inp + K;              /* allocate M for *nex_sta */
    out = nex_sta + M;              /* allocate N for *out */
    expenOut = out + N;             /* allocate N for *expenOut */
    aft_state = expenOut + N;       /* allocate n_std_sta for *aft_state */
    sol1 = aft_state + n_std_sta;    /* allocate PowPowM for *sol1 */
    expen_tmp1 = sol1 + PowPowM;
    tmpIwork = expen_tmp1 + PowPowM;
    
    for(i=0; i < leng*PowPowM; i++){
      expense1[i] = NaN;
      solu[i] = 0;
    }        
    for(i=0; i < n_std_sta; i++)
      expense1[leng+i*leng-1] = 0;
    
    starter = 0;
    
    if(plot_flag > 0)
      x0[0] = -1;
    else
      x0[0] = 0;
    
    for(i=0; i < leng*N; i++)
      code[i] = 0;
    for(i=0; i < K+1; i++)
      Y1[i] = 0;
    fig_position = 0;
    trace_num = 0;
    trace_flag = 0;
    
    x0[1] = (real_T)fig_position;
    x0[2] = (real_T)trace_num;
    x0[3] = (real_T)trace_flag;
    x0[4] = (real_T)starter;
    for(i=0; i < leng*PowPowM; i++)
      x0[i+5] = (real_T)expense1[i];
    for(i=0; i < leng*PowPowM; i++)
      x0[i+5+leng*PowPowM] = (real_T)solu[i];
    for(i=0; i < leng*N; i++)
      x0[i+5+2*leng*PowPowM] = (real_T)code[i];
    for(i=0; i < K+1; i++)
      x0[i+5+2*leng*PowPowM+leng*N] = (real_T)Y1[i];
  }
}

/*
 * mdlInitializeSampleTimes - initialize the sample times array
 *
 * This function is used to specify the sample time(s) for your S-function.
 * If your S-function is continuous, you must specify a sample time of 0.0.
 * Sample times must be registered in ascending order.
 */
static void mdlInitializeSampleTimes(SimStruct *S)
{
    ssSetSampleTimeEvent(S, 0, INHERITED_SAMPLE_TIME);
    ssSetOffsetTimeEvent(S, 0, FIXED_IN_MINOR_STEP_OFFSET);
}

/*
 * mdlOutputs - compute the outputs
 *
 * In this function, you compute the outputs of your S-function
 * block.  The outputs are placed in the y variable.
 */
static void mdlOutputs(real_T *y, const real_T *x, const real_T *u, SimStruct *S, int_T tid)
{
  /*  if u(length(u)) > 0.2
   *    K = tran_func(2, size(tran_func, 2));
   *    len_x = length(x);
   *    sys = x(len_x - K: len_x);
   *  end;
   */
  int_T     i;
  int_T     colFunc, rowFunc, M, K, N, leng, len_x, n_std_sta, PowPowM;
  real_T  max;
  
  leng = (int_T)mxGetPr(LENG)[0];
  rowFunc = mxGetM(TRAN_FUNC);
  colFunc = mxGetN(TRAN_FUNC);
  
  if( mxGetPr(TRAN_FUNC)[rowFunc*colFunc-1] < 0 ){
    N = (int_T)mxGetPr(TRAN_FUNC)[rowFunc*(colFunc-1)];
    K = (int_T)mxGetPr(TRAN_FUNC)[rowFunc*(colFunc-1)+1];
    M = (int_T)mxGetPr(TRAN_FUNC)[rowFunc*(colFunc-1)+2];
  }else{
    M = (int_T)mxGetPr(TRAN_FUNC)[1];
    N = (int_T)mxGetPr(TRAN_FUNC)[rowFunc];
    K = (int_T)mxGetPr(TRAN_FUNC)[rowFunc+1];
  }
  
  n_std_sta = 1;
  for(i=0; i < M; i++)
    n_std_sta = n_std_sta * 2;
  PowPowM = n_std_sta * n_std_sta;
  
  len_x = 6+2*leng*PowPowM+leng*N+K; /* number of discrete states */
  
  if( u[N] > 0.2 ){
    for(i=0; i < K+1; i++)
      y[i] = x[len_x-1-K+i];
  }
}

/*
 * mdlUpdate - perform action at major integration time step
 *
 * This function is called once for every major integration time step.
 * Discrete states are typically updated here, but this function is useful
 * for performing any tasks that should only take place once per integration
 * step. PS: flag = 2.
 */
static void mdlUpdate(real_T *x, const real_T *u, SimStruct *S, int_T tid)
{
  int_T     NaN;
  int_T     i, j, l, j2, jj, j_k, indx_j, j_pre, num_N, num_K;
  int_T     leng, plot_flag, len_C, len_x,lenIndx0, lenIndx1, len_aft_state, dec, tmp;
  int_T     N, M, K, K2, colFunc, rowFunc, n_tran_prob, m_tran_prob;
  int_T     num_state, n_std_sta, PowPowM, tran_indx, nex_sta_de;
  int_T     fig_position, trace_num, trace_flag, starter, expen_flag, trace_eli;
  int_T     loc_tmp, plot_flag_test, trace_pre, len_pre_state, numnotnan, loc_exp1, loca_exp1;  
  int_T     *A, *B, *C, *D, *expense1, *solu, *code, *Y1, *TRAN_A, *TRAN_B;
  int_T     *inp_pre, *cur_sta_pre, *expen_work, *expen_tmp1, *pre_state, *cur_sta, *inp;
  int_T     *nex_sta, *out, *expenOut, *aft_state, *sol1, *tmpIwork; 
  real_T  max, min, loca_exp, loc_exp;
  real_T  *expense, *expen_tmp, *Y, *sol, *handles, *tmpRwork;
  real_T  *tran_prob_tmp;
  
  leng = (int_T)mxGetPr(LENG)[0];
  plot_flag = (int_T)mxGetPr(PLOT_FLAG)[0];
  rowFunc = mxGetM(TRAN_FUNC);
  colFunc = mxGetN(TRAN_FUNC);
  n_tran_prob = mxGetM(TRAN_PROB);
  m_tran_prob = mxGetN(TRAN_PROB);
  
  /*  % the major processing routine.
   *  if u(length(u)) < .2
   *    % in the case of no signal, no processing.
   *    return;
   *  end;
   *  % otherwise, there is a signal, have to process.
   *   u = u(:)';
   *
   *  % initial parameters.
   *  [A, B, C, D, N, K, M] = gen2abcd(tran_func);
   *  num_state = M;
   *  n_std_sta = 2^M;
   *  PowPowM = n_std_sta^2;
   *  K2 = 2^K;
   *  inp_pre = de2bi([0:K2-1]', K); 
   *  cur_sta_pre = de2bi([0:n_std_sta-1], M);
   */
  if ( n_tran_prob == 3 ){
    expen_flag = 1;
    NaN = 1; 
  }else{
    expen_flag = 0;
    NaN = -1;
  }
  
  if( mxGetPr(TRAN_FUNC)[rowFunc*colFunc-1] < 0 ){
    N = (int_T)mxGetPr(TRAN_FUNC)[ rowFunc*(colFunc-1) ];
    K = (int_T)mxGetPr(TRAN_FUNC)[ rowFunc*(colFunc-1)+1 ];
    M = (int_T)mxGetPr(TRAN_FUNC)[ rowFunc*(colFunc-1)+2 ];
    len_C = M*N;
    
    A = ssGetIWork(S);
    B = A + M*M;
    C = B + M*K;
    D = C + N*M;
    
    /* Get the input Matrix A, B, C, D */
    for( i=0; i < M+N; i++ ){
      for( j=0; j < M+K; j++ ){
        if( i<M   && j<M )
          A[i+j*M] = (int_T)mxGetPr(TRAN_FUNC)[i+j*(M+N)];
    	if( i>M-1 && j<M )
          C[i+j*N-M] = (int_T)mxGetPr(TRAN_FUNC)[i+j*(M+N)];
    	if( i<M   && j>M-1 )
    	  B[i+j*M-M*M] = (int_T)mxGetPr(TRAN_FUNC)[i+j*(M+N)];
    	if( i>M-1 && j>M-1 )
    	  D[i+j*N-M*(N+1)] = (int_T)mxGetPr(TRAN_FUNC)[i+j*(M+N)];
      }
    }
  }else{
    N = (int_T)mxGetPr(TRAN_FUNC)[rowFunc];
    K = (int_T)mxGetPr(TRAN_FUNC)[rowFunc+1];
    M = (int_T)mxGetPr(TRAN_FUNC)[1];
    len_C = 0;
    
    /* Assignment */
    TRAN_A =    ssGetIWork(S);/*   !--- size of *TRAN_A */
    TRAN_B =    ssGetIWork(S) + (rowFunc-2);/* <--- size of *TRAN_B */
    A =         ssGetIWork(S) + 2*(rowFunc-2);/*    !----- size of *A */ 
    B =         ssGetIWork(S) + 2*(rowFunc-2) + (rowFunc-2)*M;/*    !----- size of *B */
    
    /* Get the input Matrix A, B */
    for(i=0; i < rowFunc-2; i++){
      TRAN_A[i] = (int_T)mxGetPr(TRAN_FUNC)[i+2];
      TRAN_B[i] = (int_T)mxGetPr(TRAN_FUNC)[rowFunc+i+2];
    }
    de2bi(TRAN_A, M, rowFunc-2, A);
    de2bi(TRAN_B, N, rowFunc-2, B);
  }
  
  num_state = M;
  n_std_sta = 1;
  for(i=0; i < M; i++)
    n_std_sta = n_std_sta * 2;
  PowPowM = n_std_sta * n_std_sta;
  K2 = 1;
  for(i=0; i < K; i++)
    K2 = K2 * 2;
  
  if ( u[N] >= 0.2 ){
    if( expen_flag != 0 ){  /* Tran_prob is a THREE rows Matrix */
      tran_prob_tmp    = ssGetRWork(S);
      expense     = ssGetRWork(S) + n_tran_prob*m_tran_prob;
      Y           = ssGetRWork(S) + n_tran_prob*m_tran_prob + leng*PowPowM;
      sol         = ssGetRWork(S) + n_tran_prob*m_tran_prob + leng*PowPowM + (K+1);
      expen_tmp   = ssGetRWork(S) + n_tran_prob*m_tran_prob + leng*PowPowM + (K+1) + PowPowM;
      tmpRwork    = expen_tmp + PowPowM;
      
      if( mxGetPr(TRAN_FUNC)[rowFunc*colFunc-1] < 0 )
	     solu = D + N*K;
      else
	     solu = B + N*(rowFunc-2);
      
      code        = solu + leng*PowPowM;        
      inp_pre     = code + leng*N;        /* allocate K*2^K for *inp_pre */
      cur_sta_pre = inp_pre + K2*K;   /*  M*2^M for *cur_sta_pre. */
      expen_work  = cur_sta_pre + M*n_std_sta;  /* allocate N for *expen_work */
      pre_state   = expen_work + N;     /* allocate n_std_sta for *pre_state */
      cur_sta     = pre_state + n_std_sta;/* allocate M for *cur_sta */
      inp         = cur_sta + M;              /* allocate K for *inp */
      nex_sta     = inp + K;              /* allocate M for *nex_sta */
      out         = nex_sta + M;              /* allocate N for *out */
      expenOut    = out + N;             /* allocate N for *expenOut */
      aft_state   = expenOut + N;       /* allocate n_std_sta for *aft_state */
      tmpIwork    = aft_state + 2*n_std_sta;
      
      inp_pre[0] = -1;
      cur_sta_pre[0] = -1;
      de2bi(inp_pre, K, K2, inp_pre);
      de2bi(cur_sta_pre, M, n_std_sta, cur_sta_pre);

      starter = (int_T)x[4];
      plot_flag_test = (int_T)x[5];
      loc_tmp = 8;
      
      /*  [n_tran_prob, m_tran_prob] = size(tran_prob);
       *  if n_tran_prob == 3
       *    expen_flag = 1;     % expense flag == 0; BSC code. == 1, with TRAN_PROB.
       *    expen_work = zeros(1, N); % work space.
       *    tran_prob(2:3, :) = log10(tran_prob(2:3, :));
       *  else
       *    expen_flag = 0;
       *  end;
       */
      for(i=0; i < N; i++)
	     expen_work[i] = 0;
      
      for(i=0; i < m_tran_prob; i++){
	     tran_prob_tmp[i*n_tran_prob] = mxGetPr(TRAN_PROB)[i*n_tran_prob];
	     tran_prob_tmp[i*n_tran_prob+1] = log10(mxGetPr(TRAN_PROB)[i*n_tran_prob+1]);
	     tran_prob_tmp[i*n_tran_prob+2] = log10(mxGetPr(TRAN_PROB)[i*n_tran_prob+2]);
      }
      
      /*  % veraible to record the weight trace back to the first state.
       *  % the column number is the state number - 1.
       *  % the first line is the previous. The second line is the current.
       *  starter = x(5);
       *  solu = zeros(leng, PowPowM);
       *  expense = solu;
       *  code = zeros(leng, N);
       *  loc_tmp  = 6;
       *  expense(:) = x(loc_tmp : leng*PowPowM + loc_tmp - 1); 
       *  solu(:) = x(loc_tmp + leng * PowPowM : 2 * leng * PowPowM + loc_tmp - 1);
       *  code(:) = x(loc_tmp + 2 * leng * PowPowM : loc_tmp + 2 * leng * PowPowM -1 + leng * N);
       */
      starter = (int_T)x[4];
      loc_tmp = 6;
      
      for(i=0; i < leng*PowPowM; i++)
    	expense[i] = x[loc_tmp-1+i];
      for(i=0; i < leng*PowPowM; i++)
	    solu[i] = (int_T)x[loc_tmp+leng*PowPowM-1+i];
      for(i=0; i < leng*N; i++)
    	code[i] = (int_T)x[loc_tmp+2*leng*PowPowM-1+i];
      
      /*  fig_position = x(2) + 1;
       *  if (x(1) > 0) & (rem(fig_position-leng, plot_flag - leng) == 0)
       *             & (fig_position >= plot_flag) & plot_flag_test
       *  see also sviplot2.m
       */
      fig_position = x[1] + 1;
      
      /*  trace_num = x(3) + 1;
       *  trace_flag = x(4);
       *  code(trace_num, :) = u(1:length(u)-1);
       *
       *  if (trace_flag == 0) & (trace_num == leng)
       *      trace_flag = 1;
       *  end;
       *  trace_pre = rem(trace_num - 2 + leng, leng) + 1;
       */
      trace_num = (int_T)x[2] + 1;
      trace_flag = (int_T)x[3];
      
      for(i=0; i < N; i++)
    	code[trace_num-1+i*leng] = (int_T)u[i];
      
      if(trace_flag == 0 && trace_num == leng)
    	trace_flag = 1;
      
      trace_pre = (trace_num - 2 + leng) % leng + 1;
      
      /*  % fill up trace and solut
       *  if (trace_flag == 0) & (trace_num == 1)
       *      pre_state = starter + 1;
       *  else
       *      pre_state = [];
       *      for j2 = 1 : n_std_sta
       *          if max(~isnan(expense(trace_pre, [1-n_std_sta : 0] + j2*n_std_sta)))
       *              pre_state = [pre_state, j2];
       *          end;
       *      end;
       *  end;
       */
      len_pre_state = 0;
      if( trace_flag == 0 && trace_num == 1 ){
    	pre_state[0] = starter + 1;
    	len_pre_state = 1;
      }else{
    	 for(j2=0; j2 < n_std_sta; j2++){
	       numnotnan = 0;
    	   for(i=0; i < n_std_sta; i++){
	         if( expense[trace_pre-1 + i*leng+j2*leng*n_std_sta] <= 0 )
	            numnotnan ++;
	       }
	       if(numnotnan != 0){
	         pre_state[len_pre_state] = j2 + 1;
	         len_pre_state++;
	       }
	     }
      }
      
      /*  expense(trace_num, :) = expense(trace_num,:) * NaN;
       *  if expen_flag
       *      for j = 1 : length(pre_state)
       *          jj = pre_state(j) - 1;           % index number - 1 is the state.
       *          cur_sta = cur_sta_pre(pre_state(j),:)';
       *          indx_j = (pre_state(j) - 1) * n_std_sta;
       *          for num_N = 1 : N
       *              expen_work(num_N) = max(find(tran_prob(1,:) <= code(trace_num, num_N)));
       *          end;
       *          for num_K = 1 : K2
       *              inp = inp_pre(num_K, :)';
       *              if isempty(C)
       *                  tran_indx = pre_state(j) + (num_K -1) * n_std_sta;
       *                  nex_sta = A(tran_indx, :)';
       *                  out = B(tran_indx, :)';
       *              else
       *                  out = rem(C * cur_sta + D * inp,2);
       *                  nex_sta = rem(A * cur_sta + B * inp, 2);
       *              end;
       */
      for(i=0; i < PowPowM; i++)
    	expense[trace_num-1+i*leng] = NaN;
      
      for(j=0; j < len_pre_state; j++){
    	jj = pre_state[j] - 1;
    	for(i=0; i < M; i++)
    	  cur_sta[i] = cur_sta_pre[jj + i*n_std_sta];
        indx_j = jj * n_std_sta;
    	for(num_N=0; num_N < N; num_N++){
          max = 0;
          for(i=0; i < m_tran_prob; i++){
            if( tran_prob_tmp[i*n_tran_prob] <= code[trace_num-1+num_N*leng] )
              max = i;
          }
	      expen_work[num_N] = max + 1;
	    }
        for(num_K=0; num_K < K2; num_K++){
          for(i=0; i < K; i++)
            inp[i] = inp_pre[num_K+i*K2];
          if( len_C == 0 ){
            tran_indx = pre_state[j] + num_K*n_std_sta;
    	    for(i=0; i < M; i++)
    	      nex_sta[i] = A[tran_indx-1+i*(rowFunc-2)];
    	    for(i=0; i < N; i++)
    	      out[i] = B[tran_indx-1+i*(rowFunc-2)];
    	  }else{
    	    for(i=0; i < N; i++){
	          out[i] = 0;
	          for(l=0; l < M; l++)
		        out[i] = out[i] + C[i+l*N]*cur_sta[l];
	          for(l=0; l < K; l++)    
		        out[i] = out[i] + D[i+l*N]*inp[l];
	          out[i] = out[i] % 2;
	        }    
	        for(i=0; i < M; i++){
	          nex_sta[i] = 0;
	          for(l=0; l < M; l++)
		        nex_sta[i] = nex_sta[i] + A[i+l*M]*cur_sta[l];
	          for(l=0; l < K; l++)    
		        nex_sta[i] = nex_sta[i] + B[i+l*M]*inp[l];
	          nex_sta[i] = nex_sta[i] % 2;
	        }
	     }
	  /*              nex_sta_de = bi2de(nex_sta') + 1;
	   *              % find the expense by the transfer probability
	   *              expen_0 = find(out' <= 0.5);
	   *              expen_1 = find(out' > 0.5);
	   *              loca_exp = sum([tran_prob(2,expen_work(expen_0)) 0])...
	   *                  +sum([tran_prob(3,expen_work(expen_1)) 0]);
	   *              tmp = (nex_sta_de-1)*n_std_sta + pre_state(j);
	   *              if isnan(expense(trace_num, tmp))
	   *                  expense(trace_num, tmp) = loca_exp;
	   *                  solu(trace_num, nex_sta_de + indx_j) = num_K;   
	   *              elseif expense(trace_num, tmp) < loca_exp
	   *                  expense(trace_num, tmp) = loca_exp;
	   *                  solu(trace_num, nex_sta_de + indx_j) = num_K;
	   *              end;
	   *          end;
	   *      end;
	   */
	  bi2de(nex_sta,1, M, &nex_sta_de);
	  nex_sta_de = nex_sta_de + 1;
	  lenIndx0= 0;
	  for(i=0; i < N; i++){
	    if( out[i] <= 0.5 ){
	      expenOut[lenIndx0] = i;
	      lenIndx0++;
	    }
	  }
	  lenIndx1 = 0;
	  for(i=0; i < N; i++){
	    if( out[i] > 0.5 ){
	      expenOut[lenIndx1+lenIndx0] = i;
	      lenIndx1++;
	    }
	  }
	  loca_exp = 0;
	  for(i=0; i < lenIndx0; i++)
	    loca_exp = loca_exp + tran_prob_tmp[1 + n_tran_prob*(expen_work[ expenOut[i] ]-1) ];
	  for(i=0; i < lenIndx1; i++)
	    loca_exp = loca_exp + tran_prob_tmp[2 + n_tran_prob*(expen_work[expenOut[i+lenIndx0]]-1)];
	  tmp = (nex_sta_de - 1) * n_std_sta + pre_state[j] - 1;
	  if( expense[trace_num - 1 + tmp*leng] > 0 ){
	    expense[trace_num - 1 + tmp*leng] = loca_exp;
	    solu[trace_num - 1 + leng*(nex_sta_de+indx_j-1)] = num_K + 1;
	  }else if( expense[trace_num - 1 + tmp*leng] < loca_exp ){
	    expense[trace_num - 1 + tmp*leng] = loca_exp;
	    solu[trace_num - 1 + leng*(nex_sta_de+indx_j-1)] = num_K + 1;
	  }
	}
      }
      /*  aft_state = [];
       *  for j2 = 1 : n_std_sta
       *      if max(~isnan(expense(trace_num, [1-n_std_sta : 0] + j2*n_std_sta)))
       *          aft_state = [aft_state, j2];
       *      end
       *  end;
       */
      len_aft_state = 0;
      for(j2=0; j2 < n_std_sta; j2++){
    	numnotnan = 0;
    	for(i=0; i < n_std_sta; i++){
          if( expense[trace_num-1+i*leng+j2*leng*n_std_sta] <=0 )
	         numnotnan ++;
	    }
        if(numnotnan != 0){
          aft_state[len_aft_state] = j2 + 1;
    	  len_aft_state++;
	    }
      }
      /*  %%%%% begin plot related %%%%% */
      
      /*  % decision making.
       *  if trace_flag
       *    sol = expense(trace_num,:);
       *    trace_eli = rem(trace_num, leng) + 1;
       *    % strike out the unnecessary.
       *    for j_k = 1 : leng - 2
       *      j_pre = rem(trace_num - j_k - 1 + leng, leng) + 1;
       *      sol = vitshort(expense(j_pre, :), sol, n_std_sta, expen_flag);
       *    end;
       *
       *    tmp = (ones(n_std_sta,1) * expense(trace_eli, [starter+1:n_std_sta:PowPowM]))';
       *    sol = sol + tmp(:)';
       */
      if( trace_flag != 0 ){
        trace_eli = (trace_num % leng) + 1;
	
        for( i=0; i < PowPowM; i++)
          sol[i] = expense[trace_num-1 + i*leng];
	
        for( j_k=1; j_k <= leng-2; j_k++){
          j_pre =(trace_num -j_k -1 + leng) % leng + 1;
          for( i=0; i < PowPowM; i++)
            expen_tmp[i] = expense[j_pre-1 + i*leng];
          shortdbl(expen_tmp, sol, n_std_sta, tmpRwork, tmpIwork);
        }
        for(j=0; j < n_std_sta; j++){
          for(i=0; i < n_std_sta; i++){
            if( expense[trace_eli-1+(starter+i*n_std_sta)*leng] > 0 )
              sol[i+j*n_std_sta] = 1;
	        else
	          sol[i+j*n_std_sta] = sol[i+j*n_std_sta] + expense[trace_eli-1+(starter+i*n_std_sta)*leng];
          }
        }

        /*    if expen_flag
         *      loc_exp =  max(sol(find(~isnan(sol))));   
         *    else
         *      loc_exp =  min(sol(find(~isnan(sol))));
         *    end
         *    dec = find(sol == loc_exp);
         *    dec = dec(1);
         *    dec = rem((dec - 1), n_std_sta);
         *    num_K = solu(trace_eli, starter*n_std_sta+dec+1);
         *    inp = inp_pre(num _K, :)';
         *    if isempty(C)
         *        tran_indx =  starter+1 + (num_K -1) * n_std_sta;
         *        out = B(tran_indx, :)';
         *    else
         *        cur_sta = cur_sta_pre(starter+1, :)';
         *        out = rem(C * cur_sta + D * inp,2);
         *    end;
         */
	/* here, expen_flag != 0 */ 
	
	for(i=0; i < PowPowM; i++){
	  if( sol[i] <= 0 ){
	    loc_exp = sol[i];
	    i = PowPowM;
	  }
	}
	for(i=0; i < PowPowM; i++){
	  if( sol[i] <= 0 && loc_exp < sol[i])
	    loc_exp = sol[i];
	}
	for(i=0; i < PowPowM; i++){
	  if( sol[i] == loc_exp ){
	    dec = i;
	    i = PowPowM;
	  }
	}
	dec = dec % n_std_sta;
	num_K = solu[trace_eli-1+leng*(starter*n_std_sta+dec)];
	for(i=0; i < K; i++)
	  inp[i] = inp_pre[num_K-1+i*K2];
	
	if( len_C == 0 ){
	  tran_indx = starter + 1 + (num_K-1)*n_std_sta;
	  for(i=0; i < N; i++)
	    out[i] = B[tran_indx-1+i*(rowFunc-2)];
	}else{
	  for(i=0; i < N; i++){
	    out[i] = 0;
	    for(l=0; l < M; l++)
	      out[i] = out[i] + C[i+l*N]*cur_sta[l];
	    for(l=0; l < K; l++)    
	      out[i] = out[i] + D[i+l*N]*inp[l];
	    out[i] = out[i] % 2;
	  }
	  for(i=0; i < M; i++)
	    cur_sta[i] = cur_sta_pre[starter + i*n_std_sta];
	}
        /*    if expen_flag
         *      % find the expense by the transfer probability
         *      expen_0 = find(out' <= 0.5);
         *      expen_1 = find(out' > 0.5);
         *      loc_exp = sum([tran_prob(2,expen_work(expen_0)) 0])...
         *               +sum([tran_prob(3,expen_work(expen_1)) 0]);
         *    else
         *      loc_exp = sum(rem(code(trace_eli, :) + out', 2));
         *    end
         */
	lenIndx0= 0;
	for(i=0; i < N; i++){
	  if( out[i] <= 0.5 ){
	    expenOut[lenIndx0] = i;
	    lenIndx0++;
	  }
	}
	lenIndx1 = 0;
	for(i=0; i < N; i++){
	  if( out[i] > 0.5 ){
	    expenOut[lenIndx1+lenIndx0] = i;
	    lenIndx1++;
	  }
	}
	loc_exp = 0;
	for(i=0; i < lenIndx0; i++)
	  loc_exp = loc_exp + tran_prob_tmp[1+n_tran_prob*(expen_work[expenOut[i]]-1)];
	for(i=0; i < lenIndx1; i++)
	  loc_exp = loc_exp + tran_prob_tmp[2+n_tran_prob*(expen_work[expenOut[i+lenIndx0]]-1)];
	

        /*    starter = dec;
         *    output = [inp(:); loca_exp];
         *  else %(if trace_flag)
         *    output = zeros(K+1, 1);
         *    starter = 0;
         *  end; %(if trace_flag)
         *
         *  trace_num = rem(trace_num, leng);
         *  sys = [x(1); fig_position; trace_num; trace_flag; starter; expense(:); solu(:); code(:); output(:)];
         */
	starter = dec;
	for(i=0; i < K; i++)
	  Y[i] = (real_T)inp[i];
	Y[K] = loca_exp;    
      }else{  /* if (trace_flag != 0 ) */
	for(i=0; i < K+1; i++)
	  Y[i] = 0;
	starter = 0;
      }  /* the end of "if (trace_flag != 0 )" */
      
      trace_num = trace_num % leng;
      
      x[1] = (real_T)fig_position;
      x[2] = (real_T)trace_num;
      x[3] = (real_T)trace_flag;
      x[4] = (real_T)starter;
      for(i=0; i < leng*PowPowM; i++)
	x[i+5] = expense[i];
      for(i=0; i < leng*PowPowM; i++)
	x[i+5+leng*PowPowM] = (real_T)solu[i];
      for(i=0; i < leng*N; i++)
	x[i+5+2*leng*PowPowM] = (real_T)code[i];
      for(i=0; i < K+1; i++)
	x[i+5+2*leng*PowPowM+leng*N] = Y[i];
      /*  the end of "if (expen_flag != 0 ) */
    }else{ /* tran_prob is not 3-row matrix */
      /* In this kind of TRAN_PROB, the type of all of variables is integer */
      if( mxGetPr(TRAN_FUNC)[rowFunc*colFunc-1] < 0 )
	expense1 = D + N*K;
      else
	expense1 = B + N*(rowFunc-2);
      
      solu = expense1 + leng*PowPowM;    
      code = solu + leng*PowPowM;
      Y1 = code + leng*N;              /* size of *Y1 is (K+1) */
      inp_pre = Y1 + (K+1);            /* allocate K*2^K for *inp_pre */
      cur_sta_pre = inp_pre + K2*K;   /*  M*2^M for *cur_sta_pre. */
      pre_state = cur_sta_pre + M*n_std_sta;  /* allocate n_std_sta for *pre_state */
      cur_sta = pre_state + n_std_sta;/* allocate M for *cur_sta */
      inp = cur_sta + M;              /* allocate K for *inp */
      nex_sta = inp + K;              /* allocate M for *nex_sta */
      out = nex_sta + M;              /* allocate N for *out */
      expenOut = out + N;             /* allocate N for *expenOut */
      aft_state = expenOut + N;       /* allocate n_std_sta for *aft_state */
      sol1 = aft_state + n_std_sta;    /* allocate PowPowM for *sol */
      expen_tmp1 = sol1 + PowPowM;
      tmpIwork = expen_tmp1 + PowPowM;
      
      inp_pre[0] = -1;
      cur_sta_pre[0] = -1;
      de2bi(cur_sta_pre, M, n_std_sta, cur_sta_pre);
      de2bi(inp_pre, K, K2, inp_pre);
      
      
      starter = (int_T)x[4];
      loc_tmp = 6;
      
      for(i=0; i < leng*PowPowM; i++)
	expense1[i] = (int_T)x[loc_tmp-1+i];
      for(i=0; i < leng*PowPowM; i++)
	solu[i] = (int_T)x[loc_tmp+leng*PowPowM-1+i];
      for(i=0; i < leng*N; i++)
	code[i] = (int_T)x[loc_tmp+2*leng*PowPowM-1+i];
      
      fig_position = x[1] + 1;
      if( x[0] > 0 && ((fig_position-leng)%(plot_flag - leng) == 0) && fig_position >= plot_flag && plot_flag_test != 0 ){
	
      }
      
      trace_num = (int_T)x[2] + 1;
      trace_flag = (int_T)x[3];
      
      for(i=0; i < N; i++)
	code[trace_num-1+i*leng] = (int_T)u[i];
      
      if(trace_flag == 0 && trace_num == leng)
	trace_flag = 1;
      
      trace_pre = (trace_num - 2 + leng) % leng + 1;
      
      len_pre_state = 0;
      if( trace_flag == 0 && trace_num == 1 ){
	pre_state[0] = starter + 1;
	len_pre_state = 1;
      }else{
	for(j2=0; j2 < n_std_sta; j2++){
	  numnotnan = 0;
	  for(i=0; i < n_std_sta; i++){
	    if( expense1[trace_pre-1 + i*leng+j2*leng*n_std_sta] >= 0 )
	      numnotnan ++;
	  }
	  if(numnotnan != 0){
	    pre_state[len_pre_state] = j2 + 1;
	    len_pre_state++;
	  }
	}
      }
      
      for(i=0; i < PowPowM; i++)
	expense1[trace_num-1+i*leng] = NaN;
      
      for(j=0; j < len_pre_state; j++){
	jj = pre_state[j] - 1;
	for(i=0; i < M; i++)
	  cur_sta[i] = cur_sta_pre[jj + i*n_std_sta];
	indx_j = jj * n_std_sta;
	for(num_K=0; num_K < K2; num_K++){
	  for(i=0; i < K; i++)
	    inp[i] = inp_pre[num_K+i*K2];
	  if( len_C == 0 ){
	    tran_indx = pre_state[j] + num_K*n_std_sta;
	    for(i=0; i < M; i++)
	      nex_sta[i] = A[tran_indx-1+i*(rowFunc-2)];
	    for(i=0; i < N; i++)
	      out[i] = B[tran_indx-1+i*(rowFunc-2)];
	  }else{
	    for(i=0; i < N; i++){
	      out[i] = 0;
	      for(l=0; l < M; l++)
		out[i] = out[i] + C[i+l*N]*cur_sta[l];
	      for(l=0; l < K; l++)    
		out[i] = out[i] + D[i+l*N]*inp[l];
	      out[i] = out[i] % 2;
	    }    
	    for(i=0; i < M; i++){
	      nex_sta[i] = 0;
	      for(l=0; l < M; l++)
		nex_sta[i] = nex_sta[i] + A[i+l*M]*cur_sta[l];
	      for(l=0; l < K; l++)
		nex_sta[i] = nex_sta[i] + B[i+l*M]*inp[l];
	      nex_sta[i] = nex_sta[i] % 2;
	    }
	  }
	  bi2de(nex_sta, 1, M, &nex_sta_de);
	  nex_sta_de = nex_sta_de + 1;
	  loca_exp1 = 0;
	  for(i=0; i < N; i++)
	    loca_exp1 = loca_exp1 + (code[trace_num-1+leng*i] + out[i]) % 2;
	  tmp = (nex_sta_de - 1) * n_std_sta + pre_state[j] - 1;
	  if( expense1[trace_num - 1 + tmp*leng] < 0 || expense1[trace_num - 1 + tmp*leng] > loca_exp1 ){
	    expense1[trace_num - 1 + tmp*leng] = loca_exp1;
	    solu[trace_num - 1 + leng*(nex_sta_de+indx_j-1)] = num_K + 1;
	  }else if( expense1[trace_num - 1 + tmp*leng] > loca_exp1 ){
	    expense1[trace_num - 1 + tmp*leng] = loca_exp1;
	    solu[trace_num - 1 + leng*(nex_sta_de+indx_j-1)] = num_K + 1;
	  }
	}
      }
      
      len_aft_state = 0;
      for(j2=0; j2 < n_std_sta; j2++){
	numnotnan = 0;
	for(i=0; i < n_std_sta; i++){
	  if( expense1[trace_num-1+i*leng+j2*leng*n_std_sta] >= 0 )
	    numnotnan ++;
	}
	if(numnotnan != 0){
	  aft_state[len_aft_state] = j2 + 1;
	  len_aft_state++;
	}
      }

      /*  %%%%% begin plot related %%%%% */
      
      if( trace_flag != 0 ){
	trace_eli = (trace_num % leng) + 1;
	
	for( i=0; i < PowPowM; i++)
	  sol1[i] = expense1[trace_num-1 + i*leng];
	
	for( j_k=1; j_k <= leng-2; j_k++){
	  j_pre =(trace_num - j_k -1 + leng) % leng + 1;
	  for( i=0; i < PowPowM; i++)
	    expen_tmp1[i] = expense1[j_pre-1 + i*leng];
	  shortint(expen_tmp1, sol1, n_std_sta, tmpIwork);
	}
	for(j=0; j < n_std_sta; j++){
	  for(i=0; i < n_std_sta; i++){
	    if( expense1[trace_eli-1+(starter+i*n_std_sta)*leng] < 0 )
	      sol1[i+j*n_std_sta] = -1;
	    else
	      sol1[i+j*n_std_sta] = sol1[i+j*n_std_sta] + expense1[trace_eli-1+(starter+i*n_std_sta)*leng];
	  }
	}
	
	for(i=0; i < PowPowM; i++){
	  if( sol1[i] >= 0 ){
	    loc_exp1 = sol1[i];
	    i = PowPowM;
	  }
	}
	for(i=0; i < PowPowM; i++){
	  if( sol1[i] >= 0 && loc_exp1 > sol1[i])
	    loc_exp1 = sol1[i];
	}
	for(i=0; i < PowPowM; i++){
	  if( sol1[i] == loc_exp1 ){
	    dec = i;
	    i = PowPowM;
	  }
	}
	dec = dec % n_std_sta;
	num_K = solu[trace_eli-1+leng*(starter*n_std_sta+dec)];
	for(i=0; i < K; i++)
	  inp[i] = inp_pre[num_K-1+i*K2];
	
	if( len_C == 0 ){
	  tran_indx = starter + 1 + (num_K-1)*n_std_sta;
	  for(i=0; i < N; i++)
	    out[i] = B[tran_indx-1+i*(rowFunc-2)];
	}else{
	  for(i=0; i < N; i++){
	    out[i] = 0;
	    for(l=0; l < M; l++)
	      out[i] = out[i] + C[i+l*N]*cur_sta[l];
	    for(l=0; l < K; l++)    
	      out[i] = out[i] + D[i+l*N]*inp[l];
	    out[i] = out[i] % 2;
	  }
	  for(i=0; i < M; i++)
	    cur_sta[i] = cur_sta_pre[starter + i*n_std_sta];
	}
                
	/*  loc_exp = sum(rem(code(trace_eli, :) + out', 2));   */
	loc_exp1 = 0;
	for(i=0; i < N; i++)
	  loc_exp1 = loc_exp1 + (code[trace_eli-1+i*leng] + out[i]) % 2;
	
	starter = dec;
	for(i=0; i < K; i++)
	  Y1[i] = inp[i];
	Y1[K] = loca_exp1;    
      }else{  /* if (trace_flag != 0 ) */
	for(i=0; i < K+1; i++)
	  Y1[i] = 0;
	starter = 0;
      }  /* the end of "if (trace_flag != 0 )" */
      
      trace_num = trace_num % leng;
      
      x[1] = (real_T)fig_position;
      x[2] = (real_T)trace_num;
      x[3] = (real_T)trace_flag;
      x[4] = (real_T)starter;
      for(i=0; i < leng*PowPowM; i++)
	x[i+5] = (real_T)expense1[i];
      for(i=0; i < leng*PowPowM; i++)
	x[i+5+leng*PowPowM] = (real_T)solu[i];
      for(i=0; i < leng*N; i++)
	x[i+5+2*leng*PowPowM] = (real_T)code[i];
      for(i=0; i < K+1; i++)
	x[i+5+2*leng*PowPowM+leng*N] = (real_T)Y1[i];
    }/* the end of " if( expen_flag != 0 )" */
  } /*  the end of "if ( u[N] >= 0.2 ) */
}

/*
 * mdlDerivatives - compute the derivatives
 *
 * In this function, you compute the S-function block's derivatives.
 * The derivatives are placed in the dx variable.
 */
static void mdlDerivatives(real_T *dx, const real_T *x, const real_T *u, SimStruct *S, int_T tid)
{
}

/*
 * mdlTerminate - called when the simulation is terminated.
 *
 * In this function, you should perform any actions that are necessary
 * at the termination of a simulation.  For example, if memory was allocated
 * in mdlInitializeConditions, this is the place to free it.
 */
static void mdlTerminate(SimStruct *S)
{
}

#ifdef      MATLAB_MEX_FILE    /* Is this file being compiled as a MEX-file? */
#include "simulink.c"      /* MEX-file interface mechanism */
#else
#include "cg_sfun.h"       /* code generation registration function */
#endif
