/* Copyright (c) 1984-98 by The MathWorks, Inc. */ /* $Revision: 1.6 $ $Date: 1998/12/18 16:28:53 $ */ /* * TEXTREAD Read formatted text files. * [A,B,C,...] = TEXTREAD(FILENAME,FORMAT,N) reads data from the file FILENAME * into the variables A,B,C,etc. The number and types of each return argument * is given by the FORMAT string. The number of return arguments is * the number of conversion specifiers in the FORMAT string. The FORMAT string * supports a subset of the specifiers and conventions of the C language FSCANF function. * If N is specified, the format string will be reused N times. Values of N smaller * than zero cause TEXTREAD to read the entire file (the default). * * The FORMAT string can contain whitespace characters (which are ignored), ordinary * characters (which are expected to match the next non-white space character * in the input), or conversion specifications. * * Supported conversion specifications: * %d - read a signed integer value (output is a double array) * %u - read a integer value (output is a double array) * %f - read a floating point value (output is a double array) * %s - read a whitespace separated string (output is a cellstr) * %q - read a (possibly double quoted) string (output is a cellstr) * %c - read characters (including white space) (output is char array) * %[...] - reads the longest string containing characters in the set * between the brackets (output is a cellstr) * %[^...] - reads the longest non-empty string containing characters not * in the set between brackets (output is a cellstr) * * Using %* instead of % in a conversion causes TEXTREAD to skip the matching * characters in the input (and no output is created for this conversion). The % * can be followed by an optional field width to handle fixed width fields. For * example %5d reads a 5 digit integer. In addition the %f format supports the * form %.f. * * TEXTREAD works by matching and converting groups of characters from the input. * These input fields are defined as a string of non-white space characters that * extends to the next white space character or until the maximum field width is * exhausted. * * [...] = TEXTREAD(...,param,value,...) allows param/value pairs to be used to * customize the behavior of TEXTREAD. Possible param/value options are: * 'whitespace' - vector of characters to treat as whitespace * 'delimiter' - delimiter characters (default is none) * 'expchars' - exponent characters (default is 'eEdD') * 'bufsize' - buffer size in bytes * 'headerlines' - Number of lines at beginning of file to skip * 'commentstyle' - one of * 'matlab' -- characters after % are ignored * 'shell' -- characters after # are ignored * 'c' -- characters between slash-star and start-slash are ignored * 'c++' -- characters after // are ignored * * TEXTREAD is useful for reading text files with a known format. Both fixed and * free format files can be handled. * * Examples: * Suppose the text file mydata.dat contains data in the following form: * Sally Type1 12.34 45 Yes * Joe Type2 23.54 60 No * Bill Type1 34.90 12 No * * This could be read using the following command * [names,types,x,y,answer] = textread('mydata.dat','%s %s %f %d %s'); * * Read file as a fixed format file while skipping the doubles * [names,types,y,answer] = textread('mydata.dat','%9c %5s %*f %2d %3s'); * * Read file and match Type literal * [names,typenum,x,y,answer] = textread('mydata.dat','%s Type%d %f %d %s'); * * Read m-file into cell array of strings * file = textread('fft.m','%s','delimiter','\n','whitespace',''); * * See also DLMREAD, SSCANF. * * Clay M. Thompson 3-3-98 */ static char rcsid[] = "$Id: textread.c,v 1.6 1998/12/18 16:28:53 clay Exp $"; #include #include #define NDEBUG 1 /* MATLAB API header file */ #include "mex.h" #include "matrix.h" /* File defines */ #define OPEN_SET '[' #define CLOSE_SET ']' #define ToNumber(c) ((c) - '0') #define DEFAULTBUFSIZE 4095 #define UNSPECIFIED -1 #define READ_WHOLE_FILE -1 #define DEFAULT_LINKED_BUFFER_LENGTH 100 #define PRIVATE static /* File globals */ PRIVATE char *delimiter = NULL; PRIVATE int headerlines = 0; PRIVATE int commentstyle = 0; PRIVATE char *whitespace = NULL; PRIVATE char defaultWhitespace[] = " \t\r\n\b"; PRIVATE char *expchars = NULL; PRIVATE char defaultExpchars[] = "eEdD"; PRIVATE char noDelimiter[] = ""; PRIVATE FILE *fp = NULL; PRIVATE int bufsize = DEFAULTBUFSIZE; PRIVATE char *buf; PRIVATE bool firsttime = true; PRIVATE int pushBackBuffer[4] = {'\0','\0','\0','\0'}; PRIVATE int pushBackLength = 0; PRIVATE int *matches = NULL; /* used to keep track of param/value matches */ typedef enum {READ_SUCCESS, READ_FAILURE, READ_BUFFER_OVERFLOW} readStatus; /************************************************************ * Character I/O ************************************************************/ /* * GetCharacter -- Get one character */ PRIVATE int GetCharacter(void) { int result; if (pushBackLength == 0) { result = getc(fp); } else { result = pushBackBuffer[--pushBackLength]; } return result; } /* * UngetCharacter -- Unget one character * * Up to four characters can be pushed back. */ PRIVATE void UngetCharacter(int ch) { mxAssert(pushBackLength < 4,"Overflowed internal buffer."); pushBackBuffer[pushBackLength++] = ch; } PRIVATE void InitializePushBackBuffer(void) { int i; for (i=0; i< 4; i++) pushBackBuffer[i] = '\0'; pushBackLength = 0; } /********************************************************* * Mex utilities *********************************************************/ /* * muIsDoubleScalar * * True if input is a real double scalar */ PRIVATE bool muIsDoubleScalar(const mxArray *A) { return mxIsDouble(A) && (mxGetNumberOfElements(A)==1) && !mxIsComplex(A); } /* * muGetDoubleScalar * * Purpose: Get double value from MATLAB array; error out if array is * empty or not double * * Inputs: A --- MATLAB array * name --- string identifying what the input array is supposed * to represent. name is used to construct a * meaningful error message. * Outputs: none * Return: double-precision value * */ PRIVATE double muGetDoubleScalar(const mxArray *A, char *name) { double result; char error_message[255]; static char trailer[] = " must be a scalar double."; mxAssertS(A != NULL, "NULL mxArray pointer"); mxAssertS(name != NULL, "NULL char pointer"); if (!muIsDoubleScalar(A)) { strncpy(error_message, name, 255-strlen(trailer)-1); strcat(error_message, trailer); mexErrMsgTxt(error_message); } result = *((double *) mxGetPr(A)); return(result); } /* * muGetIntegerScalar * * Purpose: Get an integer value from MATLAB array; error out if array is * empty or not an integer. * * Inputs: A --- MATLAB array * name --- string identifying what the input array is supposed * to represent. name is used to construct a * meaningful error message. * Outputs: none * Return: integer value * */ PRIVATE int muGetIntegerScalar(const mxArray *A, char *name) { double d; int result; char error_message[255]; static char trailer[] = " must be a scalar integer."; mxAssertS(A != NULL, "NULL mxArray pointer"); mxAssertS(name != NULL, "NULL char pointer"); if (!muIsDoubleScalar(A)) { strncpy(error_message, name, 255-strlen(trailer)-1); strcat(error_message, trailer); mexErrMsgTxt(error_message); } d = *((double *) mxGetPr(A)); result = (int)d; if (((double) result) != d) { strncpy(error_message, name, 255-strlen(trailer)-1); strcat(error_message, trailer); mexErrMsgTxt(error_message); } return(result); } /* * muGetString * * Purpose: Get C string from MATLAB string array; error out if * array is not a string array * * Inputs: A --- MATLAB array * name --- string identifying what the input array is supposed * to represent. name is used to construct a * meaningful error message. * Outputs: none * Return: string * * Note: This function allocates memory to hold the string. * The calling function is responsible for freeing the memory when done * with it! */ PRIVATE char *muGetString(const mxArray *A, char *name) { int numEl; char *result = NULL; char error_message[255]; static char trailer[] = " must be a string."; mxAssertS(A != NULL, "NULL mxArray pointer"); mxAssertS(name != NULL, "NULL char pointer"); if (!mxIsChar(A)) { strncpy(error_message, name, 255-strlen(trailer)-1); strcat(error_message, trailer); mexErrMsgTxt(error_message); } numEl = mxGetM(A) * mxGetN(A); result = mxCalloc(numEl+1, sizeof(*result)); mxGetString(A, result, numEl+1); return(result); } /* * muGetStringCC * * Purpose: Get C string from MATLAB string array (with carriage control); * error out if array is not a string array or if \c carriage * control is badly formed. * * Inputs: A --- MATLAB array * name --- string identifying what the input array is supposed * to represent. name is used to construct a * meaningful error message. * Outputs: none * Return: string * * Note: This function allocates memory to hold the string. * The calling function is responsible for freeing the memory when done * with it! */ PRIVATE char *muGetStringCC(const mxArray *A, char *name) { int numEl; char *result = NULL; char error_message[255]; static char trailer[] = " must be a string."; static char cc_trailer[] = " has bad \\ constant."; char *p, *q; mxAssertS(A != NULL, "NULL mxArray pointer"); mxAssertS(name != NULL, "NULL char pointer"); if (!mxIsChar(A)) { strncpy(error_message, name, 255-strlen(trailer)-1); strcat(error_message, trailer); mexErrMsgTxt(error_message); } numEl = mxGetM(A) * mxGetN(A); result = mxCalloc(numEl+1, sizeof(*result)); mxGetString(A, result, numEl+1); /* Convert carriage control sequences \c into equivalent characters */ p = q = result; while (*p != '\0') { if (*p != '\\') *q++ = *p++; else if (*p == '\\') { p++; switch (*p) { case '\0': strncpy(error_message, name, 255-strlen(trailer)-1); strcat(error_message, cc_trailer); mexErrMsgTxt(error_message); break; case 'n': *q++ = '\n'; break; case 'r': *q++ = '\r'; break; case 't': *q++ = '\t'; break; case 'b': *q++ = '\b'; break; case '\\': *q++ = '\\'; break; default: strncpy(error_message, name, 255-strlen(trailer)-1); strcat(error_message, cc_trailer); mexErrMsgTxt(error_message); } p++; } } *q++ = '\0'; return result; } /********************************************************* * Whitespace and delimiters *********************************************************/ /* * DestroyWhitespace -- Free whitespace character buffer */ PRIVATE void DestroyWhitespace(void) { mxFree(whitespace); whitespace = NULL; } /* * DestroyExpchars -- Free expchars character buffer */ PRIVATE void DestroyExpchars(void) { if (expchars != defaultExpchars) mxFree(expchars); expchars = NULL; } /* * DestroyDelimiter -- Free delimiter character buffer if necessary */ PRIVATE void DestroyDelimiter(void) { if (delimiter != noDelimiter && delimiter != NULL) { mxFree(delimiter); delimiter = NULL; } } /* * Look for param/value pair of the form 'Bufsize',N. Set * the bufsize global to the integer specified or the default * set if the param/value pair isn't found. */ PRIVATE void LookforAndGetBufsize( const mxArray *prhs[], int start, int end ) { char *tmp; int i; bufsize = DEFAULTBUFSIZE; mxAssert(matches != NULL,""); for (i=start; itype = UNKNOWN_FORMAT; inpf->width = UNSPECIFIED; inpf->prec = UNSPECIFIED; inpf->format_chars = NULL; inpf->nchars = 0; inpf->skip = false; inpf->count = 0; inpf->next = NULL; return inpf; } /* * Destroy inputfield list */ PRIVATE void DestroyInputfieldList(inputfield *head) { while (head != NULL) { inputfield *next; next = head->next; mxFree(head); head = next; } } /* * Field type string from enum. */ PRIVATE const char *FieldTypeStr( inputfieldtype t ) { switch (t) { case LITERAL_FORMAT: return "literal string"; case D_FORMAT: return "integer"; case U_FORMAT: return "unsigned integer"; case F_FORMAT: return "floating point number"; case C_FORMAT: return "characters"; case S_FORMAT: case Q_FORMAT: case SET_FORMAT: case NSET_FORMAT: return "string"; case UNKNOWN_FORMAT: return "unknown"; } return "unknown"; } /* * Return true if character c is one of set_chars */ PRIVATE bool InSet( char c, const char *set_chars ) { const char *p = set_chars; while (*p != '\0') { if (c == *p++) return true; } return false; } /* * Return true if character c is one of the N set_chars */ PRIVATE bool InSetn( char c, const char *set_chars, int nchars ) { const char *p = set_chars; int i; for (i=0; idata = mxCalloc(n,size); result->n = n; result->size = size; result->next = NULL; /* Assume that this is a standalone buffer */ result->head = result; result->segment = 0; result->totalelements = n; return result; } /* * GrowLinkedBuffer -- Add a buffer to the list that buffer is part of */ PRIVATE void GrowLinkedBuffer( LinkedBuffer **buffer ) { LinkedBuffer *p, *q; mxAssert(*buffer != NULL,""); p = *buffer; /* Find the end of the linked list */ while (p->next != NULL) { p = p->next; } q = CreateLinkedBuffer(p->n,p->size); /* Add the new buffer to the list */ q->segment = p->segment + 1; q->head = p->head; p->next = q; /* Add to totalelements */ q->head->totalelements += q->n; *buffer = q; } /* * DestroyLinkedBuffer -- Destroy the entire linked list */ PRIVATE void DestroyLinkedBuffer( LinkedBuffer *buffer ) { LinkedBuffer *p, *q; mxAssert(buffer != NULL,""); /* Start at the head and free everything */ p = buffer->head; while (p != NULL) { q = p; p = p->next; if (q->data != NULL) mxFree(q->data); mxFree(q); } } /* * GetLinkedBufferElementPtr -- Return void pointer to linked list element. * * Returns a void pointer to the memory at specified offset along linked list * of buffers. Allows code to treat the linked list of buffers like a huge * array. Buffer can be anywhere along the chain of buffers. Performance is * improved if the data in buffer happens to the right one for the given offset. * * An assertion is thrown if the element is off the list. * * Equivalent to * * &p[offset] * * for a normal array. Updates buffer to point to node where element is found. */ PRIVATE void *GetLinkedBufferElementPtr( LinkedBuffer **buffer, /* One of the buffers along the chain */ int offset /* Offset from beginning of linked buffer as if a huge array */ ) { int seg; LinkedBuffer *p = *buffer; mxAssert(p != NULL,""); if ((seg = GetLinkSegmentFromOffset(p->n,offset)) != p->segment) { /* Look for correct segment starting from the head */ p = p->head; while (p != NULL && p->segment < seg) { p = p->next; } /* Update buffer so that next search will be faster */ *buffer = p; } mxAssert(p->segment == seg,"Segment out of range."); return (void *) (((char *)p->data) + GetBufferPositionFromOffset(p->n,offset)*p->size); } /* * GetLinkedBufferElementPtrWithGrowth -- Return void pointer to linked list element * Grow the buffer if necessary. */ PRIVATE void *GetLinkedBufferElementPtrWithGrowth( LinkedBuffer **buffer, /* One of the buffers along the chain */ int offset /* Offset from beginning of linked buffer as if a huge array */ ) { mxAssert(*buffer != NULL,""); if (offset >= (*buffer)->head->totalelements) GrowLinkedBuffer(buffer); return GetLinkedBufferElementPtr(buffer,offset); } /*************************************************************** * Format parsing routines * ***************************************************************/ /* * Parse format spec. Return number of characters parsed. */ PRIVATE int ParseFormatSpec( inputfield *inpf, /* input field to fill in */ const char *p /* pointer into format string where format spec starts */ ) { bool done; int count = 1; /* Skip over leading % */ mxAssert(*p=='%',"All formats start with '%'"); ++p; done = false; while (!done) { /* Parse %<->. */ if (*p == '-') /* Ignore justification on reading */ { ++p; ++count; } else if (*p == '*') /* '*' is the skip indicator */ { inpf->skip = true; ++p; ++count; } else if (isdigit(*p)) /* Get width value */ { inpf->width = 0; while (*p != '\0' && isdigit(*p)) { inpf->width = inpf->width * 10 + ((int)(*p - '0')); ++p; ++count; } } else if (*p == '.') /* Get precision value */ { ++p; ++count; inpf->prec = 0; while (*p != '\0' && isdigit(*p)) { inpf->prec = inpf->prec * 10 + ((int)(*p - '0')); ++p; ++count; } } else if (*p == OPEN_SET) { ++p; ++count; if (*p == '^') /* Check for negation character */ { inpf->type = NSET_FORMAT; ++p; ++count; } else { inpf->type = SET_FORMAT; } inpf->format_chars = p; inpf->nchars = 0; /* Special case: Check for CLOSE_SET as first character. * If so CLOSE_SET is in the scanset. */ if (*p == CLOSE_SET) { ++p; ++count; inpf->nchars++; } /* Search for end of set */ while (*p != '\0' && *p != CLOSE_SET) { ++p; ++count; inpf->nchars++; } if (*p != CLOSE_SET) goto handle_error; ++count; done = true; } else if (*p == 'd') { inpf->type = D_FORMAT; ++count; done = true; } else if (*p == 'u') { inpf->type = U_FORMAT; ++count; done = true; } else if (*p == 'f') { inpf->type = F_FORMAT; ++count; done = true; } else if (*p == 's') { inpf->type = S_FORMAT; ++count; done = true; } else if (*p == 'q') { inpf->type = Q_FORMAT; ++count; done = true; } else if (*p == 'c') { inpf->type = C_FORMAT; ++count; done = true; } else { goto handle_error; } } /* Special case: %c implies %1c */ if (inpf->type == C_FORMAT && inpf->width == UNSPECIFIED) inpf->width = 1; return count; handle_error: inpf->type = UNKNOWN_FORMAT; return count; } /* * Parse format string into inputfields. Returns a linked list of * inputfield definitions or NULL if format was badly formed. * * The caller is responsible for calling DestroyInputFieldList() on * the return argument when done with it. */ PRIVATE inputfield *ParseFormat( const char *format ) { const char *p = format; inputfield *head = NULL; inputfield *tail = NULL; inputfield *inpf; while (*p != '\0') { if (*p == '%' && *(p+1) != '%') { int n; /* Begin parsing format */ inpf = CreateInputfield(); if (head == NULL) { head = inpf; tail = inpf; } else { tail->next = inpf; tail = inpf; } n = ParseFormatSpec(inpf,p); if (inpf->type == UNKNOWN_FORMAT) goto handle_error; p += n; } else if (InSet(*p,whitespace)) /* skip over whitespace */ { ++p; } else { /* * Everything that isn't a format spec or whitespace must be a literal */ inpf = CreateInputfield(); if (head == NULL) { head = inpf; tail = inpf; } else { tail->next = inpf; tail = inpf; } inpf->type = LITERAL_FORMAT; inpf->format_chars = p; inpf->nchars = 0; inpf->skip = true; /* Always skip literals */ /* Build literal from regular characters or the special %% */ while (*p != '\0' && ((*p != '%' && !InSet(*p,whitespace)) || (*p == '%' && *(p+1) == '%'))) { if (*p == '%') ++p; /* Skip over extra % */ ++p; inpf->nchars++; } } } return head; handle_error: DestroyInputfieldList(head); return NULL; } /* * Create mxArray based on input field type. Return NULL if field is to * be skipped. */ PRIVATE mxArray *CreateArrayFromInputField( const inputfield *inpf ) { mxArray *result = NULL; int dims[2]; dims[0] = inpf->count; dims[1] = 1; if (!(inpf->skip)) { switch (inpf->type) { case LITERAL_FORMAT: break; /* skip literals (no output) */ case D_FORMAT: case U_FORMAT: case F_FORMAT: result = mxCreateNumericArray(2,dims,mxDOUBLE_CLASS,mxREAL); break; case C_FORMAT: mxAssert(inpf->width != UNSPECIFIED,""); dims[1] = inpf->width; result = mxCreateCharArray(2,dims); break; case S_FORMAT: case Q_FORMAT: case SET_FORMAT: case NSET_FORMAT: result = mxCreateCellMatrix(inpf->count,1); break; default: mxAssertS(1,"Unknown format"); break; } } return result; } /* * Create Linked Buffer based on input field type. */ PRIVATE LinkedBuffer *CreateBufferFromInputField( const inputfield *inpf, int nrecycle /* Number of rows or READ_WHOLE_FILE to use default */ ) { LinkedBuffer *result = NULL; int n; if (nrecycle == READ_WHOLE_FILE) n = DEFAULT_LINKED_BUFFER_LENGTH; else n = nrecycle; if (!(inpf->skip)) { switch (inpf->type) { case LITERAL_FORMAT: break; /* skip literals (no output) */ case D_FORMAT: case U_FORMAT: case F_FORMAT: result = CreateLinkedBuffer(n,sizeof(double)); break; case C_FORMAT: mxAssert(inpf->width != UNSPECIFIED,""); result = CreateLinkedBuffer(n,sizeof(mxChar)*inpf->width); break; case S_FORMAT: case Q_FORMAT: case SET_FORMAT: case NSET_FORMAT: result = CreateLinkedBuffer(n,sizeof(mxArray *)); break; default: mxAssertS(1,"Unknown format"); break; } } mxAssert(result != NULL,""); return result; } /* * AllocateBuffers based on format type. * * This routine assumes that the number of unskipped input fields * matches nlhs. */ PRIVATE LinkedBuffer **AllocateBuffers( int nlhs, const inputfield *field_defs, int nrecycle /* Number of rows to allocate or READ_WHOLE_FILE for auto grow */ ) { LinkedBuffer *tmp; const inputfield *inpf; int i = 0; LinkedBuffer **buffers; buffers = mxCalloc(nlhs,sizeof(LinkedBuffer *)); inpf = field_defs; while (inpf != NULL) { if (!inpf->skip) { mxAssert(i < nlhs,""); tmp = CreateBufferFromInputField(inpf,nrecycle); buffers[i] = tmp; i++; } inpf = inpf->next; } mxAssert(i == nlhs,""); return buffers; } /* * DestroyBuffers */ PRIVATE void DestroyBuffers( LinkedBuffer *buffers[], int n ) { int i; for (i=0; itype) { case LITERAL_FORMAT: break; /* skip literals (no output) */ case D_FORMAT: case U_FORMAT: case F_FORMAT: mxAssert(mxIsNumeric(array),""); p = buffer->head; q = (char *)mxGetData(array); if (n <= p->n) { /* If the first buffer isn't full, we can just replace the data pointer */ mxFree(q); mxSetPr(array,p->data); p->data = NULL; } else { /* Copy any whole buffers into the array */ while (p != NULL && n > p->n) { memcpy(q,p->data,p->n*p->size); q += p->n*p->size; n -= p->n; p = p->next; } /* Copy the data in the last buffer */ if (p != NULL) { memcpy(q,p->data,n*p->size); } } break; case C_FORMAT: /* Transpose the data during the copy so that the strings come out row-wise */ mxAssert(mxGetClassID(array) == mxCHAR_CLASS,""); ch = (mxChar *)mxGetData(array); nchars = inpf->width; for (i=0; iskip) { mxAssert(i < nlhs,""); tmp = CreateArrayFromInputField(inpf); if (tmp != NULL) { CopyBufferIntoArray(tmp,buffers[i],inpf,inpf->count); plhs[i] = tmp; } i++; } inpf = inpf->next; } } /* * SkipHeader -- Skip over lines in the file */ PRIVATE void SkipHeader(int n) { int i; int ch; for (i=0; i are ignored */ if (ch == '%') { while ((ch = GetCharacter()) != EOF && ch != '\n' && ch != '\r') /* Ignore this character */; } break; case 2: /* C: slash-star star-slash are ignored */ if (ch == '/') { ch2 = ch; ch = GetCharacter(); if (ch == '*') { inComment = true; while ((ch = GetCharacter()) != EOF && inComment) { while (ch != EOF && ch == '*' && inComment) { ch = GetCharacter(); if (ch == '/') inComment = false; } } } else { UngetCharacter(ch); ch = ch2; } } break; case 3: /* C++: slash-slash are ignored */ if (ch == '/') { ch2 = ch; ch = GetCharacter(); if (ch == '/') { while ((ch = GetCharacter()) != EOF && ch != '\n' && ch != '\r') /* Ignore this character */; } else { UngetCharacter(ch); ch = ch2; } } break; case 4: /* Shell: # are ignored */ if (ch == '#') { while ((ch = GetCharacter()) != EOF && ch != '\n' && ch != '\r') /* Ignore this character */; } break; default: mxAssert(0,"Out of range comment style"); } /* Check for non-whitespace character */ if (!InSet(ch,whitespace)) { break; } } UngetCharacter(ch); } /* * SkipDelimiter -- Skip over a delimiter character */ PRIVATE void SkipDelimiter(void) { int ch; ch = GetCharacter(); /* Put is back if it isn't a delimiter */ if (!InSet(ch,delimiter)) UngetCharacter(ch); } /* * ReadNaNInf -- Read a NaN or INF value from the file into result. * * Returns READ_SUCCESS and the result if read was successful. * Returns READ_FAILURE and the result is undefined if read did not encounter a NaN or Inf. */ PRIVATE readStatus ReadNaNInf( const inputfield *inpf, double *result, bool read_signed /* True to read signed inf */ ) { char ch[4]; int n = 0; bool valid = false; int count; count = inpf->width; /* Need at least 3 characters for a NaN or Inf */ if (count > 0 && count < 3) return READ_FAILURE; ch[n++] = GetCharacter(); if (read_signed && ch[n-1] == '-') /* Try to get -inf or -Inf */ { if (count > 0 && count < 4) /* Need at least 4 characters or a -Inf */ goto done; ch[n++] = GetCharacter(); if (ch[n-1] == 'I' || ch[n-1] == 'i') { ch[n++] = GetCharacter(); if (ch[n-1] == 'n') { ch[n++] = GetCharacter(); if (ch[n-1] == 'f') { *result = -mxGetInf(); valid = true; } } } } else if (ch[n-1] == 'I' || ch[n-1] == 'i') /* Try to get inf or Inf */ { ch[n++] = GetCharacter(); if (ch[n-1] == 'n') { ch[n++] = GetCharacter(); if (ch[n-1] == 'f') { *result = mxGetInf(); valid = true; } } } else if (ch[n-1] == 'N' || ch[n-1] == 'n') /* Try to get NaN or nan */ { ch[n++] = GetCharacter(); if (ch[n-1] == 'a') { ch[n++] = GetCharacter(); if (ch[n-1] == 'N' || ch[n-1] == 'n') { *result = mxGetNaN(); valid = true; } } } done: if (valid) return READ_SUCCESS; else { while (n>0) UngetCharacter(ch[--n]); return READ_FAILURE; } } /* * ReadDFormat -- Read a signed integer from the file into result. * * Returns READ_SUCCESS and the result if read was successful. * Returns READ_FAILURE and the result is undefined if read encountered bad data. * * When delimiter is defined, return zero for an empty field. */ PRIVATE readStatus ReadDFormat( const inputfield *inpf, double *result ) { int ch; int n = 0; bool negative = false; int count; int valid = false; count = inpf->width; ch = GetCharacter(); if (InSet(ch,delimiter)) /* If delimiter is defined, empty fields return 0 */ { *result = 0; valid = true; } else if (ch != EOF) { UngetCharacter(ch); /* Look for NaNInf */ if (ReadNaNInf(inpf,result,true) == READ_SUCCESS) return READ_SUCCESS; ch = GetCharacter(); /* Look for sign */ if (ch == '-') { negative = true; --count; } else if (ch == '+') { negative = false; --count; } else UngetCharacter(ch); /* Read digits */ while ((ch = GetCharacter()) != EOF && isdigit(ch) && count--) { n = n*10 + ToNumber(ch); valid = true; } if (negative) n = -n; *result = n; } UngetCharacter(ch); if (valid) return READ_SUCCESS; else return READ_FAILURE; } /* * ReadUFormat -- Read an unsigned integer from the file into result. * * Returns READ_SUCCESS and the result if read was successful. * Returns READ_FAILURE and the result is undefined if read encountered bad data. * * When delimiter is defined, return zero for an empty field. */ PRIVATE readStatus ReadUFormat( const inputfield *inpf, double *result ) { int ch; unsigned int n = 0; int count; int valid = false; count = inpf->width; /* Look for NaNInf */ if (ReadNaNInf(inpf,result,false) == READ_SUCCESS) return READ_SUCCESS; /* Sanity check of input */ ch = GetCharacter(); if (ch != EOF && isdigit(ch)) { UngetCharacter(ch); /* Read digits */ while ((ch = GetCharacter()) != EOF && isdigit(ch) && count--) { n = n*10 + ToNumber(ch); } *result = n; valid = true;; } else if (InSet(ch,delimiter)) /* If delimiter is defined, empty fields return 0 */ { *result = 0; valid = true; } UngetCharacter(ch); if (valid) return READ_SUCCESS; else return READ_FAILURE; } /* * ReadFFormat -- Read a double from the file into result. * * A floating point number has the format [0-9].[0-9][eEdD][0-9] * * Returns READ_SUCCESS and the result if read was successful. * Returns READ_FAILURE and the result is undefined if read encountered bad data. * * When delimiter is defined, return zero for an empty field. */ PRIVATE readStatus ReadFFormat( const inputfield *inpf, double *result ) { int ch; int i = 0; int count; int prec; double d = 0.0; bool valid = false; count = inpf->width; /* Limit numbers to less than bufsize characters for now */ if (count < 0 || count > bufsize) count = bufsize-1; prec = inpf->prec; ch = GetCharacter(); if (InSet(ch,delimiter)) /* If delimiter is defined, empty fields return 0 */ { *result = 0.0; valid = true; UngetCharacter(ch); } else if (ch != EOF) { UngetCharacter(ch); /* Look for NaNInf */ if (ReadNaNInf(inpf,result,true) == READ_SUCCESS) return READ_SUCCESS; ch = GetCharacter(); /* Look for sign */ if (ch == '-' || ch == '+') { buf[i++] = ch; --count; } else UngetCharacter(ch); if (count == 0 || ch == EOF) goto done; /* Read leading mantissa digits */ while ((ch = GetCharacter()) != EOF && isdigit(ch) && count--) { buf[i++] = ch; valid = true; } if (count == 0 || ch == EOF) { UngetCharacter(ch); goto done; } /* Look for decimal */ if (ch == '.' && count--) { buf[i++] = ch; /* Read fractional part */ while ((ch = GetCharacter()) != EOF && isdigit(ch) && count-- && prec--) { buf[i++] = ch; valid = true; } if (!valid || count == 0 || prec == 0 || ch == EOF) { UngetCharacter(ch); if (!valid) UngetCharacter('.'); goto done; } } /* If no numbers have been seen yet, this is not a valid float */ if (!valid) { UngetCharacter(ch); goto done; } /* Look for exponent */ if (count > 1 && InSet(ch,expchars)) { buf[i++] = 'e'; count--; /* Look for exponent sign */ if (count > 1 && (ch = GetCharacter()) != EOF && (ch == '-' || ch == '+')) { buf[i++] = ch; count--; } else UngetCharacter(ch); /* Read exponent */ while ((ch = GetCharacter()) != EOF && isdigit(ch) && count--) { buf[i++] = ch; } UngetCharacter(ch); if (count == 0 || ch == EOF) goto done; } else UngetCharacter(ch); done: buf[i] = '\0'; if (valid) *result = atof(buf); } if (valid) return READ_SUCCESS; else return READ_FAILURE; } /* * ReadSFormat -- Read a string from the file into buffer. * * Returns READ_SUCCESS and the result if read was successful. * Returns READ_FAILURE and the result is undefined if read was unsuccessful. * * It is possible to read 0 characters. */ PRIVATE readStatus ReadSFormat( const inputfield *inpf, char *result, /* buffer to write result into */ int buflen /* maximum number of characters in buffer */ ) { int ch; int i = 0; int count; count = inpf->width; /* Limit strings to buflen-1 */ if (count < 0 || count > buflen-1) count = buflen-1; while ((ch = GetCharacter()) != EOF && !InSet(ch,delimiter) && !InSet(ch,whitespace) && count--) { result[i++] = ch; } if (ch != EOF) UngetCharacter(ch); result[i++] = '\0'; if (i == buflen) return READ_BUFFER_OVERFLOW; else if (i > 0) return READ_SUCCESS; else return READ_FAILURE; /* No way to get here? */ } /* * ReadQFormat -- Read a (possibly " quoted) string from the file into buffer. * * The surrounding quotes in a quoted string are not returned. Embedded quotes * must show up repeated in the string. * * Returns READ_SUCCESS and the result if read was successful. * Returns READ_FAILURE and the result is undefined if read was unsuccessful. * * It is possible to read 0 characters. */ PRIVATE readStatus ReadQFormat( const inputfield *inpf, char *result, /* buffer to write result into */ int buflen /* maximum number of characters in buffer */ ) { int ch; int i = 0; int count; bool inQuote = false; count = inpf->width; /* Limit strings to buflen-1 */ if (count < 0 || count > buflen-1) count = buflen-1; while ((ch = GetCharacter()) != EOF && (inQuote || (!InSet(ch,delimiter) && !InSet(ch,whitespace))) && count--) { if (ch == '"') { /* Output opening quote unless this is the first character */ if (i > 0 && !inQuote) result[i++] = ch; inQuote = !inQuote; } else result[i++] = ch; } if (ch != EOF) UngetCharacter(ch); result[i++] = '\0'; if (i == buflen) return READ_BUFFER_OVERFLOW; else if (i > 0) return READ_SUCCESS; else return READ_FAILURE; /* No way to get here? */ } /* * ReadCFormat -- Read characters from the file into buffer. */ PRIVATE readStatus ReadCFormat( const inputfield *inpf, char *result, /* buffer to write result into */ int buflen /* maximum number of characters in buffer */ ) { int ch; int i = 0; int count; count = inpf->width; mxAssert(count != UNSPECIFIED,""); /* Limit strings to buflen-1 */ if (count < 0 || count > buflen-1) count = buflen-1; while ((ch = GetCharacter()) != EOF && count--) { result[i++] = ch; } if (ch != EOF) UngetCharacter(ch); result[i++] = '\0'; if (i == buflen) return READ_BUFFER_OVERFLOW; else if (i > inpf->width) return READ_SUCCESS; else return READ_FAILURE; } /* * ReadSetFormat -- Read characters that match set from the file into buffer. */ PRIVATE readStatus ReadSetFormat( const inputfield *inpf, char *result, /* buffer to write result into */ int buflen /* maximum number of characters in buffer */ ) { int ch; int i = 0; int count; const char *set; int nchars; count = inpf->width; set = inpf->format_chars; nchars = inpf->nchars; /* Limit strings to buflen-1 */ if (count < 0 || count > buflen-1) count = buflen-1; while ((ch = GetCharacter()) != EOF && InSetn(ch,set,nchars) && count--) { result[i++] = ch; } if (ch != EOF) UngetCharacter(ch); result[i++] = '\0'; if (i == buflen) return READ_BUFFER_OVERFLOW; else if (i > 1) /* Must get at least one match */ return READ_SUCCESS; else return READ_FAILURE; } /* * ReadNsetFormat -- Read characters not in set from the file into buffer. */ PRIVATE readStatus ReadNsetFormat( const inputfield *inpf, char *result, /* buffer to write result into */ int buflen /* maximum number of characters in buffer */ ) { int ch; int i = 0; int count; const char *set; int nchars; count = inpf->width; set = inpf->format_chars; nchars = inpf->nchars; /* Limit strings to buflen-1 */ if (count < 0 || count > buflen-1) count = buflen-1; while ((ch = GetCharacter()) != EOF && !InSetn(ch,set,nchars) && count--) { result[i++] = ch; } if (ch != EOF) UngetCharacter(ch); result[i++] = '\0'; if (i == buflen) return READ_BUFFER_OVERFLOW; else if (i > 1) /* Must get at least one character */ return READ_SUCCESS; else return READ_FAILURE; } /* * ReadLiteralFormat -- Read a literal from the file into buffer. */ PRIVATE readStatus ReadLiteralFormat( const inputfield *inpf, char *result, /* buffer to write result into */ int buflen /* maximum number of characters in buffer */ ) { int ch; int i = 0; int j = 0; int count; const char *literal; int nchars; count = inpf->width; literal = inpf->format_chars; nchars = inpf->nchars; /* Limit strings to buflen-1 */ if (count < 0 || count > buflen-1) count = buflen-1; while ((ch = GetCharacter()) != EOF && i < nchars && ch == literal[i]) { result[i++] = ch; } if (ch != EOF) UngetCharacter(ch); result[i++] = '\0'; if (i == buflen) return READ_BUFFER_OVERFLOW; else if (i > nchars) return READ_SUCCESS; else return READ_FAILURE; } /* * Display helpful error message traceback */ PRIVATE void ErrorAndShowInfo( inputfieldtype t, int i, int j, readStatus status ) { int n; int ch; char buf[120]; if (status == READ_BUFFER_OVERFLOW) { sprintf(buf,"Buffer overflow (bufsize = %d) while reading %s from\nfile (row %d, field %d) ==> ", bufsize,FieldTypeStr(t),i+1,j+1); } else { sprintf(buf,"Trouble reading %s from file (row %d, field %d) ==> ", FieldTypeStr(t),i+1,j+1); } n = strlen(buf); /* Append the rest of the unread line to error message */ while ((ch = GetCharacter()) != EOF && n < 100 && ch != '\n' && ch != '\r') { buf[n++] = ch; } /* Add a trailing \n at the end of a line */ if (ch == '\n' || ch == '\r') { buf[n++] = '\\'; buf[n++] = 'n'; } buf[n] = '\0'; mexErrMsgTxt(buf); } /********************************************************* * Main routines *********************************************************/ /* * ReadFile -- Read file using inputfield definitions into buffers. * The arrays are grown if necessary. */ PRIVATE void ReadFile( int nlhs, LinkedBuffer *buffers[], inputfield *field_defs, int nrecycle /* Number of times to reuse formats or READ_WHOLE_FILE for read whole file */ ) { int i = 0; int j; int field; inputfield *inpf; readStatus status; SkipHeader(headerlines); while (!feof(fp) && (nrecycle == READ_WHOLE_FILE || i < nrecycle)) { j = 0; field = 0; inpf = field_defs; while (!feof(fp) && inpf != NULL) { mxArray **pa; double d,*pd; int n; mxChar *p; char *q; SkipWhitespace(); if (!feof(fp)) { switch (inpf->type) { case LITERAL_FORMAT: status = ReadLiteralFormat(inpf,buf,bufsize); mxAssert(inpf->skip,"Literals are always skipped."); break; case D_FORMAT: status = ReadDFormat(inpf,&d); if (!inpf->skip && status == READ_SUCCESS) { pd = (double *)GetLinkedBufferElementPtrWithGrowth(&buffers[j++],i); *pd = d; } break; case U_FORMAT: status = ReadUFormat(inpf,&d); if (!inpf->skip && status == READ_SUCCESS) { pd = (double *)GetLinkedBufferElementPtrWithGrowth(&buffers[j++],i); *pd = d; } break; case F_FORMAT: status = ReadFFormat(inpf,&d); if (!inpf->skip && status == READ_SUCCESS) { pd = (double *)GetLinkedBufferElementPtrWithGrowth(&buffers[j++],i); *pd = d; } break; case C_FORMAT: status = ReadCFormat(inpf,buf,bufsize); if (!inpf->skip && status == READ_SUCCESS) { p = (mxChar *)GetLinkedBufferElementPtrWithGrowth(&buffers[j++],i); q = buf; n = inpf->width; while (*q != '\0' && n--) { *p++ = *q++; } } break; case S_FORMAT: status = ReadSFormat(inpf,buf,bufsize); if (!inpf->skip && status == READ_SUCCESS) { pa = (mxArray **)GetLinkedBufferElementPtrWithGrowth(&buffers[j++],i); *pa = mxCreateString(buf); } break; case Q_FORMAT: status = ReadQFormat(inpf,buf,bufsize); if (!inpf->skip && status == READ_SUCCESS) { pa = (mxArray **)GetLinkedBufferElementPtrWithGrowth(&buffers[j++],i); *pa = mxCreateString(buf); } break; case SET_FORMAT: status = ReadSetFormat(inpf,buf,bufsize); if (!inpf->skip && status == READ_SUCCESS) { pa = (mxArray **)GetLinkedBufferElementPtrWithGrowth(&buffers[j++],i); *pa = mxCreateString(buf); } break; case NSET_FORMAT: status = ReadNsetFormat(inpf,buf,bufsize); if (!inpf->skip && status == READ_SUCCESS) { pa = (mxArray **)GetLinkedBufferElementPtrWithGrowth(&buffers[j++],i); *pa = mxCreateString(buf); } break; case UNKNOWN_FORMAT: mexErrMsgTxt("Attempt to read unknown format from file."); break; } if (status != READ_SUCCESS) ErrorAndShowInfo(inpf->type,i,field,status); field++; inpf->count++; inpf = inpf->next; if (delimiter != noDelimiter) { SkipWhitespace(); SkipDelimiter(); } } } i++; } mxAssertS(nrecycle == READ_WHOLE_FILE || i == nrecycle,"Overflowed buffer."); } /* * AtExitFcn -- Close any open files. */ PRIVATE void AtExitFcn(void) { if (fp != NULL) fclose(fp); fp = NULL; } /* * mexFunction gateway */ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { char *filename; char *format; int nrecycle = READ_WHOLE_FILE; inputfield *field_defs; inputfield *inpf; int count; LinkedBuffer **buffers; mxArray *lhs,*rhs; int i; /* First time though define the AtExit fcn */ if (firsttime) { if (mexAtExit(AtExitFcn)) mexErrMsgTxt("Couldn't install the AtExit function."); firsttime = false; } /* * Input argument checking */ if (nrhs < 2) mexErrMsgTxt("Not enough input arguments."); if (!mxIsChar(prhs[0])) mexErrMsgTxt("First input must be a filename."); filename = muGetString(prhs[0],"Filename"); /* Get format with carriage control */ format = muGetStringCC(prhs[1],"Format"); if (nrhs == 3) nrecycle = (int) muGetDoubleScalar(prhs[2],"Format recycle count"); if (nrhs > 3) { int paramStart; if (muIsDoubleScalar(prhs[2])) { nrecycle = (int) muGetDoubleScalar(prhs[2],"Format recycle count"); paramStart = 3; } else paramStart = 2; /* Consistency check: Param/value pairs come in pairs */ if ((nrhs - paramStart) % 2 != 0) mexErrMsgTxt("Param/value pairs must come in pairs."); matches = mxCalloc(nrhs,sizeof(int)); /* Get parameter values if they exist */ LookforAndGetWhitespace(prhs,paramStart,nrhs); LookforAndGetExpchars(prhs,paramStart,nrhs); LookforAndGetDelimiter(prhs,paramStart,nrhs); commentstyle = LookforAndGetCommentStyle(prhs,paramStart,nrhs); LookforAndGetBufsize(prhs,paramStart,nrhs); LookforAndGetHeaderLines(prhs,paramStart,nrhs); RemoveDelimiterFromWhitespace(); /* Check for unknown param/value pairs * (i.e. parameters that haven't been matched) */ for (i=paramStart; iskip) ++count; inpf = inpf->next; } if (count != nlhs && !(nlhs == 0 && count == 1)) { mexErrMsgTxt( "Number of outputs must match the number of unskipped input fields."); } /* Handle the zero output case (for ans) */ if (nlhs == 0 && count == 1) nlhs = 1; /* Close any open files */ if (fp != NULL) fclose(fp); InitializePushBackBuffer(); /* Try to open the file assuming we have a full path */ fp = fopen(filename,"r"); if (fp == NULL) { /* Try looking on path */ rhs = (mxArray *)prhs[0]; mexCallMATLAB(1,&lhs,1,&rhs,"which"); if (mxIsEmpty(lhs)) mexErrMsgTxt("File not found or permission denied."); mxFree(filename); filename = muGetString(lhs,"Filename"); mxDestroyArray(lhs); fp = fopen(filename,"r"); if (fp == NULL) mexErrMsgTxt("File not found or permission denied."); } buffers = AllocateBuffers(nlhs,field_defs,nrecycle); ReadFile(nlhs,buffers,field_defs,nrecycle); fclose(fp); fp = NULL; CopyBuffersIntoOutputs(nlhs,plhs,field_defs,buffers); DestroyInputfieldList(field_defs); DestroyBuffers(buffers,nlhs); DestroyWhitespace(); DestroyDelimiter(); DestroyExpchars(); mxFree(filename); mxFree(format); mxFree(buf); }