/* csv - read write comma separated value format
 * Copyright (c) 2003 Michael B. Allen <mba2000 ioplex.com>
 *
 * The MIT License
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <errno.h>
#include <wchar.h>
#include <wctype.h>

#include "mba/msgno.h"
#include "mba/csv.h"

#define ST_START     1
#define ST_COLLECT   2
#define ST_TAILSPACE 3
#define ST_END_QUOTE 4

struct sinput {
	FILE *in;
	const unsigned char *src;
	size_t sn;
	size_t count;
};
struct winput {
	const wchar_t *src;
	size_t sn;
	size_t count;
};

static int
snextch(struct sinput *in)
{
	int ch;

	if (in->in) {
		if ((ch = fgetc(in->in)) == EOF) {
			if (ferror(in->in)) {
				PMNO(errno);
				return -1;
			}
			return 0;
		}
	} else {
		if (in->sn == 0) {
			return 0;
		}
		ch = *(in->src)++;
		in->sn--;
	}
	in->count++;

	return ch;
}
static int
wnextch(struct winput *in)
{
	int ch;

	if (in->sn == 0) {
		return 0;
	}
	ch = *(in->src)++;
	in->sn--;
	in->count++;

	return ch;
}

static int
csv_parse_str(struct sinput *in,
			unsigned char *buf,
			size_t bn,
			unsigned char *row[],
			int rn,
			int sep,
			int flags)
{
	int trim, quotes, ch, state, r, j, t, inquotes;

	trim = flags & CSV_TRIM;
	quotes = flags & CSV_QUOTES;
	state = ST_START;
	inquotes = 0;
	ch = r = j = t = 0;

	memset(row, 0, sizeof(unsigned char *) * rn);

	while (rn && bn && (ch = snextch(in)) > 0) {
		switch (state) {
			case ST_START:
				if (ch != '\n' && ch != sep && isspace(ch)) {
					if (!trim) {
						buf[j++] = ch; bn--;
						t = j;
					}
					break;
				} else if (quotes && ch == '"') {
					j = t = 0;
					state = ST_COLLECT;
					inquotes = 1;
					break;
				}
				state = ST_COLLECT;
			case ST_COLLECT:
				if (inquotes) {
					if (ch == '"') {
						state = ST_END_QUOTE;
						break;
					}
				} else if (ch == sep || ch == '\n') {
					row[r++] = buf; rn--;
					if (ch == '\n' && t && buf[t - 1] == '\r') {
						t--; bn++; /* crlf -> lf */
					}
					buf[t] = '\0'; bn--;
					buf += t + 1;
					j = t = 0;
					state = ST_START;
					inquotes = 0;
					if (ch == '\n') {
						rn = 0;
					}
					break;
				} else if (quotes && ch == '"') {
					PMNF(errno = EILSEQ, ": unexpected quote in element %d", (r + 1));
					return -1;
				}
				buf[j++] = ch; bn--;
				if (!trim || isspace(ch) == 0) {
					t = j;
				}
				break;
			case ST_TAILSPACE:
			case ST_END_QUOTE:
				if (ch == sep || ch == '\n') {
					row[r++] = buf; rn--;
					buf[j] = '\0'; bn--;
					buf += j + 1;
					j = t =  0;
					state = ST_START;
					inquotes = 0;
					if (ch == '\n') {
						rn = 0;
					}
					break;
				} else if (quotes && ch == '"' && state != ST_TAILSPACE) {
					buf[j++] = '"';	bn--;		 /* nope, just an escaped quote */
					t = j;
					state = ST_COLLECT;
					break;
				} else if (isspace(ch)) {
					state = ST_TAILSPACE;
					break;
				}
				errno = EILSEQ;
				PMNF(errno, ": bad end quote in element %d", (r + 1));
				return -1;
		}
	}
	if (ch == -1) {
		AMSG("");
		return -1;
	}
	if (bn == 0) {
		PMNO(errno = E2BIG);
		return -1;
	}
	if (rn) {
		if (inquotes && state != ST_END_QUOTE) {
			PMNO(errno = EILSEQ);
			return -1;
		}
		row[r] = buf;
		buf[t] = '\0';
	}

	return in->count;
}
static int
csv_parse_wcs(struct winput *in, wchar_t *buf, size_t bn, wchar_t *row[], int rn, wint_t sep, int flags)
{
	int trim, quotes, state, r, j, t, inquotes;
	wint_t ch;

	trim = flags & CSV_TRIM;
	quotes = flags & CSV_QUOTES;
	state = ST_START;
	inquotes = 0;
	ch = r = j = t = 0;

	memset(row, 0, sizeof(wchar_t *) * rn);

	while (rn && bn && (ch = wnextch(in)) > 0) {
		switch (state) {
			case ST_START:
				if (ch != L'\n' && ch != sep && iswspace(ch)) {
					if (!trim) {
						buf[j++] = ch; bn--;
						t = j;
					}
					break;
				} else if (quotes && ch == L'"') {
					j = t = 0;
					state = ST_COLLECT;
					inquotes = 1;
					break;
				}
				state = ST_COLLECT;
			case ST_COLLECT:
				if (inquotes) {
					if (ch == L'"') {
						state = ST_END_QUOTE;
						break;
					}
				} else if (ch == sep || ch == L'\n') {
					row[r++] = buf; rn--;
					buf[t] = L'\0'; bn--;
					buf += t + 1;
					j = t = 0;
					state = ST_START;
					inquotes = 0;
					if (ch == L'\n') {
						rn = 0;
					}
					break;
				} else if (quotes && ch == L'"') {
					PMNF(errno = EILSEQ, ": unexpected quote in element %d", (r + 1));
					return -1;
				}
				buf[j++] = ch; bn--;
				if (!trim || iswspace(ch) == 0) {
					t = j;
				}
				break;
			case ST_TAILSPACE:
			case ST_END_QUOTE:
				if (ch == sep || ch == L'\n') {
					row[r++] = buf; rn--;
					buf[j] = L'\0'; bn--;
					buf += j + 1;
					j = t =  0;
					state = ST_START;
					inquotes = 0;
					if (ch == L'\n') {
						rn = 0;
					}
					break;
				} else if (quotes && ch == L'"' && state != ST_TAILSPACE) {
					buf[j++] = L'"'; bn--;		 /* nope, just an escaped quote */
					t = j;
					state = ST_COLLECT;
					break;
				} else if (iswspace(ch)) {
					state = ST_TAILSPACE;
					break;
				}
				PMNF(errno = EILSEQ, ": bad end quote in element %d", (r + 1));
				return -1;
		}
	}
	if (ch == (wint_t)-1) {
		AMSG("");
		return -1;
	}
	if (bn == 0) {
		PMNO(errno = E2BIG);
		return -1;
	}
	if (rn) {
		if (inquotes && state != ST_END_QUOTE) {
			PMNO(errno = EILSEQ);
			return -1;
		}
		row[r] = buf;
		buf[t] = L'\0';
	}

	return in->count;
}
int
csv_row_parse_wcs(const wchar_t *src, size_t sn, wchar_t *buf, size_t bn, wchar_t *row[], int rn, int sep, int trim)
{
	struct winput input;
	input.src = src;
	input.sn = sn;
	input.count = 0;
	return csv_parse_wcs(&input, buf, bn, row, rn, (wint_t)sep, trim);
}
int
csv_row_parse_str(const unsigned char *src, size_t sn, unsigned char *buf, size_t bn, unsigned char *row[], int rn, int sep, int trim)
{
	struct sinput input;
	input.in = NULL;
	input.src = src;
	input.sn = sn;
	input.count = 0;
	return csv_parse_str(&input, buf, bn, row, rn, sep, trim);
}
int
csv_row_fread(FILE *in, unsigned char *buf, size_t bn, unsigned char *row[], int numcols, int sep, int trim)
{
	struct sinput input;
	input.in = in;
	input.count = 0;
	return csv_parse_str(&input, buf, bn, row, numcols, sep, trim);
}



syntax highlighted by Code2HTML, v. 0.9.1