#include "csvtoken.h"

#include <mbstring.h>
#include <mbctype.h>
#include <tchar.h>

/*
 * SBCS(ASCII)
 */
scsv_tokenizer::const_pointer
scsv_tokenizer::next_comma(const_pointer ind) {
  bool inquote = false;
  while ( ind < max_ ) {
    char_type ch = *ind;
    if ( !inquote && ch == ',' ) {
      break;
    } else if ( ch == '"' ) {
      inquote = !inquote;
    }
    ++ind;
  }
  return ind;
}

scsv_tokenizer::scsv_tokenizer(const_pointer src) : src_(src) {
  cur_ = src;
  max_ = src + strlen(src);
}

scsv_tokenizer::pointer scsv_tokenizer::next() {
  if ( cur_ > max_ ) {
    return 0;
  }
  const_pointer st = cur_;
  cur_ = next_comma(cur_);
  pointer buf = new char_type[cur_ - st + 1];
  pointer p = buf;
  while ( st < cur_ ) {
    char_type ch = *st++;
    if ( ch == '"' ) {
      if ( (st < cur_) &&  *st == '"' ) {
        *p++ = ch;
        ++st;
      }
    } else {
      *p++ = ch;
    }
  }
  *p = '\0';
  ++cur_;
  return buf;
}

bool scsv_tokenizer::empty() const {
  return cur_ > max_;
}

/*
 * MBCS(Shift_JIS)
 */
mcsv_tokenizer::const_pointer mcsv_tokenizer::next_comma(const_pointer ind) {
  bool inquote = false;
  while ( ind < max_ ) {
    char_type ch = *ind;
    int type = _mbbtype(ch, 0);
    if ( !inquote && type == _MBC_SINGLE && ch == ',' ) {
      break;
    } else if ( type == _MBC_SINGLE && ch == '"' ) {
      inquote = !inquote;
    }
    ind = _mbsinc(ind);
  }
  return ind;
}

mcsv_tokenizer::mcsv_tokenizer(const_pointer src) : src_(src) {
  cur_ = src;
  max_ = src + strlen((const char*)src);
}

mcsv_tokenizer::pointer mcsv_tokenizer::next() {
  if ( cur_ > max_ ) {
    return 0;
  }
  const_pointer st = cur_;
  cur_ = next_comma(cur_);
  pointer buf = new char_type[cur_ - st + 1];
  pointer p = buf;
  while ( st < cur_ ) {
    char_type ch = *st;
    st = _mbsinc(st);
    int type = _mbbtype(ch, 0);
    if ( type == _MBC_SINGLE && ch == '"' ) {
      if ( (st < cur_) &&  _mbbtype(*st,0) == _MBC_SINGLE && *st == '"' ) {
        *p++ = ch;
        ++st;
      }
    } else {
      if ( type == _MBC_LEAD ) {
        *p++ = ch;
      }
      *p++ = *(st-1);
    }
  }
  *p = '\0';
  cur_ = _mbsinc(cur_);
  return buf;
}

bool mcsv_tokenizer::empty() const {
  return cur_ > max_;
}

/*
 * DBCS(Unicode)
 */
wcsv_tokenizer::const_pointer wcsv_tokenizer::next_comma(const_pointer ind) {
  bool inquote = false;
  while ( ind < max_ ) {
    char_type ch = *ind;
    if ( !inquote && ch == L',' ) {
      break;
    } else if ( ch == L'"' ) {
      inquote = !inquote;
    }
    ++ind;
  }
  return ind;
}

wcsv_tokenizer::wcsv_tokenizer(const_pointer src) : src_(src) {
  cur_ = src;
  max_ = src + wcslen(src);
}

wcsv_tokenizer::pointer wcsv_tokenizer::next() {
  if ( cur_ > max_ ) {
    return 0;
  }
  const_pointer st = cur_;
  cur_ = next_comma(cur_);
  pointer buf = new char_type[cur_ - st + 1];
  pointer p = buf;
  while ( st < cur_ ) {
    char_type ch = *st++;
    if ( ch == L'"' ) {
      if ( (st < cur_) &&  *st == L'"' ) {
        *p++ = ch;
        ++st;
      }
    } else {
      *p++ = ch;
    }
  }
  *p = L'\0';
  ++cur_;
  return buf;
}

bool wcsv_tokenizer::empty() const {
  return cur_ > max_;
}
