/*
 * csv2xml : convert CSV to XML using mcsv_tokenizer
 *    compile : cl -GX csv2xml.cpp csvtoken.cpp
 */

// C++ libs
#include <string>    // string
#include <iostream>  // cin, cout, cerr
#include <fstream>   // ofstream
#include <vector>    // vector;

// C libs
#include <cstdio>     // vsprintf
#include <cstdarg>    // va_start, va_end

// MSVC libs
#include <mbstring.h> // _mbbtype
#include <mbctype.h>  // _MBC_SINGLE, _MBC_LEAD

// csv tokenizer
#include "csvtoken.h" // scsv_tokenizer

using namespace std;

/*
 * global valiables
 */
string         docname   = "doc";
string         rowname   = "row";
vector<string> colnames;
bool           gencol    = false;

bool           genxsl    = false;
bool           gendtd    = false;

string         output;

/*
 * printf-like formatter
 */
const char* form(const char* format, ...) {
  static char buffer[512]; // '512' bytes seems to be enough...
  va_list marker;
  va_start(marker, format);
  vsprintf(buffer, format, marker);
  va_end(marker);
  return buffer;
}

/*
 * escapes special characters( & " ' < > )
 */
string escape(const unsigned char* str) {
  string result;
  while ( *str ) {
    int type = _mbbtype(*str,0);
    if ( type == _MBC_SINGLE ) {
      switch ( *str ) {
      case '&'  : result += "&amp;"; break;
      case '"'  : result += "&quot;"; break;
      case '\'' : result += "&apos;"; break;
      case '<'  : result += "&lt;"; break;
      case '>'  : result += "&gt;"; break;
      default   : result += *str;
      }
    } else {
      if ( type == _MBC_LEAD ) {
        result += *str++;
      }
      result += *str;
    }
    ++str;
  }
  return result;
}

/*
 * convert a csv-line to xml
 */
ostream& line2xml(ostream& strm, const string& line) {
  mcsv_tokenizer csv((mcsv_tokenizer::const_pointer)line.c_str());
  const char* row = rowname.c_str();
  strm << form("  <%s>\n", row);
  for ( int index = 0; !csv.empty(); ++index ) {
    mcsv_tokenizer::pointer token = csv.next();
    const char* col = colnames[index].c_str();
    strm << form("    <%s>%s</%s>\n", col, escape(token).c_str(), col);
    delete[] token;
  }
  strm << form("  </%s>\n", row);
  return strm;
}

/*
 * print usage
 */
void usage() {
  cerr << "csv2xml [option...] <output>\n"
          "  -doc <doc_tag> : document name (default:<doc>)\n"
          "  -row <row_tag> : row name (default:<row>\n"
          "  -col           : 1'st row as column names (default:<column##>)\n"
          "  -xsl           : create XSL(stylesheet) <output>.xsl\n"
          "  -dtd           : create DTD <output>.dtd\n"
          "<output>         : create XML <output>.xml\n"
       << endl;
}

/*
 * you may choose EUC if you compile/run under UNIX
 */
#define ENCODING "Shift_JIS"

/*
 * make XML
 */
void makexml() {
  ofstream strm;
  strm.open((output+".xml").c_str());
  strm << "<?xml version=\"1.0\" encoding=\"" ENCODING "\"?>\n"
          "<!-- created by csv2xml -->\n" 
       << endl;
  if ( genxsl ) {
    strm << form("<?xml-stylesheet type=\"text/xsl\" href=\"%s.xsl\"?>\n",output.c_str()) << endl;
  }
  if ( gendtd ) {
    strm << form("<!DOCTYPE %s SYSTEM \"%s.dtd\">\n", docname.c_str(), output.c_str()) << endl;
  }

  strm << form("<%s>\n", docname.c_str());
  bool at_first = true;
  string line;
  for ( getline(cin, line); !cin.eof(); getline(cin, line) ) {
    if ( at_first ) {
      at_first = false;
      mcsv_tokenizer csv((mcsv_tokenizer::const_pointer)line.c_str());
      while ( !csv.empty() ) {
        mcsv_tokenizer::pointer token = csv.next();
        if ( gencol ) {
          colnames.push_back((const char*)token);
        } else {
          colnames.push_back(form("column%d",colnames.size()));
        }
        delete[] token;
      }
      if ( !gencol ) {
        line2xml(strm, line) << endl;
      }
    } else {
      line2xml(strm, line) << endl;
    }
  }
  strm << form("</%s>\n", docname.c_str());
  strm.close();
}

/*
 * make XSL
 */
void makexsl() {
  ofstream strm;
  strm.open((output+".xsl").c_str());
  strm << "<?xml version=\"1.0\" encoding=\"" ENCODING "\"?>\n"
          "<!-- created by csv2xml -->\n"
          "<xsl:stylesheet xmlns:xsl=\"http://www.w3.org/TR/WD-xsl\" >\n\n";

  strm << "<xsl:template match=\"/\">\n"
          "  <html>\n"
          "    <header>\n"
          "      <title>" << docname << "</title>\n"
          "    </header>\n"
          "    <body>\n"
          "      <xsl:apply-templates select=\"" << docname << "\"/>\n"
          "    </body>\n"
          "  </html>\n"
          "</xsl:template>\n\n";

  strm << "<xsl:template match=\"" << docname << "\">\n"
          "  <table border=\"3\">\n"
          "  <tr>\n";
  for ( int i = 0; i < colnames.size(); ++i ) {
    strm << form("    <th>%s</th>\n", colnames[i].c_str());
  }
  strm << "  </tr>\n"
       << form("  <xsl:for-each select=\"%s\">\n", rowname.c_str())
       << "    <tr>\n";
  for ( i = 0; i < colnames.size(); ++i ) {
    strm << form("      <td><xsl:value-of select=\"%s\"/></td>\n", colnames[i].c_str());
  }
  strm << "    </tr>\n"
          "  </xsl:for-each>\n"
          "  </table>\n"
          "</xsl:template>\n\n"
          "</xsl:stylesheet>\n"
       << endl;
  strm.close();
}

/*
 * make DTD
 */
void makedtd() {
  ofstream strm;
  strm.open((output+".dtd").c_str());
  strm << form("<?xml version=\"1.0\" encoding=\"%s\"?>\n",ENCODING)
       << "<!-- created by csv2xml -->\n\n";
  strm << form("<!ELEMENT %s (%s*)>\n\n", docname.c_str(), rowname.c_str());
  strm << form("<!ELEMENT %s (", rowname.c_str());
  for ( int i = 0; i < colnames.size(); ++i ) {
    strm << colnames[i] << ( i == colnames.size()-1 ? ')' : ',');
  }
  strm << ">\n\n";
  for ( i = 0; i < colnames.size(); ++i ) {
    strm << form("<!ELEMENT %s (#PCDATA)>\n", colnames[i].c_str());
  }
  strm.close();
}

/*
 * main
 */
int main(int argc, char* argv[]) {
  /*
   * parse command line
   */
  if ( argc <= 1 ) {
    usage();
    return 1;
  }
  for ( int i = 1; i < argc; ++i ) {
    string arg = argv[i];
    if ( arg == "-doc" ) { docname = argv[++i]; } else
    if ( arg == "-row" ) { rowname = argv[++i]; } else
    if ( arg == "-col" ) { gencol = true;       } else
    if ( arg == "-xsl" ) { genxsl = true;       } else
    if ( arg == "-dtd" ) { gendtd = true;       } else
    {
      if ( arg[0] == '-' ) {
        usage();
        return 1;
      }
      output = arg;
    }
  }

  if ( output.empty() ) {
    usage();
    return 1;
  }

  /*
   * generate XML, XSL, and DTD
   */
  cerr << output << ".xml " << flush;
  makexml();
  if ( genxsl ) {
    cerr << output << ".xsl " << flush;
    makexsl();
  }
  if ( gendtd ) {
    cerr << output << ".dtd " << flush;
    makedtd();
  }
  cerr << endl;

  return 0;
}
