/* Module to read and write CSV files
   By Albert Graf and Eddie Rucker
   8 July 2007

   == Reading ==
   
   Regular expressions are used to parse CSV files. This is more or less the 
   same as described in http://www.hotblue.com/article0000.aspx?a=0006, but 
   also allows whitespace surrounding field values.

   Example:
   ==> csvread "\"try \"\"this\"\"\",A,\"34\", 4.5\n , C, \"00\", 100\n"
   [["try \"this\""," A","34",4.5],[""," C","00",100]]

   Each field must be terminated either with a field delimiter "," or a record
   delimiter "\n". Newlines embedded in double quotes are handled properly. 

   The useful reading functions are:
   1. To read an entire file at once use 'csvreadfile F'.
   2. To read a file, line-by-line, use 'csvreadline F'.
   3. To parse a CSV formatted string to a list, use 'csvread S'.

   == Writing ==

   Note that csvread and csvwrite are inverses (up to whitespace surrounding a
   field value, which is removed by csvread). That is,
     csvread (csvwrite LIST) => LIST and
     csvwrite (csvread STRING) => STRING
   Therefore, input to csvwrite is a list of lists. Again newlines embedded in
   double quotes are handled properly.

   Example:
   ==> csvread (csvwrite [["A",45]])
   [["A",45]]
   ==> csvwrite (csvread "testing,1,2,\"3\"\n")
   "testing,1,2,\"3\"\n"

   Example 2:
   ==> csvwrite [["try\n\"this\"", 34, "00"],["give it a try", 48, "91"]]
   "\"try\n\"\"this\"\"\",34,\"00\"\n\"give it a try\",48,\"91\"\n"
   
   The useful writing functions are:
   1. To write an entire file at once, use 'csvwritefile F L'.
   2. To write a file, line-by-line, use 'csvwritefile F L'.
   3. To make a list from CSV formatted string, use 'csvwrite L'.
*/

public csvreadfile F, csvreadline F, csvread S;
public csvwritefile F L, csvwriteline F L, csvwrite L;

private parse Xss Ys, fieldVal S, fieldStr S, fieldStr_ Xs Ys DQF;

csvreadfile F:File
	= csvread (fget F);

csvreadline F:File
	= csvread (fgets F);

csvread S:String
	= parse [[]] $ regex "g" 
	  "([ \t]*(([^\",\n]+)|\"(([^\"]|\"\")*)\")?[ \t]*)((,)|(\n))"
	  S (last regs,regskip,fieldVal$reg (hd regs));

parse [_|Xss] []
	= reverse Xss;
parse _ [(_,S,_)|_]
	= error $ sprintf "invalid field contents at or near %s" $ str S
	    if not null S;
parse [Xs|Xss] [(7,_,Y)|Ys]
	= parse [[Y|Xs]|Xss] Ys;
parse [Xs|Xss] [(8,_,Y)|Ys]
	= parse [[],reverse [Y|Xs]|Xss] Ys;

special isnumq X;
isnumq 'X:Num 
	= true;
isnumq _
	= false otherwise;

fieldVal S
// strip white space
  where [R] = regex "" "^[ \t]*((.*[^ \t])?)[ \t]*$" S (reg 1):
// must check for number first because we want \"32\" to be a string
	 = N if isnumq 'N where 'N = valq R;
// convert \"\" to \"
	 = strcat (regex "g" "\"\"" W (regskip++"\"")) ++ regskip
// strip \" \" from field
	     where [W] = regex "" "^\"(.*)\"$" R (reg 1);
	 = R;

csvwritefile F:File L:List
	= fputs F (csvwrite L);

csvwriteline F:File [L|Ls]
	= fputs F (csvwrite [L]);

csvwrite []
	= [];
csvwrite L:List 
	= (join "\n" (map (\T . (join "," (map fieldStr T))) L)) ++ "\n";
 
/* If S contains a whitespace, comma, or double quote, then double quote string
   DQF = Double Quote Flag
*/
fieldStr S
   = sprintf "\"%s\"" S if (isstr S) and then (isnum (val S));
   = str S if isnum S;
   = fieldStr_ (chars S) [] false if isstr S;
 
fieldStr_ [] A true       = strcat (["\""] ++ (reverse ["\""|A]));
fieldStr_ [] A false      = strcat (reverse A);
fieldStr_ ["\""|Cs] A DQF = fieldStr_ Cs ["\"\""|A] true;
fieldStr_ [" "|Cs]  A DQF = fieldStr_ Cs [" "|A] true;
fieldStr_ ["\t"|Cs] A DQF = fieldStr_ Cs ["\t"|A] true;
fieldStr_ ["\n"|Cs] A DQF = fieldStr_ Cs ["\n"|A] true;
fieldStr_ [","|Cs]  A DQF = fieldStr_ Cs [","|A] true;
fieldStr_ [C|Cs]    A DQF = fieldStr_ Cs [C|A] DQF;