{- |
   module: Text.CSV 
   license: MIT 
   maintainer: Jaap Weel <weel at ugcs dot caltech dot edu> 
   stability: provisional 
   portability: ghc 

   This module parses and dumps documents that are formatted more or
   less according to RFC 4180, \"Common Format and MIME Type for
   Comma-Separated Values (CSV) Files\",
   <http://www.rfc-editor.org/rfc/rfc4180.txt>.

   There are some issues with this RFC. I will describe what these
   issues are and how I deal with them.

   First, the RFC prescribes CRLF standard network line breaks, but
   you are likely to run across CSV files with other line endings, so
   we accept any sequence of CRs and LFs as a line break. 

   Second, there is an optional header line, but the format for the
   header line is exactly like a regular record and you can only
   figure out whether it exists from the mime type, which may not be
   available. I ignore the issues of header lines and simply turn them
   into regular records.
   
   Third, there is an inconsistency, in that the formal grammar
   specifies that fields can contain only certain US ASCII characters,
   but the specification of the MIME type allows for other character
   sets. I will allow all characters in fields, except for commas, CRs
   and LFs in unquoted fields. This should make it possible to parse
   CSV files in any encoding, but it allows for characters such as
   tabs that the RFC may be interpreted to forbid even in non-US-ASCII
   character sets. 

   NOTE: Several people have asked me to implement extensions that are
   used in non-US versions Microsoft Excel. This library implements
   RFC-compliant CSV, not Microsoft Excel CSV. If you want to write a
   library that deals with the CSV-like formats used by non-US versions
   of Excel or any other software, you should write a separate library. I
   suggest you call it Text.SSV, for "Something Separated Values."
-}

{- Copyright (c) Jaap Weel 2007.  Permission is hereby granted, free
of charge, to any person obtaining a copy of this software and
associated documentation files (the "Software"), to deal in the
Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.  THE
SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -}

module Text.CSV (CSV
                 , Record
                 , Field
                 , csv
                 , parseCSV
                 , parseCSVFromFile
                 , parseCSVTest
                 , printCSV
                 ) where

import Text.ParserCombinators.Parsec
import Data.List (intersperse)

-- | A CSV file is a series of records. According to the RFC, the
-- records all have to have the same length. As an extension, I
-- allow variable length records.
type CSV = [Record]

-- | A record is a series of fields
type Record = [Field]

-- | A field is a string
type Field = String

-- | A Parsec parser for parsing CSV files
csv :: Parser CSV
csv :: Parser CSV
csv = do CSV
x <- Parser Record
record Parser Record -> ParsecT String () Identity String -> Parser CSV
forall s (m :: * -> *) t u a sep.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a]
`sepEndBy` ParsecT String () Identity Char
-> ParsecT String () Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
oneOf String
"\n\r")
         ParsecT String () Identity ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
         CSV -> Parser CSV
forall (m :: * -> *) a. Monad m => a -> m a
return CSV
x

record :: Parser Record
record :: Parser Record
record = (ParsecT String () Identity String
quotedField ParsecT String () Identity String
-> ParsecT String () Identity String
-> ParsecT String () Identity String
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT String () Identity String
field) ParsecT String () Identity String
-> ParsecT String () Identity Char -> Parser Record
forall s (m :: * -> *) t u a sep.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a]
`sepBy` Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
','

field :: Parser Field
field :: ParsecT String () Identity String
field = ParsecT String () Identity Char
-> ParsecT String () Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
",\n\r\"")

quotedField :: Parser Field
quotedField :: ParsecT String () Identity String
quotedField = ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> ParsecT String () Identity String
-> ParsecT String () Identity String
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'"') (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'"') (ParsecT String () Identity String
 -> ParsecT String () Identity String)
-> ParsecT String () Identity String
-> ParsecT String () Identity String
forall a b. (a -> b) -> a -> b
$
              ParsecT String () Identity Char
-> ParsecT String () Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\"" ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> ParsecT String () Identity Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT String () Identity Char -> ParsecT String () Identity Char
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> ParsecT String () Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\"\"" ParsecT String () Identity String
-> ParsecT String () Identity Char
-> ParsecT String () Identity Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> ParsecT String () Identity Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
'"'))

-- | Given a file name (used only for error messages) and a string to
-- parse, run the parser.
parseCSV :: FilePath -> String -> Either ParseError CSV
parseCSV :: String -> String -> Either ParseError CSV
parseCSV = Parser CSV -> String -> String -> Either ParseError CSV
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parser CSV
csv

-- | Given a file name, read from that file and run the parser
parseCSVFromFile :: FilePath -> IO (Either ParseError CSV)
parseCSVFromFile :: String -> IO (Either ParseError CSV)
parseCSVFromFile = Parser CSV -> String -> IO (Either ParseError CSV)
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parser CSV
csv

-- | Given a string, run the parser, and print the result on stdout.
parseCSVTest :: String -> IO ()
parseCSVTest :: String -> IO ()
parseCSVTest = Parser CSV -> String -> IO ()
forall s t a.
(Stream s Identity t, Show a) =>
Parsec s () a -> s -> IO ()
parseTest Parser CSV
csv

-- | Given an object of type CSV, generate a CSV formatted
-- string. Always uses escaped fields.
printCSV :: CSV -> String
printCSV :: CSV -> String
printCSV CSV
records = Record -> String
unlines (Record -> String
printRecord (Record -> String) -> CSV -> Record
forall a b. (a -> b) -> [a] -> [b]
`map` CSV
records)
    where printRecord :: Record -> String
printRecord = Record -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (Record -> String) -> (Record -> Record) -> Record -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Record -> Record
forall a. a -> [a] -> [a]
intersperse String
"," (Record -> Record) -> (Record -> Record) -> Record -> Record
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> String) -> Record -> Record
forall a b. (a -> b) -> [a] -> [b]
map String -> String
forall (t :: * -> *). Foldable t => t Char -> String
printField
          printField :: t Char -> String
printField t Char
f = String
"\"" String -> String -> String
forall a. [a] -> [a] -> [a]
++ (Char -> String) -> t Char -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> String
escape t Char
f String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\""
          escape :: Char -> String
escape Char
'"' = String
"\"\""
          escape Char
x = [Char
x]
          unlines :: Record -> String
unlines = Record -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (Record -> String) -> (Record -> Record) -> Record -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Record -> Record
forall a. a -> [a] -> [a]
intersperse String
"\n"