source: project/release/4/csv/csv.scm @ 15806

Last change on this file since 15806 was 15806, checked in by Ivan Raikov, 11 years ago

added copyright notice to csv and json-abnf

File size: 5.1 KB
Line 
1;;
2;;
3;;  A parser for comma-separated values.
4;;
5;;  Based in part on RFC 4180, "Common Format and MIME Type for
6;;  Comma-Separated Values (CSV) Files", and on the Haskell Text.CSV
7;;  module by Jaap Weel.
8;;
9;;
10;;  Differences with the RFC:
11;;
12;;   1) the RFC prescribes CRLF standard network line breaks, but many
13;;   CSV files have platform-dependent line endings, so this library
14;;   accepts any sequence of CRs and LFs as a line break.
15;;
16;;   2) The format of header lines is exactly like a regular record
17;;   and the presence of a header can only be determined from the mime
18;;   type.  available. This library treats all lines as regular
19;;   records.
20;;
21;;   3) The formal grammar specifies that fields can contain only
22;;   certain US ASCII characters, but the specification of the MIME
23;;   type allows for other character sets. This library allow all
24;;   characters in fields, except for the field delimiter character,
25;;   CRs and LFs in unquoted fields. This should make it possible to
26;;   parse CSV files in any encoding, but it allows for characters
27;;   such as tabs that the RFC may be interpreted to forbid even in
28;;   non-US-ASCII character sets.
29;;
30;;   4) According to the RFC, the records all have to have the same
31;;   length. This library allows variable length records.
32;;
33;;   5) The delimiter character is specified by the user and can be
34;;   a character other than comma, or an SRFI-14 character set.
35;;
36;;  Copyright 2009 Ivan Raikov.
37;;
38;;  Redistribution and use in source and binary forms, with or without
39;;  modification, are permitted provided that the following conditions
40;;  are met:
41;;
42;;  - Redistributions of source code must retain the above copyright
43;;  notice, this list of conditions and the following disclaimer.
44;;
45;;  - Redistributions in binary form must reproduce the above
46;;  copyright notice, this list of conditions and the following
47;;  disclaimer in the documentation and/or other materials provided
48;;  with the distribution.
49;;
50;;  - Neither name of the copyright holders nor the names of its
51;;  contributors may be used to endorse or promote products derived
52;;  from this software without specific prior written permission.
53;;
54;;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE
55;;  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
56;;  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
57;;  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
58;;  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR THE
59;;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60;;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61;;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
62;;  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
63;;  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64;;  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
65;;  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66;;  POSSIBILITY OF SUCH DAMAGE.
67;;
68
69(module csv
70
71        (make-parser)
72
73        (import scheme chicken data-structures srfi-1 srfi-14)
74
75        (require-library abnf abnf-consumers)
76        (import (prefix abnf abnf:) 
77                (prefix abnf-consumers abnf:) 
78                )
79
80
81(define (non-escaped delim)
82  (abnf:bind-consumed->string
83   (abnf:repetition
84    (abnf:set
85     (char-set-complement
86      (char-set-union
87       (if (char? delim) (char-set delim)
88           delim)
89       (string->char-set "\n\r\"")))))))
90
91
92(define escaped-dquote
93  (abnf:lit "\\\""))
94
95
96(define escaped
97  (abnf:concatenation
98   (abnf:drop-consumed abnf:dquote)
99   (abnf:bind-consumed->string
100    (abnf:repetition 
101     (abnf:alternatives
102      escaped-dquote
103      (abnf:set
104       (char-set-complement
105        (char-set #\"))))))
106   (abnf:drop-consumed abnf:dquote)))
107
108
109(define (field delim)
110  (abnf:alternatives 
111   (non-escaped delim) 
112   escaped))
113
114 
115(define (record delim)
116  (abnf:bind-consumed-strings->list 'record
117   (abnf:concatenation
118    (field delim)
119    (abnf:repetition
120     (abnf:concatenation
121      (abnf:drop-consumed 
122       (if (char? delim) (abnf:char delim)
123           (abnf:set delim)))
124      (field delim))))))
125
126
127(define (csv delim)
128  (abnf:repetition
129   (abnf:concatenation 
130    (record delim)
131    (abnf:drop-consumed
132     (abnf:repetition1 
133      (abnf:set-from-string "\r\n"))))))
134
135(define (->char-list s)
136  (if (string? s) (string->list s) s))
137
138(define (check-delimiter d)
139  (if (not (or (char? d) (char-set? d)))
140      (error 'make-parser "delimiter is not a character or a character set"))
141  (cond ((char? d)
142         (case d
143           ((#\newline #\return #\")
144            (error 
145             'make-parser
146             "delimiter character is one of newline, carriage return or quotation mark"))))
147        ((char-set? d)
148         (if (or (char-set-contains? d #\newline)
149                 (char-set-contains? d #\return)
150                 (char-set-contains? d #\"))
151            (error 
152             'make-parser
153             "delimiter character set includes newline, carriage return or quotation mark")))))
154             
155                         
156
157(define (make-parser . rest)
158  (let-optionals rest ((delimiter #\,))
159   (check-delimiter delimiter)
160   (let ((p (abnf:longest (csv delimiter))))
161     (lambda (s)
162       (reverse (caar (p identity `((() ,(->char-list s))))))))))
163
164)
Note: See TracBrowser for help on using the repository browser.