1 | ;; |
---|
2 | ;; |
---|
3 | ;; A set of routines to read and extract fields from email form |
---|
4 | ;; submissions generated by the FormMail.pl script. |
---|
5 | ;; |
---|
6 | ;; Copyright 2008-2009 Ivan Raikov and the Okinawa Institute of |
---|
7 | ;; Science and Technology. |
---|
8 | ;; |
---|
9 | ;; This program is free software: you can redistribute it and/or |
---|
10 | ;; modify it under the terms of the GNU General Public License as |
---|
11 | ;; published by the Free Software Foundation, either version 3 of the |
---|
12 | ;; License, or (at your option) any later version. |
---|
13 | ;; |
---|
14 | ;; This program is distributed in the hope that it will be useful, but |
---|
15 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
17 | ;; General Public License for more details. |
---|
18 | ;; |
---|
19 | ;; A full copy of the GPL license can be found at |
---|
20 | ;; <http://www.gnu.org/licenses/>. |
---|
21 | ;; |
---|
22 | |
---|
23 | (require-extension utf8) |
---|
24 | (require-extension utf8-srfi-13) |
---|
25 | (require-extension utf8-srfi-14) |
---|
26 | |
---|
27 | (require-extension srfi-1) |
---|
28 | (require-extension rfc822) |
---|
29 | (require-extension srfi-40) |
---|
30 | (require-extension stream-ext) |
---|
31 | (require-extension stream-sections) |
---|
32 | (require-extension rb-tree) |
---|
33 | |
---|
34 | (define-extension formular) |
---|
35 | |
---|
36 | |
---|
37 | (declare (export multiline-field-start multiline-field-end |
---|
38 | field-delim form-delim form-delim-len |
---|
39 | read-form mbox->form-stream form-stream->tree)) |
---|
40 | |
---|
41 | |
---|
42 | (define multiline-field-start #\") |
---|
43 | (define multiline-field-end #\") |
---|
44 | (define field-delim #\:) |
---|
45 | (define form-delim (string->list "------")) |
---|
46 | (define form-delim-len (length form-delim)) |
---|
47 | |
---|
48 | (define-record-printer (section x out) |
---|
49 | (fprintf out "#(section depth=~S name=~S id=~S)" |
---|
50 | (section-depth x) |
---|
51 | (stream->string (section-name x)) |
---|
52 | (let ((id (section-id x))) |
---|
53 | (if (stream? id) (stream->string id) id)))) |
---|
54 | |
---|
55 | (define (take-one-line doc . rest) |
---|
56 | (let-optionals rest ((newline? #t)) |
---|
57 | (cond |
---|
58 | ((stream-null? doc) (values #f stream-null stream-null)) |
---|
59 | ((char=? (stream-car doc) #\newline) |
---|
60 | (values #f (if newline? (stream #\newline) stream-null) (stream-cdr doc))) |
---|
61 | (else |
---|
62 | (receive (result skip rest) |
---|
63 | (take-one-line (stream-cdr doc) newline?) |
---|
64 | (values result (stream-cons (stream-car doc) skip) rest)))))) |
---|
65 | |
---|
66 | |
---|
67 | (define $ (compose string->symbol stream->string )) |
---|
68 | |
---|
69 | ;; The next two procedures follow the interface of document->stream |
---|
70 | ;; from the stream-sections library to recognize the start of an RFC |
---|
71 | ;; 822 message, or the start of a form field, respectively. |
---|
72 | |
---|
73 | (define (mbox-recognize-start strm) |
---|
74 | (let-values (((skip line rest) (take-one-line strm))) |
---|
75 | (let* ((from-match (stream-prefix= line (string->list "From "))) |
---|
76 | (name (and from-match (stream-drop line 5)))) |
---|
77 | (values (and name (make-section 1 name)) (if from-match stream-null line) rest)))) |
---|
78 | |
---|
79 | |
---|
80 | (define (form-fields-recognize-start strm) |
---|
81 | (let-values (((skip line rest) (take-one-line strm))) |
---|
82 | (let ((delim-match (stream-prefix= line form-delim))) |
---|
83 | (if delim-match rest (form-fields-recognize-start rest))))) |
---|
84 | |
---|
85 | |
---|
86 | |
---|
87 | (define (read-field-value strm) |
---|
88 | (let ((multiline? (char=? multiline-field-start (stream-car strm)))) |
---|
89 | (let loop ((lst (list)) (strm (if multiline? (stream-cdr strm) strm))) |
---|
90 | (if (or (stream-null? strm) |
---|
91 | (and multiline? (char=? multiline-field-end (stream-car strm))) |
---|
92 | (and (not multiline?) (char=? (stream-car strm) #\newline))) |
---|
93 | (values (list->string (reverse lst)) strm) |
---|
94 | (loop (cons (stream-car strm) lst) (stream-cdr strm)))))) |
---|
95 | |
---|
96 | |
---|
97 | (define (read-form-fields strm) |
---|
98 | (let ((start (form-fields-recognize-start strm))) |
---|
99 | (let loop ((fields (list)) (strm start)) |
---|
100 | (if (or (stream-null? strm) (section? (stream-car strm))) |
---|
101 | (values (reverse fields) strm) |
---|
102 | (if (not (char-alphabetic? (stream-car strm))) |
---|
103 | (loop fields (stream-cdr strm)) |
---|
104 | (let-values (((field-name field-rest) |
---|
105 | (stream-break (lambda (x) (char=? x field-delim)) strm))) |
---|
106 | (let ((field-name ($ field-name)) |
---|
107 | (field-rest (and (not (stream-null? field-rest)) |
---|
108 | (stream-drop-while char-whitespace? (stream-cdr field-rest))))) |
---|
109 | (if (not field-rest) |
---|
110 | (error 'read-form-fields "premature end of field" field-name)) |
---|
111 | (let-values (((field-value rest) (read-field-value field-rest))) |
---|
112 | (loop (cons (list field-name field-value) fields) rest))))))))) |
---|
113 | |
---|
114 | ;; Given an RFC 822 envelope and message streams, parses the headers |
---|
115 | ;; and fields that are found in the message stream, and returns an |
---|
116 | ;; s-exp of the form: |
---|
117 | ;; |
---|
118 | ;; (form (envelope <envelope-string>) |
---|
119 | ;; (headers <RFC 822 header alist>) |
---|
120 | ;; (fields <field alist>)) |
---|
121 | ;; |
---|
122 | (define (read-form envelope strm) |
---|
123 | (let* ((reader-strm strm) |
---|
124 | (reader (lambda (ignore) |
---|
125 | (let-values (((skip line rest) (take-one-line reader-strm #f))) |
---|
126 | (set! reader-strm rest) |
---|
127 | (let ((str (stream->string line))) |
---|
128 | str)))) |
---|
129 | (headers (rfc822-header->list reader-strm reader: reader))) |
---|
130 | (let-values (((fields rest) (read-form-fields reader-strm))) |
---|
131 | (values `(form (envelope ,(stream->string envelope)) (headers ,headers) (fields ,fields)) |
---|
132 | rest)))) |
---|
133 | |
---|
134 | ;; |
---|
135 | ;; Given the name of a Unix mbox file, reads the file and returns an |
---|
136 | ;; SRFI-40 stream representation of its contents, where each element |
---|
137 | ;; in the stream is a form s-expression in the format created by the |
---|
138 | ;; read-form procedure. |
---|
139 | ;; |
---|
140 | (define (mbox->form-stream fname) |
---|
141 | (let ((doc (port->stream (open-input-file fname)))) |
---|
142 | (let loop ((strm (document->sections mbox-recognize-start doc))) |
---|
143 | (stream-delay |
---|
144 | (if (stream-null? strm) stream-null |
---|
145 | (let ((sec (stream-car strm))) |
---|
146 | (if (not (section? sec)) |
---|
147 | (loop (stream-cdr strm)) |
---|
148 | (let-values (((form rest) (read-form (section-name sec) (stream-cdr strm)))) |
---|
149 | (stream-cons form (loop rest)))))))))) |
---|
150 | |
---|
151 | ;; |
---|
152 | ;; Given an SRFI-40 stream created by the mbox->form-stream procedure, |
---|
153 | ;; returns an ordered dictionary structure, where the key is the email |
---|
154 | ;; address of the form sender, and the value is the list of all forms |
---|
155 | ;; submitted by that sender. The API of the tree object follows that |
---|
156 | ;; of the e.g. treap and rb-tree libraries. |
---|
157 | ;; |
---|
158 | (define (form-stream->tree strm) |
---|
159 | (define (s<= x y) |
---|
160 | (cond ((string-ci< x y) -1) |
---|
161 | ((string-ci= x y) 0) |
---|
162 | (else 1))) |
---|
163 | (define (subm< x y) |
---|
164 | (let ((x-seconds (alist-ref 'date-seconds (cdr x))) |
---|
165 | (y-seconds (alist-ref 'date-seconds (cdr y)))) |
---|
166 | (< x-seconds y-seconds))) |
---|
167 | (let loop ((tree (make-rb-tree s<=)) (strm strm)) |
---|
168 | (let ((lookup (tree 'get)) |
---|
169 | (update (tree 'put))) |
---|
170 | (if (stream-null? strm) tree |
---|
171 | (let ((form (stream-car strm))) |
---|
172 | (match form |
---|
173 | (('form ('envelope envelope) ('headers headers) ('fields fields)) |
---|
174 | (let ((date (rfc822-header-ref headers "date") ) |
---|
175 | (from (rfc822-field->tokens (rfc822-header-ref headers "from") ))) |
---|
176 | (let ((date-seconds (rfc822-date->seconds date)) |
---|
177 | (from-address (match from |
---|
178 | ((u #\@ v) (string-append u "@" v)) |
---|
179 | ((u v) (string-append u "@" v)) |
---|
180 | (else (error 'form-stream->tree "invalid from address: " from))))) |
---|
181 | (let ((exists (lookup from-address #f)) |
---|
182 | (submission `(submission (date-seconds ,date-seconds) (fields . ,fields)))) |
---|
183 | (if exists |
---|
184 | (loop (update from-address |
---|
185 | (merge (list submission) (cdr exists) subm<)) |
---|
186 | (stream-cdr strm)) |
---|
187 | (loop (update from-address (list submission)) (stream-cdr strm))))))) |
---|
188 | (else (error 'form-stream->tree "invalid form: " form)))))))) |
---|
189 | |
---|