source: project/release/4/internet-message/trunk/internet-message.scm @ 18839

Last change on this file since 18839 was 18839, checked in by Ivan Raikov, 11 years ago

converting internet-message to typeclass interface

File size: 30.7 KB
Line 
1;;
2;;  Parser for the grammar defined in RFC 5322, "Internet Message Format".
3;;
4;;  Based on the Haskell Rfc2822 module by Peter Simons.
5;;
6;;  Copyright 2009-2010 Ivan Raikov and the Okinawa Institute of
7;;  Science and Technology.
8;;
9;;
10;;  This program is free software: you can redistribute it and/or
11;;  modify it under the terms of the GNU General Public License as
12;;  published by the Free Software Foundation, either version 3 of the
13;;  License, or (at your option) any later version.
14;;
15;;  This program is distributed in the hope that it will be useful, but
16;;  WITHOUT ANY WARRANTY; without even the implied warranty of
17;;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18;;  General Public License for more details.
19;;
20;;  A full copy of the GPL license can be found at
21;;  <http://www.gnu.org/licenses/>.
22
23(module internet-message
24
25        (CoreABNF->InetMessage <InetMessage>)
26
27        (import scheme chicken data-structures srfi-1 srfi-14)
28
29        (require-extension typeclass)
30        (require-library srfi-1 srfi-13 abnf abnf-consumers)
31        (import (prefix abnf abnf:) 
32                (prefix abnf-consumers abnf:) 
33                (only abnf <CoreABNF> <Input> <Token> <CharLex> )
34                (only srfi-13 string-downcase)
35                )
36
37
38
39(define-class <InetMessage> (<CoreABNF> A) 
40  comment fields body message parts addr-spec text ftext msg-id)
41
42(define (char-list-titlecase x)
43  (if (null? x) x (cons (char-upcase (car x)) (map char-downcase (cdr x)))))
44
45;; construct symbols from consumed chars
46(define consumed-chars->tsymbol
47  (abnf:consumed-chars->list 
48   (compose string->symbol 
49            list->string 
50            char-list-titlecase 
51            abnf:trim-ws-char-list)))
52
53;; shortcut for (abnf:bind consumed-chars->tsymbol (abnf:longest ... ))
54(define-syntax bind-consumed->tsymbol
55  (syntax-rules () 
56    ((_ p)    (abnf:bind consumed-chars->tsymbol (abnf:longest p)))
57    ))
58
59(define consumed-objects-lift-any
60  (abnf:consumed-objects-lift
61   (abnf:consumed-objects identity)))
62
63;; Construct a parser for a message header line from the header's name
64;; and a parser for the body.
65
66(define=> (header <CoreABNF>)
67  (lambda (s p) 
68    (let ((ss (->string s)))
69      (lambda (#!key (crlf crlf) (alist #f))
70        (if alist
71            (let ((value (abnf:bind (consumed-objects-lift-any)
72                                    (abnf:concatenation
73                                     p
74                                     (abnf:drop-consumed crlf)))))
75              (lambda (kv)
76                (and (string=? (string-downcase ss) (string-downcase (car kv)))
77                     (list ss (value (cdr kv))))))
78            (abnf:bind (consumed-objects-lift-any)
79                       (abnf:concatenation
80                        (bind-consumed->tsymbol (lit ss))
81                        (abnf:drop-consumed (char #\:))
82                        p
83                        (abnf:drop-consumed crlf)
84                        ))
85            )))))
86
87
88;; Primitive parsers (section 3.2.1)
89
90
91;; Matches any US-ASCII character except for nul \r \n
92
93(define=> (text <CoreABNF>)
94  (set (char-set-difference
95        char-set:ascii 
96        (char-set (integer->char 0) 
97                  (integer->char 10) 
98                  (integer->char 13) ))))
99
100
101;; Folding white space and comments (section 3.2.3)
102
103(define=> (fws <CoreABNF>)
104  (abnf:concatenation
105   (abnf:optional-sequence 
106    (abnf:concatenation
107     (abnf:repetition wsp)
108     (abnf:drop-consumed 
109      (abnf:alternatives crlf lf cr))))
110   (abnf:repetition1 wsp)))
111
112(define (between-fws-drop p fws)
113  (abnf:concatenation
114   (abnf:drop-consumed (abnf:optional-sequence fws)) p 
115   (abnf:drop-consumed (abnf:optional-sequence fws))))
116                             
117;; helper macro for mutually-recursive parser definitions
118
119(define-syntax vac
120  (syntax-rules ()
121    ((_ fn) (lambda args (apply fn args)))))
122
123
124;; Matches any non-whitespace, non-control character except for ( ) and \
125
126(define=> (ctext <CoreABNF>)
127  (set (char-set-difference char-set:graphic (char-set #\( #\) #\\))))
128
129;; Matches comments. That is any combination of ctext, quoted pairs,
130;; and fws between brackets. Comments may nest.
131
132(define=> (ccontent <CoreABNF>)
133  (lambda (comment ctext)
134    (abnf:alternatives ctext quoted-pair comment)))
135
136(define=> (comment <CoreABNF>)
137  (lambda (ccontent fws)
138    (abnf:concatenation 
139     (char #\( )
140     (abnf:longest
141      (abnf:repetition
142       (abnf:concatenation
143        (abnf:optional-sequence fws)
144        ccontent
145        )))
146     (abnf:optional-sequence fws)
147     (char #\))
148     )))
149
150;; Matches any combination of fws and comments
151
152(define=> (cfws <CoreABNF>)
153  (lambda (comment fws)
154    (abnf:alternatives
155     (abnf:concatenation
156      (abnf:repetition1
157       (abnf:concatenation
158        (abnf:optional-sequence fws)
159        (abnf:drop-consumed comment)))
160      (abnf:optional-sequence fws))
161     fws)))
162                 
163
164;;  A combinator for sequences (optional cfws) p (optional cfws)
165
166(define (between-cfws p cfws)
167  (abnf:concatenation
168   (abnf:optional-sequence cfws) p 
169   (abnf:optional-sequence cfws) ))
170                             
171(define (between-cfws-drop p cfws)
172  (abnf:concatenation
173   (abnf:drop-consumed (abnf:optional-sequence cfws)) p 
174   (abnf:drop-consumed (abnf:optional-sequence cfws) )))
175                             
176
177;; Atom (section 3.2.4)
178
179;; Matches any US-ASCII character except for control characters,
180;; specials, or space. atom and dot-atom are made up of this.
181
182(define=> (atext <CoreABNF>)
183  (abnf:alternatives
184   alpha
185   decimal
186   (set-from-string "!#$%&'*+-/=?^_`{|}~")))
187
188;; Matches one or more atext characters and skip any preceeding or
189;; trailing cfws
190
191(define=> (atom <CoreABNF>)
192  (lambda (atext cfws)
193    (abnf:bind-consumed->string
194     (between-cfws (abnf:repetition1 atext) cfws))))
195
196
197;; Matches two or more atext elements interspersed by dots.
198
199(define=> (dot-atom-text <CoreABNF>)
200  (lambda (atext)
201    (abnf:concatenation
202     (abnf:repetition1 atext)
203     (abnf:repetition 
204      (abnf:concatenation
205       (char #\.)
206       (abnf:repetition1 atext) 
207       )))))
208
209;; Matches dot-atom-text and skips any preceeding or trailing cfws.
210
211(define=> (dot-atom <CoreABNF>)
212  (lambda (dot-atom-text cfws)
213    (abnf:bind-consumed->string
214     (between-cfws dot-atom-text cfws))))
215
216;; Quoted strings (section 3.2.4)
217
218;;; Matches any non-whitespace, non-control US-ASCII character except
219;;; for \ and "
220
221(define char-set:quoted (char-set-difference char-set:printing (char-set #\\ #\")))
222(define=> (qtext <CoreABNF>) (set char-set:quoted))
223
224
225;; Matches either qtext or quoted-pair
226
227(define=> (qcontent  <CoreABNF>)
228  (lambda (qtext)
229    (abnf:repetition1 
230     (abnf:alternatives
231      qtext quoted-pair))))
232
233;; Matches any number of qcontent between double quotes.
234
235(define=> (quoted-string <CoreABNF>)
236  (lambda (qcontent fws cfws)
237    (abnf:bind-consumed->string
238     (between-cfws
239      (abnf:concatenation
240       (abnf:drop-consumed dquote)
241       (abnf:repetition
242        (abnf:concatenation
243         (abnf:optional-sequence fws)
244         qcontent))
245       (abnf:optional-sequence fws)
246       (abnf:drop-consumed dquote))
247      cfws)
248      )))
249
250;; Miscellaneous tokens (section 3.2.5)
251
252;;; Matches either atom or quoted-string
253
254(define=> (word <CoreABNF>)
255  (lambda (atom quoted-string)
256    (abnf:alternatives atom quoted-string)))
257 
258;; Matches either one or more word elements
259
260(define=> (phrase  <CoreABNF>)
261  (lambda (word)
262    (abnf:bind-consumed-strings->list 
263     (abnf:repetition1 word))))
264
265
266;; Matches any number of utext tokens.
267;;
268;; Unstructured text is used in free text fields such as subject.
269
270(define=> (unstructured <CoreABNF>)
271  (lambda (fws)
272    (abnf:bind-consumed->string
273     (abnf:concatenation
274      (abnf:repetition 
275       (abnf:concatenation
276        (abnf:optional-sequence fws)
277        vchar))
278      (abnf:repetition wsp)))))
279
280;; Date and Time Specification (section 3.3)
281
282;; Parses a date and time specification of the form
283;;
284;;   Thu, 19 Dec 2002 20:35:46 +0200
285;;
286;; where the weekday specification (Thu) is optional. The parser
287;; This parser will not perform any consistency checking.
288;; It will accept
289;;
290;;    40 Apr 2002 13:12 +0100
291;;
292;;  as a perfectly valid date.
293
294                             
295;; Matches the abbreviated weekday names
296
297(define=> (day-name <CoreABNF>)
298  (abnf:alternatives
299   (lit "Mon")
300   (lit "Tue")
301   (lit "Wed")
302   (lit "Thu")
303   (lit "Fri")
304   (lit "Sat")
305   (lit "Sun")))
306
307;; Matches a day-name, optionally wrapped in folding whitespace
308
309(define=> (day-of-week  <CoreABNF>)
310  (lambda (day-name fws)
311    (abnf:bind-consumed-strings->list 
312     'day-of-week 
313     (between-fws-drop 
314      (abnf:bind-consumed->string day-name)
315      fws))))
316
317
318;; Matches a four digit decimal number
319
320(define=> (year  <CoreABNF>)
321  (lambda (fws)
322    (between-fws-drop
323     (abnf:bind-consumed->string (abnf:repetition-n 4 decimal))
324     fws)))
325
326;; Matches the abbreviated month names
327
328
329(define=> (month-name <CoreABNF>)
330  (abnf:alternatives
331   (lit "Jan")
332   (lit "Feb")
333   (lit "Mar")
334   (lit "Apr")
335   (lit "May")
336   (lit "Jun")
337   (lit "Jul")
338   (lit "Aug")
339   (lit "Sep")
340   (lit "Oct")
341   (lit "Nov")
342   (lit "Dec")))
343
344;; Matches a month-name, optionally wrapped in folding whitespace
345
346(define=> (month <CoreABNF>)
347  (lambda (month-name fws)
348    (between-fws-drop (abnf:bind-consumed->string month-name)
349                      fws)))
350
351
352;; Matches a one or two digit number
353
354(define=> (day <CoreABNF>)
355  (lambda (fws)
356    (abnf:concatenation
357     (abnf:drop-consumed (abnf:optional-sequence fws))
358     (abnf:alternatives 
359      (abnf:bind-consumed->string (abnf:variable-repetition 1 2 decimal))
360      (abnf:drop-consumed fws)))))
361
362;; Matches a date of the form dd:mm:yyyy
363(define=> (date  <CoreABNF>)
364  (lambda (day month year)
365    (abnf:bind-consumed-strings->list 'date 
366      (abnf:concatenation day month year))))
367
368;; Matches a two-digit number
369
370(define=> (hour <CoreABNF>)
371  (abnf:bind-consumed->string (abnf:repetition-n 2 decimal)))
372(define=> (minute <CoreABNF>)
373  (abnf:bind-consumed->string (abnf:repetition-n 2 decimal)))
374(define=> (isecond <CoreABNF>)
375  (abnf:bind-consumed->string (abnf:repetition-n 2 decimal)))
376
377
378;; Matches a time-of-day specification of hh:mm or hh:mm:ss.
379
380(define=> (time-of-day <CoreABNF>)
381  (lambda (hour minute isecond)
382    (abnf:concatenation
383     hour (abnf:drop-consumed (char #\:))
384     minute (abnf:optional-sequence 
385             (abnf:concatenation (abnf:drop-consumed (char #\:))
386                                 isecond)))))
387
388;; Matches a timezone specification of the form
389;; +hhmm or -hhmm
390
391(define=> (zone <CoreABNF>)
392  (lambda (hour minute fws)
393    (abnf:concatenation 
394     (abnf:drop-consumed fws)
395     (abnf:bind-consumed->string (abnf:alternatives (char #\-) (char #\+)))
396     hour minute)))
397
398;; Matches a time-of-day specification followed by a zone.
399
400(define=> (itime  <CoreABNF>)
401  (lambda (time-of-day zone)
402    (abnf:bind-consumed-strings->list 'time 
403      (abnf:concatenation time-of-day zone))))
404
405(define=> (date-time <CoreABNF>)
406  (lambda (day-of-week date itime cfws)
407    (abnf:concatenation
408     (abnf:optional-sequence
409      (abnf:concatenation
410       day-of-week
411       (abnf:drop-consumed (char #\,))))
412     date
413     itime
414     (abnf:drop-consumed (abnf:optional-sequence cfws)))))
415
416
417;; Address Specification (section 3.4)
418
419
420;; Parses and returns a "local part" of an addr-spec. That is either
421;; a dot-atom or a quoted-string.
422
423(define=> (local-part <CoreABNF>)
424  (lambda (dot-atom quoted-string)
425    (abnf:alternatives dot-atom quoted-string)))
426
427
428;; Parses and returns any ASCII characters except [ ] and \
429
430(define=> (dtext <CoreABNF>)
431  (set (char-set-difference char-set:printing (char-set #\[ #\] #\\))))
432
433
434;; Parses a domain literal. That is a [ character, followed by any
435;; amount of dcontent, followed by a terminating ] character.
436
437(define=> (domain-literal <CoreABNF>)
438  (lambda (dtext cfws )
439    (between-cfws
440     (abnf:concatenation
441      (abnf:drop-consumed (char #\[))
442      (abnf:bind-consumed->string 
443       (abnf:repetition 
444        (abnf:concatenation
445         (abnf:drop-consumed (abnf:optional-sequence fws))
446         dtext)))
447      (abnf:drop-consumed (abnf:optional-sequence fws))
448      (abnf:drop-consumed (char #\])))
449     cfws)))
450
451;; Parses and returns a domain part of an addr-spec. That is either
452;; a dot-atom or a domain-literal.
453
454(define=> (domain <CoreABNF>)
455  (lambda (dot-atom domain-literal)
456    (abnf:alternatives dot-atom domain-literal)))
457
458
459;; Addr-spec specification (section 3.4.1)
460
461;; Parses an address specification. That is, a local-part, followed
462;; by an \ character, followed by a domain.
463
464(define=> (addr-spec <CoreABNF>)
465  (lambda (local-part domain)
466    (abnf:concatenation
467     (abnf:bind-consumed-strings->list 'local-part local-part)
468     (abnf:drop-consumed (char #\@))
469     (abnf:bind-consumed-strings->list 'domain domain))))
470
471;; Parses an angle-addr
472
473(define=> (angle-addr <CoreABNF>)
474  (lambda (addr-spec cfws)
475    (between-cfws-drop
476     (abnf:concatenation
477      (abnf:drop-consumed (char #\<))
478      addr-spec
479      (abnf:drop-consumed (char #\>))
480      )
481    cfws)))
482
483
484;; Parses and returns a phrase.
485(define=> (display-name <CoreABNF>)
486  (lambda (phrase)
487    (abnf:bind-consumed-pairs->list 'display-name phrase)))
488
489;; Matches an angle-addr, optionally prefaced with a display-name
490
491(define=> (name-addr <CoreABNF>)
492  (lambda (display-name angle-addr)
493    (abnf:concatenation
494     (abnf:optional-sequence display-name)
495     angle-addr)))
496
497;; Matches a name-addr or an addr-spec and returns the address.
498
499(define=> (mailbox  <CoreABNF>)
500  (lambda (name-addr addr-spec)
501    (abnf:bind-consumed-pairs->list 'mailbox 
502     (abnf:alternatives name-addr addr-spec))))
503
504;; Parses a list of mailbox addresses, every two addresses being
505;; separated by a comma, and returns the list of found address(es).
506
507(define=> (mailbox-list <CoreABNF>)
508  (lambda (mailbox)
509    (abnf:bind-consumed-pairs->list 'mailbox-list
510     (abnf:concatenation
511      mailbox
512      (abnf:repetition
513       (abnf:concatenation
514        (abnf:drop-consumed (char #\,))
515        mailbox))))))
516
517
518;; Parses a group of addresses. That is, a display-name, followed
519;; by a colon, optionally followed by a mailbox-list, followed by a
520;; semicolon. The found address(es) are returned - what may be none.
521;; Here is an example:
522;;
523;;    my group: user1@example.org, user2@example.org;
524
525(define=> (group <CoreABNF>)
526  (lambda (display-name group-list cfws)
527    (abnf:bind-consumed-pairs->list 'group 
528      (abnf:concatenation
529       display-name
530       (abnf:drop-consumed (char #\:))
531       (abnf:optional-sequence group-list)
532       (abnf:drop-consumed (char #\;))
533       (abnf:drop-consumed (abnf:optional-sequence cfws))))))
534 
535(define=> (group-list <CoreABNF>)
536  (lambda (mailbox-list cfws)
537    (abnf:alternatives 
538     mailbox-list
539     (abnf:drop-consumed cfws))))
540
541;; Matches a single mailbox or an address group
542
543(define=> (address <CoreABNF>)
544  (lambda (mailbox group)
545    (abnf:alternatives mailbox group)))
546
547;; Parses a list of address addresses, every two addresses being
548;; separated by a comma, and returns the list of found address(es).
549
550(define=> (address-list <CoreABNF>)
551  (lambda (address)
552    (abnf:concatenation
553     address
554     (abnf:repetition
555      (abnf:concatenation
556       (abnf:drop-consumed (char #\,))
557       address)))))
558
559;;  Overall message syntax (section 3.5)
560                             
561;; This parser will return a message body as specified by the RFC;
562;; that is basically any number of text characters, which may be
563;; divided into separate lines by crlf.
564
565(define=> (body <CoreABNF>)
566  (lambda (text)
567    (abnf:repetition
568     (abnf:concatenation
569      (abnf:repetition 
570       (abnf:concatenation
571        (abnf:bind-consumed->string
572         (abnf:repetition text))
573        (abnf:drop-consumed 
574         (abnf:repetition crlf))))
575      (abnf:bind-consumed->string
576       (abnf:repetition text))))))
577
578;; Field definitions (section 3.6)
579
580;; The origination date field (section 3.6.1)
581
582;; Parses a Date: header and returns the date as a list
583;; (year month dom hour min sec tz dow)
584
585(define (orig-date header date-time)  (header "Date" date-time))
586
587;; Originator fields (section 3.6.2)
588
589;; Parses a From: header and returns the mailbox-list address(es)
590;; contained in it.
591
592(define (from header mailbox-list)      (header "From" mailbox-list))
593
594;; Parses a Sender: header and returns the mailbox address contained in
595;; it.
596
597(define (sender header mailbox)    (header "Sender" mailbox))
598
599;; Parses a Reply-To: header and returns the address-list address(es)
600;; contained in it.
601
602(define (reply-to header address-list)  (header "Reply-To" address-list))
603
604;; Destination address fields (section 3.6.3)
605
606;; Parses a To: header and returns the address-list address(es)
607;; contained in it.
608
609(define (to header address-list)        (header "To" address-list))
610
611;; Parses a Cc: header and returns the address-list address(es)
612;; contained in it.
613
614(define (cc header address-list)        (header "Cc" address-list))
615
616;; Parses a Bcc: header and returns the address-list address(es)
617;; contained in it.
618
619(define (bcc header address-list cfws)
620  (header "Bcc" (abnf:optional-sequence 
621                 (abnf:alternatives
622                  address-list
623                  (abnf:drop-consumed cfws)))))
624
625;; Identification fields (section 3.6.4)
626
627;; Parses one or more occurences of dtext or quoted-pair and returns the
628;; concatenated string. This makes up the id-right of a msg-id.
629
630(define=> (no-fold-literal <CoreABNF>)
631  (lambda (dtext)
632    (abnf:concatenation
633     (abnf:drop-consumed (char #\[))
634     (abnf:repetition dtext)
635     (abnf:drop-consumed (char #\])))))
636
637;; Parses a left ID part of a msg-id. This is almost identical to
638;; the local-part of an e-mail address, but with stricter rules
639;; about folding and whitespace.
640
641(define (id-left dot-atom-text)   dot-atom-text )
642
643;; Parses a right ID part of a msg-id. This is almost identical to the
644;; domain of an e-mail address, but with stricter rules about folding
645;; and whitespace.
646
647(define (id-right dot-atom-text no-fold-literal)
648  (abnf:alternatives dot-atom-text no-fold-literal))
649
650;; Parses a message ID and returns it. A message ID is almost identical
651;; to an angle-addr, but with stricter rules about folding and
652;; whitespace.
653
654(define=> (msg-id <CoreABNF>)
655  (lambda (id-left id-right cfws)
656    (abnf:bind-consumed-strings->list 'message-id
657      (between-cfws-drop
658       (abnf:concatenation
659        (abnf:drop-consumed (char #\<))
660        (abnf:bind-consumed->string id-left)
661        (abnf:drop-consumed (char #\@))
662        (abnf:bind-consumed->string id-right)
663        (abnf:drop-consumed (char #\>))
664        )
665       cfws))))
666
667
668;; Parses a In-Reply-To header and returns the list of msg-id's
669;; contained in it.
670
671(define (in-reply-to header msg-id) (header "In-Reply-To" (abnf:repetition1 msg-id)))
672
673;; Parses a References: header and returns the list of msg-id's
674;; contained in it.
675
676(define (references header msg-id)  (header "References"  (abnf:repetition1 msg-id)))
677
678;; Parses a Message-Id: header and returns the msg-id contained
679;; in it.
680
681(define (message-id header msg-id) (header "Message-ID" msg-id))
682                                           
683;; Informational fields (section 3.6.5)
684
685;; Parses a Subject: header and returns its contents verbatim.
686
687(define (subject header unstructured) (header "Subject"  unstructured))
688
689;; Parses a Comments: header and returns its contents verbatim.
690
691(define (comments header unstructured) (header "Comments" unstructured))
692
693;; Parses a Keywords: header and returns the list of phrases
694;; found. Please note that each phrase is again a list of atoms, as
695;; returned by the phrase parser.
696
697(define=> (kwd-list <CoreABNF>)
698  (lambda (phrase)
699    (abnf:concatenation
700     phrase
701     (abnf:repetition
702      (abnf:concatenation
703       (abnf:drop-consumed (char #\,))
704       phrase)))))
705
706(define (keywords header kwd-list)  (header "Keywords" kwd-list))
707                           
708
709;; Resent fields (section 3.6.6)
710
711;; Parses a Resent-Date: header and returns the date it contains as
712;; CalendarTime
713
714(define (resent-date header date-time) (header "Resent-Date" date-time))
715
716;; Parses a Resent-From: header and returns the mailbox-list address(es)
717;; contained in it.
718
719(define (resent-from header mailbox-list)  (header "Resent-From" mailbox-list))
720
721;; Parses a Resent-Sender: header and returns the mailbox-list
722;; address(es) contained in it.
723
724(define (resent-sender header mailbox) (header "Resent-Sender" mailbox))
725
726;; Parses a Resent-To header and returns the mailbox address contained
727;; in it.
728
729(define (resent-to header address-list)  (header "Resent-To" address-list))
730
731;; Parses a Resent-Cc header and returns the address-list address(es)
732;; contained in it.
733
734(define (resent-cc header address-list) (header "Resent-Cc" address-list))
735
736;; Parses a Resent-Bcc: header and returns the address-list
737;; address(es) contained in it. (This list may be empty.)
738
739(define (resent-bcc header address-list cfws)
740  (header "Resent-Bcc"
741          (abnf:alternatives 
742           address-list
743           (abnf:drop-consumed 
744            (abnf:optional-sequence cfws)))))
745
746
747;; Parses a Resent-Message-ID: header and returns the msg-id contained
748;; in it.
749
750(define (resent-msg-id header msg-id) 
751  (header "Resent-Message-ID" msg-id))
752
753
754;; Parses a Resent-Reply-To: header and returns the address-list
755;; contained in it.
756
757(define (resent-reply-to header address-list)
758  (header "Resent-Reply-To" address-list))
759
760
761;; Trace fields (section 3.6.7)
762
763                         
764(define=> (path <CoreABNF>)
765  (lambda (angle-addr cfws)
766    (abnf:alternatives 
767     angle-addr
768     (between-cfws-drop
769      (abnf:concatenation
770       (abnf:drop-consumed (char #\<))
771       (abnf:drop-consumed (abnf:optional-sequence cfws))
772       (abnf:drop-consumed (char #\>)))
773      cfws))))
774
775(define (return-path header path)  (header "Return-Path" path))
776
777(define=> (received-token <CoreABNF>)
778  (lambda (domain angle-addr addr-spec word)
779    (abnf:bind-consumed-strings->list
780     'received-token
781     (abnf:alternatives domain angle-addr addr-spec word))))
782
783(define=> (received-token-list <CoreABNF>)
784  (lambda (received-token date-time)
785    (abnf:concatenation
786     (abnf:repetition received-token)
787     (abnf:drop-consumed (char #\;))
788     date-time)))
789 
790
791(define (received header received-token-list) 
792  (header "Received"  received-token-list))
793
794
795;; Optional fields (section 3.6.8)
796
797;; Matches and returns any ASCII character except for control
798;; characters, whitespace, and :
799
800(define=> (ftext <CoreABNF>)
801  (set (char-set-difference char-set:graphic
802                            (char-set #\:))))
803
804
805;; Parses and returns an arbitrary header field name. That is one or
806;; more ftext characters.
807
808(define=> (field-name <CoreABNF>)
809  (lambda (ftext)
810    (bind-consumed->tsymbol (abnf:repetition1 ftext))))
811
812;; Parses an arbitrary header field and returns a tuple containing the
813;; field-name and unstructured text of the header. The name will not
814;; contain the terminating colon.
815
816(define=> (optional-field  <CoreABNF>)
817  (lambda (field-name unstructured)
818    (lambda (#!key (crlf crlf) (alist #f))
819      (abnf:bind (consumed-objects-lift-any)
820                 (abnf:concatenation
821                  (if alist
822                      abnf:pass
823                      (abnf:concatenation
824                       field-name
825                       (abnf:drop-consumed (char #\:))))
826                  unstructured
827                  (abnf:drop-consumed crlf))))))
828 
829;; This parser will parse an arbitrary number of header fields as
830;; defined in this RFC. For each field, an appropriate 'Field' value
831;; is created, all of them making up the 'Field' list that this parser
832;; returns.
833
834;; Fields that contain syntax errors fall back to the catch-all
835;; optional-field. Thus, this parser will hardly ever return a syntax
836;; error -- what conforms with the idea that any message that can
837;; possibly be accepted /should/ be.
838
839(define=> (fields <CoreABNF>)
840  (lambda (from sender return-path reply-to to cc bcc message-id in-reply-to 
841           references subject comments keywords orig-date resent-date
842           resent-from resent-sender resent-to resent-cc resent-bcc 
843           resent-msg-id resent-reply-to received optional-field)
844    (lambda (#!key (crlf crlf))
845      (abnf:longest
846       (abnf:repetition
847        (abnf:alternatives
848         (from           crlf: crlf)
849         (sender         crlf: crlf)
850         (return-path    crlf: crlf)
851         (reply-to       crlf: crlf)
852         (to             crlf: crlf)
853         (cc             crlf: crlf)
854         (bcc            crlf: crlf)
855         (message-id     crlf: crlf)
856         (in-reply-to    crlf: crlf)
857         (references     crlf: crlf)
858         (subject        crlf: crlf)
859         (comments       crlf: crlf)
860         (keywords       crlf: crlf)
861         (orig-date      crlf: crlf)
862         (resent-date    crlf: crlf)
863         (resent-from    crlf: crlf)
864         (resent-sender  crlf: crlf)
865         (resent-to      crlf: crlf)
866         (resent-cc      crlf: crlf)
867         (resent-bcc     crlf: crlf)
868         (resent-msg-id    crlf: crlf)
869         (resent-reply-to  crlf: crlf)
870         (received         crlf: crlf)
871         (optional-field   crlf: crlf))))
872      )))
873
874
875;; Parses a complete message as defined by the RFC and returns
876;; the separate header fields and the message body.
877
878(define=> (message <CoreABNF>)
879  (lambda (fields body)
880    (lambda (#!key (crlf crlf))
881      (abnf:bind-consumed-pairs->list 
882       'message
883       (abnf:concatenation 
884        (abnf:bind-consumed-pairs->list 'fields 
885         (fields crlf: crlf))
886        (abnf:optional-sequence
887         (abnf:concatenation 
888          (abnf:drop-consumed crlf)
889          (abnf:bind-consumed-strings->list 'body body)))
890        )))))
891
892;; Given an alist of headers and a body, parses all header values and
893;; the body, and returns a list of the form
894;;
895;; (PARSED-HEADERS PARSED-BODY)
896;;
897(define=> (parts <CoreABNF>)
898  (lambda (from sender return-path reply-to to cc bcc message-id in-reply-to 
899           references subject comments keywords orig-date resent-date
900           resent-from resent-sender resent-to resent-cc resent-bcc 
901           resent-msg-id resent-reply-to received optional-field)
902    (lambda (#!key (crlf crlf))
903      (let* (
904             (header-parsers
905              (map
906               (lambda (p) (p alist: #t crlf: crlf))
907               (list from
908                     sender
909                     return-path
910                     reply-to
911                     to
912                     cc
913                     bcc
914                     message-id
915                     in-reply-to
916                     references
917                     subject
918                     comments
919                     keywords
920                     orig-date
921                     resent-date
922                     resent-from
923                     resent-sender
924                     resent-to
925                     resent-cc
926                     resent-bcc
927                     resent-msg-id
928                     resent-reply-to
929                     received
930                     optional-field)))
931             (try-header
932              (lambda (kv)
933                (let loop ((fs header-parsers))
934                  (if (null? fs) kv
935                      (let ((kv1 (apply (car fs) kv)))
936                        (or kv1 (loop (cdr fs)))))))))
937        (lambda (unparsed-headers unparsed-body)
938          (let ((parsed-headers (map try-header unparsed-headers))
939                (parsed-body (body (unparsed-body))))
940            (list parsed-headers parsed-body)))))))
941
942
943(define (CoreABNF->InetMessage  A) 
944  (letrec  (
945             ;; parsers for various header components
946             (*header        (header A))
947             (*fws           (fws A))
948             (*text          (text A))
949             (*ctext         (ctext A))
950             (*ccontent      (vac ((ccontent A) *comment *ctext)))
951             (*comment       ((comment A) *ccontent *fws))
952             (*cfws          ((cfws A) *comment *fws))
953
954             (*ftext         (ftext A))
955             (*atext         (atext A))
956             (*atom          ((atom A) *atext *cfws))
957             (*dot-atom-text ((dot-atom-text A) *atext))
958             (*dot-atom      ((dot-atom A) *dot-atom-text *cfws))
959
960             (*qtext         (qtext A))
961             (*qcontent      ((qcontent A) *qtext))
962             (*quoted-string ((quoted-string A) *qcontent *fws *cfws))
963             (*word          ((word A) *atom *quoted-string))
964             (*phrase        ((phrase A) *word))
965             (*display-name  ((display-name A) *phrase))
966
967             (*local-part     ((local-part A) *dot-atom *quoted-string))
968             (*dtext          (dtext A))
969             (*domain-literal ((domain-literal A) *dtext *cfws))
970             (*domain         ((domain A) *dot-atom *domain-literal))
971
972             (*addr-spec     ((addr-spec A) *local-part *domain))
973             (*angle-addr    ((angle-addr A) *addr-spec *cfws))
974             (*name-addr     ((name-addr A) *display-name *angle-addr))
975
976             (*mailbox       ((mailbox  A) *name-addr *addr-spec))
977             (*mailbox-list  ((mailbox-list A) *mailbox))
978             (*group         (vac ((group A) *display-name *group-list *cfws)))
979             (*group-list    ((group-list A) *mailbox-list *cfws))
980             (*address       ((address A) *mailbox *group))
981             (*address-list  ((address-list A) *address))
982
983             (*path          ((path A) *angle-addr *cfws))
984
985             (*no-fold-literal  ((no-fold-literal A) *dtext))
986             (*id-left          (id-left *dot-atom-text))
987             (*id-right         (id-right *dot-atom-text *no-fold-literal))
988             (*msg-id           ((msg-id A) *id-left *id-right *cfws))
989
990             (*unstructured     ((unstructured A) *fws))
991
992             (*kwd-list         ((kwd-list A) *phrase))
993             
994             (*day-name         (day-name A))
995             (*day-of-week      ((day-of-week A) *day-name *fws))
996             (*year             ((year A) *fws))
997             (*month-name       (month-name A))
998             (*month            ((month A) *month-name *fws))
999             (*day              ((day A) *fws))
1000             (*date             ((date A) *day *month *year ))
1001             (*hour             (hour A))
1002             (*minute           (minute A))
1003             (*isecond          (isecond A))
1004             (*time-of-day      ((time-of-day A) *hour *minute *isecond))
1005             (*zone             ((zone A) *hour *minute *fws))
1006             (*itime            ((itime  A) *time-of-day *zone))
1007             (*date-time        ((date-time A) *day-of-week *date *itime *cfws))
1008
1009             (*received-token   ((received-token A) *domain *angle-addr *addr-spec *word))
1010             (*received-token-list ((received-token-list A) *received-token *date-time))
1011             
1012             (*field-name       ((field-name A) *ftext))
1013             (*optional-field   ((optional-field A) *field-name *unstructured)))
1014    (let* (
1015
1016
1017             ;; header parsers
1018           (from             (from *header *mailbox-list))
1019           (sender           (sender *header *mailbox))
1020           (return-path      (return-path *header *path))
1021           (reply-to         (reply-to *header *address-list))
1022           (to               (to *header *address-list))
1023           (cc               (cc *header *address-list))
1024           (bcc              (bcc *header *address-list *cfws))
1025           (message-id       (message-id *header *msg-id))
1026           (in-reply-to      (in-reply-to *header *msg-id))
1027           (references       (references *header *msg-id))
1028           (subject          (subject *header *unstructured))
1029           (comments         (comments *header *unstructured))
1030           (keywords         (keywords *header *kwd-list))
1031           (orig-date        (orig-date *header *date-time))
1032           (received         (received *header *received-token-list))
1033           (resent-date      (resent-date *header *date-time))
1034           (resent-from      (resent-from *header *mailbox-list))
1035           (resent-sender    (resent-sender *header *mailbox))
1036           (resent-to        (resent-to *header *address-list))
1037           (resent-cc        (resent-cc *header *address-list))
1038           (resent-bcc       (resent-bcc *header *address-list *cfws))
1039           (resent-msg-id    (resent-msg-id *header *msg-id))
1040           (resent-reply-to  (resent-reply-to *header *address-list))
1041           
1042           ;; parsers for various components of the message
1043           
1044           (fields          ((fields A)
1045                             from sender return-path reply-to to cc bcc 
1046                             message-id in-reply-to  references subject 
1047                             comments keywords orig-date resent-date
1048                             resent-from resent-sender resent-to 
1049                             resent-cc resent-bcc resent-msg-id 
1050                             resent-reply-to received *optional-field))
1051           
1052           (body            ((body A) *text))
1053           
1054           (message         ((message A) fields body))
1055           
1056           (parts           ((parts A)
1057                             from sender return-path reply-to to cc bcc 
1058                             message-id in-reply-to references subject 
1059                             comments keywords orig-date resent-date
1060                             resent-from resent-sender resent-to 
1061                             resent-cc resent-bcc resent-msg-id 
1062                             resent-reply-to received *optional-field))
1063           )
1064      (make-<InetMessage> A *comment fields body message parts *addr-spec *text *ftext *msg-id)
1065      )))
1066 
1067
1068 
1069)
Note: See TracBrowser for help on using the repository browser.