780 lines
13 KiB
Plaintext
780 lines
13 KiB
Plaintext
%{
|
|
#include "common.h"
|
|
#include "smtp.h"
|
|
#include <ctype.h>
|
|
|
|
#define YYMAXDEPTH 500 /* was default 150 */
|
|
|
|
char *yylp; /* next character to be lex'd */
|
|
int yydone; /* tell yylex to give up */
|
|
char *yybuffer; /* first parsed character */
|
|
char *yyend; /* end of buffer to be parsed */
|
|
Node *root;
|
|
Field *firstfield;
|
|
Field *lastfield;
|
|
Node *usender;
|
|
Node *usys;
|
|
Node *udate;
|
|
char *startfield, *endfield;
|
|
int originator;
|
|
int destination;
|
|
int date;
|
|
int received;
|
|
int messageid;
|
|
%}
|
|
|
|
%term WORD
|
|
%term DATE
|
|
%term RESENT_DATE
|
|
%term RETURN_PATH
|
|
%term FROM
|
|
%term SENDER
|
|
%term REPLY_TO
|
|
%term RESENT_FROM
|
|
%term RESENT_SENDER
|
|
%term RESENT_REPLY_TO
|
|
%term SUBJECT
|
|
%term TO
|
|
%term CC
|
|
%term BCC
|
|
%term RESENT_TO
|
|
%term RESENT_CC
|
|
%term RESENT_BCC
|
|
%term REMOTE
|
|
%term PRECEDENCE
|
|
%term MIMEVERSION
|
|
%term CONTENTTYPE
|
|
%term MESSAGEID
|
|
%term RECEIVED
|
|
%term MAILER
|
|
%term BADTOKEN
|
|
%start msg
|
|
%%
|
|
|
|
msg : fields
|
|
| unixfrom '\n' fields
|
|
;
|
|
fields : '\n'
|
|
{ yydone = 1; }
|
|
| field '\n'
|
|
| field '\n' fields
|
|
;
|
|
field : dates
|
|
{ date = 1; }
|
|
| originator
|
|
{ originator = 1; }
|
|
| destination
|
|
{ destination = 1; }
|
|
| subject
|
|
| optional
|
|
| ignored
|
|
| received
|
|
| precedence
|
|
| error '\n' field
|
|
;
|
|
unixfrom : FROM route_addr unix_date_time REMOTE FROM word
|
|
{ freenode($1); freenode($4); freenode($5);
|
|
usender = $2; udate = $3; usys = $6;
|
|
}
|
|
;
|
|
originator : REPLY_TO ':' address_list
|
|
{ newfield(link3($1, $2, $3), 1); }
|
|
| RETURN_PATH ':' route_addr
|
|
{ newfield(link3($1, $2, $3), 1); }
|
|
| FROM ':' mailbox_list
|
|
{ newfield(link3($1, $2, $3), 1); }
|
|
| SENDER ':' mailbox
|
|
{ newfield(link3($1, $2, $3), 1); }
|
|
| RESENT_REPLY_TO ':' address_list
|
|
{ newfield(link3($1, $2, $3), 1); }
|
|
| RESENT_SENDER ':' mailbox
|
|
{ newfield(link3($1, $2, $3), 1); }
|
|
| RESENT_FROM ':' mailbox
|
|
{ newfield(link3($1, $2, $3), 1); }
|
|
;
|
|
dates : DATE ':' date_time
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
| RESENT_DATE ':' date_time
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
;
|
|
destination : TO ':'
|
|
{ newfield(link2($1, $2), 0); }
|
|
| TO ':' address_list
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
| RESENT_TO ':'
|
|
{ newfield(link2($1, $2), 0); }
|
|
| RESENT_TO ':' address_list
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
| CC ':'
|
|
{ newfield(link2($1, $2), 0); }
|
|
| CC ':' address_list
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
| RESENT_CC ':'
|
|
{ newfield(link2($1, $2), 0); }
|
|
| RESENT_CC ':' address_list
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
| BCC ':'
|
|
{ newfield(link2($1, $2), 0); }
|
|
| BCC ':' address_list
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
| RESENT_BCC ':'
|
|
{ newfield(link2($1, $2), 0); }
|
|
| RESENT_BCC ':' address_list
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
;
|
|
subject : SUBJECT ':' things
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
| SUBJECT ':'
|
|
{ newfield(link2($1, $2), 0); }
|
|
;
|
|
received : RECEIVED ':' things
|
|
{ newfield(link3($1, $2, $3), 0); received++; }
|
|
| RECEIVED ':'
|
|
{ newfield(link2($1, $2), 0); received++; }
|
|
;
|
|
precedence : PRECEDENCE ':' things
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
| PRECEDENCE ':'
|
|
{ newfield(link2($1, $2), 0); }
|
|
;
|
|
ignored : ignoredhdr ':' things
|
|
{ newfield(link3($1, $2, $3), 0); }
|
|
| ignoredhdr ':'
|
|
{ newfield(link2($1, $2), 0); }
|
|
;
|
|
ignoredhdr : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER
|
|
;
|
|
optional : fieldwords ':' things
|
|
{ /* hack to allow same lex for field names and the rest */
|
|
if(badfieldname($1)){
|
|
freenode($1);
|
|
freenode($2);
|
|
freenode($3);
|
|
return 1;
|
|
}
|
|
newfield(link3($1, $2, $3), 0);
|
|
}
|
|
| fieldwords ':'
|
|
{ /* hack to allow same lex for field names and the rest */
|
|
if(badfieldname($1)){
|
|
freenode($1);
|
|
freenode($2);
|
|
return 1;
|
|
}
|
|
newfield(link2($1, $2), 0);
|
|
}
|
|
;
|
|
address_list : address
|
|
| address_list ',' address
|
|
{ $$ = link3($1, $2, $3); }
|
|
;
|
|
address : mailbox
|
|
| group
|
|
;
|
|
group : phrase ':' address_list ';'
|
|
{ $$ = link2($1, link3($2, $3, $4)); }
|
|
| phrase ':' ';'
|
|
{ $$ = link3($1, $2, $3); }
|
|
;
|
|
mailbox_list : mailbox
|
|
| mailbox_list ',' mailbox
|
|
{ $$ = link3($1, $2, $3); }
|
|
;
|
|
mailbox : route_addr
|
|
| phrase brak_addr
|
|
{ $$ = link2($1, $2); }
|
|
| brak_addr
|
|
;
|
|
brak_addr : '<' route_addr '>'
|
|
{ $$ = link3($1, $2, $3); }
|
|
| '<' '>'
|
|
{ $$ = nobody($2); freenode($1); }
|
|
;
|
|
route_addr : route ':' at_addr
|
|
{ $$ = address(concat($1, concat($2, $3))); }
|
|
| addr_spec
|
|
;
|
|
route : '@' domain
|
|
{ $$ = concat($1, $2); }
|
|
| route ',' '@' domain
|
|
{ $$ = concat($1, concat($2, concat($3, $4))); }
|
|
;
|
|
addr_spec : local_part
|
|
{ $$ = address($1); }
|
|
| at_addr
|
|
;
|
|
at_addr : local_part '@' domain
|
|
{ $$ = address(concat($1, concat($2, $3)));}
|
|
| at_addr '@' domain
|
|
{ $$ = address(concat($1, concat($2, $3)));}
|
|
;
|
|
local_part : word
|
|
;
|
|
domain : word
|
|
;
|
|
phrase : word
|
|
| phrase word
|
|
{ $$ = link2($1, $2); }
|
|
;
|
|
things : thing
|
|
| things thing
|
|
{ $$ = link2($1, $2); }
|
|
;
|
|
thing : word | '<' | '>' | '@' | ':' | ';' | ','
|
|
;
|
|
date_time : things
|
|
;
|
|
unix_date_time : word word word unix_time word word
|
|
{ $$ = link3($1, $3, link3($2, $6, link2($4, $5))); }
|
|
;
|
|
unix_time : word
|
|
| unix_time ':' word
|
|
{ $$ = link3($1, $2, $3); }
|
|
;
|
|
word : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER
|
|
| REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO
|
|
| TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT
|
|
| PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER
|
|
;
|
|
fieldwords : fieldword
|
|
| WORD
|
|
| fieldwords fieldword
|
|
{ $$ = link2($1, $2); }
|
|
| fieldwords word
|
|
{ $$ = link2($1, $2); }
|
|
;
|
|
fieldword : '<' | '>' | '@' | ';' | ','
|
|
;
|
|
%%
|
|
|
|
/*
|
|
* Initialize the parsing. Done once for each header field.
|
|
*/
|
|
void
|
|
yyinit(char *p, int len)
|
|
{
|
|
yybuffer = p;
|
|
yylp = p;
|
|
yyend = p + len;
|
|
firstfield = lastfield = 0;
|
|
received = 0;
|
|
}
|
|
|
|
/*
|
|
* keywords identifying header fields we care about
|
|
*/
|
|
typedef struct Keyword Keyword;
|
|
struct Keyword {
|
|
char *rep;
|
|
int val;
|
|
};
|
|
|
|
/* field names that we need to recognize */
|
|
Keyword key[] = {
|
|
{ "date", DATE },
|
|
{ "resent-date", RESENT_DATE },
|
|
{ "return_path", RETURN_PATH },
|
|
{ "from", FROM },
|
|
{ "sender", SENDER },
|
|
{ "reply-to", REPLY_TO },
|
|
{ "resent-from", RESENT_FROM },
|
|
{ "resent-sender", RESENT_SENDER },
|
|
{ "resent-reply-to", RESENT_REPLY_TO },
|
|
{ "to", TO },
|
|
{ "cc", CC },
|
|
{ "bcc", BCC },
|
|
{ "resent-to", RESENT_TO },
|
|
{ "resent-cc", RESENT_CC },
|
|
{ "resent-bcc", RESENT_BCC },
|
|
{ "remote", REMOTE },
|
|
{ "subject", SUBJECT },
|
|
{ "precedence", PRECEDENCE },
|
|
{ "mime-version", MIMEVERSION },
|
|
{ "content-type", CONTENTTYPE },
|
|
{ "message-id", MESSAGEID },
|
|
{ "received", RECEIVED },
|
|
{ "mailer", MAILER },
|
|
{ "who-the-hell-cares", WORD }
|
|
};
|
|
|
|
/*
|
|
* Lexical analysis for an rfc822 header field. Continuation lines
|
|
* are handled in yywhite() when skipping over white space.
|
|
*
|
|
*/
|
|
yylex(void)
|
|
{
|
|
String *t;
|
|
int quoting;
|
|
int escaping;
|
|
char *start;
|
|
Keyword *kp;
|
|
int c, d;
|
|
|
|
/* print("lexing\n"); /**/
|
|
if(yylp >= yyend)
|
|
return 0;
|
|
if(yydone)
|
|
return 0;
|
|
|
|
quoting = escaping = 0;
|
|
start = yylp;
|
|
yylval = malloc(sizeof(Node));
|
|
yylval->white = yylval->s = 0;
|
|
yylval->next = 0;
|
|
yylval->addr = 0;
|
|
yylval->start = yylp;
|
|
for(t = 0; yylp < yyend; yylp++){
|
|
c = *yylp & 0xff;
|
|
|
|
/* dump nulls, they can't be in header */
|
|
if(c == 0)
|
|
continue;
|
|
|
|
if(escaping) {
|
|
escaping = 0;
|
|
} else if(quoting) {
|
|
switch(c){
|
|
case '\\':
|
|
escaping = 1;
|
|
break;
|
|
case '\n':
|
|
d = (*(yylp+1))&0xff;
|
|
if(d != ' ' && d != '\t'){
|
|
quoting = 0;
|
|
yylp--;
|
|
continue;
|
|
}
|
|
break;
|
|
case '"':
|
|
quoting = 0;
|
|
break;
|
|
}
|
|
} else {
|
|
switch(c){
|
|
case '\\':
|
|
escaping = 1;
|
|
break;
|
|
case '(':
|
|
case ' ':
|
|
case '\t':
|
|
case '\r':
|
|
goto out;
|
|
case '\n':
|
|
if(yylp == start){
|
|
yylp++;
|
|
/* print("lex(c %c)\n", c); /**/
|
|
yylval->end = yylp;
|
|
return yylval->c = c;
|
|
}
|
|
goto out;
|
|
case '@':
|
|
case '>':
|
|
case '<':
|
|
case ':':
|
|
case ',':
|
|
case ';':
|
|
if(yylp == start){
|
|
yylp++;
|
|
yylval->white = yywhite();
|
|
/* print("lex(c %c)\n", c); /**/
|
|
yylval->end = yylp;
|
|
return yylval->c = c;
|
|
}
|
|
goto out;
|
|
case '"':
|
|
quoting = 1;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
if(t == 0)
|
|
t = s_new();
|
|
s_putc(t, c);
|
|
}
|
|
out:
|
|
yylval->white = yywhite();
|
|
if(t) {
|
|
s_terminate(t);
|
|
} else /* message begins with white-space! */
|
|
return yylval->c = '\n';
|
|
yylval->s = t;
|
|
for(kp = key; kp->val != WORD; kp++)
|
|
if(cistrcmp(s_to_c(t), kp->rep)==0)
|
|
break;
|
|
/* print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/
|
|
yylval->end = yylp;
|
|
return yylval->c = kp->val;
|
|
}
|
|
|
|
void
|
|
yyerror(char *x)
|
|
{
|
|
USED(x);
|
|
|
|
/*fprint(2, "parse err: %s\n", x);/**/
|
|
}
|
|
|
|
/*
|
|
* parse white space and comments
|
|
*/
|
|
String *
|
|
yywhite(void)
|
|
{
|
|
String *w;
|
|
int clevel;
|
|
int c;
|
|
int escaping;
|
|
|
|
escaping = clevel = 0;
|
|
for(w = 0; yylp < yyend; yylp++){
|
|
c = *yylp & 0xff;
|
|
|
|
/* dump nulls, they can't be in header */
|
|
if(c == 0)
|
|
continue;
|
|
|
|
if(escaping){
|
|
escaping = 0;
|
|
} else if(clevel) {
|
|
switch(c){
|
|
case '\n':
|
|
/*
|
|
* look for multiline fields
|
|
*/
|
|
if(*(yylp+1)==' ' || *(yylp+1)=='\t')
|
|
break;
|
|
else
|
|
goto out;
|
|
case '\\':
|
|
escaping = 1;
|
|
break;
|
|
case '(':
|
|
clevel++;
|
|
break;
|
|
case ')':
|
|
clevel--;
|
|
break;
|
|
}
|
|
} else {
|
|
switch(c){
|
|
case '\\':
|
|
escaping = 1;
|
|
break;
|
|
case '(':
|
|
clevel++;
|
|
break;
|
|
case ' ':
|
|
case '\t':
|
|
case '\r':
|
|
break;
|
|
case '\n':
|
|
/*
|
|
* look for multiline fields
|
|
*/
|
|
if(*(yylp+1)==' ' || *(yylp+1)=='\t')
|
|
break;
|
|
else
|
|
goto out;
|
|
default:
|
|
goto out;
|
|
}
|
|
}
|
|
if(w == 0)
|
|
w = s_new();
|
|
s_putc(w, c);
|
|
}
|
|
out:
|
|
if(w)
|
|
s_terminate(w);
|
|
return w;
|
|
}
|
|
|
|
/*
|
|
* link two parsed entries together
|
|
*/
|
|
Node*
|
|
link2(Node *p1, Node *p2)
|
|
{
|
|
Node *p;
|
|
|
|
for(p = p1; p->next; p = p->next)
|
|
;
|
|
p->next = p2;
|
|
return p1;
|
|
}
|
|
|
|
/*
|
|
* link three parsed entries together
|
|
*/
|
|
Node*
|
|
link3(Node *p1, Node *p2, Node *p3)
|
|
{
|
|
Node *p;
|
|
|
|
for(p = p2; p->next; p = p->next)
|
|
;
|
|
p->next = p3;
|
|
|
|
for(p = p1; p->next; p = p->next)
|
|
;
|
|
p->next = p2;
|
|
|
|
return p1;
|
|
}
|
|
|
|
/*
|
|
* make a:b, move all white space after both
|
|
*/
|
|
Node*
|
|
colon(Node *p1, Node *p2)
|
|
{
|
|
if(p1->white){
|
|
if(p2->white)
|
|
s_append(p1->white, s_to_c(p2->white));
|
|
} else {
|
|
p1->white = p2->white;
|
|
p2->white = 0;
|
|
}
|
|
|
|
s_append(p1->s, ":");
|
|
if(p2->s)
|
|
s_append(p1->s, s_to_c(p2->s));
|
|
|
|
if(p1->end < p2->end)
|
|
p1->end = p2->end;
|
|
freenode(p2);
|
|
return p1;
|
|
}
|
|
|
|
/*
|
|
* concatenate two fields, move all white space after both
|
|
*/
|
|
Node*
|
|
concat(Node *p1, Node *p2)
|
|
{
|
|
char buf[2];
|
|
|
|
if(p1->white){
|
|
if(p2->white)
|
|
s_append(p1->white, s_to_c(p2->white));
|
|
} else {
|
|
p1->white = p2->white;
|
|
p2->white = 0;
|
|
}
|
|
|
|
if(p1->s == nil){
|
|
buf[0] = p1->c;
|
|
buf[1] = 0;
|
|
p1->s = s_new();
|
|
s_append(p1->s, buf);
|
|
}
|
|
|
|
if(p2->s)
|
|
s_append(p1->s, s_to_c(p2->s));
|
|
else {
|
|
buf[0] = p2->c;
|
|
buf[1] = 0;
|
|
s_append(p1->s, buf);
|
|
}
|
|
|
|
if(p1->end < p2->end)
|
|
p1->end = p2->end;
|
|
freenode(p2);
|
|
return p1;
|
|
}
|
|
|
|
/*
|
|
* look for disallowed chars in the field name
|
|
*/
|
|
int
|
|
badfieldname(Node *p)
|
|
{
|
|
for(; p; p = p->next){
|
|
/* field name can't contain white space */
|
|
if(p->white && p->next)
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* mark as an address
|
|
*/
|
|
Node *
|
|
address(Node *p)
|
|
{
|
|
p->addr = 1;
|
|
return p;
|
|
}
|
|
|
|
/*
|
|
* case independent string compare
|
|
*/
|
|
int
|
|
cistrcmp(char *s1, char *s2)
|
|
{
|
|
int c1, c2;
|
|
|
|
for(; *s1; s1++, s2++){
|
|
c1 = isupper(*s1) ? tolower(*s1) : *s1;
|
|
c2 = isupper(*s2) ? tolower(*s2) : *s2;
|
|
if (c1 != c2)
|
|
return -1;
|
|
}
|
|
return *s2;
|
|
}
|
|
|
|
/*
|
|
* free a node
|
|
*/
|
|
void
|
|
freenode(Node *p)
|
|
{
|
|
Node *tp;
|
|
|
|
while(p){
|
|
tp = p->next;
|
|
if(p->s)
|
|
s_free(p->s);
|
|
if(p->white)
|
|
s_free(p->white);
|
|
free(p);
|
|
p = tp;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* an anonymous user
|
|
*/
|
|
Node*
|
|
nobody(Node *p)
|
|
{
|
|
if(p->s)
|
|
s_free(p->s);
|
|
p->s = s_copy("pOsTmAsTeR");
|
|
p->addr = 1;
|
|
return p;
|
|
}
|
|
|
|
/*
|
|
* add anything that was dropped because of a parse error
|
|
*/
|
|
void
|
|
missing(Node *p)
|
|
{
|
|
Node *np;
|
|
char *start, *end;
|
|
Field *f;
|
|
String *s;
|
|
|
|
start = yybuffer;
|
|
if(lastfield != nil){
|
|
for(np = lastfield->node; np; np = np->next)
|
|
start = np->end+1;
|
|
}
|
|
|
|
end = p->start-1;
|
|
|
|
if(end <= start)
|
|
return;
|
|
|
|
if(strncmp(start, "From ", 5) == 0)
|
|
return;
|
|
|
|
np = malloc(sizeof(Node));
|
|
np->start = start;
|
|
np->end = end;
|
|
np->white = nil;
|
|
s = s_copy("BadHeader: ");
|
|
np->s = s_nappend(s, start, end-start);
|
|
np->next = nil;
|
|
|
|
f = malloc(sizeof(Field));
|
|
f->next = 0;
|
|
f->node = np;
|
|
f->source = 0;
|
|
if(firstfield)
|
|
lastfield->next = f;
|
|
else
|
|
firstfield = f;
|
|
lastfield = f;
|
|
}
|
|
|
|
/*
|
|
* create a new field
|
|
*/
|
|
void
|
|
newfield(Node *p, int source)
|
|
{
|
|
Field *f;
|
|
|
|
missing(p);
|
|
|
|
f = malloc(sizeof(Field));
|
|
f->next = 0;
|
|
f->node = p;
|
|
f->source = source;
|
|
if(firstfield)
|
|
lastfield->next = f;
|
|
else
|
|
firstfield = f;
|
|
lastfield = f;
|
|
endfield = startfield;
|
|
startfield = yylp;
|
|
}
|
|
|
|
/*
|
|
* fee a list of fields
|
|
*/
|
|
void
|
|
freefield(Field *f)
|
|
{
|
|
Field *tf;
|
|
|
|
while(f){
|
|
tf = f->next;
|
|
freenode(f->node);
|
|
free(f);
|
|
f = tf;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* add some white space to a node
|
|
*/
|
|
Node*
|
|
whiten(Node *p)
|
|
{
|
|
Node *tp;
|
|
|
|
for(tp = p; tp->next; tp = tp->next)
|
|
;
|
|
if(tp->white == 0)
|
|
tp->white = s_copy(" ");
|
|
return p;
|
|
}
|
|
|
|
void
|
|
yycleanup(void)
|
|
{
|
|
Field *f, *fnext;
|
|
Node *np, *next;
|
|
|
|
for(f = firstfield; f; f = fnext){
|
|
for(np = f->node; np; np = next){
|
|
if(np->s)
|
|
s_free(np->s);
|
|
if(np->white)
|
|
s_free(np->white);
|
|
next = np->next;
|
|
free(np);
|
|
}
|
|
fnext = f->next;
|
|
free(f);
|
|
}
|
|
firstfield = lastfield = 0;
|
|
}
|