From 48eb530b1f37dbc8d6889f8e8ebf9f76769a2f89 Mon Sep 17 00:00:00 2001 From: Mechiel Lukkien Date: Fri, 11 Aug 2023 14:07:49 +0200 Subject: [PATCH] improve message parsing: allow bare carriage return (unless in pedantic mode), allow empty header, and no longer treat a message with only headers as a message with only a body --- ctl.go | 11 ++-- imapserver/server.go | 10 +--- imapserver/status_test.go | 4 +- message/part.go | 104 ++++++++++++++++++++++++++------------ message/part_test.go | 30 +++++++++-- message/writer.go | 23 ++++++--- message/writer_test.go | 13 ++--- queue/dsn.go | 2 +- smtpserver/server.go | 11 +--- store/account_test.go | 2 +- 10 files changed, 130 insertions(+), 80 deletions(-) diff --git a/ctl.go b/ctl.go index 1eacaaeefe..9709e400f1 100644 --- a/ctl.go +++ b/ctl.go @@ -327,21 +327,16 @@ func servectlcmd(ctx context.Context, ctl *ctl, shutdown func()) { log.Check(err, "closing temporary message file") } }() - mw := &message.Writer{Writer: msgFile} + mw := message.NewWriter(msgFile) ctl.xwriteok() ctl.xstreamto(mw) err = msgFile.Sync() ctl.xcheck(err, "syncing message to storage") - msgPrefix := []byte{} - if !mw.HaveHeaders { - msgPrefix = []byte("\r\n\r\n") - } m := &store.Message{ - Received: time.Now(), - Size: int64(len(msgPrefix)) + mw.Size, - MsgPrefix: msgPrefix, + Received: time.Now(), + Size: mw.Size, } a.WithWLock(func() { diff --git a/imapserver/server.go b/imapserver/server.go index 75dd4043e8..b15b634919 100644 --- a/imapserver/server.go +++ b/imapserver/server.go @@ -2683,7 +2683,7 @@ func (c *conn) cmdAppend(tag, cmd string, p *parser) { } }() defer c.xtrace(mlog.LevelTracedata)() - mw := &message.Writer{Writer: msgFile} + mw := message.NewWriter(msgFile) msize, err := io.Copy(mw, io.LimitReader(c.br, size)) c.xtrace(mlog.LevelTrace) // Restore. if err != nil { @@ -2693,11 +2693,6 @@ func (c *conn) cmdAppend(tag, cmd string, p *parser) { if msize != size { xserverErrorf("read %d bytes for message, expected %d (%w)", msize, size, errIO) } - msgPrefix := []byte{} - // todo: should we treat the message as body? i believe headers are required in messages, and bodies are optional. so would make more sense to treat the data as headers. perhaps only if the headers are valid? - if !mw.HaveHeaders { - msgPrefix = []byte("\r\n") - } if utf8 { line := c.readline(false) @@ -2736,8 +2731,7 @@ func (c *conn) cmdAppend(tag, cmd string, p *parser) { Received: tm, Flags: storeFlags, Keywords: keywords, - Size: size + int64(len(msgPrefix)), - MsgPrefix: msgPrefix, + Size: size, } mb.Add(m.MailboxCounts()) diff --git a/imapserver/status_test.go b/imapserver/status_test.go index f1abdcce76..1ba4b491ba 100644 --- a/imapserver/status_test.go +++ b/imapserver/status_test.go @@ -25,10 +25,10 @@ func TestStatus(t *testing.T) { // Again, now with a message in the mailbox. tc.transactf("ok", "append inbox {4+}\r\ntest") tc.transactf("ok", "status inbox (messages uidnext uidvalidity unseen deleted size recent appendlimit)") - tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 0, "SIZE": 6, "RECENT": 0, "APPENDLIMIT": 0}}) + tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 0, "SIZE": 4, "RECENT": 0, "APPENDLIMIT": 0}}) tc.client.Select("inbox") tc.client.StoreFlagsSet("1", true, `\Deleted`) tc.transactf("ok", "status inbox (messages uidnext uidvalidity unseen deleted size recent appendlimit)") - tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 1, "SIZE": 6, "RECENT": 0, "APPENDLIMIT": 0}}) + tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 1, "SIZE": 4, "RECENT": 0, "APPENDLIMIT": 0}}) } diff --git a/message/part.go b/message/part.go index f9786a23b1..ce0bb13c23 100644 --- a/message/part.go +++ b/message/part.go @@ -4,7 +4,6 @@ package message // todo: allow more invalid content-type values, we now stop parsing on: empty media type (eg "content-type: ; name=..."), empty value for property (eg "charset=", missing quotes for characters that should be quoted (eg boundary containing "=" but without quotes), duplicate properties (two charsets), empty pairs (eg "text/html;;"). // todo: what should our max line length be? rfc says 1000. messages exceed that. we should enforce 1000 for outgoing messages. // todo: should we be forgiving when closing boundary in multipart message is missing? seems like spam messages do this... -// todo: allow bare \r (without \n)? this does happen in messages. // todo: should we allow base64 messages where a line starts with a space? and possibly more whitespace. is happening in messages. coreutils base64 accepts it, encoding/base64 does not. // todo: handle comments in headers? // todo: should we just always store messages with \n instead of \r\n? \r\n seems easier for use with imap. @@ -45,7 +44,8 @@ var ( errLineTooLong = errors.New("line too long") errMissingBoundaryParam = errors.New("missing/empty boundary content-type parameter") errMissingClosingBoundary = errors.New("eof without closing boundary") - errHalfLineSep = errors.New("invalid CR or LF without the other") + errBareLF = errors.New("invalid bare line feed") + errBareCR = errors.New("invalid bare carriage return") errUnexpectedEOF = errors.New("unexpected eof") ) @@ -269,8 +269,12 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) { hb := &bytes.Buffer{} for { line, _, err := b.ReadLine(true) + if err == io.EOF { + // No body is valid. + break + } if err != nil { - return p, err + return p, fmt.Errorf("reading header line: %w", err) } hb.Write(line) if len(line) == 2 { @@ -279,13 +283,18 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) { } p.BodyOffset = b.offset - h, err := parseHeader(hb) - if err != nil { - return p, fmt.Errorf("parsing header: %w", err) + // Don't attempt to parse empty header, mail.ReadMessage doesn't like it. + if p.HeaderOffset == p.BodyOffset { + p.header = textproto.MIMEHeader{} + } else { + h, err := parseHeader(hb) + if err != nil { + return p, fmt.Errorf("parsing header: %w", err) + } + p.header = h } - p.header = h - ct := h.Get("Content-Type") + ct := p.header.Get("Content-Type") mt, params, err := mime.ParseMediaType(ct) if err != nil && ct != "" { return p, fmt.Errorf("%w: %s: %q", ErrBadContentType, err, ct) @@ -300,12 +309,12 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) { p.ContentTypeParams = params } - p.ContentID = h.Get("Content-Id") - p.ContentDescription = h.Get("Content-Description") - p.ContentTransferEncoding = strings.ToUpper(h.Get("Content-Transfer-Encoding")) + p.ContentID = p.header.Get("Content-Id") + p.ContentDescription = p.header.Get("Content-Description") + p.ContentTransferEncoding = strings.ToUpper(p.header.Get("Content-Transfer-Encoding")) if parent == nil { - p.Envelope, err = parseEnvelope(mail.Header(h)) + p.Envelope, err = parseEnvelope(mail.Header(p.header)) if err != nil { return p, err } @@ -347,6 +356,10 @@ func (p *Part) Header() (textproto.MIMEHeader, error) { if p.header != nil { return p.header, nil } + if p.HeaderOffset == p.BodyOffset { + p.header = textproto.MIMEHeader{} + return p.header, nil + } h, err := parseHeader(p.HeaderReader()) p.header = h return h, err @@ -357,6 +370,7 @@ func (p *Part) HeaderReader() io.Reader { return io.NewSectionReader(p.r, p.HeaderOffset, p.BodyOffset-p.HeaderOffset) } +// parse a header, only call this on non-empty input (even though that is a valid header). func parseHeader(r io.Reader) (textproto.MIMEHeader, error) { // We read using mail.ReadMessage instead of textproto.ReadMIMEHeaders because the // first handles email messages properly, while the second only works for HTTP @@ -444,7 +458,7 @@ func parseAddressList(h mail.Header, k string) []Address { // ParseNextPart parses the next (sub)part of this multipart message. // ParseNextPart returns io.EOF and a nil part when there are no more parts. -// Only use for initial parsing of message. Once parsed, use p.Parts. +// Only used for initial parsing of message. Once parsed, use p.Parts. func (p *Part) ParseNextPart() (*Part, error) { if len(p.bound) == 0 { return nil, errNotMultipart @@ -602,14 +616,15 @@ func (p *Part) RawReader() io.Reader { } p.RawLineCount = 0 if p.parent == nil { - return &offsetReader{p, p.BodyOffset, true} + return &offsetReader{p, p.BodyOffset, true, false} } - return &boundReader{p: p, b: &bufAt{r: p.r, offset: p.BodyOffset}, lastnewline: true} + return &boundReader{p: p, b: &bufAt{r: p.r, offset: p.BodyOffset}, prevlf: true} } // bufAt is a buffered reader on an underlying ReaderAt. +// bufAt verifies that lines end with crlf. type bufAt struct { - offset int64 // Offset in r currently consumed, i.e. ignoring any buffered data. + offset int64 // Offset in r currently consumed, i.e. not including any buffered data. r io.ReaderAt buf []byte // Buffered data. @@ -649,7 +664,7 @@ func (b *bufAt) ensure() error { } // ReadLine reads a line until \r\n is found, returning the line including \r\n. -// If not found, or a single \r or \n is encountered, ReadLine returns an error, e.g. io.EOF. +// If not found, or a bare \n is encountered, or a bare \r is enountered in pedantic mode, ReadLine returns an error. func (b *bufAt) ReadLine(requirecrlf bool) (buf []byte, crlf bool, err error) { return b.line(true, requirecrlf) } @@ -664,14 +679,18 @@ func (b *bufAt) line(consume, requirecrlf bool) (buf []byte, crlf bool, err erro } for i, c := range b.buf[:b.nbuf] { if c == '\n' { - return nil, false, errHalfLineSep + // Should have seen a \r, which should have been handled below. + return nil, false, errBareLF } if c != '\r' { continue } i++ if i >= b.nbuf || b.buf[i] != '\n' { - return nil, false, errHalfLineSep + if moxvar.Pedantic { + return nil, false, errBareCR + } + continue } b.scratch = b.scratch[:i+1] copy(b.scratch, b.buf[:i+1]) @@ -708,10 +727,13 @@ func (b *bufAt) PeekByte() (byte, error) { return b.buf[0], nil } +// offsetReader reads from p.r starting from offset, and RawLineCount on p. +// offsetReader validates lines end with \r\n. type offsetReader struct { - p *Part - offset int64 - lastnewline bool + p *Part + offset int64 + prevlf bool + prevcr bool } func (r *offsetReader) Read(buf []byte) (int, error) { @@ -720,10 +742,18 @@ func (r *offsetReader) Read(buf []byte) (int, error) { r.offset += int64(n) for _, c := range buf[:n] { - if r.lastnewline { + if r.prevlf { r.p.RawLineCount++ } - r.lastnewline = c == '\n' + if err == nil || err == io.EOF { + if c == '\n' && !r.prevcr { + err = errBareLF + } else if c != '\n' && r.prevcr && moxvar.Pedantic { + err = errBareCR + } + } + r.prevlf = c == '\n' + r.prevcr = c == '\r' } } if err == io.EOF { @@ -735,13 +765,15 @@ func (r *offsetReader) Read(buf []byte) (int, error) { var crlf = []byte("\r\n") // boundReader is a reader that stops at a closing multipart boundary. +// boundReader ensures lines end with crlf through its use of bufAt. type boundReader struct { - p *Part - b *bufAt - buf []byte // Data from previous line, to be served first. - nbuf int // Number of valid bytes in buf. - crlf []byte // Possible crlf, to be returned if we do not yet encounter a boundary. - lastnewline bool // If last char return was a newline. For counting lines. + p *Part + b *bufAt + buf []byte // Data from previous line, to be served first. + nbuf int // Number of valid bytes in buf. + crlf []byte // Possible crlf, to be returned if we do not yet encounter a boundary. + prevlf bool // If last char returned was a newline. For counting lines. + prevcr bool } func (b *boundReader) Read(buf []byte) (count int, rerr error) { @@ -749,10 +781,18 @@ func (b *boundReader) Read(buf []byte) (count int, rerr error) { defer func() { if count > 0 { for _, c := range origBuf[:count] { - if b.lastnewline { + if b.prevlf { b.p.RawLineCount++ } - b.lastnewline = c == '\n' + if rerr == nil || rerr == io.EOF { + if c == '\n' && !b.prevcr { + rerr = errBareLF + } else if c != '\n' && b.prevcr && moxvar.Pedantic { + rerr = errBareCR + } + } + b.prevlf = c == '\n' + b.prevcr = c == '\r' } } }() diff --git a/message/part_test.go b/message/part_test.go index c3fc6216ec..9e92460411 100644 --- a/message/part_test.go +++ b/message/part_test.go @@ -10,6 +10,8 @@ import ( "reflect" "strings" "testing" + + "github.com/mjl-/mox/moxvar" ) func tcheck(t *testing.T, err error, msg string) { @@ -204,11 +206,29 @@ func TestLongLine(t *testing.T) { } func TestHalfCrLf(t *testing.T) { - _, err := Parse(strings.NewReader("test\rtest")) - tfail(t, err, errHalfLineSep) - - _, err = Parse(strings.NewReader("test\ntest")) - tfail(t, err, errHalfLineSep) + parse := func(s string) error { + p, err := Parse(strings.NewReader(s)) + if err != nil { + return err + } + return p.Walk(nil) + } + err := parse("subject: test\ntest\r\n") + tfail(t, err, errBareLF) + err = parse("\r\ntest\ntest\r\n") + tfail(t, err, errBareLF) + + moxvar.Pedantic = true + err = parse("subject: test\rtest\r\n") + tfail(t, err, errBareCR) + err = parse("\r\ntest\rtest\r\n") + tfail(t, err, errBareCR) + moxvar.Pedantic = false + + err = parse("subject: test\rtest\r\n") + tcheck(t, err, "header with bare cr") + err = parse("\r\ntest\rtest\r\n") + tcheck(t, err, "body with bare cr") } func TestMissingClosingBoundary(t *testing.T) { diff --git a/message/writer.go b/message/writer.go index bb535f5ee1..79e1920271 100644 --- a/message/writer.go +++ b/message/writer.go @@ -7,17 +7,24 @@ import ( // Writer is a write-through helper, collecting properties about the written // message. type Writer struct { - Writer io.Writer - HaveHeaders bool - Has8bit bool // Whether a byte with the high/8bit has been read. So whether this is 8BITMIME instead of 7BIT. - Size int64 - tail [3]byte // For detecting crlfcrlf. + writer io.Writer + + HaveBody bool // Body is optional. ../rfc/5322:343 + Has8bit bool // Whether a byte with the high/8bit has been read. So whether this is 8BITMIME instead of 7BIT. + Size int64 + + tail [3]byte // For detecting header/body-separating crlf. // todo: should be parsing headers here, as we go } +func NewWriter(w io.Writer) *Writer { + // Pretend we already saw \r\n, for handling empty header. + return &Writer{writer: w, tail: [3]byte{0, '\r', '\n'}} +} + // Write implements io.Writer. func (w *Writer) Write(buf []byte) (int, error) { - if !w.HaveHeaders && len(buf) > 0 { + if !w.HaveBody && len(buf) > 0 { get := func(i int) byte { if i < 0 { return w.tail[3+i] @@ -27,7 +34,7 @@ func (w *Writer) Write(buf []byte) (int, error) { for i, b := range buf { if b == '\n' && get(i-3) == '\r' && get(i-2) == '\n' && get(i-1) == '\r' { - w.HaveHeaders = true + w.HaveBody = true break } } @@ -47,7 +54,7 @@ func (w *Writer) Write(buf []byte) (int, error) { } } } - n, err := w.Writer.Write(buf) + n, err := w.writer.Write(buf) if n > 0 { w.Size += int64(n) } diff --git a/message/writer_test.go b/message/writer_test.go index 2155729c2e..49b9991d26 100644 --- a/message/writer_test.go +++ b/message/writer_test.go @@ -10,23 +10,23 @@ func TestMsgWriter(t *testing.T) { t.Helper() b := &strings.Builder{} - mw := &Writer{Writer: b} + mw := NewWriter(b) if _, err := mw.Write([]byte(data)); err != nil { t.Fatalf("write for message %q: %s", data, err) } - if mw.HaveHeaders != want { - t.Fatalf("got %v, expected %v, for message %q", mw.HaveHeaders, want, data) + if mw.HaveBody != want { + t.Fatalf("got %v, expected %v, for message %q", mw.HaveBody, want, data) } b = &strings.Builder{} - mw = &Writer{Writer: b} + mw = NewWriter(b) for i := range data { if _, err := mw.Write([]byte(data[i : i+1])); err != nil { t.Fatalf("write for message %q: %s", data, err) } } - if mw.HaveHeaders != want { - t.Fatalf("got %v, expected %v, for message %q", mw.HaveHeaders, want, data) + if mw.HaveBody != want { + t.Fatalf("got %v, expected %v, for message %q", mw.HaveBody, want, data) } } @@ -38,4 +38,5 @@ func TestMsgWriter(t *testing.T) { check("key: value\r\rbody", false) check("\r\n\r\n", true) check("\r\n\r\nbody", true) + check("\r\nbody", true) } diff --git a/queue/dsn.go b/queue/dsn.go index 68e5c3b2ce..d9545af25b 100644 --- a/queue/dsn.go +++ b/queue/dsn.go @@ -162,7 +162,7 @@ func queueDSN(log *mlog.Log, m Msg, remoteMTA dsn.NameIP, secodeOpt, errmsg stri } }() - msgWriter := &message.Writer{Writer: msgFile} + msgWriter := message.NewWriter(msgFile) if _, err := msgWriter.Write(msgData); err != nil { qlog("writing dsn message", err) return diff --git a/smtpserver/server.go b/smtpserver/server.go index 7f28320ba4..f23ba9b861 100644 --- a/smtpserver/server.go +++ b/smtpserver/server.go @@ -1505,7 +1505,7 @@ func (c *conn) cmdData(p *parser) { c.log.Check(err, "removing temporary message file") } }() - msgWriter := &message.Writer{Writer: dataFile} + msgWriter := message.NewWriter(dataFile) dr := smtp.NewDataReader(c.r) n, err := io.Copy(&limitWriter{maxSize: c.maxMessageSize, w: msgWriter}, dr) c.xtrace(mlog.LevelTrace) // Restore. @@ -1534,7 +1534,7 @@ func (c *conn) cmdData(p *parser) { // Basic sanity checks on messages before we send them out to the world. Just // trying to be strict in what we do to others and liberal in what we accept. if c.submission { - if !msgWriter.HaveHeaders { + if !msgWriter.HaveBody { // ../rfc/6409:541 xsmtpUserErrorf(smtp.C554TransactionFailed, smtp.SeMsg6Other0, "message requires both header and body section") } @@ -1771,10 +1771,6 @@ func (c *conn) submit(ctx context.Context, recvHdrFor func(string) string, msgWr } xmsgPrefix := append([]byte(recvHdrFor(rcptAcc.rcptTo.String())), msgPrefix...) - // todo: don't convert the headers to a body? it seems the body part is optional. does this have consequences for us in other places? ../rfc/5322:343 - if !msgWriter.HaveHeaders { - xmsgPrefix = append(xmsgPrefix, "\r\n"...) - } msgSize := int64(len(xmsgPrefix)) + msgWriter.Size if _, err := queue.Add(ctx, c.log, c.account.Name, *c.mailFrom, rcptAcc.rcptTo, msgWriter.Has8bit, c.smtputf8, msgSize, messageID, xmsgPrefix, dataFile, nil, i == len(c.recipients)-1); err != nil { @@ -2283,9 +2279,6 @@ func (c *conn) deliver(ctx context.Context, recvHdrFor func(string) string, msgW receivedSPF.Header() + recvHdrFor(rcptAcc.rcptTo.String()), ) - if !msgWriter.HaveHeaders { - msgPrefix = append(msgPrefix, "\r\n"...) - } m := &store.Message{ Received: time.Now(), diff --git a/store/account_test.go b/store/account_test.go index 38eb8a2c36..2bb4d001b9 100644 --- a/store/account_test.go +++ b/store/account_test.go @@ -45,7 +45,7 @@ func TestMailbox(t *testing.T) { t.Fatalf("creating temp msg file: %s", err) } defer msgFile.Close() - msgWriter := &message.Writer{Writer: msgFile} + msgWriter := message.NewWriter(msgFile) if _, err := msgWriter.Write([]byte(" message")); err != nil { t.Fatalf("writing to temp message: %s", err) }