Skip to content

Commit

Permalink
improve message parsing: allow bare carriage return (unless in pedant…
Browse files Browse the repository at this point in the history
…ic mode), allow empty header, and no longer treat a message with only headers as a message with only a body
  • Loading branch information
mjl- committed Aug 11, 2023
1 parent 79d0618 commit 48eb530
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 80 deletions.
11 changes: 3 additions & 8 deletions ctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,21 +327,16 @@ func servectlcmd(ctx context.Context, ctl *ctl, shutdown func()) {
log.Check(err, "closing temporary message file")
}
}()
mw := &message.Writer{Writer: msgFile}
mw := message.NewWriter(msgFile)
ctl.xwriteok()

ctl.xstreamto(mw)
err = msgFile.Sync()
ctl.xcheck(err, "syncing message to storage")
msgPrefix := []byte{}
if !mw.HaveHeaders {
msgPrefix = []byte("\r\n\r\n")
}

m := &store.Message{
Received: time.Now(),
Size: int64(len(msgPrefix)) + mw.Size,
MsgPrefix: msgPrefix,
Received: time.Now(),
Size: mw.Size,
}

a.WithWLock(func() {
Expand Down
10 changes: 2 additions & 8 deletions imapserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2683,7 +2683,7 @@ func (c *conn) cmdAppend(tag, cmd string, p *parser) {
}
}()
defer c.xtrace(mlog.LevelTracedata)()
mw := &message.Writer{Writer: msgFile}
mw := message.NewWriter(msgFile)
msize, err := io.Copy(mw, io.LimitReader(c.br, size))
c.xtrace(mlog.LevelTrace) // Restore.
if err != nil {
Expand All @@ -2693,11 +2693,6 @@ func (c *conn) cmdAppend(tag, cmd string, p *parser) {
if msize != size {
xserverErrorf("read %d bytes for message, expected %d (%w)", msize, size, errIO)
}
msgPrefix := []byte{}
// todo: should we treat the message as body? i believe headers are required in messages, and bodies are optional. so would make more sense to treat the data as headers. perhaps only if the headers are valid?
if !mw.HaveHeaders {
msgPrefix = []byte("\r\n")
}

if utf8 {
line := c.readline(false)
Expand Down Expand Up @@ -2736,8 +2731,7 @@ func (c *conn) cmdAppend(tag, cmd string, p *parser) {
Received: tm,
Flags: storeFlags,
Keywords: keywords,
Size: size + int64(len(msgPrefix)),
MsgPrefix: msgPrefix,
Size: size,
}

mb.Add(m.MailboxCounts())
Expand Down
4 changes: 2 additions & 2 deletions imapserver/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ func TestStatus(t *testing.T) {
// Again, now with a message in the mailbox.
tc.transactf("ok", "append inbox {4+}\r\ntest")
tc.transactf("ok", "status inbox (messages uidnext uidvalidity unseen deleted size recent appendlimit)")
tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 0, "SIZE": 6, "RECENT": 0, "APPENDLIMIT": 0}})
tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 0, "SIZE": 4, "RECENT": 0, "APPENDLIMIT": 0}})

tc.client.Select("inbox")
tc.client.StoreFlagsSet("1", true, `\Deleted`)
tc.transactf("ok", "status inbox (messages uidnext uidvalidity unseen deleted size recent appendlimit)")
tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 1, "SIZE": 6, "RECENT": 0, "APPENDLIMIT": 0}})
tc.xuntagged(imapclient.UntaggedStatus{Mailbox: "Inbox", Attrs: map[string]int64{"MESSAGES": 1, "UIDVALIDITY": 1, "UIDNEXT": 2, "UNSEEN": 1, "DELETED": 1, "SIZE": 4, "RECENT": 0, "APPENDLIMIT": 0}})
}
104 changes: 72 additions & 32 deletions message/part.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ package message
// todo: allow more invalid content-type values, we now stop parsing on: empty media type (eg "content-type: ; name=..."), empty value for property (eg "charset=", missing quotes for characters that should be quoted (eg boundary containing "=" but without quotes), duplicate properties (two charsets), empty pairs (eg "text/html;;").
// todo: what should our max line length be? rfc says 1000. messages exceed that. we should enforce 1000 for outgoing messages.
// todo: should we be forgiving when closing boundary in multipart message is missing? seems like spam messages do this...
// todo: allow bare \r (without \n)? this does happen in messages.
// todo: should we allow base64 messages where a line starts with a space? and possibly more whitespace. is happening in messages. coreutils base64 accepts it, encoding/base64 does not.
// todo: handle comments in headers?
// todo: should we just always store messages with \n instead of \r\n? \r\n seems easier for use with imap.
Expand Down Expand Up @@ -45,7 +44,8 @@ var (
errLineTooLong = errors.New("line too long")
errMissingBoundaryParam = errors.New("missing/empty boundary content-type parameter")
errMissingClosingBoundary = errors.New("eof without closing boundary")
errHalfLineSep = errors.New("invalid CR or LF without the other")
errBareLF = errors.New("invalid bare line feed")
errBareCR = errors.New("invalid bare carriage return")
errUnexpectedEOF = errors.New("unexpected eof")
)

Expand Down Expand Up @@ -269,8 +269,12 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) {
hb := &bytes.Buffer{}
for {
line, _, err := b.ReadLine(true)
if err == io.EOF {
// No body is valid.
break
}
if err != nil {
return p, err
return p, fmt.Errorf("reading header line: %w", err)
}
hb.Write(line)
if len(line) == 2 {
Expand All @@ -279,13 +283,18 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) {
}
p.BodyOffset = b.offset

h, err := parseHeader(hb)
if err != nil {
return p, fmt.Errorf("parsing header: %w", err)
// Don't attempt to parse empty header, mail.ReadMessage doesn't like it.
if p.HeaderOffset == p.BodyOffset {
p.header = textproto.MIMEHeader{}
} else {
h, err := parseHeader(hb)
if err != nil {
return p, fmt.Errorf("parsing header: %w", err)
}
p.header = h
}
p.header = h

ct := h.Get("Content-Type")
ct := p.header.Get("Content-Type")
mt, params, err := mime.ParseMediaType(ct)
if err != nil && ct != "" {
return p, fmt.Errorf("%w: %s: %q", ErrBadContentType, err, ct)
Expand All @@ -300,12 +309,12 @@ func newPart(r io.ReaderAt, offset int64, parent *Part) (p Part, rerr error) {
p.ContentTypeParams = params
}

p.ContentID = h.Get("Content-Id")
p.ContentDescription = h.Get("Content-Description")
p.ContentTransferEncoding = strings.ToUpper(h.Get("Content-Transfer-Encoding"))
p.ContentID = p.header.Get("Content-Id")
p.ContentDescription = p.header.Get("Content-Description")
p.ContentTransferEncoding = strings.ToUpper(p.header.Get("Content-Transfer-Encoding"))

if parent == nil {
p.Envelope, err = parseEnvelope(mail.Header(h))
p.Envelope, err = parseEnvelope(mail.Header(p.header))
if err != nil {
return p, err
}
Expand Down Expand Up @@ -347,6 +356,10 @@ func (p *Part) Header() (textproto.MIMEHeader, error) {
if p.header != nil {
return p.header, nil
}
if p.HeaderOffset == p.BodyOffset {
p.header = textproto.MIMEHeader{}
return p.header, nil
}
h, err := parseHeader(p.HeaderReader())
p.header = h
return h, err
Expand All @@ -357,6 +370,7 @@ func (p *Part) HeaderReader() io.Reader {
return io.NewSectionReader(p.r, p.HeaderOffset, p.BodyOffset-p.HeaderOffset)
}

// parse a header, only call this on non-empty input (even though that is a valid header).
func parseHeader(r io.Reader) (textproto.MIMEHeader, error) {
// We read using mail.ReadMessage instead of textproto.ReadMIMEHeaders because the
// first handles email messages properly, while the second only works for HTTP
Expand Down Expand Up @@ -444,7 +458,7 @@ func parseAddressList(h mail.Header, k string) []Address {

// ParseNextPart parses the next (sub)part of this multipart message.
// ParseNextPart returns io.EOF and a nil part when there are no more parts.
// Only use for initial parsing of message. Once parsed, use p.Parts.
// Only used for initial parsing of message. Once parsed, use p.Parts.
func (p *Part) ParseNextPart() (*Part, error) {
if len(p.bound) == 0 {
return nil, errNotMultipart
Expand Down Expand Up @@ -602,14 +616,15 @@ func (p *Part) RawReader() io.Reader {
}
p.RawLineCount = 0
if p.parent == nil {
return &offsetReader{p, p.BodyOffset, true}
return &offsetReader{p, p.BodyOffset, true, false}
}
return &boundReader{p: p, b: &bufAt{r: p.r, offset: p.BodyOffset}, lastnewline: true}
return &boundReader{p: p, b: &bufAt{r: p.r, offset: p.BodyOffset}, prevlf: true}
}

// bufAt is a buffered reader on an underlying ReaderAt.
// bufAt verifies that lines end with crlf.
type bufAt struct {
offset int64 // Offset in r currently consumed, i.e. ignoring any buffered data.
offset int64 // Offset in r currently consumed, i.e. not including any buffered data.

r io.ReaderAt
buf []byte // Buffered data.
Expand Down Expand Up @@ -649,7 +664,7 @@ func (b *bufAt) ensure() error {
}

// ReadLine reads a line until \r\n is found, returning the line including \r\n.
// If not found, or a single \r or \n is encountered, ReadLine returns an error, e.g. io.EOF.
// If not found, or a bare \n is encountered, or a bare \r is enountered in pedantic mode, ReadLine returns an error.
func (b *bufAt) ReadLine(requirecrlf bool) (buf []byte, crlf bool, err error) {
return b.line(true, requirecrlf)
}
Expand All @@ -664,14 +679,18 @@ func (b *bufAt) line(consume, requirecrlf bool) (buf []byte, crlf bool, err erro
}
for i, c := range b.buf[:b.nbuf] {
if c == '\n' {
return nil, false, errHalfLineSep
// Should have seen a \r, which should have been handled below.
return nil, false, errBareLF
}
if c != '\r' {
continue
}
i++
if i >= b.nbuf || b.buf[i] != '\n' {
return nil, false, errHalfLineSep
if moxvar.Pedantic {
return nil, false, errBareCR
}
continue
}
b.scratch = b.scratch[:i+1]
copy(b.scratch, b.buf[:i+1])
Expand Down Expand Up @@ -708,10 +727,13 @@ func (b *bufAt) PeekByte() (byte, error) {
return b.buf[0], nil
}

// offsetReader reads from p.r starting from offset, and RawLineCount on p.
// offsetReader validates lines end with \r\n.
type offsetReader struct {
p *Part
offset int64
lastnewline bool
p *Part
offset int64
prevlf bool
prevcr bool
}

func (r *offsetReader) Read(buf []byte) (int, error) {
Expand All @@ -720,10 +742,18 @@ func (r *offsetReader) Read(buf []byte) (int, error) {
r.offset += int64(n)

for _, c := range buf[:n] {
if r.lastnewline {
if r.prevlf {
r.p.RawLineCount++
}
r.lastnewline = c == '\n'
if err == nil || err == io.EOF {
if c == '\n' && !r.prevcr {
err = errBareLF
} else if c != '\n' && r.prevcr && moxvar.Pedantic {
err = errBareCR
}
}
r.prevlf = c == '\n'
r.prevcr = c == '\r'
}
}
if err == io.EOF {
Expand All @@ -735,24 +765,34 @@ func (r *offsetReader) Read(buf []byte) (int, error) {
var crlf = []byte("\r\n")

// boundReader is a reader that stops at a closing multipart boundary.
// boundReader ensures lines end with crlf through its use of bufAt.
type boundReader struct {
p *Part
b *bufAt
buf []byte // Data from previous line, to be served first.
nbuf int // Number of valid bytes in buf.
crlf []byte // Possible crlf, to be returned if we do not yet encounter a boundary.
lastnewline bool // If last char return was a newline. For counting lines.
p *Part
b *bufAt
buf []byte // Data from previous line, to be served first.
nbuf int // Number of valid bytes in buf.
crlf []byte // Possible crlf, to be returned if we do not yet encounter a boundary.
prevlf bool // If last char returned was a newline. For counting lines.
prevcr bool
}

func (b *boundReader) Read(buf []byte) (count int, rerr error) {
origBuf := buf
defer func() {
if count > 0 {
for _, c := range origBuf[:count] {
if b.lastnewline {
if b.prevlf {
b.p.RawLineCount++
}
b.lastnewline = c == '\n'
if rerr == nil || rerr == io.EOF {
if c == '\n' && !b.prevcr {
rerr = errBareLF
} else if c != '\n' && b.prevcr && moxvar.Pedantic {
rerr = errBareCR
}
}
b.prevlf = c == '\n'
b.prevcr = c == '\r'
}
}
}()
Expand Down
30 changes: 25 additions & 5 deletions message/part_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"reflect"
"strings"
"testing"

"github.com/mjl-/mox/moxvar"
)

func tcheck(t *testing.T, err error, msg string) {
Expand Down Expand Up @@ -204,11 +206,29 @@ func TestLongLine(t *testing.T) {
}

func TestHalfCrLf(t *testing.T) {
_, err := Parse(strings.NewReader("test\rtest"))
tfail(t, err, errHalfLineSep)

_, err = Parse(strings.NewReader("test\ntest"))
tfail(t, err, errHalfLineSep)
parse := func(s string) error {
p, err := Parse(strings.NewReader(s))
if err != nil {
return err
}
return p.Walk(nil)
}
err := parse("subject: test\ntest\r\n")
tfail(t, err, errBareLF)
err = parse("\r\ntest\ntest\r\n")
tfail(t, err, errBareLF)

moxvar.Pedantic = true
err = parse("subject: test\rtest\r\n")
tfail(t, err, errBareCR)
err = parse("\r\ntest\rtest\r\n")
tfail(t, err, errBareCR)
moxvar.Pedantic = false

err = parse("subject: test\rtest\r\n")
tcheck(t, err, "header with bare cr")
err = parse("\r\ntest\rtest\r\n")
tcheck(t, err, "body with bare cr")
}

func TestMissingClosingBoundary(t *testing.T) {
Expand Down
23 changes: 15 additions & 8 deletions message/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,24 @@ import (
// Writer is a write-through helper, collecting properties about the written
// message.
type Writer struct {
Writer io.Writer
HaveHeaders bool
Has8bit bool // Whether a byte with the high/8bit has been read. So whether this is 8BITMIME instead of 7BIT.
Size int64
tail [3]byte // For detecting crlfcrlf.
writer io.Writer

HaveBody bool // Body is optional. ../rfc/5322:343
Has8bit bool // Whether a byte with the high/8bit has been read. So whether this is 8BITMIME instead of 7BIT.
Size int64

tail [3]byte // For detecting header/body-separating crlf.
// todo: should be parsing headers here, as we go
}

func NewWriter(w io.Writer) *Writer {
// Pretend we already saw \r\n, for handling empty header.
return &Writer{writer: w, tail: [3]byte{0, '\r', '\n'}}
}

// Write implements io.Writer.
func (w *Writer) Write(buf []byte) (int, error) {
if !w.HaveHeaders && len(buf) > 0 {
if !w.HaveBody && len(buf) > 0 {
get := func(i int) byte {
if i < 0 {
return w.tail[3+i]
Expand All @@ -27,7 +34,7 @@ func (w *Writer) Write(buf []byte) (int, error) {

for i, b := range buf {
if b == '\n' && get(i-3) == '\r' && get(i-2) == '\n' && get(i-1) == '\r' {
w.HaveHeaders = true
w.HaveBody = true
break
}
}
Expand All @@ -47,7 +54,7 @@ func (w *Writer) Write(buf []byte) (int, error) {
}
}
}
n, err := w.Writer.Write(buf)
n, err := w.writer.Write(buf)
if n > 0 {
w.Size += int64(n)
}
Expand Down
Loading

0 comments on commit 48eb530

Please sign in to comment.