Skip to content

Commit

Permalink
add strict mode when parsing messages, typically enabled for incoming…
Browse files Browse the repository at this point in the history
… special-use messages like tls/dmarc reports, subjectpass emails

and pass a logger to the message parser, so problems with message parsing get
the cid logged.
  • Loading branch information
mjl- committed Aug 15, 2023
1 parent f5f953b commit 34c2dcd
Show file tree
Hide file tree
Showing 24 changed files with 312 additions and 153 deletions.
4 changes: 2 additions & 2 deletions ctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,7 @@ func servectlcmd(ctx context.Context, ctl *ctl, shutdown func()) {
m.Size = correctSize

mr := acc.MessageReader(m)
part, err := message.EnsurePart(mr, m.Size)
part, err := message.EnsurePart(log, false, mr, m.Size)
if err != nil {
_, werr := fmt.Fprintf(w, "parsing message %d again: %v (continuing)\n", m.ID, err)
ctl.xcheck(werr, "write")
Expand Down Expand Up @@ -890,7 +890,7 @@ func servectlcmd(ctx context.Context, ctl *ctl, shutdown func()) {
return q.ForEach(func(m store.Message) error {
lastID = m.ID
mr := acc.MessageReader(m)
p, err := message.EnsurePart(mr, m.Size)
p, err := message.EnsurePart(log, false, mr, m.Size)
if err != nil {
_, err := fmt.Fprintf(w, "parsing message %d: %v (continuing)\n", m.ID, err)
ctl.xcheck(err, "write")
Expand Down
13 changes: 7 additions & 6 deletions dmarcrpt/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"strings"

"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/moxio"
)

Expand All @@ -33,17 +34,17 @@ func ParseReport(r io.Reader) (*Feedback, error) {
// ParseMessageReport parses an aggregate feedback report from a mail message. The
// maximum message size is 15MB, the maximum report size after decompression is
// 20MB.
func ParseMessageReport(r io.ReaderAt) (*Feedback, error) {
func ParseMessageReport(log *mlog.Log, r io.ReaderAt) (*Feedback, error) {
// ../rfc/7489:1801
p, err := message.Parse(&moxio.LimitAtReader{R: r, Limit: 15 * 1024 * 1024})
p, err := message.Parse(log, true, &moxio.LimitAtReader{R: r, Limit: 15 * 1024 * 1024})
if err != nil {
return nil, fmt.Errorf("parsing mail message: %s", err)
}

return parseMessageReport(p)
return parseMessageReport(log, p)
}

func parseMessageReport(p message.Part) (*Feedback, error) {
func parseMessageReport(log *mlog.Log, p message.Part) (*Feedback, error) {
// Pretty much any mime structure is allowed. ../rfc/7489:1861
// In practice, some parties will send the report as the only (non-multipart)
// content of the message.
Expand All @@ -53,14 +54,14 @@ func parseMessageReport(p message.Part) (*Feedback, error) {
}

for {
sp, err := p.ParseNextPart()
sp, err := p.ParseNextPart(log)
if err == io.EOF {
return nil, ErrNoReport
}
if err != nil {
return nil, err
}
report, err := parseMessageReport(*sp)
report, err := parseMessageReport(log, *sp)
if err == ErrNoReport {
continue
} else if err != nil || report != nil {
Expand Down
10 changes: 7 additions & 3 deletions dmarcrpt/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@ import (
"reflect"
"strings"
"testing"

"github.com/mjl-/mox/mlog"
)

var xlog = mlog.New("dmarcrpt")

const reportExample = `<?xml version="1.0" encoding="UTF-8" ?>
<feedback>
<report_metadata>
Expand Down Expand Up @@ -130,15 +134,15 @@ func TestParseMessageReport(t *testing.T) {
if err != nil {
t.Fatalf("open %q: %s", p, err)
}
_, err = ParseMessageReport(f)
_, err = ParseMessageReport(xlog, f)
if err != nil {
t.Fatalf("ParseMessageReport: %q: %s", p, err)
}
f.Close()
}

// No report in a non-multipart message.
_, err = ParseMessageReport(strings.NewReader("From: <[email protected]>\r\n\r\nNo report.\r\n"))
_, err = ParseMessageReport(xlog, strings.NewReader("From: <[email protected]>\r\n\r\nNo report.\r\n"))
if err != ErrNoReport {
t.Fatalf("message without report, got err %#v, expected ErrNoreport", err)
}
Expand All @@ -164,7 +168,7 @@ MIME-Version: 1.0
--===============5735553800636657282==--
`, "\n", "\r\n")
_, err = ParseMessageReport(strings.NewReader(multipartNoreport))
_, err = ParseMessageReport(xlog, strings.NewReader(multipartNoreport))
if err != ErrNoReport {
t.Fatalf("message without report, got err %#v, expected ErrNoreport", err)
}
Expand Down
4 changes: 3 additions & 1 deletion dsn/dsn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"github.com/mjl-/mox/smtp"
)

var xlog = mlog.New("dsn")

func xparseDomain(s string) dns.Domain {
d, err := dns.ParseDomain(s)
if err != nil {
Expand All @@ -33,7 +35,7 @@ func xparseIPDomain(s string) dns.IPDomain {

func tparseMessage(t *testing.T, data []byte, nparts int) (*Message, *message.Part) {
t.Helper()
m, p, err := Parse(bytes.NewReader(data))
m, p, err := Parse(xlog, bytes.NewReader(data))
if err != nil {
t.Fatalf("parsing dsn: %v", err)
}
Expand Down
7 changes: 4 additions & 3 deletions dsn/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/smtp"
)

Expand All @@ -22,17 +23,17 @@ import (
// The first return value is the machine-parsed DSN message. The second value is
// the entire MIME multipart message. Use its Parts field to access the
// human-readable text and optional original message/headers.
func Parse(r io.ReaderAt) (*Message, *message.Part, error) {
func Parse(log *mlog.Log, r io.ReaderAt) (*Message, *message.Part, error) {
// DSNs can mix and match subtypes with and without utf-8. ../rfc/6533:441

part, err := message.Parse(r)
part, err := message.Parse(log, false, r)
if err != nil {
return nil, nil, fmt.Errorf("parsing message: %v", err)
}
if part.MediaType != "MULTIPART" || part.MediaSubType != "REPORT" {
return nil, nil, fmt.Errorf(`message has content-type %q, must have "message/report"`, strings.ToLower(part.MediaType+"/"+part.MediaSubType))
}
err = part.Walk(nil)
err = part.Walk(log, nil)
if err != nil {
return nil, nil, fmt.Errorf("parsing message parts: %v", err)
}
Expand Down
2 changes: 1 addition & 1 deletion imapserver/append_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func TestAppend(t *testing.T) {
tc3.transactf("ok", "noop")
tc3.xuntagged() // Inbox is not selected, nothing to report.

tc2.transactf("ok", "append inbox (\\Seen) \" 1-Jan-2022 10:10:00 +0100\" UTF8 ({34+}\r\ncontent-type: text/plain;;\r\n\r\ntest)")
tc2.transactf("ok", "append inbox (\\Seen) \" 1-Jan-2022 10:10:00 +0100\" UTF8 ({47+}\r\ncontent-type: just completely invalid;;\r\n\r\ntest)")
tc2.xuntagged(imapclient.UntaggedExists(2))
tc2.xcodeArg(imapclient.CodeAppendUID{UIDValidity: 1, UID: 2})

Expand Down
2 changes: 1 addition & 1 deletion import.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ func importctl(ctx context.Context, ctl *ctl, mbox bool) {
mb.Add(m.MailboxCounts())

// Parse message and store parsed information for later fast retrieval.
p, err := message.EnsurePart(msgf, m.Size)
p, err := message.EnsurePart(ctl.log, false, msgf, m.Size)
if err != nil {
ctl.log.Infox("parsing message, continuing", err, mlog.Field("path", origPath))
}
Expand Down
6 changes: 4 additions & 2 deletions junk.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,14 +309,16 @@ func cmdJunkPlay(c *cmd) {

var nbad, nnodate, nham, nspam, nsent int

jlog := mlog.New("junkplay")

scanDir := func(dir string, ham, sent bool) {
for _, name := range listDir(dir) {
path := dir + "/" + name
mf, err := os.Open(path)
xcheckf(err, "open %q", path)
fi, err := mf.Stat()
xcheckf(err, "stat %q", path)
p, err := message.EnsurePart(mf, fi.Size())
p, err := message.EnsurePart(jlog, false, mf, fi.Size())
if err != nil {
nbad++
if err := mf.Close(); err != nil {
Expand Down Expand Up @@ -396,7 +398,7 @@ func cmdJunkPlay(c *cmd) {
}()
fi, err := mf.Stat()
xcheckf(err, "stat %q", path)
p, err := message.EnsurePart(mf, fi.Size())
p, err := message.EnsurePart(jlog, false, mf, fi.Size())
if err != nil {
log.Printf("bad sent message %q: %s", path, err)
return
Expand Down
6 changes: 3 additions & 3 deletions junk/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ func (f *Filter) ClassifyMessagePath(ctx context.Context, path string) (probabil
}

func (f *Filter) ClassifyMessageReader(ctx context.Context, mf io.ReaderAt, size int64) (probability float64, words map[string]struct{}, nham, nspam int, rerr error) {
m, err := message.EnsurePart(mf, size)
m, err := message.EnsurePart(f.log, false, mf, size)
if err != nil && errors.Is(err, message.ErrBadContentType) {
// Invalid content-type header is a sure sign of spam.
//f.log.Infox("parsing content", err)
Expand Down Expand Up @@ -567,7 +567,7 @@ func (f *Filter) Train(ctx context.Context, ham bool, words map[string]struct{})
}

func (f *Filter) TrainMessage(ctx context.Context, r io.ReaderAt, size int64, ham bool) error {
p, _ := message.EnsurePart(r, size)
p, _ := message.EnsurePart(f.log, false, r, size)
words, err := f.ParseMessage(p)
if err != nil {
return fmt.Errorf("parsing mail contents: %v", err)
Expand All @@ -576,7 +576,7 @@ func (f *Filter) TrainMessage(ctx context.Context, r io.ReaderAt, size int64, ha
}

func (f *Filter) UntrainMessage(ctx context.Context, r io.ReaderAt, size int64, ham bool) error {
p, _ := message.EnsurePart(r, size)
p, _ := message.EnsurePart(f.log, false, r, size)
words, err := f.ParseMessage(p)
if err != nil {
return fmt.Errorf("parsing mail contents: %v", err)
Expand Down
2 changes: 1 addition & 1 deletion junk/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func (f *Filter) tokenizeMail(path string) (bool, map[string]struct{}, error) {
if err != nil {
return false, nil, err
}
p, _ := message.EnsurePart(mf, fi.Size())
p, _ := message.EnsurePart(f.log, false, mf, fi.Size())
words, err := f.ParseMessage(p)
return true, words, err
}
Expand Down
34 changes: 24 additions & 10 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -1443,11 +1443,13 @@ headers prepended.
c.Usage()
}

clog := mlog.New("dkimsign")

msgf, err := os.Open(args[0])
xcheckf(err, "open message")
defer msgf.Close()

p, err := message.Parse(msgf)
p, err := message.Parse(clog, true, msgf)
xcheckf(err, "parsing message")

if len(p.Envelope.From) != 1 {
Expand Down Expand Up @@ -1591,7 +1593,7 @@ can be found in message headers.

data, err := io.ReadAll(os.Stdin)
xcheckf(err, "read message")
dmarcFrom, _, err := message.From(bytes.NewReader(data))
dmarcFrom, _, err := message.From(mlog.New("dmarcverify"), false, bytes.NewReader(data))
xcheckf(err, "extract dmarc from message")

const ignoreTestMode = false
Expand Down Expand Up @@ -1621,10 +1623,12 @@ understand email deliverability problems.
c.Usage()
}

clog := mlog.New("dmarcparsereportmsg")

for _, arg := range args {
f, err := os.Open(arg)
xcheckf(err, "open %q", arg)
feedback, err := dmarcrpt.ParseMessageReport(f)
feedback, err := dmarcrpt.ParseMessageReport(clog, f)
xcheckf(err, "parse report in %q", arg)
meta := feedback.ReportMetadata
fmt.Printf("Report: period %s-%s, organisation %q, reportID %q, %s\n", time.Unix(meta.DateRange.Begin, 0).UTC().String(), time.Unix(meta.DateRange.End, 0).UTC().String(), meta.OrgName, meta.ReportID, meta.Email)
Expand Down Expand Up @@ -1673,9 +1677,11 @@ func cmdDMARCDBAddReport(c *cmd) {

mustLoadConfig()

clog := mlog.New("dmarcdbaddreport")

fromdomain := xparseDomain(args[0], "domain")
fmt.Fprintln(os.Stderr, "reading report message from stdin")
report, err := dmarcrpt.ParseMessageReport(os.Stdin)
report, err := dmarcrpt.ParseMessageReport(clog, os.Stdin)
xcheckf(err, "parse message")
err = dmarcdb.AddReport(context.Background(), report, fromdomain)
xcheckf(err, "add dmarc report")
Expand Down Expand Up @@ -1712,10 +1718,12 @@ The report is printed in formatted JSON.
c.Usage()
}

clog := mlog.New("tlsrptparsereportmsg")

for _, arg := range args {
f, err := os.Open(arg)
xcheckf(err, "open %q", arg)
report, err := tlsrpt.ParseMessage(f)
report, err := tlsrpt.ParseMessage(clog, f)
xcheckf(err, "parse report in %q", arg)
// todo future: only print the highlights?
enc := json.NewEncoder(os.Stdout)
Expand Down Expand Up @@ -1855,19 +1863,21 @@ func cmdTLSRPTDBAddReport(c *cmd) {

mustLoadConfig()

clog := mlog.New("tlsrptdbaddreport")

// First read message, to get the From-header. Then parse it as TLSRPT.
fmt.Fprintln(os.Stderr, "reading report message from stdin")
buf, err := io.ReadAll(os.Stdin)
xcheckf(err, "reading message")
part, err := message.Parse(bytes.NewReader(buf))
part, err := message.Parse(clog, true, bytes.NewReader(buf))
xcheckf(err, "parsing message")
if part.Envelope == nil || len(part.Envelope.From) != 1 {
log.Fatalf("message must have one From-header")
}
from := part.Envelope.From[0]
domain := xparseDomain(from.Host, "domain")

report, err := tlsrpt.ParseMessage(bytes.NewReader(buf))
report, err := tlsrpt.ParseMessage(clog, bytes.NewReader(buf))
xcheckf(err, "parsing tls report in message")

mailfrom := from.User + "@" + from.Host // todo future: should escape and such
Expand Down Expand Up @@ -2276,6 +2286,8 @@ func cmdEnsureParsed(c *cmd) {
c.Usage()
}

clog := mlog.New("ensureparsed")

mustLoadConfig()
a, err := store.OpenAccount(args[0])
xcheckf(err, "open account")
Expand All @@ -2298,7 +2310,7 @@ func cmdEnsureParsed(c *cmd) {
}
for _, m := range l {
mr := a.MessageReader(m)
p, err := message.EnsurePart(mr, m.Size)
p, err := message.EnsurePart(clog, false, mr, m.Size)
if err != nil {
log.Printf("parsing message %d: %v (continuing)", m.ID, err)
}
Expand Down Expand Up @@ -2351,13 +2363,15 @@ func cmdMessageParse(c *cmd) {
c.Usage()
}

clog := mlog.New("messageparse")

f, err := os.Open(args[0])
xcheckf(err, "open")
defer f.Close()

part, err := message.Parse(f)
part, err := message.Parse(clog, false, f)
xcheckf(err, "parsing message")
err = part.Walk(nil)
err = part.Walk(clog, nil)
xcheckf(err, "parsing nested parts")
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", "\t")
Expand Down
5 changes: 3 additions & 2 deletions message/from.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"net/textproto"

"github.com/mjl-/mox/dns"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/smtp"
)

Expand All @@ -16,12 +17,12 @@ import (
// From headers may be present. From returns an error if there is not exactly
// one address. This address can be used for evaluating a DMARC policy against
// SPF and DKIM results.
func From(r io.ReaderAt) (raddr smtp.Address, header textproto.MIMEHeader, rerr error) {
func From(log *mlog.Log, strict bool, r io.ReaderAt) (raddr smtp.Address, header textproto.MIMEHeader, rerr error) {
// ../rfc/7489:1243

// todo: only allow utf8 if enabled in session/message?

p, err := Parse(r)
p, err := Parse(log, strict, r)
if err != nil {
// todo: should we continue with p, perhaps headers can be parsed?
return raddr, nil, fmt.Errorf("parsing message: %v", err)
Expand Down
Loading

0 comments on commit 34c2dcd

Please sign in to comment.