Skip to content

Commit

Permalink
feat: add option to disable character detection (#342)
Browse files Browse the repository at this point in the history
* feat: add option to disable character detection

Resolves #340

---------

Signed-off-by: James Hillyerd <[email protected]>
Co-authored-by: James Hillyerd <[email protected]>
  • Loading branch information
axllent and jhillyerd authored Aug 31, 2024
1 parent 16957e7 commit a9fae7a
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 1 deletion.
12 changes: 12 additions & 0 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,15 @@ func (o disableTextConversionOption) apply(p *Parser) {
func DisableTextConversion(disableTextConversion bool) Option {
return disableTextConversionOption(disableTextConversion)
}

type disableCharacterDetectionOption bool

func (o disableCharacterDetectionOption) apply(p *Parser) {
p.disableCharacterDetection = bool(o)
}

// DisableCharacterDetection sets the disableCharacterDetection option. When true, the parser will use the
// defined character set if it is defined in the message part.
func DisableCharacterDetection(disableCharacterDetection bool) Option {
return disableCharacterDetectionOption(disableCharacterDetection)
}
1 change: 1 addition & 0 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type Parser struct {
customParseMediaType CustomParseMediaType
stripMediaTypeInvalidCharacters bool
disableTextConversion bool
disableCharacterDetection bool
}

// defaultParser is a Parser with default configuration.
Expand Down
3 changes: 2 additions & 1 deletion part.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ func (p *Part) convertFromDetectedCharset(r io.Reader, readPartErrorPolicy ReadP
// Restore r.
r = bytes.NewReader(buf)

if cs == nil || cs.Confidence < minCharsetConfidence || len(bytes.Runes(buf)) < minCharsetRuneLength {
if (p.parser.disableCharacterDetection && p.Charset != "") ||
(cs == nil || cs.Confidence < minCharsetConfidence || len(bytes.Runes(buf)) < minCharsetRuneLength) {
// Low confidence or not enough characters, use declared character set.
return p.convertFromStatedCharset(r), nil
}
Expand Down
25 changes: 25 additions & 0 deletions part_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1318,3 +1318,28 @@ func TestCtypeInvalidCharacters(t *testing.T) {

test.ComparePart(t, p, wantp)
}

func TestDisableCharacterDetectionPart(t *testing.T) {
var wantp *enmime.Part

// chardet considers this test file to be ISO-8859-1.
r := test.OpenTestData("parts", "chardet-detection.raw")
parser := enmime.NewParser(enmime.DisableCharacterDetection(true))
p, err := parser.ReadParts(r)

// Examine root
if err != nil {
t.Fatalf("Unexpected parse error: %+v", err)
}
if p == nil {
t.Fatal("Root node should not be nil")
}

wantp = &enmime.Part{
ContentType: "text/plain",
PartID: "0",
Charset: "utf-8",
}

test.ComparePart(t, p, wantp)
}
8 changes: 8 additions & 0 deletions testdata/parts/chardet-detection.raw
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable

Loggen Sie sich ein, um die Einladung zu akzeptieren oder geben Sie den fol=
gen1233

Nachricht:
=C3=B6o=C3=B6o

0 comments on commit a9fae7a

Please sign in to comment.