Skip to content

Commit

Permalink
regexp-based solution
Browse files Browse the repository at this point in the history
Doesn't yet pass all tests, but wanted to save progress.
  • Loading branch information
mmcloughlin committed Feb 5, 2020
1 parent b9c7196 commit 8bc4dfa
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 162 deletions.
185 changes: 41 additions & 144 deletions format.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@ package main

import (
"bytes"
"errors"
"go/ast"
"go/format"
"go/parser"
"go/token"
"regexp"
"strings"
"unicode"
"unicode/utf8"

"golang.org/x/tools/go/ast/astutil"
)
Expand All @@ -24,17 +22,7 @@ func Format(src []byte) ([]byte, error) {
}

// Apply transform.
transformed := CommentTransform(f, func(text string) string {
newtext, errf := formula(text)
if errf != nil {
err = errf
return text
}
return newtext
})
if err != nil {
return nil, err
}
transformed := CommentTransform(f, formula)

// Format.
buf := bytes.NewBuffer(nil)
Expand Down Expand Up @@ -66,9 +54,16 @@ func CommentTransform(root ast.Node, transform func(string) string) ast.Node {

// Fixed data structures required for formula processing.
var (
replacer *strings.Replacer // replacer for symbols.
super = map[rune]rune{} // replacement map for superscript characters.
sub = map[rune]rune{} // replacement map for subscript characters.
// Symbol replacer.
replacer *strings.Replacer

// Regular expressions for super/subscripts.
supregexp *regexp.Regexp
subregexp *regexp.Regexp

// Rune replacement maps.
super = map[rune]rune{}
sub = map[rune]rune{}
)

func init() {
Expand All @@ -79,151 +74,53 @@ func init() {
}
replacer = strings.NewReplacer(oldnew...)

// Build super/subscript replacement maps.
// Build super/subscript character classes and replacement maps.
var superclass, subclass []rune
for _, char := range chars {
if char.Super != None {
superclass = append(superclass, char.Char)
super[char.Char] = char.Super
}
if char.Sub != None {
subclass = append(subclass, char.Char)
sub[char.Char] = char.Sub
}
}
}

// formula processes a formula in s, writing the result to w.
func formula(s string) (string, error) {
if len(s) == 0 {
return "", nil
}

// Replace symbols.
s = replacer.Replace(s)

// Replace super/subscripts.
buf := bytes.NewBuffer(nil)
last := None
for len(s) > 0 {
r, size := utf8.DecodeRuneInString(s)

// Look for a super/subscript character.
var repl map[rune]rune
switch r {
case '^':
repl = super
case '_':
repl = sub
default:
buf.WriteRune(r)
last = r
s = s[size:]
continue
}

// Perform replacement.
if unicode.IsPrint(last) && !unicode.IsSpace(last) {
var err error
s, err = supsub(buf, s, repl)
if err != nil {
return "", err
}
} else {
buf.WriteRune(r)
s = s[size:]
}

last = None
}

return buf.String(), nil
// Build regular expressions.
supregexp = regexp.MustCompile(`(\b[A-Za-z0-9]|\pS)\^(\d+|\{` + charclass(superclass) + `+\})`)
subregexp = regexp.MustCompile(`(\b[A-Za-z]|\pS)_(\d+\b|\{` + charclass(subclass) + `+\})`)
}

// supsub processes a super/subscript starting at s, writing the result to w.
// The repl map provides the mapping from runes to the corresponding
// super/subscripted versions. Note the first character of s should be the "^"
// or "_" operator.
func supsub(w *bytes.Buffer, s string, repl map[rune]rune) (string, error) {
arg, rest, err := parsearg(s[1:])
if err != nil {
return "", err
}

// If we could not parse an argument, or its not replaceable, just write the
// sub/script operator and return.
if len(arg) == 0 || !replaceable(arg, repl) {
w.WriteByte(s[0])
return s[1:], nil
}

// Perform the replacement.
for _, r := range arg {
w.WriteRune(repl[r])
}

return rest, nil
// charclass builds a regular expression character class from a list of runes.
func charclass(runes []rune) string {
return strings.ReplaceAll("["+string(runes)+"]", "-", `\-`)
}

// parsearg parses the argument to a super/subscript.
func parsearg(s string) (string, string, error) {
if len(s) == 0 {
return "", "", nil
}

// Braced.
if s[0] == '{' {
arg, rest, err := parsebraces(s)
if err != nil {
return "", "", err
}
return arg[1 : len(arg)-1], rest, nil
}

// Look for a numeral.
i := 0
for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
}
if i > 0 {
return s[:i], s[i:], nil
}

// Default to the first rune.
_, i = utf8.DecodeRuneInString(s)
return s[:i], s[i:], nil
}

// parsebraces parses matching braces starting at the beginning of s.
func parsebraces(s string) (string, string, error) {
if len(s) == 0 || s[0] != '{' {
return "", "", errors.New("expected {")
}

depth := 0
for i, r := range s {
// Adjust depth if we see open or close brace.
switch r {
case '{':
depth++
case '}':
depth--
}
// formula processes a formula in s, writing the result to w.
func formula(s string) string {
// Replace symbols.
s = replacer.Replace(s)

// Continue if we have not reached matched braces.
if depth > 0 {
continue
}
// Replace superscripts.
s = supregexp.ReplaceAllStringFunc(s, subsupreplacer(super))

// Return the matched braces.
return s[:i+1], s[i+1:], nil
}
// Replace subscripts.
s = subregexp.ReplaceAllStringFunc(s, subsupreplacer(sub))

return "", "", errors.New("unmatched braces")
return s
}

// replaceable returns whether every rune in s has a replacement in repl.
func replaceable(s string, repl map[rune]rune) bool {
for _, r := range s {
if _, ok := repl[r]; !ok {
return false
func subsupreplacer(repl map[rune]rune) func(string) string {
return func(s string) string {
var runes []rune
for i, r := range s {
if i == 0 {
runes = append(runes, r)
} else if repl[r] != None {
runes = append(runes, repl[r])
}
}
return string(runes)
}
return true
}
75 changes: 58 additions & 17 deletions format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,31 @@ func TestFormula(t *testing.T) {

// Symbols.
{Name: "basic_symbol", Input: "x +- y", Expect: "x ± y"},
{Name: "basic_latex_symbol", Input: "x \\oplus y", Expect: "x ⊕ y"},
{Name: "basic_latex_symbol", Input: `x \oplus y`, Expect: "x ⊕ y"},
{Name: "multi_symbols", Input: "2 <= x <= 10", Expect: "2 ⩽ x ⩽ 10"},

// Super/subscripts.
{Name: "sup_brace_replaceable", Input: "x^{i+j}ab", Expect: "xⁱ⁺ʲab"},
{Name: "sup_numeral_replaceable", Input: "x^123a", Expect: "x¹²³a"},
{Name: "sup_char_replaceable", Input: "x^ijk", Expect: "xⁱjk"},
{Name: "sup_char_replaceable", Input: "x^ijk", Expect: "x^ijk"},

{Name: "sup_brace_nonreplaceable", Input: "x^{p+q}pq", Expect: "x^{p+q}pq"},
{Name: "sup_char_nonreplaceable", Input: "x^qrs", Expect: "x^qrs"},

{Name: "sub_brace_replaceable", Input: "x_{i+j}ab", Expect: "xᵢ₊ⱼab"},
{Name: "sub_numeral_replaceable", Input: "x_123a", Expect: "x₁₂₃a"},
{Name: "sub_char_replaceable", Input: "x_ijk", Expect: "xᵢjk"},
{Name: "sub_digit_brace_replaceable", Input: "2_{i+j}ab", Expect: "2_{i+j}ab"},
{Name: "sub_numeral_boundary_replaceable", Input: "x_123 a", Expect: "x₁₂₃ a"},
{Name: "sub_numeral_non_boundary", Input: "x_123a", Expect: "x_123a"},
{Name: "sub_char_replaceable", Input: "x_ijk", Expect: "x_ijk"},

{Name: "sub_brace_nonreplaceable", Input: "x_{w+x}wx", Expect: "x_{w+x}wx"},
{Name: "sub_char_nonreplaceable", Input: "x_wxy", Expect: "x_wxy"},

// Combination.
{Name: "sup_with_symbol", Input: "\\oplus^23", Expect: "⊕²³"},
{Name: "sub_with_symbol", Input: "\\oplus_23", Expect: "⊕₂₃"},
{Name: "sup_with_symbol", Input: `\oplus^23`, Expect: "⊕²³"},
{Name: "sub_with_symbol", Input: `\oplus_23`, Expect: "⊕₂₃"},
{Name: "sup_brace_with_symbol", Input: `\oplus^{i+j}`, Expect: "⊕ⁱ⁺ʲ"},
{Name: "sub_brace_with_symbol", Input: `\oplus_{i+j}`, Expect: "⊕ᵢ₊ⱼ"},

// Malformed.
{Name: "sup_first_char", Input: "^a", Expect: "^a"},
Expand All @@ -52,21 +56,58 @@ func TestFormula(t *testing.T) {
{Name: "sub_consecutive", Input: "pre _______a post", Expect: "pre _______a post"},

// Regression.
{Name: "sup_with_minus", Input: "2^32-1", Expect: "2³²-1"},
{
Name: "sup_with_minus",
Input: "2^32-1",
Expect: "2³²-1",
},
{
Name: "exp_with_minus",
Input: "p256Invert calculates |out| = |in|^{-1}",
Expect: "p256Invert calculates |out| = |in|⁻¹",
},
}
for _, c := range cases {
c := c // scopelint
t.Run(c.Name, func(t *testing.T) {
got, err := formula(c.Input)
if err != nil {
t.Fatal(err)
}
if got != c.Expect {
t.Logf("input = %q", c.Input)
t.Logf("got = %q", got)
t.Logf("expect = %q", c.Expect)
t.FailNow()
}
AssertFormulaOutput(t, c.Input, c.Expect)
})
}
}

func TestFormulaNoChange(t *testing.T) {
// Regression tests for inputs that should have been left alone.
cases := []string{
// golang.org/x/crypto
"\"_acme-challenge\" name of the domain being validated.", // subscript "_a"
"echo -n cert | base64 | tr -d '=' | tr '/+' '_-'", // subscript "_-"
"thumbprint is precomputed for testKeyEC in jws_test.go", // subscript "_t"
"The \"signature_algorithms\" extension, if present, limits the key exchange", // subscript "_a"
"testGetCertificate_tokenCache tests the fallback of token certificate fetches", // subscript "_t"
"https://en.wikipedia.org/wiki/Automated_Certificate_Management_Environment#CAs_&_PKIs_that_offer_ACME_certificates", // subscripts in URL
"g8TuAS9g5zhq8ELQ3kmjr-KV86GAMgI6VAcGlq3QrzpTCf_30Ab7-zawrfRaFON", // subscript "_30"
"JAumQ_I2fjj98_97mk3ihOY4AgVdCDj1z_GCoZkG5Rq7nbCGyosyKWyDX00Zs-n", // subscript "_97"
"xiToPMinus1Over3 is ξ^((p-1)/3) where ξ = i+3.", // superscript "^("
"FrobeniusP2 computes (xτ²+yτ+z)^(p²) = xτ^(2p²) + yτ^(p²) + z", // superscript "^("
"x for a moment, then after applying the Frobenius, we have x̄ω^(2p)", // superscript "^("
"x̄ξ^((p-1)/3)ω² and applying the inverse isomorphism eliminates the", // superscript "^("
"be called when the vector facility is available. Implementation in asm_s390x.s.", // subscript "_s"
"Z_2^u Z_2^v Z_2^u", // combination of sub/superscript
"[1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf", // subscript "_202"
"Cert generated by ssh-keygen OpenSSH_6.8p1 OS X 10.10.3", // subscript "_6"
}
for _, input := range cases {
AssertFormulaOutput(t, input, input)
}
}

func AssertFormulaOutput(t *testing.T, input, expect string) {
t.Helper()
got := formula(input)
if got != expect {
t.Logf("input = %q", input)
t.Logf("got = %q", got)
t.Logf("expect = %q", expect)
t.Fail()
}
}
2 changes: 1 addition & 1 deletion testdata/poly1305.in
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ func shiftRightBy2(a uint128) uint128 {
// updateGeneric absorbs msg into the state.h accumulator. For each chunk m of
// 128 bits of message, it computes
//
// h_+ = (h + m) * r mod 2^130 - 5
// h_{+} = (h + m) * r mod 2^130 - 5
//
// If the msg length is not a multiple of TagSize, it assumes the last
// incomplete chunk is the final one.
Expand Down

0 comments on commit 8bc4dfa

Please sign in to comment.