diff --git a/replacer.go b/replacer.go index 9301f2a..62a8265 100644 --- a/replacer.go +++ b/replacer.go @@ -22,41 +22,54 @@ func Replace(input string) string { start := -1 var lastEnd int - for index, char := range input { - if char == ':' { - if start == -1 { - start = index - continue - } + // Instead of for ranging over the string, we treat it as a byte array to + // save CPU cycles. + for index := 0; index < len(input); index++ { + char := input[index] + // Even though we might get codepoints out of the ascii range, one + // byte of a unicode codepoint can never be a colon. This is proven + // by the test TestThatPartOfARuneCannotBeColon and probably wouldn't + // need proving if I understood unicode and UTF-8 better. + if char != ':' { + continue + } - // Occurence of something like "Hello :: World", in which case we needn't do anything. - if index-start == 1 { - start = -1 - continue - } + if start == -1 { + start = index + continue + } - emojiSequence := input[start+1 : index] - emojified, contains := EmojiMap[emojiSequence] + // Occurence of something like "Hello :: World", in which case we needn't do anything. + if index-start == 1 { + start = -1 + continue + } + + emojiSequence := input[start+1 : index] + emojified, contains := EmojiMap[emojiSequence] + if !contains { + // Since the previous check is case sensitive, we do the same in a case + // insensitive manner to make use of the best case performance. + emojiSequence = strings.ToLower(input[start+1 : index]) + emojified, contains = EmojiMap[emojiSequence] if !contains { - // Since the previous check is case sensitive, we do the same in a case - // insensitive manner to make use of the best case performance. - emojiSequence = strings.ToLower(input[start+1 : index]) - emojified, contains = EmojiMap[emojiSequence] - if !contains { - start = -1 - continue - } + start = -1 + // Solves cases such as this ":sunglassesö:sunglasses:", where + // the sequence wouldn't be sucessfully resolved otherwise. + // Danke Marvin. + index-- + continue } + } - if len(buffer) == 0 { - // Potentially allocate a bit more than required, but not having to reallocate - buffer = make([]byte, 0, len(input)-len(emojiSequence)-2+len(emojified)) - } - buffer = append(buffer, input[lastEnd:start]...) - buffer = append(buffer, emojified...) - start = -1 - lastEnd = index + 1 + if len(buffer) == 0 { + // Potentially allocate a bit more than required, but not having to reallocate + buffer = make([]byte, 0, len(input)-len(emojiSequence)-2+len(emojified)) } + buffer = append(buffer, input[lastEnd:start]...) + buffer = append(buffer, emojified...) + start = -1 + lastEnd = index + 1 } // Since we only ever append after we've found a matching diff --git a/replacer_test.go b/replacer_test.go index 71d89f5..6f2e05a 100644 --- a/replacer_test.go +++ b/replacer_test.go @@ -1,9 +1,11 @@ package discordemojimap import ( + "fmt" "regexp" "strings" "testing" + "unicode" ) func TestReplace(t *testing.T) { @@ -36,6 +38,7 @@ var inputVariations = [][2]string{ {"empty string", ""}, {"just a colon", ":"}, {"empty emoji sequence", "::"}, + {"invalid emoji sequence with invalid characters", ":sunglassesö:sunglasses:"}, {"valid single letter emoji sequence", ":a:"}, {"no emoji sequence", "Hello"}, {"no emoji sequence, but single colon", "Hello :"}, @@ -47,6 +50,7 @@ var inputVariations = [][2]string{ {"invalid emoji sequence with word after", ":invalidinvalid: Hello"}, {"invalid emoji sequence with word before and after", "Hello :invalidinvalid: Hello"}, {"very long string with invalid emoji sequence in the middle", strings.Repeat("a", 1000) + ":invalidinvalid:" + strings.Repeat("b", 1000)}, + {"very long string with valid emoji sequence in the middle", strings.Repeat("a", 1000) + ":sunglasses:" + strings.Repeat("b", 1000)}, {"standalone valid emoji sequence", ":sunglasses:"}, {"standalone valid uppercased emoji sequence", ":SUNGLASSES:"}, {"valid emoji sequence with word before", "hello :sunglasses:"}, @@ -78,6 +82,17 @@ func oldRegexReplace(input string) string { }) } +func TestThatPartOfARuneCannotBeColon(t *testing.T) { + for i := '~'; i <= unicode.MaxRune; i++ { + data := string(i) + for j := 0; j < len(data); j++ { + if data[j] == ':' { + panic(fmt.Sprintf(": found in %s at index %d", string(data), j)) + } + } + } +} + func TestNewReplaceAndOldReplaceBehaveTheSame(t *testing.T) { for _, test := range inputVariations { a := oldRegexReplace(test[1])