Skip to content

Commit

Permalink
echo: handle multibyte escape sequences
Browse files Browse the repository at this point in the history
Bug was reported, with root cause analysis, by kkew3
Added tests were derived from test cases provided by kkew3
See #6741
  • Loading branch information
andrewliebenow committed Oct 20, 2024
1 parent 5b881a0 commit 9691afb
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 22 deletions.
53 changes: 31 additions & 22 deletions src/uu/echo/src/echo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ impl Base {
}

/// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences
fn parse_code(input: &mut Peekable<Chars>, base: Base) -> Option<char> {
fn parse_code(input: &mut Peekable<Chars>, base: Base) -> Option<u8> {
// All arithmetic on `ret` needs to be wrapping, because octal input can
// take 3 digits, which is 9 bits, and therefore more than what fits in a
// `u8`. GNU just seems to wrap these values.
Expand All @@ -60,14 +60,16 @@ fn parse_code(input: &mut Peekable<Chars>, base: Base) -> Option<char> {
let _ = input.next();
}

Some(ret.into())
Some(ret)
}

fn print_escaped(input: &str, mut output: impl Write) -> io::Result<ControlFlow<()>> {
let mut iter = input.chars().peekable();

while let Some(c) = iter.next() {
if c != '\\' {
write!(output, "{c}")?;

continue;
}

Expand All @@ -76,40 +78,47 @@ fn print_escaped(input: &str, mut output: impl Write) -> io::Result<ControlFlow<
// would be the \0NNN syntax.
if let Some('1'..='8') = iter.peek() {
if let Some(parsed) = parse_code(&mut iter, Base::Oct) {
write!(output, "{parsed}")?;
output.write_all(&[parsed])?;

continue;
}
}

if let Some(next) = iter.next() {
let unescaped = match next {
'\\' => '\\',
'a' => '\x07',
'b' => '\x08',
// For extending lifetime
let sl: [u8; 1_usize];

let unescaped: &[u8] = match next {
'\\' => br"\",
'a' => b"\x07",
'b' => b"\x08",
'c' => return Ok(ControlFlow::Break(())),
'e' => '\x1b',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'v' => '\x0b',
'e' => b"\x1b",
'f' => b"\x0c",
'n' => b"\n",
'r' => b"\r",
't' => b"\t",
'v' => b"\x0b",
'x' => {
if let Some(c) = parse_code(&mut iter, Base::Hex) {
c
if let Some(ue) = parse_code(&mut iter, Base::Hex) {
sl = [ue];

&sl
} else {
write!(output, "\\")?;
'x'
br"\x"
}
}
'0' => parse_code(&mut iter, Base::Oct).unwrap_or('\0'),
'0' => &[parse_code(&mut iter, Base::Oct).unwrap_or(b'\0')],
c => {
write!(output, "\\")?;
c
write!(output, "\\{c}")?;

continue;
}
};
write!(output, "{unescaped}")?;

output.write_all(unescaped)?;
} else {
write!(output, "\\")?;
output.write_all(br"\")?;
}
}

Expand Down
35 changes: 35 additions & 0 deletions tests/by-util/test_echo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,3 +303,38 @@ fn partial_version_argument() {
fn partial_help_argument() {
new_ucmd!().arg("--he").succeeds().stdout_is("--he\n");
}

#[test]
fn multibyte_escape_unicode() {
// spell-checker:disable-next-line
// Tests suggested by kkew3
// https://github.com/uutils/coreutils/issues/6741

// \u{1F602} is:
//
// "Face with Tears of Joy"
// U+1F602
// "😂"

new_ucmd!()
.args(&["-e", r"\xf0\x9f\x98\x82"])
.succeeds()
.stdout_only("\u{1F602}\n");

new_ucmd!()
.args(&["-e", r"\x41\xf0\x9f\x98\x82\x42"])
.succeeds()
.stdout_only("A\u{1F602}B\n");

new_ucmd!()
.args(&["-e", r"\xf0\x41\x9f\x98\x82"])
.succeeds()
.stdout_is_bytes(b"\xF0A\x9F\x98\x82\n")
.no_stderr();

new_ucmd!()
.args(&["-e", r"\x41\xf0\c\x9f\x98\x82"])
.succeeds()
.stdout_is_bytes(b"A\xF0")
.no_stderr();
}

0 comments on commit 9691afb

Please sign in to comment.