Skip to content

Commit

Permalink
refactor: remove lifetimes, better validate python/pcre lookbehinds
Browse files Browse the repository at this point in the history
  • Loading branch information
Aloso committed May 18, 2024
1 parent b25a559 commit 8d05633
Show file tree
Hide file tree
Showing 60 changed files with 720 additions and 366 deletions.
16 changes: 8 additions & 8 deletions pomsky-bin/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use pomsky::{
};

pub(crate) fn run_tests(
parsed: Expr<'_>,
parsed: Expr,
input: &str,
options: CompileOptions,
errors: &mut Vec<Diagnostic>,
Expand Down Expand Up @@ -53,7 +53,7 @@ pub(crate) fn run_tests(
}
}

fn check_test_match(regex: &Regex, test_case: TestCaseMatch<'_>, errors: &mut Vec<Diagnostic>) {
fn check_test_match(regex: &Regex, test_case: TestCaseMatch, errors: &mut Vec<Diagnostic>) {
let result = regex.captures(test_case.literal.content.as_bytes());
match result {
Ok(Some(captures)) => {
Expand All @@ -67,9 +67,9 @@ fn check_test_match(regex: &Regex, test_case: TestCaseMatch<'_>, errors: &mut Ve
}

for capture in &test_case.captures {
let Some(got_capture) = (match capture.ident {
let Some(got_capture) = (match &capture.ident {
CaptureIdent::Name(name) => captures.name(name),
CaptureIdent::Index(idx) => captures.get(idx as usize),
&CaptureIdent::Index(idx) => captures.get(idx as usize),
}) else {
errors.push(Diagnostic::test_failure(
capture.ident_span,
Expand Down Expand Up @@ -103,7 +103,7 @@ fn check_test_match(regex: &Regex, test_case: TestCaseMatch<'_>, errors: &mut Ve

fn check_all_test_matches(
regex: &Regex,
test_case: TestCaseMatchAll<'_>,
test_case: TestCaseMatchAll,
errors: &mut Vec<Diagnostic>,
) {
let captures_iter = regex
Expand Down Expand Up @@ -144,9 +144,9 @@ fn check_all_test_matches(
}

for capture in &test_case.captures {
let Some(got_capture) = (match capture.ident {
let Some(got_capture) = (match &capture.ident {
CaptureIdent::Name(name) => captures.name(name),
CaptureIdent::Index(idx) => captures.get(idx as usize),
&CaptureIdent::Index(idx) => captures.get(idx as usize),
}) else {
errors.push(Diagnostic::test_failure(
capture.ident_span,
Expand All @@ -173,7 +173,7 @@ fn check_all_test_matches(
}
}

fn check_test_reject(regex: &Regex, test_case: TestCaseReject<'_>, errors: &mut Vec<Diagnostic>) {
fn check_test_reject(regex: &Regex, test_case: TestCaseReject, errors: &mut Vec<Diagnostic>) {
let result = regex.captures(test_case.literal.content.as_bytes());
match result {
Ok(Some(captures)) => {
Expand Down
1 change: 1 addition & 0 deletions pomsky-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ exclude = ["tests/**", "fuzz/**", "afl-fuzz/**"]
default = []
dbg = ["pomsky-syntax/dbg"]
suggestions = ["pomsky-syntax/suggestions"]
arbitrary = ["dep:arbitrary", "pomsky-syntax/arbitrary"]

[dependencies]
pomsky-syntax = { version = "0.11.0", path = "../pomsky-syntax" }
Expand Down
3 changes: 3 additions & 0 deletions pomsky-lib/afl-fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pomsky-lib/afl-fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ afl = "0.14.3"
arbitrary = "1.3.2"
regex = "1"
regex-test = { path = "../../regex-test" }
pomsky = { path = "..", features = ["arbitrary"] }
pomsky = { path = "..", features = ["dbg", "arbitrary"] }

# Prevent this from interfering with workspaces
[workspace]
Expand Down
33 changes: 33 additions & 0 deletions pomsky-lib/afl-fuzz/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# AFL fuzzer

This fuzzer checks that the Pomsky compiler does not crash for any input, and produces valid regular expressions.

The latter requirement is tested by compiling the regex with the respective regex engine. This requires the following programs to be installed:

- deno (for JavaScript)
- javac
- python
- mcs (for .NET)

## Usage

It is recommended to use [just](https://github.com/casey/just). When fuzzing Pomsky for the first time, run

```sh
just fuzz_init
just fuzz in
```

When you want to resume a previous fuzzing session, you can just

```sh
just fuzz
```

## Analyze crashes

When you found a crash, you might find it in `errors.txt`. If it's not in `errors.txt`, that likely means that there was an unexpected panic. To minimize it, run `just tmin <path>`, where `<path>` is the path to a file in the `out/default/crashes` folder. This command minimizes the input for the crash and creates a logfile at `log.txt` that should make it possible to identify the bug.

## Report the bug

Please report the bug [here](https://github.com/pomsky-lang/pomsky/issues). If you think it could be a security vulnerability, please disclose it directly per email: [email protected].
13 changes: 11 additions & 2 deletions pomsky-lib/afl-fuzz/ignored_errors.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
Ruby|Oniguruma error: never ending recursion
# Ruby|Oniguruma error: too big number for repeat range

Rust|empty character classes are not allowed
Rust|Compiled regex exceeds size limit
PCRE|error compiling pattern at offset \d+: lookbehind assertion is not fixed length
Py|look-behind requires fixed-width pattern

PCRE|branch too long in variable-length lookbehind assertion
PCRE|regular expression is too large
# repetitions can be at most 65535 (2^16)
PCRE|number too big in \{\} quantifier
# lookbehind must not match more than 65535 (2^16) code points
PCRE|lookbehind assertion is too long

Java|Look-behind group does not have an obvious maximum length
12 changes: 10 additions & 2 deletions pomsky-lib/afl-fuzz/justfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
fuzz_init:
cargo install cargo-afl
cargo afl system-config

fuzz in='-':
cargo afl build
cargo afl fuzz -i {{in}} -o out target/debug/afl-fuzz

tmin input:
rm log.txt
FUZZ_LOG=1 AFL_DEBUG=1 AFL_MAP_SIZE=100000 cargo afl tmin -i {{input}} -o out.txt -- ./target/debug/afl-fuzz
rm -f log.txt
FUZZ_LOG=1 AFL_DEBUG=1 AFL_MAP_SIZE=100000 cargo afl tmin -i {{input}} -o out.txt -- ./target/debug/afl-fuzz
58 changes: 10 additions & 48 deletions pomsky-lib/afl-fuzz/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,13 @@ fn main() {

afl::fuzz(true, |data| {
let mut u = Unstructured::new(data);
let Ok((input, compile_options)) = Arbitrary::arbitrary(&mut u) else { return };
let input = transform_input(input);
let Ok((compile_options, input)) = Arbitrary::arbitrary(&mut u) else { return };

debug!(f, "\n{:?} -- {:?}\n", input, compile_options);
debug!(f, "\n\n{:#?}\n{:?} -- {:?}\n", input, input, compile_options);

let result = Expr::parse_and_compile(&input, compile_options);
let result = Expr::compile(&input, "", compile_options);

if let (Some(regex), _warnings, _tests) = result {
if let (Some(regex), _warnings) = result {
debug!(f, " compiled;");

let features = compile_options.allowed_features;
Expand All @@ -52,7 +51,7 @@ fn main() {
// - don't validate raw regexes, which may be invalid
if regex.len() < 2048 && !regex.is_empty() && features == { features }.regexes(false) {
debug!(f, " check");
check(&regex, &ignored_errors, compile_options.flavor, f, ef);
check(input, &regex, &ignored_errors, compile_options.flavor, f, ef);
} else {
debug!(f, " SKIPPED (too long or `regex` feature enabled)");
}
Expand All @@ -63,6 +62,7 @@ fn main() {
}

fn check(
expr: Expr,
regex: &str,
ignored_errors: &HashMap<RegexFlavor, RegexSet>,
flavor: RegexFlavor,
Expand All @@ -89,7 +89,7 @@ fn check(
}
}

debug!(ef, "{flavor:?}|{regex:?}|{e}\n");
debug!(ef, "\n{expr:?}\n{flavor:?}|{regex:?}|{e}\n");
debug!(f, " {regex:?} ({flavor:?}) failed:\n{e}");
panic!("Regex {regex:?} is invalid in the {flavor:?} flavor:\n{e}");
}
Expand All @@ -101,6 +101,9 @@ fn parse_ignored_errors() -> HashMap<RegexFlavor, RegexSet> {
let ignored_errors = ignored_errors
.lines()
.filter_map(|line| {
if line.starts_with('#') || line.is_empty() {
return None;
}
Some(match line.split_once('|') {
Some(("JS" | "JavaScript", err)) => (RegexFlavor::JavaScript, err),
Some(("Java", err)) => (RegexFlavor::Java, err),
Expand All @@ -121,44 +124,3 @@ fn parse_ignored_errors() -> HashMap<RegexFlavor, RegexSet> {

ignored_errors.into_iter().map(|(k, v)| (k, RegexSet::new(v).unwrap())).collect()
}

fn transform_input(input: &str) -> String {
input.chars().fold(String::with_capacity(input.len()), |mut acc, c| match c {
// increase likelihood of generating these key words and important sequences by chance
'à' => acc + " Codepoint ",
'á' => acc + " Grapheme ",
'â' => acc + " Start ",
'ã' => acc + " End ",
'ä' => acc + " lazy ",
'å' => acc + " greedy ",
'æ' => acc + " enable ",
'ç' => acc + " disable ",
'è' => acc + " unicode ",
'é' => acc + " test {",
'ê' => acc + " match ",
'ë' => acc + " reject ",
'ì' => acc + " in ",
'í' => acc + " as ",
'î' => acc + " if ",
'ï' => acc + " else ",
'ð' => acc + " regex ",
'ñ' => acc + " recursion ",
'ò' => acc + " range ",
'ó' => acc + " base ",
'ô' => acc + " let ",
'õ' => acc + " U+1FEFF ",
'ö' => acc + ":bla(",
'ø' => acc + "::bla ",
'ù' => acc + "<< ",
'ú' => acc + ">> ",
'û' => acc + "'test'",
'ü' => acc + "atomic",
'ý' => acc + " U+FEFF ",
// 'þ' => acc + "",
// 'ÿ' => acc + "",
_ => {
acc.push(c);
acc
}
})
}
2 changes: 1 addition & 1 deletion pomsky-lib/src/capturing_groups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ impl RuleVisitor<CompileError> for CapturingGroupsCollector {
}

fn visit_group(&mut self, group: &exprs::Group) -> Result<(), CompileError> {
match group.kind {
match &group.kind {
GroupKind::Capturing(Capture { name: Some(name) }) => {
if self.variable_nesting > 0 {
return Err(CompileErrorKind::CaptureInLet.at(group.span));
Expand Down
10 changes: 5 additions & 5 deletions pomsky-lib/src/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,27 @@ use crate::{
regex::Regex,
};

pub(crate) type CompileResult<'i> = Result<Regex<'i>, CompileError>;
pub(crate) type CompileResult = Result<Regex, CompileError>;

#[derive(Clone)]
pub(crate) struct CompileState<'c, 'i> {
pub(crate) struct CompileState<'i> {
pub(crate) next_idx: u32,
pub(crate) used_names_vec: Vec<Option<String>>,
pub(crate) used_names: HashMap<String, CapturingGroupIndex>,
pub(crate) groups_count: u32,
pub(crate) numbered_groups_count: u32,
pub(crate) in_lookbehind: bool,

pub(crate) variables: Vec<(&'i str, &'c Rule<'i>)>,
pub(crate) variables: Vec<(&'i str, &'i Rule)>,
pub(crate) current_vars: HashSet<usize>,

pub(crate) diagnostics: Vec<Diagnostic>,
}

impl<'c, 'i> CompileState<'c, 'i> {
impl<'i> CompileState<'i> {
pub(crate) fn new(
capt_groups: CapturingGroupsCollector,
variables: Vec<(&'i str, &'c Rule<'i>)>,
variables: Vec<(&'i str, &'i Rule)>,
) -> Self {
let used_names = capt_groups.names;
let groups_count = capt_groups.count_named + capt_groups.count_numbered;
Expand Down
18 changes: 18 additions & 0 deletions pomsky-lib/src/diagnose/compile_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ pub(crate) enum CompileErrorKind {
RubyLookaheadInLookbehind {
was_word_boundary: bool,
},
UnsupportedInLookbehind {
flavor: RegexFlavor,
feature: Feature,
},
LookbehindNotConstantLength {
flavor: RegexFlavor,
},
NestedTest,
}

Expand Down Expand Up @@ -186,6 +193,17 @@ impl core::fmt::Display for CompileErrorKind {
CompileErrorKind::NestedTest => {
write!(f, "Unit tests may only appear at the top level of the expression")
}
CompileErrorKind::UnsupportedInLookbehind { flavor, feature } => {
write!(f, "Feature `{feature:?}` is not supported within lookbehinds in the {flavor:?} flavor")
}
CompileErrorKind::LookbehindNotConstantLength { flavor } => match flavor {
RegexFlavor::Pcre | RegexFlavor::Python => write!(
f,
"In the {flavor:?} flavor, lookbehinds must have a {} length",
if flavor == &RegexFlavor::Pcre { "bounded" } else { "constant" }
),
_ => write!(f, "This kind of lookbehind is not supported in the {flavor:?} flavor"),
},
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions pomsky-lib/src/diagnose/diagnostic_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ diagnostic_code! {
IllegalNegation = 317,
DotNetNumberedRefWithMixedGroups = 318,
RubyLookaheadInLookbehind = 319,
UnsupportedInLookbehind = 320,
LookbehindNotConstantLength = 321,

// Warning indicating something might not be supported
PossiblyUnsupported = 400,
Expand Down Expand Up @@ -222,6 +224,8 @@ impl<'a> From<&'a CompileErrorKind> for DiagnosticCode {
C::JsWordBoundaryInUnicodeMode => Self::UnsupportedInUnicodeMode,
C::DotNetNumberedRefWithMixedGroups => Self::DotNetNumberedRefWithMixedGroups,
C::RubyLookaheadInLookbehind { .. } => Self::RubyLookaheadInLookbehind,
C::UnsupportedInLookbehind { .. } => Self::UnsupportedInLookbehind,
C::LookbehindNotConstantLength { .. } => Self::LookbehindNotConstantLength,
C::NestedTest => Self::NestedTest,
}
}
Expand Down
4 changes: 3 additions & 1 deletion pomsky-lib/src/diagnose/diagnostic_kind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ impl From<&CompileErrorKind> for DiagnosticKind {
| K::NestedTest
| K::NegatedHorizVertSpace
| K::DotNetNumberedRefWithMixedGroups
| K::RubyLookaheadInLookbehind { .. } => DiagnosticKind::Unsupported,
| K::RubyLookaheadInLookbehind { .. }
| K::UnsupportedInLookbehind { .. }
| K::LookbehindNotConstantLength { .. } => DiagnosticKind::Unsupported,
K::RangeIsTooBig(_) => DiagnosticKind::Limits,
}
}
Expand Down
Loading

0 comments on commit 8d05633

Please sign in to comment.