Skip to content

Commit

Permalink
Add a unique ID to each rule
Browse files Browse the repository at this point in the history
This commit adds an `id` field to each Nosey Parker rule.
This also updates the `noseyparker rules check` command to include additional checks to ensure that IDs are valid.

The IDs are not currently used for anything.
  • Loading branch information
bradlarsen authored Jul 14, 2023
1 parent 74098e8 commit d82ff54
Show file tree
Hide file tree
Showing 55 changed files with 266 additions and 36 deletions.
72 changes: 54 additions & 18 deletions crates/noseyparker-cli/src/bin/noseyparker/cmd_rules.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use anyhow::{Context, Result, bail};
use vectorscan::{Pattern, BlockDatabase, Scan, Flag};
use anyhow::{bail, Context, Result};
use regex::Regex;
use std::collections::HashSet;
use vectorscan::{BlockDatabase, Flag, Pattern, Scan};

use tracing::{debug_span, error, error_span, info, warn};

Expand All @@ -16,12 +18,47 @@ pub fn run(global_args: &args::GlobalArgs, args: &args::RulesArgs) -> Result<()>
fn cmd_rules_check(_global_args: &args::GlobalArgs, args: &args::RulesCheckArgs) -> Result<()> {
let _span = debug_span!("cmd_rules_check").entered();

let rules = Rules::from_paths(&args.inputs)
.context("Failed to load input rules")?;
let rules = Rules::from_paths(&args.inputs).context("Failed to load input rules")?;
let mut num_errors = 0;
let mut num_warnings = 0;
let num_rules = rules.rules.len();

// ensure IDs are globally unique
{
let mut seen_ids = HashSet::<&str>::new();
for rule in rules.rules.iter() {
let rule_id = &rule.id;
if !seen_ids.insert(rule_id) {
error!("Rule ID {rule_id} is not unique");
num_errors += 1;
}
}
}

// ensure IDs are well-formed
{
let id_pat = Regex::new(r"^[a-zA-Z0-9]+(?:[.-][a-zA-Z0-9]+)*$")
.expect("ID validator pattern should compile");

for rule in rules.rules.iter() {
let rule_id = &rule.id;
const ID_LIMIT: usize = 20;
let rule_id_len = rule_id.len();
if rule_id_len > ID_LIMIT {
error!("Rule ID {rule_id} is too long ({rule_id_len} characters: \
should be {ID_LIMIT} characters max)");
num_errors += 1;
}

if !id_pat.is_match(rule_id) {
error!("Rule ID {rule_id} is not well-formed: \
it should consist only of alphanumeric sections \
delimited by hyphens or periods");
num_errors += 1;
}
}
}

// compile the rules individually
for (rule_num, rule) in rules.rules.iter().enumerate() {
let stats = check_rule(rule_num, rule)?;
Expand All @@ -30,8 +67,8 @@ fn cmd_rules_check(_global_args: &args::GlobalArgs, args: &args::RulesCheckArgs)
}

// compile the rules all together
let _rules_db = RulesDatabase::from_rules(rules)
.context("Failed to compile rules database")?;
let _rules_db =
RulesDatabase::from_rules(rules).context("Failed to compile combined rules database")?;

if num_warnings == 0 && num_errors == 0 {
println!("{num_rules} rules: no issues detected");
Expand All @@ -40,11 +77,11 @@ fn cmd_rules_check(_global_args: &args::GlobalArgs, args: &args::RulesCheckArgs)
}

if num_errors != 0 {
bail!("{} errors in rules", num_errors);
bail!("{num_errors} errors in rules");
}

if num_warnings != 0 && args.warnings_as_errors {
bail!("{} warnings; warnings being treated as errors", num_warnings);
bail!("{num_warnings} warnings; warnings being treated as errors");
}

Ok(())
Expand All @@ -64,7 +101,6 @@ fn hs_compile_pattern(pat: &str) -> Result<BlockDatabase> {
// Ok(db)
// }


struct CheckStats {
num_warnings: usize,
num_errors: usize,
Expand All @@ -84,7 +120,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {

match rule.as_regex() {
Err(e) => {
error!("Regex: failed to compile pattern: {}", e);
error!("Regex: failed to compile pattern: {e}");
num_errors += 1;
}
Ok(pat) => {
Expand All @@ -94,7 +130,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {
// Check positive examples
for (example_num, example) in rule.examples.iter().enumerate() {
if pat.find(example.as_bytes()).is_none() {
error!("Regex: failed to match example {}", example_num);
error!("Regex: failed to match example {example_num}");
num_failed += 1;
num_errors += 1;
} else {
Expand All @@ -105,7 +141,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {
// Check negative examples
for (example_num, example) in rule.negative_examples.iter().enumerate() {
if pat.find(example.as_bytes()).is_some() {
error!("Regex: incorrectly matched negative example {}", example_num);
error!("Regex: incorrectly matched negative example {example_num}");
num_failed += 1;
num_errors += 1;
} else {
Expand All @@ -115,7 +151,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {

let num_total = num_succeeded + num_failed;
if num_total > 0 {
info!("Regex: {}/{} examples succeeded", num_succeeded, num_total);
info!("Regex: {num_succeeded}/{num_total} examples succeeded");
}
}
};
Expand All @@ -130,7 +166,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {

match hs_compile_pattern(&rule.uncommented_pattern()) {
Err(e) => {
error!("Vectorscan: failed to compile pattern: {}", e);
error!("Vectorscan: failed to compile pattern: {e}");
num_errors += 1;
}
Ok(db) => {
Expand All @@ -147,7 +183,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {
Scan::Continue
})?;
if !matched {
error!("Vectorscan: failed to match example {}", example_num);
error!("Vectorscan: failed to match example {example_num}");
num_failed += 1;
num_errors += 1;
} else {
Expand All @@ -163,7 +199,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {
Scan::Continue
})?;
if matched {
error!("Vectorscan: incorrectly matched negative example {}", example_num);
error!("Vectorscan: incorrectly matched negative example {example_num}");
num_failed += 1;
num_errors += 1;
} else {
Expand All @@ -173,15 +209,15 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {

let num_total = num_succeeded + num_failed;
if num_total > 0 {
info!("Vectorscan: {}/{} examples succeeded", num_succeeded, num_total);
info!("Vectorscan: {num_succeeded}/{num_total} examples succeeded");
}
}
}

if num_warnings == 0 && num_errors == 0 {
info!("No issues detected");
} else {
info!("{} errors and {} warnings", num_errors, num_warnings);
info!("{num_errors} errors and {num_warnings} warnings");
}

Ok(CheckStats {
Expand Down
2 changes: 2 additions & 0 deletions crates/noseyparker/data/default/rules/adobe.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rules:

- name: Adobe OAuth Client Secret
id: np.adobe.1

pattern: |
(?x)(?i)
\b
Expand Down
2 changes: 2 additions & 0 deletions crates/noseyparker/data/default/rules/age.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
rules:

- name: Age Recipient (X25519 public key)
id: np.age.1
pattern: '\bage1[0-9a-z]{58}\b'

examples:
Expand All @@ -13,6 +14,7 @@ rules:


- name: Age Identity (X22519 secret key)
id: np.age.2
pattern: '\bAGE-SECRET-KEY-1[0-9A-Z]{58}\b'

examples:
Expand Down
1 change: 1 addition & 0 deletions crates/noseyparker/data/default/rules/artifactory.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
rules:

- name: Artifactory API Key
id: np.artifactory.1
pattern: '(?i)artifactory.{0,50}\b([a-z0-9]{73})\b'

examples:
Expand Down
14 changes: 14 additions & 0 deletions crates/noseyparker/data/default/rules/aws.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
rules:

- name: AWS API Key
id: np.aws.1

pattern: '\b((?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16})\b'

references:
- https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html
- https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html
Expand All @@ -21,7 +24,10 @@ rules:


- name: AWS Secret Access Key
id: np.aws.2

pattern: '(?i)\baws_?(?:secret)?_?(?:access)?_?(?:key)?["'']?\s{0,30}(?::|=>|=)\s{0,30}["'']?([a-z0-9/+=]{40})\b'

references:
- https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html
- https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html
Expand All @@ -44,6 +50,8 @@ rules:


- name: AWS Account ID
id: np.aws.3

pattern: '(?i)aws_?(?:account)_?(?:id)?["''`]?\s{0,30}(?::|=>|=)\s{0,30}["''`]?([0-9]{4}-?[0-9]{4}-?[0-9]{4})'

examples:
Expand Down Expand Up @@ -83,6 +91,7 @@ rules:


- name: AWS Session Token
id: np.aws.4
pattern: '(?i)(?:aws.?session|aws.?session.?token|aws.?token)["''`]?\s{0,30}(?::|=>|=)\s{0,30}["''`]?([a-z0-9/+=]{16,200})[^a-z0-9/+=]'

negative_examples:
Expand All @@ -98,6 +107,7 @@ rules:
- name: Amazon MWS Auth Token
id: np.aws.5
pattern: '(?i)amzn\.mws\.([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'

examples:
Expand All @@ -108,6 +118,8 @@ rules:
- name: AWS S3 Bucket (subdomain style)
id: np.s3.1

pattern: |
(?x)
(?: ^ | [\s/"'] | %2F )
Expand Down Expand Up @@ -161,6 +173,8 @@ rules:


- name: AWS S3 Bucket (path style)
id: np.s3.2

pattern: |
(?x)
(?: ^ | [\s/"'] | %2F )
Expand Down
4 changes: 4 additions & 0 deletions crates/noseyparker/data/default/rules/azure.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rules:

- name: Azure Connection String
id: np.azure.1

# XXX There are a bunch of other keys that seem to have secret content assigned to them:
#
# - SharedAccessSignature
Expand Down Expand Up @@ -49,6 +51,8 @@ rules:


- name: Azure App Configuration Connection String
id: np.azure.2

pattern: |
(?x)
(https://[a-zA-Z0-9-]+\.azconfig\.io);
Expand Down
4 changes: 4 additions & 0 deletions crates/noseyparker/data/default/rules/codeclimate.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
rules:

- name: CodeClimate
id: np.codeclimate.1

pattern: '(?i)codeclima.{0,50}\b([a-f0-9]{64})\b'

references:
- https://github.com/codeclimate/ruby-test-reporter/issues/34

examples:
- ' - RAILS_ENV=test CODECLIMATE_REPO_TOKEN=d37a8b9e09642cb73cfcf4e1284815fc3d6a55a7714110187ac59856ae4ab5ad'
1 change: 1 addition & 0 deletions crates/noseyparker/data/default/rules/crates.io.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
rules:

- name: crates.io API Key
id: np.cratesio.1

# It's a 32-character alphanumeric identifier prefixed by `cio`
pattern: '\bcio[a-zA-Z0-9]{32}\b'
Expand Down
6 changes: 6 additions & 0 deletions crates/noseyparker/data/default/rules/digitalocean.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rules:

- name: DigitalOcean Application Access Token
id: np.digitalocean.1

pattern: |
(?x)(?i)
\b
Expand All @@ -15,6 +17,8 @@ rules:


- name: DigitalOcean Personal Access Token
id: np.digitalocean.2

pattern: |
(?x)(?i)
\b
Expand All @@ -29,6 +33,8 @@ rules:


- name: DigitalOcean Refresh Token
id: np.digitalocean.3

pattern: |
(?x)(?i)
\b
Expand Down
2 changes: 2 additions & 0 deletions crates/noseyparker/data/default/rules/dynatrace.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rules:

- name: Dynatrace Token
id: np.dynatrace.1

pattern: '\b(dt0[a-zA-Z]{1}[0-9]{2}\.[A-Z0-9]{24}\.[A-Z0-9]{64})\b'

examples:
Expand Down
4 changes: 4 additions & 0 deletions crates/noseyparker/data/default/rules/facebook.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rules:

- name: Facebook Secret Key
id: np.facebook.1

pattern: |
(?x)(?i)
\b (?: facebook | fb )
Expand All @@ -21,6 +23,8 @@ rules:


- name: Facebook Access Token
id: np.facebook.2

pattern: '\b(EAACEdEose0cBA[a-zA-Z0-9]+)\b'

references:
Expand Down
1 change: 1 addition & 0 deletions crates/noseyparker/data/default/rules/figma.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
rules:

- name: Figma Personal Access Token
id: np.figma.1

# The key material looks like a v4 UUID with an extra 4 hex digits up front
pattern: |
Expand Down
Loading

0 comments on commit d82ff54

Please sign in to comment.