Skip to content

Commit

Permalink
analysis: add a dedupe pass (#797)
Browse files Browse the repository at this point in the history
See #796.
  • Loading branch information
woodruffw authored Jan 10, 2021
1 parent b991f9e commit 7273e5f
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 0 deletions.
26 changes: 26 additions & 0 deletions src/analysis/pass/dedupe/dedupe
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

# dedupe: filter out any cohorts whose inputs have already appeared at least once.

require "json"
require "set"

STDERR.puts "[+] pass: dedupe"

count = 0
seen = Set.new
STDIN.each_line do |line|
result = JSON.parse line, symbolize_names: true

# add? returns nil if the element is already present, saving us
# two separate operations (check + add).
if seen.add?(result[:input]).nil?
count += 1
next
end

STDOUT.puts result.to_json
end

STDERR.puts "[+] pass: dedupe done: #{count} filtered"
3 changes: 3 additions & 0 deletions src/analysis/pass/dedupe/spec.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name: dedupe
desc: Filter out any duplicate cohorts (by input)
run: dedupe
1 change: 1 addition & 0 deletions src/analysis/pass/minimize-input/minimize-input
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ STDIN.each_line do |line|

# If the maximum ndecoded is 0, then all are 0 and we should skip
# this cohort entirely.
# In effect, this is probably identical to filter-all-failure.
if max_ndecoded.zero?
count += 1
next
Expand Down
4 changes: 4 additions & 0 deletions src/analysis/passes.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
default:
- dedupe
- filter-all-failure
- filter-ndecoded
- minimize-input
Expand All @@ -15,19 +16,22 @@ same-size-different-decodings:

# Find inputs that not all workers either succeed or fail on.
status-discrepancies:
- dedupe
- filter-all-failure
- filter-all-success
- minimize-input
- normalize

destroy-capstone:
- dedupe
- filter-all-success
- filter-ndecoded
- filter-destroy-capstone
- minimize-input
- normalize

destroy-bddisasm:
- dedupe
- filter-all-success
- filter-ndecoded
- filter-destroy-bddisasm
Expand Down

0 comments on commit 7273e5f

Please sign in to comment.