Skip to content

Commit

Permalink
Bug Fix: Phishing Detection Dataset Discrepancies (#1032)
Browse files Browse the repository at this point in the history
<!--
Note: This checklist is a reminder of our shared engineering
expectations.
-->

Please review the release process for BrowserServicesKit
[here](https://app.asana.com/0/1200194497630846/1200837094583426).

**Required**:

Task/Issue URL:
https://app.asana.com/0/1204023833050360/1208567121137949/f
iOS PR: duckduckgo/iOS#3469
macOS PR: duckduckgo/macos-browser#3440
What kind of version bump will this require?: Patch

**Optional**:

Tech Design URL:
CC:

**Description**:
In [Implement desktop integration efficacy tests - 5-7
days](https://app.asana.com/0/1207943168535188/1207205745934704/f) it
was discovered that Swift's client-side caching results in out-of-date
datasets and significant dataset discrepancies between different
clients. For example, it's very common for the same request to return
different results from the backend, resulting in a client believing they
are updating to a newer revision than they are. Over time, this
compounds and results in disparate versions of the same dataset across
different clients, putting users at risk of landing on newer phishing
pages.

Fix: 
 - Remove Client Side Caching in PhishingDetectionClient.swift
- Ensure embedded dataset is used to replace the on-disk dataset when
the revision of the embedded dataset > on disk dataset
<!--
Tagging instructions
If this PR isn't ready to be merged for whatever reason it should be
marked with the `DO NOT MERGE` label (particularly if it's a draft)
If it's pending Product Review/PFR, please add the `Pending Product
Review` label.

If at any point it isn't actively being worked on/ready for
review/otherwise moving forward (besides the above PR/PFR exception)
strongly consider closing it (or not opening it in the first place). If
you decide not to close it, make sure it's labelled to make it clear the
PRs state and comment with more information.
-->

**Steps to test this PR**:
1. Check unit tests
3. Change on-disk revision:
4. `echo "1650000" >
"/System/Volumes/Data/Users/<user>/Library/Application
Support/com.duckduckgo.macos.browser.debug/revision.txt"`
5. Build the browser
6. Visit https://privacy-test-pages.site/security/badware/phishing.html
7. Ensure blocked
8. Check on-disk revision:
9. `cat "/System/Volumes/Data/Users/<user>/Library/Application
Support/com.duckduckgo.macos.browser.debug/revision.txt"`
10. Should be > 1650000


<!--
Before submitting a PR, please ensure you have tested the combinations
you expect the reviewer to test, then delete configurations you *know*
do not need explicit testing.

Using a simulator where a physical device is unavailable is acceptable.
-->

**OS Testing**:

* [ ] iOS 14
* [ ] iOS 15
* [ ] iOS 16
* [ ] macOS 10.15
* [ ] macOS 11
* [ ] macOS 12

---
###### Internal references:
[Software Engineering
Expectations](https://app.asana.com/0/59792373528535/199064865822552)
[Technical Design
Template](https://app.asana.com/0/59792373528535/184709971311943)
  • Loading branch information
not-a-rootkit authored Nov 1, 2024
1 parent 8602894 commit d39d04c
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 9 deletions.
49 changes: 40 additions & 9 deletions Sources/PhishingDetection/PhishingDetectionDataStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -136,29 +136,43 @@ public class PhishingDetectionDataStore: PhishingDetectionDataSaving {
}

private func loadHashPrefix() -> Set<String> {
guard let data = fileStorageManager.read(from: hashPrefixFilename) else { return dataProvider.loadEmbeddedHashPrefixes() }
guard let data = fileStorageManager.read(from: hashPrefixFilename) else {
return dataProvider.loadEmbeddedHashPrefixes()
}
let decoder = JSONDecoder()
do {
return Set(try decoder.decode(Set<String>.self, from: data))
if loadRevisionFromDisk() < dataProvider.embeddedRevision {
return dataProvider.loadEmbeddedHashPrefixes()
}
let onDiskHashPrefixes = Set(try decoder.decode(Set<String>.self, from: data))
return onDiskHashPrefixes
} catch {
Logger.phishingDetectionDataStore.error("Error decoding \(self.hashPrefixFilename): \(error.localizedDescription)")
return dataProvider.loadEmbeddedHashPrefixes()
}
}

private func loadFilterSet() -> Set<Filter> {
guard let data = fileStorageManager.read(from: filterSetFilename) else { return dataProvider.loadEmbeddedFilterSet() }
guard let data = fileStorageManager.read(from: filterSetFilename) else {
return dataProvider.loadEmbeddedFilterSet()
}
let decoder = JSONDecoder()
do {
return Set(try decoder.decode(Set<Filter>.self, from: data))
if loadRevisionFromDisk() < dataProvider.embeddedRevision {
return dataProvider.loadEmbeddedFilterSet()
}
let onDiskFilterSet = Set(try decoder.decode(Set<Filter>.self, from: data))
return onDiskFilterSet
} catch {
Logger.phishingDetectionDataStore.error("Error decoding \(self.filterSetFilename): \(error.localizedDescription)")
return dataProvider.loadEmbeddedFilterSet()
}
}

private func loadRevision() -> Int {
guard let data = fileStorageManager.read(from: revisionFilename) else { return dataProvider.embeddedRevision }
private func loadRevisionFromDisk() -> Int {
guard let data = fileStorageManager.read(from: revisionFilename) else {
return dataProvider.embeddedRevision
}
let decoder = JSONDecoder()
do {
return try decoder.decode(Int.self, from: data)
Expand All @@ -167,22 +181,39 @@ public class PhishingDetectionDataStore: PhishingDetectionDataSaving {
return dataProvider.embeddedRevision
}
}

private func loadRevision() -> Int {
guard let data = fileStorageManager.read(from: revisionFilename) else {
return dataProvider.embeddedRevision
}
let decoder = JSONDecoder()
do {
let loadedRevision = try decoder.decode(Int.self, from: data)
if loadedRevision < dataProvider.embeddedRevision {
return dataProvider.embeddedRevision
}
return loadedRevision
} catch {
Logger.phishingDetectionDataStore.error("Error decoding \(self.revisionFilename): \(error.localizedDescription)")
return dataProvider.embeddedRevision
}
}
}

extension PhishingDetectionDataStore {
public func saveFilterSet(set: Set<Filter>) {
writeFilterSet()
self.filterSet = set
writeFilterSet()
}

public func saveHashPrefixes(set: Set<String>) {
writeHashPrefixes()
self.hashPrefixes = set
writeHashPrefixes()
}

public func saveRevision(_ revision: Int) {
writeRevision()
self.currentRevision = revision
writeRevision()
}
}

Expand Down
101 changes: 101 additions & 0 deletions Tests/PhishingDetectionTests/PhishingDetectionDataStoreTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,60 @@ class PhishingDetectionDataStoreTests: XCTestCase {
XCTAssertEqual(actualHashPrefix, expectedHashPrefix)
}

func testWhenEmbeddedRevisionNewerThanOnDisk_ThenLoadEmbedded() async {
let encoder = JSONEncoder()
// On Disk Data Setup
fileStorageManager.write(data: "1".utf8data, to: "revision.txt")
let onDiskFilterSet = Set([Filter(hashValue: "other", regex: "other")])
let filterSetData = try! encoder.encode(Array(onDiskFilterSet))
let onDiskHashPrefix = Set(["faffa"])
let hashPrefixData = try! encoder.encode(Array(onDiskHashPrefix))
fileStorageManager.write(data: filterSetData, to: "filterSet.json")
fileStorageManager.write(data: hashPrefixData, to: "hashPrefixes.json")

// Embedded Data Setup
mockDataProvider.embeddedRevision = 5
let embeddedFilterSet = Set([Filter(hashValue: "some", regex: "some")])
let embeddedHashPrefix = Set(["sassa"])
mockDataProvider.shouldReturnFilterSet(set: embeddedFilterSet)
mockDataProvider.shouldReturnHashPrefixes(set: embeddedHashPrefix)

let actualRevision = dataStore.currentRevision
let actualFilterSet = dataStore.filterSet
let actualHashPrefix = dataStore.hashPrefixes

XCTAssertEqual(actualFilterSet, embeddedFilterSet)
XCTAssertEqual(actualHashPrefix, embeddedHashPrefix)
XCTAssertEqual(actualRevision, 5)
}

func testWhenEmbeddedRevisionOlderThanOnDisk_ThenDontLoadEmbedded() async {
let encoder = JSONEncoder()
// On Disk Data Setup
fileStorageManager.write(data: "6".utf8data, to: "revision.txt")
let onDiskFilterSet = Set([Filter(hashValue: "other", regex: "other")])
let filterSetData = try! encoder.encode(Array(onDiskFilterSet))
let onDiskHashPrefix = Set(["faffa"])
let hashPrefixData = try! encoder.encode(Array(onDiskHashPrefix))
fileStorageManager.write(data: filterSetData, to: "filterSet.json")
fileStorageManager.write(data: hashPrefixData, to: "hashPrefixes.json")

// Embedded Data Setup
mockDataProvider.embeddedRevision = 1
let embeddedFilterSet = Set([Filter(hashValue: "some", regex: "some")])
let embeddedHashPrefix = Set(["sassa"])
mockDataProvider.shouldReturnFilterSet(set: embeddedFilterSet)
mockDataProvider.shouldReturnHashPrefixes(set: embeddedHashPrefix)

let actualRevision = dataStore.currentRevision
let actualFilterSet = dataStore.filterSet
let actualHashPrefix = dataStore.hashPrefixes

XCTAssertEqual(actualFilterSet, onDiskFilterSet)
XCTAssertEqual(actualHashPrefix, onDiskHashPrefix)
XCTAssertEqual(actualRevision, 6)
}

func testWriteAndLoadData() async {
// Get and write data
let expectedHashPrefixes = Set(["aabb"])
Expand Down Expand Up @@ -93,4 +147,51 @@ class PhishingDetectionDataStoreTests: XCTestCase {
XCTFail("Failed to decode stored PhishingDetection data")
}
}

func testLazyLoadingDoesNotReturnStaleData() async {
clearDatasets()

// Set up initial data
let initialFilterSet = Set([Filter(hashValue: "initial", regex: "initial")])
let initialHashPrefixes = Set(["initialPrefix"])
mockDataProvider.shouldReturnFilterSet(set: initialFilterSet)
mockDataProvider.shouldReturnHashPrefixes(set: initialHashPrefixes)

// Access the lazy-loaded properties to trigger loading
let loadedFilterSet = dataStore.filterSet
let loadedHashPrefixes = dataStore.hashPrefixes

// Validate loaded data matches initial data
XCTAssertEqual(loadedFilterSet, initialFilterSet)
XCTAssertEqual(loadedHashPrefixes, initialHashPrefixes)

// Update in-memory data
let updatedFilterSet = Set([Filter(hashValue: "updated", regex: "updated")])
let updatedHashPrefixes = Set(["updatedPrefix"])
dataStore.saveFilterSet(set: updatedFilterSet)
dataStore.saveHashPrefixes(set: updatedHashPrefixes)

// Access lazy-loaded properties again
let reloadedFilterSet = dataStore.filterSet
let reloadedHashPrefixes = dataStore.hashPrefixes

// Validate reloaded data matches updated data
XCTAssertEqual(reloadedFilterSet, updatedFilterSet)
XCTAssertEqual(reloadedHashPrefixes, updatedHashPrefixes)

// Validate on-disk data is also updated
let storedFilterSetData = fileStorageManager.read(from: "filterSet.json")
let storedHashPrefixesData = fileStorageManager.read(from: "hashPrefixes.json")

let decoder = JSONDecoder()
if let storedFilterSet = try? decoder.decode(Set<Filter>.self, from: storedFilterSetData!),
let storedHashPrefixes = try? decoder.decode(Set<String>.self, from: storedHashPrefixesData!) {

XCTAssertEqual(storedFilterSet, updatedFilterSet)
XCTAssertEqual(storedHashPrefixes, updatedHashPrefixes)
} else {
XCTFail("Failed to decode stored PhishingDetection data after update")
}
}

}

0 comments on commit d39d04c

Please sign in to comment.