From d39d04cf36b8522f894eebc3e11ee5fe65d880fa Mon Sep 17 00:00:00 2001 From: Thom Espach Date: Fri, 1 Nov 2024 11:02:30 +0000 Subject: [PATCH] Bug Fix: Phishing Detection Dataset Discrepancies (#1032) Please review the release process for BrowserServicesKit [here](https://app.asana.com/0/1200194497630846/1200837094583426). **Required**: Task/Issue URL: https://app.asana.com/0/1204023833050360/1208567121137949/f iOS PR: https://github.com/duckduckgo/iOS/pull/3469 macOS PR: https://github.com/duckduckgo/macos-browser/pull/3440 What kind of version bump will this require?: Patch **Optional**: Tech Design URL: CC: **Description**: In [Implement desktop integration efficacy tests - 5-7 days](https://app.asana.com/0/1207943168535188/1207205745934704/f) it was discovered that Swift's client-side caching results in out-of-date datasets and significant dataset discrepancies between different clients. For example, it's very common for the same request to return different results from the backend, resulting in a client believing they are updating to a newer revision than they are. Over time, this compounds and results in disparate versions of the same dataset across different clients, putting users at risk of landing on newer phishing pages. Fix: - Remove Client Side Caching in PhishingDetectionClient.swift - Ensure embedded dataset is used to replace the on-disk dataset when the revision of the embedded dataset > on disk dataset **Steps to test this PR**: 1. Check unit tests 3. Change on-disk revision: 4. `echo "1650000" > "/System/Volumes/Data/Users//Library/Application Support/com.duckduckgo.macos.browser.debug/revision.txt"` 5. Build the browser 6. Visit https://privacy-test-pages.site/security/badware/phishing.html 7. Ensure blocked 8. Check on-disk revision: 9. `cat "/System/Volumes/Data/Users//Library/Application Support/com.duckduckgo.macos.browser.debug/revision.txt"` 10. Should be > 1650000 **OS Testing**: * [ ] iOS 14 * [ ] iOS 15 * [ ] iOS 16 * [ ] macOS 10.15 * [ ] macOS 11 * [ ] macOS 12 --- ###### Internal references: [Software Engineering Expectations](https://app.asana.com/0/59792373528535/199064865822552) [Technical Design Template](https://app.asana.com/0/59792373528535/184709971311943) --- .../PhishingDetectionDataStore.swift | 49 +++++++-- .../PhishingDetectionDataStoreTests.swift | 101 ++++++++++++++++++ 2 files changed, 141 insertions(+), 9 deletions(-) diff --git a/Sources/PhishingDetection/PhishingDetectionDataStore.swift b/Sources/PhishingDetection/PhishingDetectionDataStore.swift index ab761f272..f247f90b8 100644 --- a/Sources/PhishingDetection/PhishingDetectionDataStore.swift +++ b/Sources/PhishingDetection/PhishingDetectionDataStore.swift @@ -136,10 +136,16 @@ public class PhishingDetectionDataStore: PhishingDetectionDataSaving { } private func loadHashPrefix() -> Set { - guard let data = fileStorageManager.read(from: hashPrefixFilename) else { return dataProvider.loadEmbeddedHashPrefixes() } + guard let data = fileStorageManager.read(from: hashPrefixFilename) else { + return dataProvider.loadEmbeddedHashPrefixes() + } let decoder = JSONDecoder() do { - return Set(try decoder.decode(Set.self, from: data)) + if loadRevisionFromDisk() < dataProvider.embeddedRevision { + return dataProvider.loadEmbeddedHashPrefixes() + } + let onDiskHashPrefixes = Set(try decoder.decode(Set.self, from: data)) + return onDiskHashPrefixes } catch { Logger.phishingDetectionDataStore.error("Error decoding \(self.hashPrefixFilename): \(error.localizedDescription)") return dataProvider.loadEmbeddedHashPrefixes() @@ -147,18 +153,26 @@ public class PhishingDetectionDataStore: PhishingDetectionDataSaving { } private func loadFilterSet() -> Set { - guard let data = fileStorageManager.read(from: filterSetFilename) else { return dataProvider.loadEmbeddedFilterSet() } + guard let data = fileStorageManager.read(from: filterSetFilename) else { + return dataProvider.loadEmbeddedFilterSet() + } let decoder = JSONDecoder() do { - return Set(try decoder.decode(Set.self, from: data)) + if loadRevisionFromDisk() < dataProvider.embeddedRevision { + return dataProvider.loadEmbeddedFilterSet() + } + let onDiskFilterSet = Set(try decoder.decode(Set.self, from: data)) + return onDiskFilterSet } catch { Logger.phishingDetectionDataStore.error("Error decoding \(self.filterSetFilename): \(error.localizedDescription)") return dataProvider.loadEmbeddedFilterSet() } } - private func loadRevision() -> Int { - guard let data = fileStorageManager.read(from: revisionFilename) else { return dataProvider.embeddedRevision } + private func loadRevisionFromDisk() -> Int { + guard let data = fileStorageManager.read(from: revisionFilename) else { + return dataProvider.embeddedRevision + } let decoder = JSONDecoder() do { return try decoder.decode(Int.self, from: data) @@ -167,22 +181,39 @@ public class PhishingDetectionDataStore: PhishingDetectionDataSaving { return dataProvider.embeddedRevision } } + + private func loadRevision() -> Int { + guard let data = fileStorageManager.read(from: revisionFilename) else { + return dataProvider.embeddedRevision + } + let decoder = JSONDecoder() + do { + let loadedRevision = try decoder.decode(Int.self, from: data) + if loadedRevision < dataProvider.embeddedRevision { + return dataProvider.embeddedRevision + } + return loadedRevision + } catch { + Logger.phishingDetectionDataStore.error("Error decoding \(self.revisionFilename): \(error.localizedDescription)") + return dataProvider.embeddedRevision + } + } } extension PhishingDetectionDataStore { public func saveFilterSet(set: Set) { - writeFilterSet() self.filterSet = set + writeFilterSet() } public func saveHashPrefixes(set: Set) { - writeHashPrefixes() self.hashPrefixes = set + writeHashPrefixes() } public func saveRevision(_ revision: Int) { - writeRevision() self.currentRevision = revision + writeRevision() } } diff --git a/Tests/PhishingDetectionTests/PhishingDetectionDataStoreTests.swift b/Tests/PhishingDetectionTests/PhishingDetectionDataStoreTests.swift index 95a51eb05..79e9fb500 100644 --- a/Tests/PhishingDetectionTests/PhishingDetectionDataStoreTests.swift +++ b/Tests/PhishingDetectionTests/PhishingDetectionDataStoreTests.swift @@ -61,6 +61,60 @@ class PhishingDetectionDataStoreTests: XCTestCase { XCTAssertEqual(actualHashPrefix, expectedHashPrefix) } + func testWhenEmbeddedRevisionNewerThanOnDisk_ThenLoadEmbedded() async { + let encoder = JSONEncoder() + // On Disk Data Setup + fileStorageManager.write(data: "1".utf8data, to: "revision.txt") + let onDiskFilterSet = Set([Filter(hashValue: "other", regex: "other")]) + let filterSetData = try! encoder.encode(Array(onDiskFilterSet)) + let onDiskHashPrefix = Set(["faffa"]) + let hashPrefixData = try! encoder.encode(Array(onDiskHashPrefix)) + fileStorageManager.write(data: filterSetData, to: "filterSet.json") + fileStorageManager.write(data: hashPrefixData, to: "hashPrefixes.json") + + // Embedded Data Setup + mockDataProvider.embeddedRevision = 5 + let embeddedFilterSet = Set([Filter(hashValue: "some", regex: "some")]) + let embeddedHashPrefix = Set(["sassa"]) + mockDataProvider.shouldReturnFilterSet(set: embeddedFilterSet) + mockDataProvider.shouldReturnHashPrefixes(set: embeddedHashPrefix) + + let actualRevision = dataStore.currentRevision + let actualFilterSet = dataStore.filterSet + let actualHashPrefix = dataStore.hashPrefixes + + XCTAssertEqual(actualFilterSet, embeddedFilterSet) + XCTAssertEqual(actualHashPrefix, embeddedHashPrefix) + XCTAssertEqual(actualRevision, 5) + } + + func testWhenEmbeddedRevisionOlderThanOnDisk_ThenDontLoadEmbedded() async { + let encoder = JSONEncoder() + // On Disk Data Setup + fileStorageManager.write(data: "6".utf8data, to: "revision.txt") + let onDiskFilterSet = Set([Filter(hashValue: "other", regex: "other")]) + let filterSetData = try! encoder.encode(Array(onDiskFilterSet)) + let onDiskHashPrefix = Set(["faffa"]) + let hashPrefixData = try! encoder.encode(Array(onDiskHashPrefix)) + fileStorageManager.write(data: filterSetData, to: "filterSet.json") + fileStorageManager.write(data: hashPrefixData, to: "hashPrefixes.json") + + // Embedded Data Setup + mockDataProvider.embeddedRevision = 1 + let embeddedFilterSet = Set([Filter(hashValue: "some", regex: "some")]) + let embeddedHashPrefix = Set(["sassa"]) + mockDataProvider.shouldReturnFilterSet(set: embeddedFilterSet) + mockDataProvider.shouldReturnHashPrefixes(set: embeddedHashPrefix) + + let actualRevision = dataStore.currentRevision + let actualFilterSet = dataStore.filterSet + let actualHashPrefix = dataStore.hashPrefixes + + XCTAssertEqual(actualFilterSet, onDiskFilterSet) + XCTAssertEqual(actualHashPrefix, onDiskHashPrefix) + XCTAssertEqual(actualRevision, 6) + } + func testWriteAndLoadData() async { // Get and write data let expectedHashPrefixes = Set(["aabb"]) @@ -93,4 +147,51 @@ class PhishingDetectionDataStoreTests: XCTestCase { XCTFail("Failed to decode stored PhishingDetection data") } } + + func testLazyLoadingDoesNotReturnStaleData() async { + clearDatasets() + + // Set up initial data + let initialFilterSet = Set([Filter(hashValue: "initial", regex: "initial")]) + let initialHashPrefixes = Set(["initialPrefix"]) + mockDataProvider.shouldReturnFilterSet(set: initialFilterSet) + mockDataProvider.shouldReturnHashPrefixes(set: initialHashPrefixes) + + // Access the lazy-loaded properties to trigger loading + let loadedFilterSet = dataStore.filterSet + let loadedHashPrefixes = dataStore.hashPrefixes + + // Validate loaded data matches initial data + XCTAssertEqual(loadedFilterSet, initialFilterSet) + XCTAssertEqual(loadedHashPrefixes, initialHashPrefixes) + + // Update in-memory data + let updatedFilterSet = Set([Filter(hashValue: "updated", regex: "updated")]) + let updatedHashPrefixes = Set(["updatedPrefix"]) + dataStore.saveFilterSet(set: updatedFilterSet) + dataStore.saveHashPrefixes(set: updatedHashPrefixes) + + // Access lazy-loaded properties again + let reloadedFilterSet = dataStore.filterSet + let reloadedHashPrefixes = dataStore.hashPrefixes + + // Validate reloaded data matches updated data + XCTAssertEqual(reloadedFilterSet, updatedFilterSet) + XCTAssertEqual(reloadedHashPrefixes, updatedHashPrefixes) + + // Validate on-disk data is also updated + let storedFilterSetData = fileStorageManager.read(from: "filterSet.json") + let storedHashPrefixesData = fileStorageManager.read(from: "hashPrefixes.json") + + let decoder = JSONDecoder() + if let storedFilterSet = try? decoder.decode(Set.self, from: storedFilterSetData!), + let storedHashPrefixes = try? decoder.decode(Set.self, from: storedHashPrefixesData!) { + + XCTAssertEqual(storedFilterSet, updatedFilterSet) + XCTAssertEqual(storedHashPrefixes, updatedHashPrefixes) + } else { + XCTFail("Failed to decode stored PhishingDetection data after update") + } + } + }