Skip to content

Commit

Permalink
Firestore: FIRQueryTests.mm: Add test testBloomFilterShouldCorrectlyE…
Browse files Browse the repository at this point in the history
…ncodeComplexUnicodeCharacters (#11679)
  • Loading branch information
dconeybe authored Sep 7, 2023
1 parent 2eccf79 commit 0e3f78b
Show file tree
Hide file tree
Showing 7 changed files with 361 additions and 58 deletions.
140 changes: 140 additions & 0 deletions Firestore/Example/Tests/Integration/API/FIRQueryTests.mm
Original file line number Diff line number Diff line change
Expand Up @@ -1285,4 +1285,144 @@ - (void)testResumingAQueryShouldUseBloomFilterToAvoidFullRequery {
}
}

- (void)testBloomFilterShouldCorrectlyEncodeComplexUnicodeCharacters {
// TODO(b/291365820): Stop skipping this test when running against the Firestore emulator once
// the emulator is improved to include a bloom filter in the existence filter messages that it
// sends.
XCTSkipIf([FSTIntegrationTestCase isRunningAgainstEmulator],
"Skip this test when running against the Firestore emulator because the emulator does "
"not include a bloom filter when it sends existence filter messages, making it "
"impossible for this test to verify the correctness of the bloom filter.");

// Set this test to stop when the first failure occurs because some test assertion failures make
// the rest of the test not applicable or will even crash.
[self setContinueAfterFailure:NO];

// Define a comparator that compares `NSString` objects in a way that orders canonically-
// equivalent, but distinct, strings in a consistent manner by using `NSForcedOrderingSearch`.
// Otherwise, the bare `[NSString compare:]` method considers canonically-equivalent, but
// distinct, strings as "equal" and orders them indeterminately.
NSComparator sortComparator = ^(NSString *string1, NSString *string2) {
return [string1 compare:string2 options:NSForcedOrderingSearch];
};

// Firestore does not do any Unicode normalization on the document IDs. Therefore, two document
// IDs that are canonically-equivalent (i.e. they visually appear identical) but are represented
// by a different sequence of Unicode code points are treated as distinct document IDs.
NSArray<NSString *> *testDocIds;
{
NSMutableArray<NSString *> *testDocIdsAccumulator = [[NSMutableArray alloc] init];
[testDocIdsAccumulator addObject:@"DocumentToDelete"];
// The next two strings both end with "e" with an accent: the first uses the dedicated Unicode
// code point for this character, while the second uses the standard lowercase "e" followed by
// the accent combining character.
[testDocIdsAccumulator addObject:@"LowercaseEWithAcuteAccent_\u00E9"];
[testDocIdsAccumulator addObject:@"LowercaseEWithAcuteAccent_\u0065\u0301"];
// The next two strings both end with an "e" with two different accents applied via the
// following two combining characters. The combining characters are specified in a different
// order and Firestore treats these document IDs as unique, despite the order of the combining
// characters being irrelevant.
[testDocIdsAccumulator addObject:@"LowercaseEWithMultipleAccents_\u0065\u0301\u0327"];
[testDocIdsAccumulator addObject:@"LowercaseEWithMultipleAccents_\u0065\u0327\u0301"];
// The next string contains a character outside the BMP (the "basic multilingual plane"); that
// is, its code point is greater than 0xFFFF. Since NSString stores text in sequences of 16-bit
// code units, using the UTF-16 encoding (according to
// https://www.objc.io/issues/9-strings/unicode) it is stored as a surrogate pair, two 16-bit
// code units U+D83D and U+DE00, to represent this character. Make sure that its presence is
// correctly tested in the bloom filter, which uses UTF-8 encoding.
[testDocIdsAccumulator addObject:@"Smiley_\U0001F600"];

testDocIds = [NSArray arrayWithArray:testDocIdsAccumulator];
}

// Verify assumptions about the equivalence of strings in `testDocIds`.
XCTAssertEqualObjects(testDocIds[1].decomposedStringWithCanonicalMapping,
testDocIds[2].decomposedStringWithCanonicalMapping);
XCTAssertEqualObjects(testDocIds[3].decomposedStringWithCanonicalMapping,
testDocIds[4].decomposedStringWithCanonicalMapping);
XCTAssertEqual([testDocIds[5] characterAtIndex:7], 0xD83D);
XCTAssertEqual([testDocIds[5] characterAtIndex:8], 0xDE00);

// Create the mapping from document ID to document data for the document IDs specified in
// `testDocIds`.
NSMutableDictionary<NSString *, NSDictionary<NSString *, id> *> *testDocs =
[[NSMutableDictionary alloc] init];
for (NSString *testDocId in testDocIds) {
[testDocs setValue:@{@"foo" : @42} forKey:testDocId];
}

// Create the documents whose names contain complex Unicode characters in a new collection.
FIRCollectionReference *collRef = [self collectionRefWithDocuments:testDocs];

// Run a query to populate the local cache with documents that have names with complex Unicode
// characters.
{
FIRQuerySnapshot *querySnapshot1 = [self readDocumentSetForRef:collRef
source:FIRFirestoreSourceDefault];
XCTAssertEqualObjects(
[FIRQuerySnapshotGetIDs(querySnapshot1) sortedArrayUsingComparator:sortComparator],
[testDocIds sortedArrayUsingComparator:sortComparator],
@"querySnapshot1 has the wrong documents");
}

// Delete one of the documents so that the next call to collection.get() will experience an
// existence filter mismatch. Use a different Firestore instance to avoid affecting the local
// cache.
FIRDocumentReference *documentToDelete = [collRef documentWithPath:@"DocumentToDelete"];
{
FIRFirestore *db2 = [self firestore];
[self deleteDocumentRef:[db2 documentWithPath:documentToDelete.path]];
}

// Wait for 10 seconds, during which Watch will stop tracking the query and will send an
// existence filter rather than "delete" events when the query is resumed.
[NSThread sleepForTimeInterval:10.0f];

// Resume the query and save the resulting snapshot for verification. Use some internal testing
// hooks to "capture" the existence filter mismatches.
__block FIRQuerySnapshot *querySnapshot2;
NSArray<FSTTestingHooksExistenceFilterMismatchInfo *> *existenceFilterMismatches =
[FSTTestingHooks captureExistenceFilterMismatchesDuringBlock:^{
querySnapshot2 = [self readDocumentSetForRef:collRef source:FIRFirestoreSourceDefault];
}];

// Verify that the snapshot from the resumed query contains the expected documents; that is, that
// it contains the documents whose names contain complex Unicode characters and _not_ the document
// that was deleted.
{
NSMutableArray<NSString *> *querySnapshot2ExpectedDocumentIds =
[NSMutableArray arrayWithArray:testDocIds];
[querySnapshot2ExpectedDocumentIds removeObject:documentToDelete.documentID];
XCTAssertEqualObjects(
[FIRQuerySnapshotGetIDs(querySnapshot2) sortedArrayUsingComparator:sortComparator],
[querySnapshot2ExpectedDocumentIds sortedArrayUsingComparator:sortComparator],
@"querySnapshot2 has the wrong documents");
}

// Verify that Watch sent an existence filter with the correct counts.
XCTAssertEqual(existenceFilterMismatches.count, 1u,
@"Watch should have sent exactly 1 existence filter");
FSTTestingHooksExistenceFilterMismatchInfo *existenceFilterMismatchInfo =
existenceFilterMismatches[0];
XCTAssertEqual(existenceFilterMismatchInfo.localCacheCount, (int)testDocIds.count);
XCTAssertEqual(existenceFilterMismatchInfo.existenceFilterCount, (int)testDocIds.count - 1);

// Verify that Watch sent a valid bloom filter.
FSTTestingHooksBloomFilter *bloomFilter = existenceFilterMismatchInfo.bloomFilter;
XCTAssertNotNil(bloomFilter, "Watch should have included a bloom filter in the existence filter");

// The bloom filter application should statistically be successful almost every time; the _only_
// time when it would _not_ be successful is if there is a false positive when testing for
// 'DocumentToDelete' in the bloom filter. So verify that the bloom filter application is
// successful, unless there was a false positive.
BOOL isFalsePositive = [bloomFilter mightContain:documentToDelete];
XCTAssertEqual(bloomFilter.applied, !isFalsePositive);

// Verify that the bloom filter contains the document paths with complex Unicode characters.
for (FIRDocumentSnapshot *documentSnapshot in querySnapshot2.documents) {
XCTAssertTrue([bloomFilter mightContain:documentSnapshot.reference],
@"The bloom filter should contain %@", documentSnapshot.documentID);
}
}

@end
3 changes: 3 additions & 0 deletions Firestore/Example/Tests/Util/FSTTestingHooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ NS_ASSUME_NONNULL_BEGIN
/** The number of bits of padding in the last byte of the bloom filter. */
@property(nonatomic, readonly) int padding;

/** Returns whether the bloom filter contains the given document. */
- (BOOL)mightContain:(FIRDocumentReference*)documentRef;

@end // @interface FSTTestingHooksBloomFilter

#pragma mark - FSTTestingHooksExistenceFilterMismatchInfo
Expand Down
45 changes: 38 additions & 7 deletions Firestore/Example/Tests/Util/FSTTestingHooks.mm
Original file line number Diff line number Diff line change
Expand Up @@ -47,34 +47,63 @@ @interface FSTTestingHooksBloomFilter ()
- (instancetype)initWithApplied:(BOOL)applied
hashCount:(int)hashCount
bitmapLength:(int)bitmapLength
padding:(int)padding NS_DESIGNATED_INITIALIZER;
padding:(int)padding
projectId:(std::string)projectId
databaseId:(std::string)databaseId
bloomFilter:(absl::optional<BloomFilter>)bloomFilter NS_DESIGNATED_INITIALIZER;

- (instancetype)initWithBloomFilterInfo:(const TestingHooks::BloomFilterInfo&)bloomFilterInfo;
- (instancetype)initWithBloomFilterInfo:(const TestingHooks::BloomFilterInfo&)bloomFilterInfo
projectId:(std::string)projectId
databaseId:(std::string)databaseId;

@end

#pragma mark - FSTTestingHooksBloomFilter implementation

@implementation FSTTestingHooksBloomFilter
@implementation FSTTestingHooksBloomFilter {
std::string _projectId;
std::string _databaseId;
absl::optional<BloomFilter> _bloomFilter;
}

- (instancetype)initWithApplied:(BOOL)applied
hashCount:(int)hashCount
bitmapLength:(int)bitmapLength
padding:(int)padding {
padding:(int)padding
projectId:(std::string)projectId
databaseId:(std::string)databaseId
bloomFilter:(absl::optional<BloomFilter>)bloomFilter {
if (self = [super init]) {
_applied = applied;
_hashCount = hashCount;
_bitmapLength = bitmapLength;
_padding = padding;
_projectId = std::move(projectId);
_databaseId = std::move(databaseId);
_bloomFilter = std::move(bloomFilter);
}
return self;
}

- (instancetype)initWithBloomFilterInfo:(const TestingHooks::BloomFilterInfo&)bloomFilterInfo {
- (instancetype)initWithBloomFilterInfo:(const TestingHooks::BloomFilterInfo&)bloomFilterInfo
projectId:(std::string)projectId
databaseId:(std::string)databaseId {
return [self initWithApplied:bloomFilterInfo.applied
hashCount:bloomFilterInfo.hash_count
bitmapLength:bloomFilterInfo.bitmap_length
padding:bloomFilterInfo.padding];
padding:bloomFilterInfo.padding
projectId:std::move(projectId)
databaseId:std::move(databaseId)
bloomFilter:bloomFilterInfo.bloom_filter];
}

- (BOOL)mightContain:(FIRDocumentReference*)documentRef {
if (!_bloomFilter.has_value()) {
return NO;
}
std::string document_path = StringFormat("projects/%s/databases/%s/documents/%s", _projectId,
_databaseId, MakeStringView(documentRef.path));
return _bloomFilter->MightContain(document_path);
}

@end
Expand Down Expand Up @@ -114,7 +143,9 @@ - (instancetype)initWithExistenceFilterMismatchInfo:
FSTTestingHooksBloomFilter* bloomFilter;
if (existenceFilterMismatchInfo.bloom_filter.has_value()) {
bloomFilter = [[FSTTestingHooksBloomFilter alloc]
initWithBloomFilterInfo:existenceFilterMismatchInfo.bloom_filter.value()];
initWithBloomFilterInfo:existenceFilterMismatchInfo.bloom_filter.value()
projectId:existenceFilterMismatchInfo.project_id
databaseId:existenceFilterMismatchInfo.database_id];
} else {
bloomFilter = nil;
}
Expand Down
58 changes: 38 additions & 20 deletions Firestore/core/src/remote/remote_event.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,21 +216,25 @@ namespace {

TestingHooks::ExistenceFilterMismatchInfo
create_existence_filter_mismatch_info_for_testing_hooks(
BloomFilterApplicationStatus status,
int local_cache_count,
const ExistenceFilterWatchChange& existence_filter) {
absl::optional<TestingHooks::BloomFilterInfo> bloom_filter;
const ExistenceFilterWatchChange& existence_filter,
const DatabaseId& database_id,
absl::optional<BloomFilter> bloom_filter,
BloomFilterApplicationStatus status) {
absl::optional<TestingHooks::BloomFilterInfo> bloom_filter_info;
if (existence_filter.filter().bloom_filter_parameters().has_value()) {
const BloomFilterParameters& bloom_filter_parameters =
existence_filter.filter().bloom_filter_parameters().value();
bloom_filter = {status == BloomFilterApplicationStatus::kSuccess,
bloom_filter_parameters.hash_count,
static_cast<int>(bloom_filter_parameters.bitmap.size()),
bloom_filter_parameters.padding};
bloom_filter_info = {
status == BloomFilterApplicationStatus::kSuccess,
bloom_filter_parameters.hash_count,
static_cast<int>(bloom_filter_parameters.bitmap.size()),
bloom_filter_parameters.padding, std::move(bloom_filter)};
}

return {local_cache_count, existence_filter.filter().count(),
std::move(bloom_filter)};
database_id.project_id(), database_id.database_id(),
std::move(bloom_filter_info)};
}

} // namespace
Expand Down Expand Up @@ -264,8 +268,13 @@ void WatchChangeAggregator::HandleExistenceFilter(
int current_size = GetCurrentDocumentCountForTarget(target_id);
if (current_size != expected_count) {
// Apply bloom filter to identify and mark removed documents.
absl::optional<BloomFilter> bloom_filter =
ParseBloomFilter(existence_filter);
BloomFilterApplicationStatus status =
ApplyBloomFilter(existence_filter, current_size);
bloom_filter.has_value()
? ApplyBloomFilter(bloom_filter.value(), existence_filter,
current_size)
: BloomFilterApplicationStatus::kSkipped;
if (status != BloomFilterApplicationStatus::kSuccess) {
// If bloom filter application fails, we reset the mapping and
// trigger re-run of the query.
Expand All @@ -279,18 +288,20 @@ void WatchChangeAggregator::HandleExistenceFilter(

TestingHooks::GetInstance().NotifyOnExistenceFilterMismatch(
create_existence_filter_mismatch_info_for_testing_hooks(
status, current_size, existence_filter));
current_size, existence_filter,
target_metadata_provider_->GetDatabaseId(),
std::move(bloom_filter), status));
}
}
}
}

BloomFilterApplicationStatus WatchChangeAggregator::ApplyBloomFilter(
const ExistenceFilterWatchChange& existence_filter, int current_count) {
absl::optional<BloomFilter> WatchChangeAggregator::ParseBloomFilter(
const ExistenceFilterWatchChange& existence_filter) {
const absl::optional<BloomFilterParameters>& bloom_filter_parameters =
existence_filter.filter().bloom_filter_parameters();
if (!bloom_filter_parameters.has_value()) {
return BloomFilterApplicationStatus::kSkipped;
return absl::nullopt;
}

util::StatusOr<BloomFilter> maybe_bloom_filter =
Expand All @@ -300,23 +311,30 @@ BloomFilterApplicationStatus WatchChangeAggregator::ApplyBloomFilter(
if (!maybe_bloom_filter.ok()) {
LOG_WARN("Creating BloomFilter failed: %s",
maybe_bloom_filter.status().error_message());
return BloomFilterApplicationStatus::kSkipped;
return absl::nullopt;
}

BloomFilter bloom_filter = std::move(maybe_bloom_filter).ValueOrDie();

if (bloom_filter.bit_count() == 0) {
return BloomFilterApplicationStatus::kSkipped;
return absl::nullopt;
}

return bloom_filter;
}

BloomFilterApplicationStatus WatchChangeAggregator::ApplyBloomFilter(
const BloomFilter& bloom_filter,
const ExistenceFilterWatchChange& existence_filter,
int current_count) {
int expected_count = existence_filter.filter().count();

int removed_document_count =
FilterRemovedDocuments(bloom_filter, existence_filter.target_id());

int expected_count = existence_filter.filter().count();
if (expected_count != (current_count - removed_document_count)) {
return BloomFilterApplicationStatus::kFalsePositive;
}
return BloomFilterApplicationStatus::kSuccess;
return (expected_count == (current_count - removed_document_count))
? BloomFilterApplicationStatus::kSuccess
: BloomFilterApplicationStatus::kFalsePositive;
}

int WatchChangeAggregator::FilterRemovedDocuments(
Expand Down
11 changes: 10 additions & 1 deletion Firestore/core/src/remote/remote_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -417,12 +417,21 @@ class WatchChangeAggregator {
bool TargetContainsDocument(model::TargetId target_id,
const model::DocumentKey& key);

/**
* Parse the bloom filter from the "unchanged_names" field of an existence
* filter.
*/
static absl::optional<BloomFilter> ParseBloomFilter(
const ExistenceFilterWatchChange& existence_filter);

/**
* Apply bloom filter to remove the deleted documents, and return the
* application status.
*/
BloomFilterApplicationStatus ApplyBloomFilter(
const ExistenceFilterWatchChange& existence_filter, int current_count);
const BloomFilter& bloom_filter,
const ExistenceFilterWatchChange& existence_filter,
int current_count);

/**
* Filter out removed documents based on bloom filter membership result and
Expand Down
Loading

0 comments on commit 0e3f78b

Please sign in to comment.