-
Notifications
You must be signed in to change notification settings - Fork 195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
compatibility Test coverage #1057
base: mainline
Are you sure you want to change the base?
Changes from 11 commits
949b741
3a0ad93
c4eda2b
bab9337
a834c9f
44d37f9
4d6f2ae
fb07372
4c6cb9f
5b5eea6
9921576
64047cf
6cd7885
4bc2084
f2e5068
c95be2c
f84825d
b940b01
cf68c99
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import pytest | ||
from tests.compatibility_tests.base_test_case.base_compatibility_test import BaseCompatibilityTestCase | ||
|
||
@pytest.mark.marqo_version('2.0.0') | ||
class TestAddDocuments(BaseCompatibilityTestCase): | ||
structured_index_name = "test_add_doc_api_structured_index" | ||
unstructured_index_name = "test_add_doc_api_unstructured_index" | ||
|
||
indexes_to_test_on = [{ | ||
"indexName": structured_index_name, | ||
"type": "structured", | ||
"model": "sentence-transformers/all-MiniLM-L6-v2", | ||
"normalizeEmbeddings": False, | ||
"allFields": [ | ||
{"name": "Title", "type": "text"}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You get the best coverage if you have all data types. That does mean you will need extra tests as some types were added in later versions. This comment applies to both index types. |
||
{"name": "Description", "type": "text"}, | ||
{"name": "Genre", "type": "text"}, | ||
], | ||
"tensorFields": ["Title", "Description", "Genre"], | ||
}, | ||
{ | ||
"indexName": unstructured_index_name, | ||
"type": "unstructured", | ||
"model": "sentence-transformers/all-MiniLM-L6-v2", | ||
"normalizeEmbeddings": False, | ||
}] | ||
|
||
text_docs = [{ | ||
"Title": "The Travels of Marco Polo", | ||
"Description": "A 13th-century travelogue describing the travels of Polo", | ||
"Genre": "History", | ||
"_id": "article_602" | ||
}, | ||
{ | ||
"Title": "Extravehicular Mobility Unit (EMU)", | ||
"Description": "The EMU is a spacesuit that provides environmental protection", | ||
"_id": "article_591", | ||
"Genre": "Science" | ||
}] | ||
@classmethod | ||
def tearDownClass(cls) -> None: | ||
cls.indexes_to_delete = [index['indexName'] for index in cls.indexes_to_test_on] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could also do this in setupclass, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am using setupClass in prepare mode to create Marqo client and do other basic setup. I don't want to delete any indexes in prepare mode, I only want to delete an index after it has been thoroughly tested which happens after Test mode run finishes, so I've added it in tearDownClass() which will directly be called after a test run (executed by Test mode) finishes |
||
super().tearDownClass() | ||
|
||
@classmethod | ||
def setUpClass(cls) -> None: | ||
super().setUpClass() | ||
|
||
def prepare(self): | ||
self.logger.info(f"Creating indexes {self.indexes_to_test_on} in test case: {self.__class__.__name__}") | ||
self.create_indexes(self.indexes_to_test_on) | ||
|
||
try: | ||
self.logger.debug(f'Feeding documents to {self.indexes_to_test_on}') | ||
for index in self.indexes_to_test_on: | ||
if index.get("type") is not None and index.get('type') == 'structured': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The problem with the way this is written is if the test fails for one index type, it doesn't run for the other. You need either subtests (if you can make that work here) or separate tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Subtests alone won't work here, because they just can be a wrapper above assertionErrors. Any other types of exceptions / errors and the subtest will fail and make the whole test fail. So I'm now using a try catch block + a subtest. Try catch block will be useful for any generic errors where as assertion errors can be logged by subtests |
||
self.client.index(index_name = index['indexName']).add_documents(documents = self.text_docs) | ||
else: | ||
self.client.index(index_name = index['indexName']).add_documents(documents = self.text_docs, | ||
tensor_fields = ["Description", "Genre", "Title"]) | ||
|
||
self.logger.debug(f"Finished running prepare method for test case: {self.__class__.__name__}") | ||
except Exception as e: | ||
raise e | ||
|
||
all_results = {} | ||
|
||
for index in self.indexes_to_test_on: | ||
index_name = index['indexName'] | ||
all_results[index_name] = {} | ||
|
||
for doc in self.text_docs: | ||
doc_id = doc['_id'] | ||
try: | ||
all_results[index_name][doc_id] = self.client.index(index_name).get_document(doc_id) | ||
except Exception as e: | ||
self.logger.error( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we fail here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No point in failing a prepare mode test. Even if we fail here - when we run test mode we will not have the context of which prepare mode tests failed. |
||
f"Exception when getting documents with id: {doc_id} from index: {index_name}", | ||
exc_info=True) | ||
|
||
self.save_results_to_file(all_results) | ||
|
||
def test_add_doc(self): | ||
self.logger.info(f"Running test_add_doc on {self.__class__.__name__}") | ||
stored_results = self.load_results_from_file() | ||
for index in self.indexes_to_test_on: | ||
index_name = index['indexName'] | ||
|
||
for doc in self.text_docs: | ||
doc_id = doc['_id'] | ||
try: | ||
expected_doc = stored_results[index_name][doc_id] | ||
except KeyError as e: | ||
self.logger.error(f"The key {doc_id} doesn't exist in the stored results. Skipping the test for this document.") | ||
adityabharadwaj198 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
continue | ||
self.logger.debug(f"Printing expected doc {expected_doc}") | ||
actual_doc = self.client.index(index_name).get_document(doc_id) | ||
self.logger.debug(f"Printing actual doc {expected_doc}") | ||
|
||
self.assertEqual(expected_doc, actual_doc) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
import logging | ||
from abc import abstractmethod, ABC | ||
from pathlib import Path | ||
from marqo_test import MarqoTestCase | ||
from tests.compatibility_tests.base_test_case.marqo_test import MarqoTestCase | ||
|
||
|
||
class BaseCompatibilityTestCase(MarqoTestCase, ABC): | ||
|
@@ -40,6 +40,8 @@ def tearDownClass(cls) -> None: | |
cls.delete_indexes(cls.indexes_to_delete) | ||
cls.logger.debug(f"Deleting indexes {cls.indexes_to_delete}") | ||
|
||
cls.delete_file() | ||
|
||
@classmethod | ||
def save_results_to_file(cls, results): | ||
"""Save results to a JSON file.""" | ||
|
@@ -61,8 +63,11 @@ def load_results_from_file(cls): | |
def delete_file(cls): | ||
"""Delete the results file.""" | ||
filepath = cls.get_results_file_path() | ||
filepath.unlink() | ||
cls.logger.debug(f"Results file deleted: {filepath}") | ||
if filepath.exists(): | ||
filepath.unlink() | ||
cls.logger.debug(f"Results file deleted: {filepath}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if debug log level is excessive here. Do you think we will ever run this in debug mode? Is there a risk if we logged these at info level (i.e. print them all the time)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can run it in debug mode if debugging locally. If these are logged at info level then it would make the logs too verbose and it'd be difficult to find valuable information |
||
else: | ||
cls.logger.debug(f"Not deleting, as the results file was never created in the first place.") | ||
|
||
@abstractmethod | ||
def prepare(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Non-normalized indexes need to be covered, but are unusual. If covering one, we should cover normalized
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed