Fixed the migrations for EBI Search and MGX.

The test tests/me/test_populate_metagenomics_exchange.py::TestMeAPI::test_removals_dry_mode it's working now.
EBI-Metagenomics · Jan 29, 2024 · a6e9eda · a6e9eda
1 parent e229f5c
commit a6e9eda
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 46 deletions.
diff --git a/emgapi/management/commands/populate_metagenomics_exchange.py b/emgapi/management/commands/populate_metagenomics_exchange.py
@@ -53,12 +53,6 @@ def add_arguments(self, parser):
             required=False,
             type=float,
         )
-        parser.add_argument(
-            "--dev",
-            action="store_true",
-            required=False,
-            help="Populate dev API",
-        )
         parser.add_argument(
             "--dry-run",
             action="store_true",

diff --git a/emgapi/metagenomics_exchange.py b/emgapi/metagenomics_exchange.py
@@ -96,7 +96,7 @@ def check_analysis(self, source_id: str, sequence_id: str, public=None, metadata
             }
         endpoint = f"sequences/{sequence_id}"
         response = self.get_request(endpoint=endpoint, params=params)
-        analysis_registryID = ""
+        analysis_registry_id = None
         metadata_match = True
         if response.ok:
             data = response.json()
@@ -105,21 +105,21 @@ def check_analysis(self, source_id: str, sequence_id: str, public=None, metadata
             if source_id in sourceIDs:
                 found_record = [item for item in datasets if item.get("sourceID") == source_id][0]
                 logging.info(f"{source_id} exists in ME")
-                analysis_registryID = found_record.get("registryID")
+                analysis_registry_id  = found_record.get("registryID")
                 if metadata:
                     for metadata_record in metadata:
                         if not(metadata_record in found_record):
                             metadata_match = False
-                            return analysis_registryID, metadata_match
+                            return analysis_registry_id , metadata_match
                         else:
                             if metadata[metadata_record] != found_record[metadata_record]:
                                 metadata_match = False
                                 logging.info(f"Incorrect field {metadata[metadata_record]} in ME ({found_record[metadata_record]})")
-                                return analysis_registryID, metadata_match
-                return analysis_registryID, metadata_match
+                                return analysis_registry_id, metadata_match
+                return analysis_registry_id , metadata_match
             else:
                 logging.info(f"{source_id} does not exist in ME")
-        return analysis_registryID, metadata_match
+        return analysis_registry_id, metadata_match
 
     def delete_analysis(self, registry_id: str):
         response = self.delete_request(endpoint=f"datasets/{registry_id}")

diff --git a/emgapi/migrations/0014_analysisjob_last_update.py b/emgapi/migrations/0014_analysisjob_last_update.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.2.18 on 2024-01-18 13:55
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('emgapi', '0013_auto_20240118_1220'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='analysisjob',
+            name='last_update',
+            field=models.DateTimeField(auto_now=True, db_column='LAST_UPDATE'),
+        ),
+    ]
diff --git a/emgapi/models.py b/emgapi/models.py
@@ -279,6 +279,16 @@ class Meta:
         abstract = True
 
 
+class IndexableModel(models.Model):
+    last_update = models.DateTimeField(
+        db_column='LAST_UPDATE',
+        auto_now=True
+    )
+
+    class Meta:
+        abstract = True
+
+
 class IndexableModelQueryset(models.QuerySet):
     """
     to_delete: Objects that have been suppressed since they were last indexed,
@@ -288,8 +298,7 @@ class IndexableModelQueryset(models.QuerySet):
     or that have been indexed but updated since.
     """
     def to_delete(self):
-        not_indexed_filter = {f"{self.index_field}__isnull": False}
-        updated_after_indexing = Q(last_update__gte=F(self.index_field), **not_indexed_filter)
+        updated_after_indexing = Q(last_update__gte=F(self.index_field), **{f"{self.index_field}__isnull": False})
 
         try:
             self.model._meta.get_field("suppressed_at")
@@ -303,9 +312,8 @@ def to_delete(self):
             )
 
     def to_add(self):
-        not_indexed_filter = {f"{self.index_field}__isnull": False}
-        updated_after_indexing = Q(last_update__gte=F(self.index_field), **not_indexed_filter)
-        never_indexed = Q(last_indexed__isnull=True)
+        updated_after_indexing = Q(last_update__gte=F(self.index_field), **{f"{self.index_field}__isnull": False})
+        never_indexed = Q(**{f"{self.index_field}__isnull": True})
 
         try:
             self.model._meta.get_field("is_suppressed")
@@ -330,7 +338,7 @@ class EBISearchIndexQueryset(IndexableModelQueryset):
     index_field = "last_ebi_search_indexed"
 
 
-class EBISearchIndexedModel(models.Model):
+class EBISearchIndexedModel(IndexableModel):
 
     last_ebi_search_indexed = models.DateTimeField(
         db_column='LAST_EBI_SEARCH_INDEXED',
@@ -352,6 +360,10 @@ class MetagenomicsExchangeQueryset(IndexableModelQueryset):
 
 class MetagenomicsExchangeIndexedModel(models.Model):
     """Model to track Metagenomics Exchange indexation of analysis jobs
+    TODO: this model should have the last_update field as it's a requirement.
+          The current implementation of this works because the analysis jobs are
+          also extending the EBISearchIndexable model which provided the
+          last_update field.
     """
     last_mgx_indexed = models.DateTimeField(
         db_column='LAST_MGX_INDEXED',
@@ -1094,8 +1106,6 @@ def _custom_pk(self):
         db_column='AUTHOR_EMAIL', max_length=100, blank=True, null=True)
     author_name = models.CharField(
         db_column='AUTHOR_NAME', max_length=100, blank=True, null=True)
-    last_update = models.DateTimeField(
-        db_column='LAST_UPDATE', auto_now=True)
     submission_account_id = models.CharField(
         db_column='SUBMISSION_ACCOUNT_ID',
         max_length=15, blank=True, null=True)

diff --git a/tests/me/test_populate_metagenomics_exchange.py b/tests/me/test_populate_metagenomics_exchange.py
@@ -16,7 +16,7 @@
 
 import pytest
 
-from unittest.mock import patch
+from unittest import mock
 
 from django.core.management import call_command
 
@@ -27,53 +27,45 @@
 
 @pytest.mark.django_db
 class TestMeAPI:
-    @pytest.mark.usefixtures('run_multiple_analysis')
-    def test_population_dry_mode(
-            self,
-            caplog
-    ):
+    @pytest.mark.usefixtures("run_multiple_analysis")
+    def test_population_dry_mode(self, caplog):
         call_command(
             "populate_metagenomics_exchange",
-            dev=True,
             dry_run=True,
         )
         assert "Dry-mode run: no addition to real ME for MGYA00001234" in caplog.text
         assert "Dry-mode run: no addition to real ME for MGYA00005678" in caplog.text
         assert "Dry-mode run: no addition to real ME for MGYA00466090" in caplog.text
         assert "Processing 0 analyses to remove" in caplog.text
 
-    @pytest.mark.usefixtures('suppressed_analysis_jobs')
-    def test_removals_dry_mode(
-            self,
-            caplog
-    ):
+    @mock.patch("emgapi.metagenomics_exchange.MetagenomicsExchangeAPI.check_analysis")
+    @pytest.mark.usefixtures("suppressed_analysis_jobs")
+    def test_removals_dry_mode(self, mock_check_analysis, caplog):
+        mock_check_analysis.return_value = None, False
         call_command(
             "populate_metagenomics_exchange",
-            dev=True,
             dry_run=True,
         )
         ajobs = AnalysisJob.objects.all()
         for job in ajobs:
-            assert f"No {job.accession} in ME, nothing to delete" in caplog.text
-        assert "Processing 0 new analyses" in caplog.text
+            assert (
+                f"{job.accession} doesn't exist in the registry, nothing to delete"
+                in caplog.text
+            )
+        assert "Indexing 0 new analyses" in caplog.text
 
-    @pytest.mark.usefixtures('analysis_existed_in_me')
+    @pytest.mark.usefixtures("analysis_existed_in_me")
     def test_update_dry_mode(self, caplog):
         call_command(
             "populate_metagenomics_exchange",
-            dev=True,
             dry_run=True,
         )
         assert "Incorrect field None in ME (ERR1806500)" in caplog.text
         assert "Dry-mode run: no patch to real ME for MGYA00147343" in caplog.text
         assert "Processing 0 analyses to remove" in caplog.text
 
-    @pytest.mark.usefixtures('run_multiple_analysis')
-    def test_population(
-            self,
-            caplog
-    ):
+    @pytest.mark.usefixtures("run_multiple_analysis")
+    def test_population(self, caplog):
         call_command(
             "populate_metagenomics_exchange",
-            dev=True,
         )
diff --git a/tests/test_utils/emg_fixtures.py b/tests/test_utils/emg_fixtures.py
@@ -1107,15 +1107,15 @@ def suppressed_analysis_jobs(ena_suppressed_runs):
     suppressed_analysisjobs = make_suppressed_analysis_jobs(quantity=5,
                                                             emg_props={"is_suppressed": True,
                                                                        "suppressed_at": '1980-01-01 00:00:00',
-                                                                       'last_populated_me': '1970-01-01 00:00:00'})
+                                                                       'last_mgx_indexed': '1970-01-01 00:00:00'})
     return suppressed_analysisjobs
 
 @pytest.fixture
 def analysis_existed_in_me():
     emg_props = {
         "job_id": 147343,
-        "last_populated_me": '1970-01-01 00:00:00',
-        "last_updated_me": '1980-01-01 00:00:00',
+        "last_mgx_indexed": '1970-01-01 00:00:00',
+        "last_update": '1980-01-01 00:00:00',
         "is_suppressed": False,
         "is_private": False
     }