Skip to content

Commit

Permalink
Merge pull request #3613 from specify/recursive_cte_experimental
Browse files Browse the repository at this point in the history
Begin adding recursive CTE to tree queries
  • Loading branch information
realVinayak authored Nov 8, 2023
2 parents f4a963b + a16a613 commit 807f04d
Show file tree
Hide file tree
Showing 15 changed files with 554 additions and 293 deletions.
9 changes: 7 additions & 2 deletions specifyweb/settings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,17 @@
},
}

SA_DATABASE_URL = 'mysql://%s:%s@%s:%s/%s?charset=utf8' % (
def get_sa_db_url(db_name):
return 'mysql://%s:%s@%s:%s/%s?charset=utf8' % (
MASTER_NAME,
MASTER_PASSWORD,
DATABASE_HOST,
DATABASE_PORT or 3306,
DATABASE_NAME)
db_name)

SA_DATABASE_URL = get_sa_db_url(DATABASE_NAME)

SA_TEST_DB_URL = get_sa_db_url(f'test_{DATABASE_NAME}')

# Prevent MySQL connection timeouts
SA_POOL_RECYCLE = 3600
Expand Down
3 changes: 2 additions & 1 deletion specifyweb/specify/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
from django.db.models import Max
from django.test import TestCase, Client

from specifyweb.businessrules.exceptions import BusinessRuleException
from specifyweb.permissions.models import UserPolicy
from specifyweb.specify import api, models
from specifyweb.specify.views import fix_record_data

def get_table(name: str):
return getattr(models, name.capitalize())

class MainSetupTearDown:
def setUp(self):
Expand Down
211 changes: 211 additions & 0 deletions specifyweb/specify/test_trees.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
from specifyweb.specify import models
from specifyweb.specify.api_tests import ApiTests, get_table
from specifyweb.specify.tree_stats import get_tree_stats
from specifyweb.stored_queries.tests import SQLAlchemySetup

class TestTreeSetup(ApiTests):
def setUp(self) -> None:
super().setUp()
self.geographytreedef.treedefitems.create(name='Continent', rankid=100)
self.geographytreedef.treedefitems.create(name='Country', rankid=200)
self.geographytreedef.treedefitems.create(name='State', rankid=300)
self.geographytreedef.treedefitems.create(name='County', rankid=400)
self.geographytreedef.treedefitems.create(name='City', rankid=500)


self.taxontreedef = models.Taxontreedef.objects.create(name="Test Taxonomy")
self.taxontreedef.treedefitems.create(name='Taxonomy Root', rankid=0)
self.taxontreedef.treedefitems.create(name='Kingdom', rankid=10)
self.taxontreedef.treedefitems.create(name='Phylum', rankid=30)
self.taxontreedef.treedefitems.create(name='Class', rankid=60)
self.taxontreedef.treedefitems.create(name='Order', rankid=100)
self.taxontreedef.treedefitems.create(name='Superfamily', rankid=130)
self.taxontreedef.treedefitems.create(name='Family', rankid=140)
self.taxontreedef.treedefitems.create(name='Genus', rankid=180)
self.taxontreedef.treedefitems.create(name='Subgenus', rankid=190)
self.taxontreedef.treedefitems.create(name='Species', rankid=220)
self.taxontreedef.treedefitems.create(name='Subspecies', rankid=230)

class TestTree:
def setUp(self)->None:
super().setUp()
self.earth = get_table('Geography').objects.create(
name="Earth",
definitionitem=get_table('Geographytreedefitem').objects.get(name="Planet"),
definition=self.geographytreedef,
)

self.na = get_table('Geography').objects.create(
name="North America",
definitionitem=get_table('Geographytreedefitem').objects.get(name="Continent"),
definition=self.geographytreedef,
parent=self.earth,
)

self.usa = get_table('Geography').objects.create(
name="USA",
definitionitem=get_table('Geographytreedefitem').objects.get(name="Country"),
definition=self.geographytreedef,
parent=self.na,
)

self.kansas = get_table('Geography').objects.create(
name="Kansas",
definitionitem=get_table('Geographytreedefitem').objects.get(name="State"),
definition=self.geographytreedef,
parent=self.usa,
)

self.mo = get_table('Geography').objects.create(
name="Missouri",
definitionitem=get_table('Geographytreedefitem').objects.get(name="State"),
definition=self.geographytreedef,
parent=self.usa,
)

self.ohio = get_table('Geography').objects.create(
name="Ohio",
definitionitem=get_table('Geographytreedefitem').objects.get(name="State"),
definition=self.geographytreedef,
parent=self.usa,
)

self.ill = get_table('Geography').objects.create(
name="Illinois",
definitionitem=get_table('Geographytreedefitem').objects.get(name="State"),
definition=self.geographytreedef,
parent=self.usa,
)

self.doug = get_table('Geography').objects.create(
name="Douglas",
definitionitem=get_table('Geographytreedefitem').objects.get(name="County"),
definition=self.geographytreedef,
parent=self.kansas,
)

self.greene = get_table('Geography').objects.create(
name="Greene",
definitionitem=get_table('Geographytreedefitem').objects.get(name="County"),
definition=self.geographytreedef,
parent=self.mo,
)

self.greeneoh = get_table('Geography').objects.create(
name="Greene",
definitionitem=get_table('Geographytreedefitem').objects.get(name="County"),
definition=self.geographytreedef,
parent=self.ohio,
)

self.sangomon = get_table('Geography').objects.create(
name="Sangamon",
definitionitem=get_table('Geographytreedefitem').objects.get(name="County"),
definition=self.geographytreedef,
parent=self.ill,
)

self.springmo = get_table('Geography').objects.create(
name="Springfield",
definitionitem=get_table('Geographytreedefitem').objects.get(name="City"),
definition=self.geographytreedef,
parent=self.greene,
)

self.springill = get_table('Geography').objects.create(
name="Springfield",
definitionitem=get_table('Geographytreedefitem').objects.get(name="City"),
definition=self.geographytreedef,
parent=self.sangomon,
)

class GeographyTree(TestTree, TestTreeSetup): pass

class SqlTreeSetup(SQLAlchemySetup, GeographyTree): pass

class TreeStatsTest(SqlTreeSetup):

def setUp(self):
super().setUp()
locality_1 = models.Locality.objects.create(
localityname="somewhere1",
srclatlongunit=0,
discipline=self.discipline,
geography=self.usa
)
locality_2 = models.Locality.objects.create(
localityname="somewhere2",
srclatlongunit=0,
discipline=self.discipline,
geography=self.springill
)
locality_3 = models.Locality.objects.create(
localityname="somewhere3",
srclatlongunit=0,
discipline=self.discipline,
geography=self.ill
)
collecting_event_1 = models.Collectingevent.objects.create(
discipline=self.discipline,
locality=locality_1
)
collecting_event_2 = models.Collectingevent.objects.create(
discipline=self.discipline,
locality=locality_2
)
collecting_event_3 = models.Collectingevent.objects.create(
discipline=self.discipline,
locality=locality_3
)
self.collectionobjects[0].collectingevent = collecting_event_1
self.collectionobjects[1].collectingevent = collecting_event_2
self.collectionobjects[2].collectingevent = collecting_event_3
self.collectionobjects[3].collectingevent = collecting_event_3
self.collectionobjects[4].collectingevent = collecting_event_3

_saved = [co.save() for co in self.collectionobjects]

second_collection = models.Collection.objects.create(
catalognumformatname='test',
collectionname='TestCollection2',
isembeddedcollectingevent=False,
discipline=self.discipline)

collection_object_another_collection = models.Collectionobject.objects.create(
collection=second_collection,
catalognumber="num-5"
)

def _run_nn_and_cte(*args, **kwargs):
cte_results = get_tree_stats(*args, **kwargs, session_context=TreeStatsTest.test_session_context, using_cte=True)
node_number_results = get_tree_stats(*args, **kwargs, session_context=TreeStatsTest.test_session_context, using_cte=False)
self.assertCountEqual(cte_results, node_number_results)
return cte_results

self.validate_tree_stats = lambda *args, **kwargs: (
lambda true_results: self.assertCountEqual(_run_nn_and_cte(*args, **kwargs), true_results))

def test_counts_correctness(self):
correct_results = {
self.earth.id: [(self.na.id, 0, 5)],
self.na.id: [(self.usa.id, 1, 5)],
self.usa.id: [
(self.kansas.id, 0, 0),
(self.mo.id, 0, 0),
(self.ohio.id, 0, 0),
(self.ill.id, 3, 4),
],
self.ill.id: [
(self.sangomon.id, 0, 1),
],
self.sangomon.id: [
(self.springill.id, 1, 1)
]
}

_results = [
self.validate_tree_stats(self.geographytreedef.id, 'geography', parent_id, self.collection)(correct)
for parent_id, correct in correct_results.items()
]


Loading

0 comments on commit 807f04d

Please sign in to comment.