Skip to content

Commit

Permalink
Fix Order Bug (#301)
Browse files Browse the repository at this point in the history
* fix order bug

* beta

* more logging, new beta

* fix version

* fix xl order call

* add index to rid column

* version
  • Loading branch information
willronchetti authored Jul 25, 2024
1 parent 6d8a461 commit 8f6113a
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 10 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ snovault
Change Log
----------

11.20.0
=======

* Bug fix: use loadxl_order() in staggered reindexing
* Add B-tree index to rid column in propsheets to optimize revision history retrieval


11.19.0
=======

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicsnovault"
version = "11.19.0"
version = "11.20.0"
description = "Storage support for 4DN Data Portals."
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion snovault/commands/create_mapping_on_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _run_create_mapping(app, args):
log.info('Overriding deploy_cfg and wiping ES')
deploy_cfg['WIPE_ES'] = True
run_create_mapping(app, check_first=(not deploy_cfg['WIPE_ES']), purge_queue=args.clear_queue,
item_order=loadxl_order)
item_order=loadxl_order())
except Exception as e:
log.error("Exception encountered while gathering deployment information or running create_mapping")
log.error(str(e))
Expand Down
2 changes: 2 additions & 0 deletions snovault/dev_servers.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ def cleanup_process():

# now clear the queues and queue items for indexing
create_mapping.run(app, check_first=True, strict=True, purge_queue=False)
# To test prod setup:
# create_mapping.reindex_by_type_staggered(app)

PRINT('Started. ^C to exit.')

Expand Down
16 changes: 9 additions & 7 deletions snovault/elasticsearch/create_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
# from ..commands.es_index_data import run as run_index_data
from ..schema_utils import combine_schemas
from ..tools import make_indexer_testapp
from ..project_app import app_project
from ..util import (
add_default_embeds, IndexSettings,
NUM_SHARDS, NUM_REPLICAS, SEARCH_MAX, KW_IGNORE_ABOVE, MIN_NGRAM,
Expand Down Expand Up @@ -874,11 +875,11 @@ def build_index(app, es, index_name, in_type, mapping, uuids_to_index, dry_run,
start = timer()
coll_uuids = set(get_uuids_for_types(app.registry, types=[in_type]))
end = timer()
log.info('Time to get collection uuids: %s' % str(end-start), cat='fetch time',
duration=str(end-start), collection=in_type)
log.warning('Time to get collection uuids: %s' % str(end-start), cat='fetch time',
duration=str(end-start), collection=in_type)
uuids_to_index[in_type] = coll_uuids
log.info('MAPPING: will queue all %s items in the new index %s for reindexing' %
(len(coll_uuids), in_type), cat='items to queue', count=len(coll_uuids), collection=in_type)
log.warning('MAPPING: will queue all %s items in the new index %s for reindexing' %
(len(coll_uuids), in_type), cat='items to queue', count=len(coll_uuids), collection=in_type)


def check_if_index_exists(es, in_type):
Expand Down Expand Up @@ -1383,7 +1384,7 @@ def reindex_by_type_staggered(app):
registry = app.registry
es = registry[ELASTIC_SEARCH]
indexer_queue = registry[INDEXER_QUEUE]
all_types = registry[COLLECTIONS].by_item_type
all_types = app_project().loadxl_order()

log.warning('Running staggered create_mapping command - wiping and reindexing indices sequentially'
' to minimize downtime.')
Expand All @@ -1396,10 +1397,11 @@ def reindex_by_type_staggered(app):
uuids = {}
build_index(app, es, namespaced_index, i_type, mapping, uuids, False)
mapping_end = timer()
to_index_list = flatten_and_sort_uuids(app.registry, uuids, None)
to_index_list = flatten_and_sort_uuids(app.registry, uuids, None) # none here is fine since we pre-ordered
indexer_queue.add_uuids(app.registry, to_index_list, strict=True,
target_queue='secondary')
log.warning(f'Queued type {i_type} in {mapping_end - current_start}')
log.warning(f'Queued type {i_type} ({len(to_index_list)} total items) in {mapping_end - current_start}')
log.warning(f'First 10 items: {list(uuid for uuid in to_index_list[0:10])}')
time.sleep(10) # give queue some time to catch up
while not indexer_queue.queue_is_empty():
time.sleep(10) # check every 10 seconds
Expand Down
3 changes: 2 additions & 1 deletion snovault/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,11 +784,12 @@ class PropertySheet(Base):
)
# The sid column also serves as the order.
sid = Column(types.Integer, autoincrement=True, primary_key=True)
# B-tree index on rid here greatly optimizes retrieval of revision history
rid = Column(UUID,
ForeignKey('resources.rid',
deferrable=True,
initially='DEFERRED'),
nullable=False)
nullable=False, index=True)
name = Column(types.String, nullable=False)
properties = Column(JSON)
resource = orm.relationship('Resource')
Expand Down

0 comments on commit 8f6113a

Please sign in to comment.