Skip to content

Commit

Permalink
Fix for records that are too large by reducing the bulk update size f…
Browse files Browse the repository at this point in the history
…or OpenSearch (Some items had thousands or more authors that ballooned the size recently)
  • Loading branch information
sfisher committed Nov 15, 2024
1 parent aac4682 commit c22e747
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions ezidapp/management/commands/opensearch-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
urllib3.disable_warnings(InsecureRequestWarning)
# end suppression of urllib3 InsecureRequestWarning

SPLIT_SIZE = 100
SPLIT_SIZE = 5
DB_PAGE_SIZE = 100

# run: python manage.py opensearch-update
# optional parameters: --starting_id 1234 --updated_since 2023-10-10T00:00:00Z
Expand All @@ -37,7 +38,7 @@

class Command(BaseCommand):
def handle(self, *args, **options):
# Get all items from Identifier table 100 at a time manually since
# Get all items from Identifier table DB_PAGE_SIZE at a time manually since
# I had lockup issues with the ORM, even with constructs that were
# supposed to be lazy and handle large datasets. :shrug:
#
Expand Down Expand Up @@ -68,7 +69,7 @@ def handle(self, *args, **options):

while True:
iden_arr = (SearchIdentifier.objects.filter(id__gt=start_after_id)
.filter(additional_filter).order_by('id')[:100])
.filter(additional_filter).order_by('id')[:DB_PAGE_SIZE])

# break when we run out of items
if not iden_arr:
Expand Down

0 comments on commit c22e747

Please sign in to comment.