From 561173f8adff85b468f6d1f046d2da2664277304 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Wed, 24 Jul 2024 14:25:40 -0700 Subject: [PATCH 1/4] chore: nudge timeouts --- ietf/doc/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index 9f5aa4976a..4ace33446c 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -122,7 +122,7 @@ def generate_draft_bibxml_files_task(days=7, process_all=False): log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}") -@shared_task(bind=True, time_limit=30, soft_time_limit=28) +@shared_task(bind=True, time_limit=32, soft_time_limit=30) def pdfize_document_task(self, name, rev): doc = Document.objects.filter(name=name).first() if doc is None: @@ -130,7 +130,7 @@ def pdfize_document_task(self, name, rev): return # There is a very slight race condition between the task time_limit and the lock expiration # time. We can tolerate this task running twice if that unlikely timing ever works out. - with celery_task_lock(self, 30) as acquired: + with celery_task_lock(self, 32) as acquired: if not acquired: log.log(f"{self.name}({name}, {rev}) skipped because it's already running") return From 69a672479830ac7a5d53f3c15227c7a00d092ff6 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 25 Jul 2024 15:29:41 -0700 Subject: [PATCH 2/4] feat: UnprocessableDocument model+migration --- .../migrations/0022_unprocessabledocument.py | 46 +++++++++++++++++++ ietf/doc/models.py | 20 ++++++++ 2 files changed, 66 insertions(+) create mode 100644 ietf/doc/migrations/0022_unprocessabledocument.py diff --git a/ietf/doc/migrations/0022_unprocessabledocument.py b/ietf/doc/migrations/0022_unprocessabledocument.py new file mode 100644 index 0000000000..5ab5a57ec9 --- /dev/null +++ b/ietf/doc/migrations/0022_unprocessabledocument.py @@ -0,0 +1,46 @@ +# Generated by Django 4.2.14 on 2024-07-25 22:28 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone +import ietf.utils.models + + +class Migration(migrations.Migration): + + dependencies = [ + ("doc", "0021_narrativeminutes"), + ] + + operations = [ + migrations.CreateModel( + name="UnprocessableDocument", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("rev", models.CharField(max_length=16, verbose_name="revision")), + ( + "proc_type", + models.CharField( + choices=[("pdfize", "Pdfize")], + help_text="type of processing that fails for this document", + max_length=16, + ), + ), + ("time", models.DateTimeField(default=django.utils.timezone.now)), + ( + "document", + ietf.utils.models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="doc.document" + ), + ), + ], + ), + ] diff --git a/ietf/doc/models.py b/ietf/doc/models.py index 7d3f4658af..38f8ab68f6 100644 --- a/ietf/doc/models.py +++ b/ietf/doc/models.py @@ -1483,3 +1483,23 @@ class BofreqEditorDocEvent(DocEvent): class BofreqResponsibleDocEvent(DocEvent): """ Capture the responsible leadership (IAB and IESG members) for a BOF Request """ responsible = models.ManyToManyField('person.Person', blank=True) + + + +class UnprocessableDocument(models.Model): + """Document """ + class ProcTypes(models.TextChoices): + PDFIZE = "pdfize" + + document = ForeignKey(Document) + rev = models.CharField( + verbose_name="revision", + max_length=16, + blank=False, + ) + proc_type = models.CharField( + max_length=16, + choices=ProcTypes.choices, + help_text="type of processing that fails for this document", + ) + time = models.DateTimeField(default=timezone.now) From c7acd2049fba8c0ba98f38d8546084532d8a1eeb Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 25 Jul 2024 15:30:00 -0700 Subject: [PATCH 3/4] feat: remember pdfize failures --- ietf/doc/tasks.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index 4ace33446c..0bb831bb83 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -12,6 +12,7 @@ from django.conf import settings from django.utils import timezone +from ietf.doc.models import UnprocessableDocument from ietf.utils import log from ietf.utils.celery import celery_task_lock from ietf.utils.timezone import datetime_today @@ -140,3 +141,11 @@ def pdfize_document_task(self, name, rev): PdfizedDoc(doc).update_cache() except SoftTimeLimitExceeded: log.log(f"Failed to pdfize document {name} rev {rev}: exceeded task time limit") + UnprocessableDocument.objects.update_or_create( + document=doc, + rev=doc.rev, + proc_type=UnprocessableDocument.ProcTypes.PDFIZE, + defaults={ + "time": timezone.now(), + }, + ) From e97c1b30d657c285b192337268ad4b9da7b59f82 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 25 Jul 2024 16:55:40 -0700 Subject: [PATCH 4/4] chore: give up if doc is unprocessable --- ietf/doc/views_doc.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index 56b600c317..5fd8889067 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -57,7 +57,7 @@ from ietf.doc.models import ( Document, DocHistory, DocEvent, BallotDocEvent, BallotType, ConsensusDocEvent, NewRevisionDocEvent, TelechatDocEvent, WriteupDocEvent, IanaExpertDocEvent, IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor, - RelatedDocument, RelatedDocHistory) + RelatedDocument, RelatedDocHistory, UnprocessableDocument) from ietf.doc.utils import (augment_events_with_revision, can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, investigate_fragment, needed_ballot_positions, nice_consensus, update_telechat, has_same_ballot, @@ -1067,7 +1067,11 @@ def document_pdfized(request, name, rev=None, ext=None): pdf = PdfizedDoc(doc).get() if pdf: return HttpResponse(pdf, content_type="application/pdf") - + elif UnprocessableDocument.objects.filter( + document=doc, rev=doc.rev, proc_type=UnprocessableDocument.ProcTypes.PDFIZE + ).exists(): + return Http404() + pdfize_document_task.delay(name=doc.name, rev=doc.rev) refresh_time_seconds = 5 path=request.path