Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: track docs that fail pdfize task #7754

Draft
wants to merge 4 commits into
base: feat/pdfize-async
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions ietf/doc/migrations/0022_unprocessabledocument.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Generated by Django 4.2.14 on 2024-07-25 22:28

from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
import ietf.utils.models


class Migration(migrations.Migration):

dependencies = [
("doc", "0021_narrativeminutes"),
]

operations = [
migrations.CreateModel(
name="UnprocessableDocument",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("rev", models.CharField(max_length=16, verbose_name="revision")),
(
"proc_type",
models.CharField(
choices=[("pdfize", "Pdfize")],
help_text="type of processing that fails for this document",
max_length=16,
),
),
("time", models.DateTimeField(default=django.utils.timezone.now)),
(
"document",
ietf.utils.models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="doc.document"
),
),
],
),
]
20 changes: 20 additions & 0 deletions ietf/doc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1483,3 +1483,23 @@ class BofreqEditorDocEvent(DocEvent):
class BofreqResponsibleDocEvent(DocEvent):
""" Capture the responsible leadership (IAB and IESG members) for a BOF Request """
responsible = models.ManyToManyField('person.Person', blank=True)



class UnprocessableDocument(models.Model):
"""Document """
class ProcTypes(models.TextChoices):
PDFIZE = "pdfize"

document = ForeignKey(Document)
rev = models.CharField(
verbose_name="revision",
max_length=16,
blank=False,
)
proc_type = models.CharField(
max_length=16,
choices=ProcTypes.choices,
help_text="type of processing that fails for this document",
)
time = models.DateTimeField(default=timezone.now)
13 changes: 11 additions & 2 deletions ietf/doc/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from django.conf import settings
from django.utils import timezone

from ietf.doc.models import UnprocessableDocument
from ietf.utils import log
from ietf.utils.celery import celery_task_lock
from ietf.utils.timezone import datetime_today
Expand Down Expand Up @@ -122,15 +123,15 @@ def generate_draft_bibxml_files_task(days=7, process_all=False):
log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}")


@shared_task(bind=True, time_limit=30, soft_time_limit=28)
@shared_task(bind=True, time_limit=32, soft_time_limit=30)
def pdfize_document_task(self, name, rev):
doc = Document.objects.filter(name=name).first()
if doc is None:
log.log(f"Failed to pdfize document {name} rev {rev}: Document does not exist")
return
# There is a very slight race condition between the task time_limit and the lock expiration
# time. We can tolerate this task running twice if that unlikely timing ever works out.
with celery_task_lock(self, 30) as acquired:
with celery_task_lock(self, 32) as acquired:
if not acquired:
log.log(f"{self.name}({name}, {rev}) skipped because it's already running")
return
Expand All @@ -140,3 +141,11 @@ def pdfize_document_task(self, name, rev):
PdfizedDoc(doc).update_cache()
except SoftTimeLimitExceeded:
log.log(f"Failed to pdfize document {name} rev {rev}: exceeded task time limit")
UnprocessableDocument.objects.update_or_create(
document=doc,
rev=doc.rev,
proc_type=UnprocessableDocument.ProcTypes.PDFIZE,
defaults={
"time": timezone.now(),
},
)
8 changes: 6 additions & 2 deletions ietf/doc/views_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
from ietf.doc.models import ( Document, DocHistory, DocEvent, BallotDocEvent, BallotType,
ConsensusDocEvent, NewRevisionDocEvent, TelechatDocEvent, WriteupDocEvent, IanaExpertDocEvent,
IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor,
RelatedDocument, RelatedDocHistory)
RelatedDocument, RelatedDocHistory, UnprocessableDocument)
from ietf.doc.utils import (augment_events_with_revision,
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, investigate_fragment,
needed_ballot_positions, nice_consensus, update_telechat, has_same_ballot,
Expand Down Expand Up @@ -1067,7 +1067,11 @@ def document_pdfized(request, name, rev=None, ext=None):
pdf = PdfizedDoc(doc).get()
if pdf:
return HttpResponse(pdf, content_type="application/pdf")

elif UnprocessableDocument.objects.filter(
document=doc, rev=doc.rev, proc_type=UnprocessableDocument.ProcTypes.PDFIZE
).exists():
return Http404()

pdfize_document_task.delay(name=doc.name, rev=doc.rev)
refresh_time_seconds = 5
path=request.path
Expand Down