Skip to content

Commit

Permalink
fix urllib.error.HTTPError: HTTP Error 403: Forbidden
Browse files Browse the repository at this point in the history
  • Loading branch information
nicola-corbellini committed Jul 29, 2023
1 parent 3f0c8b9 commit 8ef3c3e
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions core/cat/rabbit_hole.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import json
import mimetypes
from typing import List, Union
from urllib.request import urlopen
from urllib.request import urlopen, Request
from urllib.parse import urlparse
from urllib.error import HTTPError

from cat.log import log
from starlette.datastructures import UploadFile
from fastapi import HTTPException
from langchain.docstore.document import Document
from qdrant_client.http import models

Expand All @@ -33,7 +33,6 @@ def __init__(self, cat):
"text/html": BS4HTMLParser()
}


def ingest_memory(self, file: UploadFile):
"""Upload memories to the declarative memory from a JSON file.
Expand Down Expand Up @@ -198,9 +197,15 @@ def file_to_docs(
content_type = "text/html"
source = file

# Get binary content of url
with urlopen(file) as response:
file_bytes = response.read()
# Make a request with a fake browser name
request = Request(file, headers={'User-Agent': "Magic Browser"})

try:
# Get binary content of url
with urlopen(request) as response:
file_bytes = response.read()
except HTTPError as e:
log(e, "ERROR")
else:

# Get mime type from file extension and source
Expand All @@ -218,7 +223,7 @@ def file_to_docs(
mimetype=content_type,
source=source).from_data(data=file_bytes,
mime_type=content_type)

# Parser based on the mime type
parser = MimeTypeBasedParser(handlers=self.file_handlers)

Expand Down

0 comments on commit 8ef3c3e

Please sign in to comment.