fix urllib.error.HTTPError: HTTP Error 403: Forbidden

cheshire-cat-ai · Jul 29, 2023 · 8ef3c3e · 8ef3c3e
1 parent 3f0c8b9
commit 8ef3c3e
Showing 1 changed file with 12 additions and 7 deletions.
diff --git a/core/cat/rabbit_hole.py b/core/cat/rabbit_hole.py
@@ -3,12 +3,12 @@
 import json
 import mimetypes
 from typing import List, Union
-from urllib.request import urlopen
+from urllib.request import urlopen, Request
 from urllib.parse import urlparse
+from urllib.error import HTTPError
 
 from cat.log import log
 from starlette.datastructures import UploadFile
-from fastapi import HTTPException
 from langchain.docstore.document import Document
 from qdrant_client.http import models
 
@@ -33,7 +33,6 @@ def __init__(self, cat):
             "text/html": BS4HTMLParser()
         }
 
-
     def ingest_memory(self, file: UploadFile):
         """Upload memories to the declarative memory from a JSON file.
 
@@ -198,9 +197,15 @@ def file_to_docs(
                 content_type = "text/html"
                 source = file
 
-                # Get binary content of url
-                with urlopen(file) as response:
-                    file_bytes = response.read()
+                # Make a request with a fake browser name
+                request = Request(file, headers={'User-Agent': "Magic Browser"})
+
+                try:
+                    # Get binary content of url
+                    with urlopen(request) as response:
+                        file_bytes = response.read()
+                except HTTPError as e:
+                    log(e, "ERROR")
             else:
 
                 # Get mime type from file extension and source
@@ -218,7 +223,7 @@ def file_to_docs(
                     mimetype=content_type,
                     source=source).from_data(data=file_bytes,
                                              mime_type=content_type)
-        
+
         # Parser based on the mime type
         parser = MimeTypeBasedParser(handlers=self.file_handlers)