Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
ilude committed May 9, 2024
1 parent 5e88a4c commit 724327c
Show file tree
Hide file tree
Showing 6 changed files with 232 additions and 195 deletions.
2 changes: 2 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
"zcompdump"
],
"python.analysis.extraPaths": [
"./app",
"./app/services",
"./app"
]
}
18 changes: 8 additions & 10 deletions app/models/layout.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from asyncio import tasks
import asyncio
import json
import logging
import os
from services.favicon_finder import FaviconFinder
import yaml
from services.favicon_store import FaviconStore
from models.bookmark import Bookmark
from models.row import Row
from models.column import Column
Expand All @@ -19,8 +17,8 @@

class Layout:
id: str = 'layout'
headers: list[Bookmark] = []
tabs: list[Tab] = []
headers: list[Bookmark] = []
bookmark_bar: list[dict] = []

def __init__(self, config_file: str = "configs/layout.yml", bookmarks_bar_file: str = "configs/bookmarks_bar.json"):
Expand All @@ -34,7 +32,7 @@ def __init__(self, config_file: str = "configs/layout.yml", bookmarks_bar_file:
except Exception as ex:
logger.error(f"Error: {ex} creating empty bookmark bar file at {self.bookmark_bar_path}")

self.favicon_finder = FaviconFinder()
self.favicon_store = FaviconStore()
self.reload()

def load_bookmarks(self):
Expand All @@ -57,10 +55,10 @@ def is_modified(self):
def mtime(self):
return os.path.getmtime(self.config_path)

def bookmark_iterator(self, bookmarks, urls=[]):
def bookmarks_list(self, bookmarks, urls=[]):
for bookmark in bookmarks:
if 'contents' in bookmark:
self.bookmark_iterator(bookmark['contents'], urls)
self.bookmarks_list(bookmark['contents'], urls)
elif 'href' in bookmark:
urls.append(bookmark['href'])
return urls
Expand All @@ -78,9 +76,9 @@ def reload(self):
self.feed_hash = {}

self.bookmark_bar = self.load_bookmarks()

bookmarks = self.bookmark_iterator(self.bookmark_bar)
self.favicon_finder.fetch_from_iterator(bookmarks)
bookmarks = self.bookmarks_list(self.bookmark_bar)
logger.debug("====== Layout calling fetch favicons!")
self.favicon_store.fetch_favicons_from(bookmarks)

logger.debug("Completed Layout reload!")

Expand Down
185 changes: 0 additions & 185 deletions app/services/favicon_finder.py

This file was deleted.

80 changes: 80 additions & 0 deletions app/services/favicon_retriever.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import logging
import os
import re
from services.favicon_utils import get_favicon_filename, normalize_domain
import requests
from bs4 import BeautifulSoup
from models.utils import pwd
from urllib.parse import urljoin

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)


class FaviconRetriever:
def __init__(self, favicon_store, cache_dir: str):
self.cache_dir = pwd.joinpath(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
self.request_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}
self.favicon_store = favicon_store

def make_request(self, url):
return requests.get(url, headers=self.request_headers, allow_redirects=True)

def favicon_path(self, url):
favicon_filename = get_favicon_filename(url)
return os.path.join(self.cache_dir, favicon_filename)

def find_favicon_url(self, url):
normalized_domain = normalize_domain(url)
for try_url in [url, normalized_domain]:
try:
response = self.make_request(try_url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
icon_link = soup.find('link', rel=['icon', 'shortcut icon'])
if icon_link:
icon_url = icon_link['href']
if not icon_url.startswith('http'):
icon_url = urljoin(url, icon_url)
return icon_url
except Exception as ex:
logger.error(f"Error: find_favicon_url({try_url}): {ex}")

# if we made it here we have not found a favicon url
# lets check google

icon_url = f'http://www.google.com/s2/favicons?domain={normalized_domain}'
response = self.make_request(icon_url)
if response.status_code == 200 and response.headers.get('Content-Type', '').startswith('image'):
with open(self.favicon_path(normalized_domain), 'wb') as file:
file.write(response.content)
self.favicon_store.save_processed_domain(normalized_domain, reason='found in google')

return None

def download_favicon(self, url):
logger.debug(f"download_favicon({url}) called")
icon_url = self.find_favicon_url(url)
if not icon_url:
logger.debug(f"Could not download_favicon({url}) no icon url found!")
return

normalized_domain = normalize_domain(icon_url)
favicon_path = self.favicon_path(normalized_domain)

try:
response = self.make_request(icon_url)
if response.status_code == 200 and response.headers.get('content-type', '').lower().startswith('image/'):
with open(favicon_path, 'wb') as file:
file.write(response.content)
self.favicon_store.save_processed_domain(normalized_domain, reason='success')
else:
self.favicon_store.save_processed_domain(
normalized_domain,
reason=f'response_code: {response.status_code} content-type: {response.headers.get("content-type", "")}'
)
except Exception as ex:
self.favicon_store.save_processed_domain(normalized_domain, reason=f'{ex}')
Loading

0 comments on commit 724327c

Please sign in to comment.