implement async requests to feed to increase performance on cold cache

traefikturkey · Mar 28, 2024 · 24c326a · 24c326a
1 parent 802ad8e
commit 24c326a
Show file tree

Hide file tree

Showing 8 changed files with 321 additions and 119 deletions.
diff --git a/app/app.py b/app/app.py
@@ -1,66 +1,100 @@
 import os
+import yaml
+import asyncio
 from datetime import datetime
 
-import feedparser
-from datetime import datetime
-from flask import Flask, render_template
+from flask import Flask, request, render_template
 from flask_caching import Cache
-from post_processor import post_processor
 
-from utils import clean_html, copy_default_to_configs, load_file
+from utils import copy_default_to_configs, load_file
+from rss import rss
 
 copy_default_to_configs()
 
 app = Flask(__name__)
 
-# 600 seconds = 10 minutes
-cache = Cache(app, config={
-  'CACHE_TYPE': 'simple',            
-  'CACHE_DEFAULT_TIMEOUT': 600
-})
+if os.environ.get("FLASK_DEBUG", "False") == "True":
+  cache_config={
+    'CACHE_TYPE': 'null'
+  }
+else:
+  # 600 seconds = 10 minutes
+  cache_config={
+    'CACHE_TYPE': 'simple',            
+    'CACHE_DEFAULT_TIMEOUT': 600
+  }
+
+cache = Cache(app, config=cache_config)
 
 @app.context_processor
 def inject_current_date():
   return {'today_date': datetime.now()}
 
+@app.route('/save_tab_name', methods=['POST'])
+def save_tab_name():
+    data = request.get_json()
+    tab_name = data.get('tab_name')
+    tab_index = data.get('tab_index')
+    column_count = data.get('column_count')
+
+    if tab_name and column_count >= 1 and column_count <= 6:
+        with open('configs/layout.yml', 'r') as file:
+            layout = yaml.safe_load(file)
+
+        tabs = layout['tabs']
+
+        if tab_index is not None:
+            # Edit an existing tab
+            tabs[tab_index]['name'] = tab_name
+            tabs[tab_index]['columns'] = column_count
+        else:
+            # Add a new tab
+            tabs.append({'name': tab_name, 'columns': column_count, 'widgets': []})
+
+        with open('configs/layout.yml', 'w') as file:
+            yaml.safe_dump(layout, file)
+
+        return {'message': f'Tab name "{tab_name}" with {column_count} columns saved successfully'}
+    else:
+        return {'error': 'Invalid tab name or column count'}, 400
+
 # Define route to render the template
 @app.route('/')
+@app.route('/<tab_name>')
 @cache.cached(timeout=600)
-def index():
+async def index(tab_name=None):
   # Load feeds and bookmarks
   layout = load_file('layout.yml', cache)
   headers = layout['headers']
-  widgets = layout['widgets']
 
-  # Divide feeds into three columns
-  columns = [[], [], []]
-
+  tabs = layout['tabs']
+  if tab_name is None:
+    tab = tabs[0]
+  else:
+    tab = next((tab for tab in tabs if tab["name"].lower() == tab_name.lower()), tabs[0])
+  current_tab = tab['name']
+
+  column_count = tab['columns']
+  columns = [[] for _ in range(column_count)]
+
+  tasks = []
+
   # Add feeds to the appropriate column
-  for widget in widgets:
-    column_index = (widget['column'] - 1) % 3
-    if widget['type'] == 'feed':
-      parsed_feed = feedparser.parse(widget['url'])
-      parsed_item = {
-        'title': widget['name'],
-        'link': widget['link'],
-        'type': widget['type'],
-        'summary_enabled': bool(widget.get('summary', True)),
-        'articles': [{
-          'title': " ".join(entry.get('title', 'No Title').split()).strip() , 
-          'link': entry.link, 
-          'summary': clean_html(entry.get('summary', ''))} for entry in parsed_feed.entries[:10]] if 'entries' in parsed_feed else []
-      }
-      parsed_item = post_processor.process(parsed_item['title'], parsed_item)
-      columns[column_index].append(parsed_item)
-    elif widget['type'] == 'bookmarks':
-      columns[column_index].append({
-        'title': widget['name'], 
-        'type': widget['type'], 
-        'articles': [{'title': entry['title'], 'link': entry['url']} for entry in widget['bookmarks']]
-      })
+  if tab['widgets']:
+    for widget in tab['widgets']:
+      column_index = (widget['column'] - 1) % column_count
+      if widget['type'] == 'feed':
+        tasks.append(asyncio.create_task(rss.load_feed(widget, columns[column_index])))
+      elif widget['type'] == 'bookmarks':
+        widget['articles'] = [{'title': entry['title'], 'link': entry['url']} for entry in widget['bookmarks']]
+        columns[column_index].append(widget)
 
+  await asyncio.wait(tasks)
+  for column in columns:
+    column.sort(key = lambda x: x['position'])
+
   # Pass column data to the template
-  return render_template('index.html', columns=columns, headers=headers)
+  return render_template('index.html', tabs=tabs, columns=columns, headers=headers, current_tab=current_tab)
 
 if __name__ == '__main__':
   port = int(os.environ.get("ONBOARD_PORT", 9830))

diff --git a/app/configs/layout.yml b/app/configs/layout.yml
@@ -10,27 +10,34 @@ headers:
   - name: "Youtube"
     link: "https://www.youtube.com/"
 
-widgets:
-  - name: "Lawrence Person's BattleSwarm Blog"
-    type: "feed"
-    link: "https://www.battleswarmblog.com/"
-    url: "https://www.battleswarmblog.com/?feed=rss2"
-    column: 1
-  - name: "Cafe Hayek"
-    type: "feed"
-    link: "https://cafehayek.com/"
-    url: "https://cafehayek.com/feed"
-    column: 1
-  - name: "Slashdot"
-    type: "feed"
-    link: "https://slashdot.org/"
-    url: "https://rss.slashdot.org/Slashdot/slashdotMain"
-    column: 1
-    summary: false
-  - name: "Bookmarks"
-    type: "bookmarks"
-    column: 2
-    bookmarks:
+tabs:
+  - name: "Home"
+    columns: 3
+    widgets:
+    - name: "Lawrence Person's BattleSwarm Blog"
+      type: "feed"
+      link: "https://www.battleswarmblog.com/"
+      url: "https://www.battleswarmblog.com/?feed=rss2"
+      column: 1
+      position: 1
+    - name: "Cafe Hayek"
+      type: "feed"
+      link: "https://cafehayek.com/"
+      url: "https://cafehayek.com/feed"
+      column: 1
+      position: 2
+    - name: "Slashdot"
+      type: "feed"
+      summary_enabled: False
+      link: "https://slashdot.org/"
+      url: "https://rss.slashdot.org/Slashdot/slashdotMain"
+      column: 1
+      position: 3
+    - name: "Bookmarks"
+      type: "bookmarks"
+      column: 2
+      position: 1
+      bookmarks:
       - title: Ali Express
         url: https://www.aliexpress.com/
       - title: Amazon
@@ -65,18 +72,27 @@ widgets:
         url: https://tailscale.com/
       - title: Thingiverse
         url: https://www.thingiverse.com/
-  - name: "Real Clear Politics"
-    type: "feed"
-    link: "https://www.realclearpolitics.com/"
-    url: "https://www.realclearpolitics.com/index.xml"
-    column: 2
-  - name: "Instapundit"
-    type: "feed"
-    link: "https://instapundit.com/"
-    url: "https://instapundit.com/feed/"
-    column: 3
-  - name: "Twitchy"
-    type: "feed"
-    link: "https://twitchy.com/"
-    url: "https://twitchy.com/feed"
-    column: 3
+    - name: "Real Clear Politics"
+      type: "feed"
+      link: "https://www.realclearpolitics.com/"
+      url: "https://www.realclearpolitics.com/index.xml"
+      column: 2
+      position: 2
+    - name: "Instapundit"
+      type: "feed"
+      link: "https://instapundit.com/"
+      url: "https://instapundit.com/feed/"
+      column: 3
+      position: 1
+    - name: "Twitchy"
+      type: "feed"
+      link: "https://twitchy.com/"
+      url: "https://twitchy.com/feed"
+      column: 3
+      position: 2
+  - name: "More"
+    columns: 3
+    widgets:
+  - name: "Monitoring"
+    columns: 3
+    widgets:
diff --git a/app/rss.py b/app/rss.py
@@ -1,11 +1,53 @@
+import time
+import aiohttp
 import feedparser
 import html
 import requests
-from bs4 import BeautifulSoup
+import re
+
+from post_processor import post_processor
+from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
+import warnings
+
+warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
 
 class Rss:
-  def clean_html(self, text):
-    return BeautifulSoup( html.unescape(text), 'lxml').get_text()
+
+  def clean_html(self, text: str) -> str:
+    """
+    Removes HTML tags, decode HTML entities, and strip leading and trailing
+    whitespace from the given text.
+
+    Args:
+        text (str): The text to clean.
+
+    Returns:
+        str: The cleaned text.
+    """
+    text = text.replace('\n', ' ').replace('\r', ' ').strip()
+
+    if not text:
+      return text
+
+    text = BeautifulSoup(html.unescape(text), 'lxml').text
+    text = re.sub(r'\[.*?\].*$', '', text)
+    # text = re.sub(r'http[s]?://\S+', '', text, flags=re.IGNORECASE)
+    # text = ' '.join([x.capitalize() for x in text.split(' ')])
+    return text
+
+  async def load_feed(self, widget, column):
+    start_time = time.time()
+    async with aiohttp.ClientSession() as session:
+      async with session.get(widget['url']) as response:
+        parsed_feed = feedparser.parse(await response.text())
+        widget['summary_enabled'] = widget.get('summary_enabled', True)
+        widget['articles'] = [{
+            'title': " ".join(entry.get('title', 'No Title').split()).strip() , 
+            'link': entry.link, 
+            'summary': self.clean_html(entry.get('summary', ''))} for entry in parsed_feed.entries[:10]] if 'entries' in parsed_feed else []
+        widget = post_processor.process(widget['name'], widget)
+        column.append(widget)
+        return (time.time() - start_time)
 
   def find_feed_links(self, url):
     response = requests.get(url)
@@ -23,6 +65,9 @@ def find_feed_links(self, url):
       print(f"Failed to retrieve content from {url}")
       return None
 
+rss = Rss()   
+
+
 
 if __name__ == "__main__":
   webpage_url = "https://blog.langchain.dev/automating-web-research/"# input("Enter the URL of the webpage: ")

diff --git a/app/static/css/index.css b/app/static/css/index.css
@@ -217,3 +217,36 @@ ul li:last-child {
   color: #ddd;
 }
 
+.new-tab-btn, .edit-tab-btn {
+  float: right;
+  margin-left: 10px;
+}
+
+.tab-buttons {
+  float: right;
+}
+
+.new-tab-btn, .edit-tab-btn {
+  margin-left: 10px;
+}
+
+.modal {
+  display: none;
+  position: fixed;
+  z-index: 1;
+  left: 0;
+  top: 0;
+  width: 100%;
+  height: 100%;
+  overflow: auto;
+  background-color: rgba(0, 0, 0, 0.4);
+}
+
+.modal-content {
+  background-color: #fefefe;
+  margin: 15% auto;
+  padding: 20px;
+  border: 1px solid #888;
+  width: 30%;
+}
+