From 24c326a751a2dbc42e2b04e7e92aa74d326400b7 Mon Sep 17 00:00:00 2001
From: mike <219478+ilude@users.noreply.github.com>
Date: Thu, 28 Mar 2024 14:22:06 -0400
Subject: [PATCH] implement async requests to feed to increase performance on
 cold cache

---
 app/app.py               | 110 +++++++++++++++++++++++++--------------
 app/configs/layout.yml   |  88 ++++++++++++++++++-------------
 app/rss.py               |  51 ++++++++++++++++--
 app/static/css/index.css |  33 ++++++++++++
 app/static/js/script.js  |  56 ++++++++++++++++++++
 app/templates/index.html |  37 +++++++++++--
 app/utils.py             |  60 +++++++++------------
 requirements.txt         |   5 +-
 8 files changed, 321 insertions(+), 119 deletions(-)
 create mode 100644 app/static/js/script.js

diff --git a/app/app.py b/app/app.py
index 4c790c2..69e0341 100644
--- a/app/app.py
+++ b/app/app.py
@@ -1,66 +1,100 @@
 import os
+import yaml
+import asyncio
 from datetime import datetime
 
-import feedparser
-from datetime import datetime
-from flask import Flask, render_template
+from flask import Flask, request, render_template
 from flask_caching import Cache
-from post_processor import post_processor
 
-from utils import clean_html, copy_default_to_configs, load_file
+from utils import copy_default_to_configs, load_file
+from rss import rss
 
 copy_default_to_configs()
 
 app = Flask(__name__)
 
-# 600 seconds = 10 minutes
-cache = Cache(app, config={
-  'CACHE_TYPE': 'simple',            
-  'CACHE_DEFAULT_TIMEOUT': 600
-})
+if os.environ.get("FLASK_DEBUG", "False") == "True":
+  cache_config={
+    'CACHE_TYPE': 'null'
+  }
+else:
+  # 600 seconds = 10 minutes
+  cache_config={
+    'CACHE_TYPE': 'simple',            
+    'CACHE_DEFAULT_TIMEOUT': 600
+  }
+  
+cache = Cache(app, config=cache_config)
 
 @app.context_processor
 def inject_current_date():
   return {'today_date': datetime.now()}
 
+@app.route('/save_tab_name', methods=['POST'])
+def save_tab_name():
+    data = request.get_json()
+    tab_name = data.get('tab_name')
+    tab_index = data.get('tab_index')
+    column_count = data.get('column_count')
+
+    if tab_name and column_count >= 1 and column_count <= 6:
+        with open('configs/layout.yml', 'r') as file:
+            layout = yaml.safe_load(file)
+
+        tabs = layout['tabs']
+
+        if tab_index is not None:
+            # Edit an existing tab
+            tabs[tab_index]['name'] = tab_name
+            tabs[tab_index]['columns'] = column_count
+        else:
+            # Add a new tab
+            tabs.append({'name': tab_name, 'columns': column_count, 'widgets': []})
+
+        with open('configs/layout.yml', 'w') as file:
+            yaml.safe_dump(layout, file)
+
+        return {'message': f'Tab name "{tab_name}" with {column_count} columns saved successfully'}
+    else:
+        return {'error': 'Invalid tab name or column count'}, 400
+
 # Define route to render the template
 @app.route('/')
+@app.route('/<tab_name>')
 @cache.cached(timeout=600)
-def index():
+async def index(tab_name=None):
   # Load feeds and bookmarks
   layout = load_file('layout.yml', cache)
   headers = layout['headers']
-  widgets = layout['widgets']
   
-  # Divide feeds into three columns
-  columns = [[], [], []]
-
+  tabs = layout['tabs']
+  if tab_name is None:
+    tab = tabs[0]
+  else:
+    tab = next((tab for tab in tabs if tab["name"].lower() == tab_name.lower()), tabs[0])
+  current_tab = tab['name']
+  
+  column_count = tab['columns']
+  columns = [[] for _ in range(column_count)]
+  
+  tasks = []
+    
   # Add feeds to the appropriate column
-  for widget in widgets:
-    column_index = (widget['column'] - 1) % 3
-    if widget['type'] == 'feed':
-      parsed_feed = feedparser.parse(widget['url'])
-      parsed_item = {
-        'title': widget['name'],
-        'link': widget['link'],
-        'type': widget['type'],
-        'summary_enabled': bool(widget.get('summary', True)),
-        'articles': [{
-          'title': " ".join(entry.get('title', 'No Title').split()).strip() , 
-          'link': entry.link, 
-          'summary': clean_html(entry.get('summary', ''))} for entry in parsed_feed.entries[:10]] if 'entries' in parsed_feed else []
-      }
-      parsed_item = post_processor.process(parsed_item['title'], parsed_item)
-      columns[column_index].append(parsed_item)
-    elif widget['type'] == 'bookmarks':
-      columns[column_index].append({
-        'title': widget['name'], 
-        'type': widget['type'], 
-        'articles': [{'title': entry['title'], 'link': entry['url']} for entry in widget['bookmarks']]
-      })
+  if tab['widgets']:
+    for widget in tab['widgets']:
+      column_index = (widget['column'] - 1) % column_count
+      if widget['type'] == 'feed':
+        tasks.append(asyncio.create_task(rss.load_feed(widget, columns[column_index])))
+      elif widget['type'] == 'bookmarks':
+        widget['articles'] = [{'title': entry['title'], 'link': entry['url']} for entry in widget['bookmarks']]
+        columns[column_index].append(widget)
 
+  await asyncio.wait(tasks)
+  for column in columns:
+    column.sort(key = lambda x: x['position'])
+    
   # Pass column data to the template
-  return render_template('index.html', columns=columns, headers=headers)
+  return render_template('index.html', tabs=tabs, columns=columns, headers=headers, current_tab=current_tab)
 
 if __name__ == '__main__':
   port = int(os.environ.get("ONBOARD_PORT", 9830))
diff --git a/app/configs/layout.yml b/app/configs/layout.yml
index 74725d8..ed3e878 100644
--- a/app/configs/layout.yml
+++ b/app/configs/layout.yml
@@ -10,27 +10,34 @@ headers:
   - name: "Youtube"
     link: "https://www.youtube.com/"
 
-widgets:
-  - name: "Lawrence Person's BattleSwarm Blog"
-    type: "feed"
-    link: "https://www.battleswarmblog.com/"
-    url: "https://www.battleswarmblog.com/?feed=rss2"
-    column: 1
-  - name: "Cafe Hayek"
-    type: "feed"
-    link: "https://cafehayek.com/"
-    url: "https://cafehayek.com/feed"
-    column: 1
-  - name: "Slashdot"
-    type: "feed"
-    link: "https://slashdot.org/"
-    url: "https://rss.slashdot.org/Slashdot/slashdotMain"
-    column: 1
-    summary: false
-  - name: "Bookmarks"
-    type: "bookmarks"
-    column: 2
-    bookmarks:
+tabs:
+  - name: "Home"
+    columns: 3
+    widgets:
+    - name: "Lawrence Person's BattleSwarm Blog"
+      type: "feed"
+      link: "https://www.battleswarmblog.com/"
+      url: "https://www.battleswarmblog.com/?feed=rss2"
+      column: 1
+      position: 1
+    - name: "Cafe Hayek"
+      type: "feed"
+      link: "https://cafehayek.com/"
+      url: "https://cafehayek.com/feed"
+      column: 1
+      position: 2
+    - name: "Slashdot"
+      type: "feed"
+      summary_enabled: False
+      link: "https://slashdot.org/"
+      url: "https://rss.slashdot.org/Slashdot/slashdotMain"
+      column: 1
+      position: 3
+    - name: "Bookmarks"
+      type: "bookmarks"
+      column: 2
+      position: 1
+      bookmarks:
       - title: Ali Express
         url: https://www.aliexpress.com/
       - title: Amazon
@@ -65,18 +72,27 @@ widgets:
         url: https://tailscale.com/
       - title: Thingiverse
         url: https://www.thingiverse.com/
-  - name: "Real Clear Politics"
-    type: "feed"
-    link: "https://www.realclearpolitics.com/"
-    url: "https://www.realclearpolitics.com/index.xml"
-    column: 2
-  - name: "Instapundit"
-    type: "feed"
-    link: "https://instapundit.com/"
-    url: "https://instapundit.com/feed/"
-    column: 3
-  - name: "Twitchy"
-    type: "feed"
-    link: "https://twitchy.com/"
-    url: "https://twitchy.com/feed"
-    column: 3
\ No newline at end of file
+    - name: "Real Clear Politics"
+      type: "feed"
+      link: "https://www.realclearpolitics.com/"
+      url: "https://www.realclearpolitics.com/index.xml"
+      column: 2
+      position: 2
+    - name: "Instapundit"
+      type: "feed"
+      link: "https://instapundit.com/"
+      url: "https://instapundit.com/feed/"
+      column: 3
+      position: 1
+    - name: "Twitchy"
+      type: "feed"
+      link: "https://twitchy.com/"
+      url: "https://twitchy.com/feed"
+      column: 3
+      position: 2
+  - name: "More"
+    columns: 3
+    widgets:
+  - name: "Monitoring"
+    columns: 3
+    widgets:
\ No newline at end of file
diff --git a/app/rss.py b/app/rss.py
index e932ba1..4ca5d8c 100644
--- a/app/rss.py
+++ b/app/rss.py
@@ -1,11 +1,53 @@
+import time
+import aiohttp
 import feedparser
 import html
 import requests
-from bs4 import BeautifulSoup
+import re
+
+from post_processor import post_processor
+from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
+import warnings
+
+warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
 
 class Rss:
-  def clean_html(self, text):
-    return BeautifulSoup( html.unescape(text), 'lxml').get_text()
+  
+  def clean_html(self, text: str) -> str:
+    """
+    Removes HTML tags, decode HTML entities, and strip leading and trailing
+    whitespace from the given text.
+
+    Args:
+        text (str): The text to clean.
+
+    Returns:
+        str: The cleaned text.
+    """
+    text = text.replace('\n', ' ').replace('\r', ' ').strip()
+    
+    if not text:
+      return text
+    
+    text = BeautifulSoup(html.unescape(text), 'lxml').text
+    text = re.sub(r'\[.*?\].*$', '', text)
+    # text = re.sub(r'http[s]?://\S+', '', text, flags=re.IGNORECASE)
+    # text = ' '.join([x.capitalize() for x in text.split(' ')])
+    return text
+
+  async def load_feed(self, widget, column):
+    start_time = time.time()
+    async with aiohttp.ClientSession() as session:
+      async with session.get(widget['url']) as response:
+        parsed_feed = feedparser.parse(await response.text())
+        widget['summary_enabled'] = widget.get('summary_enabled', True)
+        widget['articles'] = [{
+            'title': " ".join(entry.get('title', 'No Title').split()).strip() , 
+            'link': entry.link, 
+            'summary': self.clean_html(entry.get('summary', ''))} for entry in parsed_feed.entries[:10]] if 'entries' in parsed_feed else []
+        widget = post_processor.process(widget['name'], widget)
+        column.append(widget)
+        return (time.time() - start_time)
   
   def find_feed_links(self, url):
     response = requests.get(url)
@@ -23,6 +65,9 @@ def find_feed_links(self, url):
       print(f"Failed to retrieve content from {url}")
       return None
 
+rss = Rss()   
+
+
 
 if __name__ == "__main__":
   webpage_url = "https://blog.langchain.dev/automating-web-research/"# input("Enter the URL of the webpage: ")
diff --git a/app/static/css/index.css b/app/static/css/index.css
index 9e510c2..24c3a32 100644
--- a/app/static/css/index.css
+++ b/app/static/css/index.css
@@ -217,3 +217,36 @@ ul li:last-child {
   color: #ddd;
 }
 
+.new-tab-btn, .edit-tab-btn {
+  float: right;
+  margin-left: 10px;
+}
+
+.tab-buttons {
+  float: right;
+}
+
+.new-tab-btn, .edit-tab-btn {
+  margin-left: 10px;
+}
+
+.modal {
+  display: none;
+  position: fixed;
+  z-index: 1;
+  left: 0;
+  top: 0;
+  width: 100%;
+  height: 100%;
+  overflow: auto;
+  background-color: rgba(0, 0, 0, 0.4);
+}
+
+.modal-content {
+  background-color: #fefefe;
+  margin: 15% auto;
+  padding: 20px;
+  border: 1px solid #888;
+  width: 30%;
+}
+
diff --git a/app/static/js/script.js b/app/static/js/script.js
new file mode 100644
index 0000000..3bdcbb1
--- /dev/null
+++ b/app/static/js/script.js
@@ -0,0 +1,56 @@
+$(document).ready(function() {
+  // Get the modal elements
+  const $modal = $('.modal');
+  const $tabNameInput = $('#tabNameInput');
+  const $columnSelect = $('#columnSelect');
+  const $saveBtn = $('#saveBtn');
+
+  // Open the modal
+  function openModal(tabName='', editIndex = -1) {
+    $tabNameInput.val(tabName);
+    $saveBtn.off('click').on('click', function() { saveTabName(editIndex) });
+    $modal.show();
+  }
+
+  // Save the tab name
+  function saveTabName(editIndex) {
+    const newTabName = $tabNameInput.val().trim();
+    if (newTabName) {
+      const tabIndex = editIndex !== -1 ? editIndex : null;
+
+      // Make an AJAX request to the Flask route
+      $.ajax({
+        url: '/save_tab_name',
+        type: 'POST',
+        data: JSON.stringify({ 'tab_name': newTabName, 'tab_index': tabIndex }),
+        contentType: 'application/json; charset=utf-8',
+        dataType: 'json',
+        success: function(response) {
+          console.log('Tab name saved:', response.message);
+          // Reload the page or update the UI as needed
+        },
+        error: function(xhr, status, error) {
+          console.error('Error saving tab name:', error);
+        }
+      });
+    }
+    $modal.hide();
+    $tabNameInput.val('');
+  }
+
+  // Close the modal when clicking outside of it
+  $(window).click(function(event) {
+    if (event.target === $modal[0]) {
+      $modal.hide();
+    }
+  });
+
+  // Add event listeners to the buttons
+  $('.new-tab-btn').click(function() { openModal(); });
+  $('.edit-tab-btn').click(function() { 
+    active_tab = $('.tab-bar a').filter(function() { return $(this).data('current') === 'True'; }).first();
+ 
+    openModal(active_tab.text(), active_tab.data('index')); 
+  });
+  
+});
\ No newline at end of file
diff --git a/app/templates/index.html b/app/templates/index.html
index 81d67e0..02e7d89 100644
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -6,6 +6,8 @@
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   <title>OnBoard</title>
   <link rel="stylesheet" href="{{ url_for('static', filename='css/index.css') }}">
+  <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
+  <script src="{{ url_for('static', filename='js/script.js') }}"></script>
 </head>
 
 <body>
@@ -20,9 +22,34 @@
         <div class="logo">OB</div> <!-- Placeholder for the logo -->
       </div>
       <div class="tab-bar">
-        <a href="#tab1">Tab 1</a>
-        <a href="#tab2">Tab 2</a>
-        <a href="#tab3">Tab 3</a>
+        {% for tab in tabs %}
+          <a href="/{{tab.name}}" data-index="{{ loop.index }}"  data-current="{{ tab.name == current_tab }}">{{tab.name}}</a>
+        {% endfor %}
+        <div class="tab-buttons">
+          <button class="new-tab-btn">New Tab</button>
+          <button class="edit-tab-btn">Edit Tab</button>
+        </div>
+        <div class="modal">
+          <div class="modal-content">
+            <div>
+              <label for="tabNameInput">Tab Name:</label>
+              <input type="text" id="tabNameInput" placeholder="Enter tab name">
+            </div>
+            <div>
+              <label for="columnSelect">Number of Columns:</label>
+              <select id="columnSelect">
+                <option value="1">1</option>
+                <option value="2">2</option>
+                <option value="3">3</option>
+                <option value="4">4</option>
+                <option value="5">5</option>
+                <option value="6">6</option>
+              </select>
+            </div>
+            
+            <button id="saveBtn">Save</button>
+          </div>
+        </div>
       </div>
       <div class="row">
         {% for column in columns %}
@@ -30,7 +57,7 @@
           {% for feed in column %}
           <div class="box {{ feed.type }}-box">
             <div class="box-header {{ feed.type }}-header">
-              <a href="{{ feed.link }}" target="_blank">{{ feed.title }}</a></div>
+              <a href="{{ feed.link }}" target="_blank">{{ feed.name }}</a></div>
             <div class="box-content {{ feed.type }}-content">
               <ul>
                 {% for article in feed.articles %}
@@ -42,7 +69,7 @@
                       No Title
                     {% endif %}
                   </a>
-                  {% if feed.summary_enabled and article.summary and article.summary >= article.title %}
+                  {% if feed.summary_enabled and article.summary %}
                   <div class="summary {{ feed.type }}">{{ article.summary }}</div>
                   {% endif %}
                 </li>
diff --git a/app/utils.py b/app/utils.py
index 17b0043..93f8ab2 100644
--- a/app/utils.py
+++ b/app/utils.py
@@ -1,12 +1,7 @@
-import html
 import os
-import re
 import shutil
-
 import yaml
 
-from bs4 import BeautifulSoup
-
 def copy_default_to_configs():
   pwd = os.path.dirname(os.path.abspath(__file__))
   default_dir = os.path.join(pwd, 'defaults') 
@@ -27,33 +22,28 @@ def copy_default_to_configs():
     print(f"Default files synced from {default_dir} to {config_dir}.")
   else:
     print(f"No files copied from {default_dir} to {config_dir}.")    
-    
-def clean_html(text):
-  text = text.replace('\n', ' ').replace('\r', ' ')
-  text = BeautifulSoup(html.unescape(text), 'lxml').text
-  text = re.sub(r'\[.*?\].*$', '', text)
-  # text = re.sub(r'http[s]?://\S+', '', text, flags=re.IGNORECASE)
-  # text = ' '.join([x.capitalize() for x in text.split(' ')])
-
-  return text.strip()
-
-global last_modified_times
-last_modified_times = {}
-
-def load_file(file_name, cache):
-  # Adjust file path for the configs subdirectory
-  current_working_directory = os.path.dirname(os.path.realpath(__file__))
-  file_path = os.path.join(current_working_directory, 'configs', file_name)
-
-  # Check the last modification time of the file
-  current_modified_time = os.path.getmtime(file_path)
-  current_data = cache.get(file_path)
-
-  # If the file has been modified since the last check, reload it
-  if current_modified_time != last_modified_times.get(file_path) or not current_data:
-    last_modified_times[file_path] = current_modified_time
-    with open(file_path, 'r') as file:
-      current_data = yaml.safe_load(file)
-      cache.set(file_path, current_data)
-
-  return current_data
\ No newline at end of file
+
+class FileData:
+    def __init__(self, last_modified=0, contents=None):
+        self.last_modified = last_modified
+        self.contents = contents
+
+    def __getitem__(self, key, default=None):
+        return self.contents.get(key, default)
+
+file_cache = {}
+
+def load_file(file_name, cache=None):
+    current_working_directory = os.path.dirname(os.path.realpath(__file__))
+    file_path = os.path.join(current_working_directory, 'configs', file_name)
+
+    # Check the last modification time of the file
+    current_modified_time = os.path.getmtime(file_path)
+
+    # Only load the file if it has been modified since the last check or if there is no value for that file in the dict
+    if current_modified_time > file_cache.get(file_path, FileData()).last_modified or file_path not in file_cache:
+        with open(file_path, 'r') as file:
+            contents = yaml.safe_load(file)
+        file_cache[file_path] = FileData(current_modified_time, contents)
+
+    return file_cache[file_path].contents
diff --git a/requirements.txt b/requirements.txt
index b1321c2..49ece52 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,12 @@
 argcomplete
 bs4
 docker
-flask 
+flask[async]
 flask-caching
 feedparser
 lxml
 python-dotenv
 pyyaml 
 requests
-hypercorn==0.15.0
\ No newline at end of file
+hypercorn==0.15.0
+aiohttp[speedups]
\ No newline at end of file