Skip to content

Commit

Permalink
allow ai to be optional
Browse files Browse the repository at this point in the history
  • Loading branch information
ilude committed Mar 30, 2024
1 parent 4737cb0 commit e806dcf
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 35 deletions.
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
{
"cSpell.words": [
"Instapundit",
"langchain",
"llms",
"Monero",
"ollama",
"Ollama",
"OLLAMA",
"Slickdeals",
"Tailscale",
"Thingiverse"
Expand Down
51 changes: 29 additions & 22 deletions app/post_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ def to_snake_case(self, input_string):
return snake_case_string

def process(self, widget):

if 'processed' in widget and widget['processed'] and not bool(os.environ.get('FLASK_DEBUG')):
print (f"Widget {widget['name']} already processed.")
return widget

self.normalize(widget)

# Check if the class has already been loaded
Expand All @@ -47,29 +50,33 @@ def process(self, widget):
self.loaded_classes[class_name] = instance

# Call process() method of the instance with the provided data
result = instance.process(widget)
return result
widget = instance.process(widget)
widget['processed'] = True
return widget

def normalize(self, widget):
for article in widget['articles']:
article['title'] = re.sub(r'\s+', ' ', article['title'])

if not article['summary']:
continue

article['summary'] = article['summary'].replace('\n', ' ').replace('\r', ' ').strip()
article['summary'] = BeautifulSoup(html.unescape(article['summary']), 'lxml').text
# strip [...] from the end of the summary
article['summary'] = re.sub(r'\[[\.+|…\]].*$', '', article['summary'])

if article['summary'] == article['title']:
article['summary'] = None
elif (article['title'] in article['summary'] and len(article['title'])/len(article['summary']) > 0.64):
article['title'] = article['summary']
article['summary'] = None
elif (article['summary'] in article['title']):
article['summary'] = article['title']
article['title'] = None
for article in widget['articles']:
article['title'] = article['original_title'].strip()
article['title'] = re.sub(r'\s+', ' ', article['title'])

if not article['original_summary']:
continue
else:
article['summary'] = article['original_summary']

article['summary'] = article['summary'].replace('\n', ' ').replace('\r', ' ').strip()
article['summary'] = BeautifulSoup(html.unescape(article['summary']), 'lxml').text
# strip [...] from the end of the summary
article['summary'] = re.sub(r'\[[\.+|…\]].*$', '', article['summary'])

if article['summary'] == article['title']:
article['summary'] = None
elif (article['title'] in article['summary'] and len(article['title'])/len(article['summary']) > 0.64):
article['title'] = article['summary']
article['summary'] = None
elif (article['summary'] in article['title']):
article['summary'] = article['title']
article['title'] = None

# Instantiate loader when the module is imported
post_processor = PostProcessor()
38 changes: 35 additions & 3 deletions app/processors/instapundit.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,44 @@
import os
import re

from langchain_community.llms import Ollama
from langchain.prompts import ChatPromptTemplate

class Instapundit:
def __init__(self):
ollama_url = os.getenv('OLLAMA_URL')
if ollama_url:
_prompt = ChatPromptTemplate.from_messages([
("human", """
Title: {title}
Summary: {summary}
"""),
("system", """
you are and news article title editor that reviews and provides a concise and accurate title when given
an existing Title and article Summary.
Remove all links from the title.
Title should be as short as possible, aim to be less that 70 characters long.
Title should have an absolute minimum of punctuation.
Do your best to keep the existing title if possible.
DO NOT provide any additional text or thoughts before or after the title.
DO NOT put notes in parentheses.
Provide the title only!
"""),
])

_model = Ollama(base_url=ollama_url, model="dolphin-mistral", keep_alive=5, temperature=0.0)

self.chain = _prompt | _model

def process(self, widget):
for article in widget['articles'][:]:
if '#CommissionEarned' in article['title']:
if '#CommissionEarned' in article['title'] or re.search('Open Thread', article['title'], re.IGNORECASE):
widget['articles'].remove(article)
next
article['title'] = re.sub(r'http[s]?://\S+', '', article['title'], flags=re.IGNORECASE)
if self.chain:
title = self.chain.invoke({"title": article['original_title'], "summary": article['original_summary']})
title = title.strip().strip('""')
article['title'] = title
else:
article['title'] = article['title'].strip().strip('""')

return widget
15 changes: 7 additions & 8 deletions app/rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
import aiohttp
from cachelib import FileSystemCache
import feedparser
import html
import requests
import re

from post_processor import post_processor
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
Expand All @@ -26,7 +24,7 @@ async def load_feed(self, widget):
cached_widget = self.feed_cache.get(widget['name'])

# check if feed is in self.feeds and that the last updated time is less than 15 minutes ago
if cached_widget and (start_time - cached_widget['last_updated']) < 60 * 15:
if cached_widget and 'last_updated' in cached_widget and (start_time - cached_widget['last_updated']) < 60 * 15:
widget['articles'] = cached_widget['articles']
# print(f"Loaded {widget['name']} from cache")
else:
Expand All @@ -42,15 +40,16 @@ async def load_feed(self, widget):
parsed_feed = feedparser.parse(await response.text())

widget['articles'] = [{
'title': entry.get('title', 'No Title').strip() ,
'original_title': entry.get('title', 'No Title').strip(),
'link': entry.link,
'summary': entry.get('summary', None)
} for entry in parsed_feed.entries[:article_limit]] if 'entries' in parsed_feed else []
'original_summary': entry.get('summary', None)
} for entry in parsed_feed.entries[:article_limit]] if 'entries' in parsed_feed else []

widget['last_updated'] = start_time
self.feed_cache.set(widget['name'], widget)

widget = post_processor.process(widget)

post_processor.process(widget)
self.feed_cache.set(widget['name'], widget)

return (time.time() - start_time)

Expand Down
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
argcomplete
bs4
docker
feedparser
flask[async]
flask-caching
Flask-Minify
feedparser
langchain
langchain-community
lxml
python-dotenv
pyyaml
requests
hypercorn==0.15.0
aiohttp[speedups]
aiohttp[speedups]

0 comments on commit e806dcf

Please sign in to comment.