Skip to content

Commit

Permalink
Migrate site probes to github action
Browse files Browse the repository at this point in the history
  • Loading branch information
polyrabbit committed Feb 27, 2024
1 parent dcc4a2a commit b5cca5a
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 1 deletion.
51 changes: 51 additions & 0 deletions .github/workflows/probe.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Simple workflow for deploying static content to GitHub Pages
name: Probe

on:
# Runs on pushes targeting the default branch
push:
branches: [ "master" ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

repository_dispatch:
types: [ probe-hn-sites ]

schedule:
- cron: "*/10 * * * *"

# Allow one concurrent deployment
concurrency:
group: probe-event-${{ github.event_name }} # So UT won't be interrupted by cronjobs
cancel-in-progress: true # Avoid batch pending when one job hangs

jobs:
# Single deploy job since we're just deploying
probe:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Setup Python
uses: actions/[email protected]
with:
python-version: 3.9
cache: 'pip'

- name: Install Python Dependencies
run: pip install --upgrade -r requirements.txt

- name: Blog
run: python probe.py blog
if: ${{ always() }}

- name: HN Summary
run: python probe.py hn
if: ${{ always() }}

- name: HN Summary ZH
run: python probe.py hn-zh
if: ${{ always() }}
2 changes: 1 addition & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
logging.basicConfig(level=logging.DEBUG if DEBUG else logging.INFO,
format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d %(funcName)s] - %(message)s',
handlers=log_handlers)
logger = logging.getLogger()
logger = logging.getLogger(__name__)


def int_env(name, default):
Expand Down
62 changes: 62 additions & 0 deletions probe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# coding: utf-8
import argparse
import logging
import re
from datetime import datetime, timedelta

from page_content_extractor.http import session

parser = argparse.ArgumentParser(description='Probe betacat.io sites')
parser.add_argument("site", choices=['hn', 'hn-zh', 'blog'], help="Specify site to probe")
args = parser.parse_args()
logger = logging.getLogger(__name__)


def probe_hn_summary():
url = 'https://hackernews.betacat.io/'
resp = session.get(url)
resp.raise_for_status()
body = resp.text

assert "Hacker News" in body, '"Hacker News" not in response'
assert body.count("OpenAI") > 5, "Too few OpenAI summaries, only got %d" % body.count("OpenAI")
logger.info(f'OpenAI summaries {body.count("OpenAI")} times')

pattern = r'Last updated: <span>(.*?)<\/span>'
matches = re.search(pattern, body)

time_updated_str = matches.group(1)
time_updated = datetime.strptime(time_updated_str, "%Y-%m-%d %H:%M:%S %Z")

current_time = datetime.utcnow()

assert current_time <= time_updated + timedelta(hours=1), "Haven't been updated for one hour, last update: " + time_updated_str


def probe_hn_zh():
url = 'https://hackernews.betacat.io/zh.html'
resp = session.get(url)
resp.raise_for_status()
body = resp.text

assert '摘要' in body


def probe_blog():
url = 'https://blog.betacat.io/'
resp = session.get(url)
resp.raise_for_status()
body = resp.text

assert '喵叔没话说' in body


if __name__ == '__main__':
if args.site == 'blog':
probe_blog()
elif args.site == 'hn-zh':
probe_hn_zh()
elif args.site == 'hn':
probe_hn_summary()
else:
assert False

0 comments on commit b5cca5a

Please sign in to comment.