From 21977293caed33a03d1894c735bce1a7e1712600 Mon Sep 17 00:00:00 2001 From: Liam Steckler Date: Thu, 13 Apr 2023 08:49:05 -0700 Subject: [PATCH] Initial version --- kuow_fetcher.py | 33 +++++++++++++++++++++++++++++++++ requirements.txt | 3 +++ 2 files changed, 36 insertions(+) create mode 100644 kuow_fetcher.py create mode 100644 requirements.txt diff --git a/kuow_fetcher.py b/kuow_fetcher.py new file mode 100644 index 0000000..2aacd17 --- /dev/null +++ b/kuow_fetcher.py @@ -0,0 +1,33 @@ +import requests +from bs4 import BeautifulSoup +from mastodon import Mastodon +kuow_base_url="https://www.kuow.org" +url=kuow_base_url+"/fragments?name=story_grid&source=homepage&view_id=1&page=1&per_page=12" +log_file = "kuow_bot_logfile.txt" +mastodon = Mastodon(access_token = 'kuow_bot_mastodon.secret') + +r = requests.get(url) +soup = BeautifulSoup(r.content, 'html.parser') +for article in soup.find_all("span", class_ = "txt"): + article_link = article.find("a").attrs["href"] + + is_new_article = True + with open(log_file, 'r') as fp: + lines = fp.readlines() + for row in lines: + if row == article_link+"\n": + print("Article "+article_link+" has already been seen") + is_new_article = False + + if is_new_article: + print(article_link+" has not been seen, posting") + with open(log_file, "a") as fp: + fp.write(article_link+"\n") + article_lookup = requests.get(kuow_base_url+article_link) + article_soup = BeautifulSoup(article_lookup.content, 'html.parser') + try: + article_description = (article_soup.find("meta", attrs={"property": "description"})).attrs["content"].strip() + mastodon.status_post(status=article_description+"\n"+kuow_base_url+article_link+"\n#KUOW #News", visibility="unlisted") + except: + print("Could not load a description/post this article") + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4029575 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +beautifulsoup4==4.12.2 +Mastodon.py==1.8.0 +requests==2.28.2 \ No newline at end of file