kuow-mastodon-bot/kuow_fetcher.py

52 lines
1.7 KiB
Python
Raw Normal View History

2024-01-14 14:48:09 -08:00
import requests
from bs4 import BeautifulSoup
from mastodon import Mastodon
2024-01-14 14:48:09 -08:00
kuow_base_url = "https://www.kuow.org"
url = (
kuow_base_url
+ "/fragments?name=story_grid&source=homepage&view_id=1&page=1&per_page=12"
)
2024-01-14 14:48:09 -08:00
log_file = "kuow_bot_logfile.txt"
2024-01-14 14:48:09 -08:00
mastodon = Mastodon(access_token="kuow_bot_mastodon.secret")
2024-01-14 14:48:09 -08:00
r = requests.get(url)
2024-01-14 14:48:09 -08:00
soup = BeautifulSoup(r.content, "html.parser")
articles = soup.find_all("span", class_="txt")
2024-01-14 14:48:09 -08:00
# Reverse articles, so that if multiple new ones have been found, they'll be posted in order of when published
articles.reverse()
for article in articles:
2024-01-14 14:48:09 -08:00
article_link = article.find("a").attrs["href"]
is_new_article = True
2024-01-14 14:48:09 -08:00
with open(log_file, "r") as fp:
2024-01-14 14:48:09 -08:00
lines = fp.readlines()
for row in lines:
2024-01-14 14:48:09 -08:00
if row == article_link + "\n":
print("Article " + article_link + " has already been seen")
2024-01-14 14:48:09 -08:00
is_new_article = False
if is_new_article:
2024-01-14 14:48:09 -08:00
print(article_link + " has not been seen, posting")
article_lookup = requests.get(kuow_base_url + article_link)
article_soup = BeautifulSoup(article_lookup.content, "html.parser")
2024-01-14 14:48:09 -08:00
try:
2024-01-14 14:48:09 -08:00
article_description = (
(article_soup.find("meta", attrs={"property": "description"}))
.attrs["content"]
.strip()
)
mastodon.status_post(
status=article_description
+ "\n"
+ kuow_base_url
+ article_link
+ "\n#KUOW #News",
visibility="unlisted",
)
with open(log_file, "a") as fp:
2024-01-14 14:48:09 -08:00
fp.write(article_link + "\n")
2024-01-14 14:48:09 -08:00
except:
print("Could not load a description/post this article")