kuow-mastodon-bot/kuow_fetcher.py

59 lines
2 KiB
Python
Raw Normal View History

2024-01-14 14:48:09 -08:00
import requests
2024-01-14 15:12:33 -08:00
import yaml
2024-01-14 14:48:09 -08:00
from bs4 import BeautifulSoup
from mastodon import Mastodon
2024-01-14 14:48:09 -08:00
kuow_base_url = "https://www.kuow.org"
url = (
kuow_base_url
+ "/fragments?name=story_grid&source=homepage&view_id=1&page=1&per_page=12"
)
2024-01-14 14:48:09 -08:00
log_file = "kuow_bot_logfile.txt"
2024-01-14 15:12:33 -08:00
config = yaml.safe_load(open("config.yml"))
mastodon = Mastodon(
client_id=config["mastodon"]["client_id"],
client_secret=config["mastodon"]["client_secret"],
access_token=config["mastodon"]["access_token"],
api_base_url=config["mastodon"]["api_base_url"],
)
2024-01-14 14:48:09 -08:00
2024-01-14 15:12:33 -08:00
kuow_response = requests.get(url)
soup = BeautifulSoup(kuow_response.content, "html.parser")
2024-01-14 14:48:09 -08:00
articles = soup.find_all("span", class_="txt")
2024-01-14 14:48:09 -08:00
# Reverse articles, so that if multiple new ones have been found, they'll be posted in order of when published
articles.reverse()
for article in articles:
2024-01-14 14:48:09 -08:00
article_link = article.find("a").attrs["href"]
is_new_article = True
2024-01-14 14:48:09 -08:00
with open(log_file, "r") as fp:
2024-01-14 14:48:09 -08:00
lines = fp.readlines()
for row in lines:
2024-01-14 14:48:09 -08:00
if row == article_link + "\n":
print("Article " + article_link + " has already been seen")
2024-01-14 14:48:09 -08:00
is_new_article = False
if is_new_article:
2024-01-14 14:48:09 -08:00
print(article_link + " has not been seen, posting")
article_lookup = requests.get(kuow_base_url + article_link)
article_soup = BeautifulSoup(article_lookup.content, "html.parser")
2024-01-14 14:48:09 -08:00
try:
2024-01-14 14:48:09 -08:00
article_description = (
(article_soup.find("meta", attrs={"property": "description"}))
.attrs["content"]
.strip()
)
mastodon.status_post(
status=article_description
+ "\n"
+ kuow_base_url
+ article_link
+ "\n#KUOW #News",
2024-01-14 17:10:15 -08:00
visibility="public",
2024-01-14 14:48:09 -08:00
)
with open(log_file, "a") as fp:
2024-01-14 14:48:09 -08:00
fp.write(article_link + "\n")
2024-01-14 14:48:09 -08:00
except:
print("Could not load a description/post this article")