kuow-mastodon-bot/kuow_fetcher.py

60 lines
2.1 KiB
Python

import requests
import yaml
from bs4 import BeautifulSoup
from mastodon import Mastodon
kuow_base_url = "https://www.kuow.org"
url = (
kuow_base_url
+ "/fragments?name=story_grid&source=homepage&view_id=1&page=1&per_page=12"
)
log_file = "kuow_bot_logfile.txt"
config = yaml.safe_load(open("config.yml"))
stadiamaps_api_key = config["stadiamaps"]["api_key"]
nominatim_url = config["nominatim"]["api_base_url"]
mastodon = Mastodon(
client_id=config["mastodon"]["client_id"],
client_secret=config["mastodon"]["client_secret"],
access_token=config["mastodon"]["access_token"],
api_base_url=config["mastodon"]["api_base_url"],
)
kuow_response = requests.get(url)
soup = BeautifulSoup(kuow_response.content, "html.parser")
articles = soup.find_all("span", class_="txt")
# Reverse articles, so that if multiple new ones have been found, they'll be posted in order of when published
articles.reverse()
for article in articles:
article_link = article.find("a").attrs["href"]
is_new_article = True
with open(log_file, "r") as fp:
lines = fp.readlines()
for row in lines:
if row == article_link + "\n":
print("Article " + article_link + " has already been seen")
is_new_article = False
if is_new_article:
print(article_link + " has not been seen, posting")
article_lookup = requests.get(kuow_base_url + article_link)
article_soup = BeautifulSoup(article_lookup.content, "html.parser")
try:
article_description = (
(article_soup.find("meta", attrs={"property": "description"}))
.attrs["content"]
.strip()
)
mastodon.status_post(
status=article_description
+ "\n"
+ kuow_base_url
+ article_link
+ "\n#KUOW #News",
visibility="unlisted",
)
with open(log_file, "a") as fp:
fp.write(article_link + "\n")
except:
print("Could not load a description/post this article")