2023-04-13 08:49:05 -07:00
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from mastodon import Mastodon
|
2023-04-13 12:00:29 -07:00
|
|
|
|
|
|
|
kuow_base_url = "https://www.kuow.org"
|
|
|
|
url = (
|
|
|
|
kuow_base_url
|
|
|
|
+ "/fragments?name=story_grid&source=homepage&view_id=1&page=1&per_page=12"
|
|
|
|
)
|
2023-04-13 08:49:05 -07:00
|
|
|
log_file = "kuow_bot_logfile.txt"
|
2023-04-13 12:00:29 -07:00
|
|
|
mastodon = Mastodon(access_token="kuow_bot_mastodon.secret")
|
2023-04-13 08:49:05 -07:00
|
|
|
|
|
|
|
r = requests.get(url)
|
2023-04-13 12:00:29 -07:00
|
|
|
soup = BeautifulSoup(r.content, "html.parser")
|
|
|
|
articles = soup.find_all("span", class_="txt")
|
2023-04-13 11:58:41 -07:00
|
|
|
# Reverse articles, so that if multiple new ones have been found, they'll be posted in order of when published
|
|
|
|
articles.reverse()
|
|
|
|
|
|
|
|
for article in articles:
|
2023-04-13 08:49:05 -07:00
|
|
|
article_link = article.find("a").attrs["href"]
|
|
|
|
|
|
|
|
is_new_article = True
|
2023-04-13 12:00:29 -07:00
|
|
|
with open(log_file, "r") as fp:
|
2023-04-13 08:49:05 -07:00
|
|
|
lines = fp.readlines()
|
|
|
|
for row in lines:
|
2023-04-13 12:00:29 -07:00
|
|
|
if row == article_link + "\n":
|
|
|
|
print("Article " + article_link + " has already been seen")
|
2023-04-13 08:49:05 -07:00
|
|
|
is_new_article = False
|
|
|
|
|
|
|
|
if is_new_article:
|
2023-04-13 12:00:29 -07:00
|
|
|
print(article_link + " has not been seen, posting")
|
|
|
|
article_lookup = requests.get(kuow_base_url + article_link)
|
|
|
|
article_soup = BeautifulSoup(article_lookup.content, "html.parser")
|
2023-04-13 08:49:05 -07:00
|
|
|
try:
|
2023-04-13 12:00:29 -07:00
|
|
|
article_description = (
|
|
|
|
(article_soup.find("meta", attrs={"property": "description"}))
|
|
|
|
.attrs["content"]
|
|
|
|
.strip()
|
|
|
|
)
|
|
|
|
mastodon.status_post(
|
|
|
|
status=article_description
|
|
|
|
+ "\n"
|
|
|
|
+ kuow_base_url
|
|
|
|
+ article_link
|
|
|
|
+ "\n#KUOW #News",
|
|
|
|
visibility="unlisted",
|
|
|
|
)
|
2023-04-13 09:12:01 -07:00
|
|
|
with open(log_file, "a") as fp:
|
2023-04-13 12:00:29 -07:00
|
|
|
fp.write(article_link + "\n")
|
2023-04-13 08:49:05 -07:00
|
|
|
except:
|
|
|
|
print("Could not load a description/post this article")
|