import requests from bs4 import BeautifulSoup from mastodon import Mastodon kuow_base_url = "https://www.kuow.org" url = ( kuow_base_url + "/fragments?name=story_grid&source=homepage&view_id=1&page=1&per_page=12" ) log_file = "kuow_bot_logfile.txt" mastodon = Mastodon(access_token="kuow_bot_mastodon.secret") r = requests.get(url) soup = BeautifulSoup(r.content, "html.parser") articles = soup.find_all("span", class_="txt") # Reverse articles, so that if multiple new ones have been found, they'll be posted in order of when published articles.reverse() for article in articles: article_link = article.find("a").attrs["href"] is_new_article = True with open(log_file, "r") as fp: lines = fp.readlines() for row in lines: if row == article_link + "\n": print("Article " + article_link + " has already been seen") is_new_article = False if is_new_article: print(article_link + " has not been seen, posting") article_lookup = requests.get(kuow_base_url + article_link) article_soup = BeautifulSoup(article_lookup.content, "html.parser") try: article_description = ( (article_soup.find("meta", attrs={"property": "description"})) .attrs["content"] .strip() ) mastodon.status_post( status=article_description + "\n" + kuow_base_url + article_link + "\n#KUOW #News", visibility="unlisted", ) with open(log_file, "a") as fp: fp.write(article_link + "\n") except: print("Could not load a description/post this article")