import requests from bs4 import BeautifulSoup from mastodon import Mastodon kuow_base_url="https://www.kuow.org" url=kuow_base_url+"/fragments?name=story_grid&source=homepage&view_id=1&page=1&per_page=12" log_file = "kuow_bot_logfile.txt" mastodon = Mastodon(access_token = 'kuow_bot_mastodon.secret') r = requests.get(url) soup = BeautifulSoup(r.content, 'html.parser') for article in soup.find_all("span", class_ = "txt"): article_link = article.find("a").attrs["href"] is_new_article = True with open(log_file, 'r') as fp: lines = fp.readlines() for row in lines: if row == article_link+"\n": print("Article "+article_link+" has already been seen") is_new_article = False if is_new_article: print(article_link+" has not been seen, posting") article_lookup = requests.get(kuow_base_url+article_link) article_soup = BeautifulSoup(article_lookup.content, 'html.parser') try: article_description = (article_soup.find("meta", attrs={"property": "description"})).attrs["content"].strip() mastodon.status_post(status=article_description+"\n"+kuow_base_url+article_link+"\n#KUOW #News", visibility="unlisted") with open(log_file, "a") as fp: fp.write(article_link+"\n") except: print("Could not load a description/post this article")