diff --git a/config-sample.yml b/config-sample.yml index 4fd18a4..6a9981d 100644 --- a/config-sample.yml +++ b/config-sample.yml @@ -6,4 +6,5 @@ mastodon: languages: - en - es -default_language: en \ No newline at end of file +default_language: en +exclude_tags: [] \ No newline at end of file diff --git a/kuow_fetcher.py b/kuow_fetcher.py index 2a67edd..9413d0b 100644 --- a/kuow_fetcher.py +++ b/kuow_fetcher.py @@ -43,6 +43,7 @@ def detect_article_language(article_description: str) -> str: except AttributeError: return default_language.iso_code_639_1.name + engine = create_engine("sqlite:///kuow.db") Base.metadata.create_all(engine) @@ -130,18 +131,39 @@ with Session(engine) as session: except (NameError, TypeError): print("Could not find or load IDs for this post") + tags: list[str] = [] try: - tags = article_soup.find( - "script", {"class": "dfp_targeting", "data-key": "tags"} - )["data-value"].split("|") + tags.extend( + article_soup.find( + "script", {"class": "dfp_targeting", "data-key": "tags"} + )["data-value"].split("|") + ) except (NameError, TypeError): - print("Could not find or load any tags for this article") - tags = [] + print("Could not find or load any tags from the 'tags' property") + + try: + tags.extend( + article_soup.find( + "script", {"class": "dfp_targeting", "data-key": "topics"} + )["data-value"].split("|") + ) + except (NameError, TypeError): + print("Could not find or load any tags from the 'tags' property") + + # Remove duplicates + tags = list(set(tags)) additional_tag_string = "" for tag in tags: - # TODO: Do a check for tags in the config file that we don't want to tag posts with - additional_tag_string += " #{}".format(tag.title().replace(" ", "")) + tag = tag.title().replace(" ", "").replace("&", "And") + if tag.casefold() in config["exclude_tags"]: + print( + "Tag {} was found in the article, but won't be included in the post".format( + tag + ) + ) + else: + additional_tag_string += " #{}".format(tag) try: article_description = (