Improve tagging #20

Merged
buckbanzai merged 1 commit from improve-tagging into main 2024-02-14 08:21:33 -08:00
2 changed files with 31 additions and 8 deletions
Showing only changes of commit 0feaae9090 - Show all commits

View file

@ -7,3 +7,4 @@ languages:
- en - en
- es - es
default_language: en default_language: en
exclude_tags: []

View file

@ -43,6 +43,7 @@ def detect_article_language(article_description: str) -> str:
except AttributeError: except AttributeError:
return default_language.iso_code_639_1.name return default_language.iso_code_639_1.name
engine = create_engine("sqlite:///kuow.db") engine = create_engine("sqlite:///kuow.db")
Base.metadata.create_all(engine) Base.metadata.create_all(engine)
@ -130,18 +131,39 @@ with Session(engine) as session:
except (NameError, TypeError): except (NameError, TypeError):
print("Could not find or load IDs for this post") print("Could not find or load IDs for this post")
tags: list[str] = []
try: try:
tags = article_soup.find( tags.extend(
article_soup.find(
"script", {"class": "dfp_targeting", "data-key": "tags"} "script", {"class": "dfp_targeting", "data-key": "tags"}
)["data-value"].split("|") )["data-value"].split("|")
)
except (NameError, TypeError): except (NameError, TypeError):
print("Could not find or load any tags for this article") print("Could not find or load any tags from the 'tags' property")
tags = []
try:
tags.extend(
article_soup.find(
"script", {"class": "dfp_targeting", "data-key": "topics"}
)["data-value"].split("|")
)
except (NameError, TypeError):
print("Could not find or load any tags from the 'tags' property")
# Remove duplicates
tags = list(set(tags))
additional_tag_string = "" additional_tag_string = ""
for tag in tags: for tag in tags:
# TODO: Do a check for tags in the config file that we don't want to tag posts with tag = tag.title().replace(" ", "").replace("&", "And")
additional_tag_string += " #{}".format(tag.title().replace(" ", "")) if tag.casefold() in config["exclude_tags"]:
print(
"Tag {} was found in the article, but won't be included in the post".format(
tag
)
)
else:
additional_tag_string += " #{}".format(tag)
try: try:
article_description = ( article_description = (