Compare commits

...

2 commits

Author SHA1 Message Date
d8a0e46c81 Merge pull request 'Improve tagging' (#20) from improve-tagging into main
All checks were successful
ci/woodpecker/push/lint Pipeline was successful
Reviewed-on: #20
2024-02-14 08:21:32 -08:00
0feaae9090 Improve tagging
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
2024-02-14 08:18:08 -08:00
2 changed files with 31 additions and 8 deletions

View file

@ -7,3 +7,4 @@ languages:
- en
- es
default_language: en
exclude_tags: []

View file

@ -43,6 +43,7 @@ def detect_article_language(article_description: str) -> str:
except AttributeError:
return default_language.iso_code_639_1.name
engine = create_engine("sqlite:///kuow.db")
Base.metadata.create_all(engine)
@ -130,18 +131,39 @@ with Session(engine) as session:
except (NameError, TypeError):
print("Could not find or load IDs for this post")
tags: list[str] = []
try:
tags = article_soup.find(
"script", {"class": "dfp_targeting", "data-key": "tags"}
)["data-value"].split("|")
tags.extend(
article_soup.find(
"script", {"class": "dfp_targeting", "data-key": "tags"}
)["data-value"].split("|")
)
except (NameError, TypeError):
print("Could not find or load any tags for this article")
tags = []
print("Could not find or load any tags from the 'tags' property")
try:
tags.extend(
article_soup.find(
"script", {"class": "dfp_targeting", "data-key": "topics"}
)["data-value"].split("|")
)
except (NameError, TypeError):
print("Could not find or load any tags from the 'tags' property")
# Remove duplicates
tags = list(set(tags))
additional_tag_string = ""
for tag in tags:
# TODO: Do a check for tags in the config file that we don't want to tag posts with
additional_tag_string += " #{}".format(tag.title().replace(" ", ""))
tag = tag.title().replace(" ", "").replace("&", "And")
if tag.casefold() in config["exclude_tags"]:
print(
"Tag {} was found in the article, but won't be included in the post".format(
tag
)
)
else:
additional_tag_string += " #{}".format(tag)
try:
article_description = (