Improve tagging #20
2 changed files with 31 additions and 8 deletions
|
@ -7,3 +7,4 @@ languages:
|
|||
- en
|
||||
- es
|
||||
default_language: en
|
||||
exclude_tags: []
|
|
@ -43,6 +43,7 @@ def detect_article_language(article_description: str) -> str:
|
|||
except AttributeError:
|
||||
return default_language.iso_code_639_1.name
|
||||
|
||||
|
||||
engine = create_engine("sqlite:///kuow.db")
|
||||
Base.metadata.create_all(engine)
|
||||
|
||||
|
@ -130,18 +131,39 @@ with Session(engine) as session:
|
|||
except (NameError, TypeError):
|
||||
print("Could not find or load IDs for this post")
|
||||
|
||||
tags: list[str] = []
|
||||
try:
|
||||
tags = article_soup.find(
|
||||
"script", {"class": "dfp_targeting", "data-key": "tags"}
|
||||
)["data-value"].split("|")
|
||||
tags.extend(
|
||||
article_soup.find(
|
||||
"script", {"class": "dfp_targeting", "data-key": "tags"}
|
||||
)["data-value"].split("|")
|
||||
)
|
||||
except (NameError, TypeError):
|
||||
print("Could not find or load any tags for this article")
|
||||
tags = []
|
||||
print("Could not find or load any tags from the 'tags' property")
|
||||
|
||||
try:
|
||||
tags.extend(
|
||||
article_soup.find(
|
||||
"script", {"class": "dfp_targeting", "data-key": "topics"}
|
||||
)["data-value"].split("|")
|
||||
)
|
||||
except (NameError, TypeError):
|
||||
print("Could not find or load any tags from the 'tags' property")
|
||||
|
||||
# Remove duplicates
|
||||
tags = list(set(tags))
|
||||
|
||||
additional_tag_string = ""
|
||||
for tag in tags:
|
||||
# TODO: Do a check for tags in the config file that we don't want to tag posts with
|
||||
additional_tag_string += " #{}".format(tag.title().replace(" ", ""))
|
||||
tag = tag.title().replace(" ", "").replace("&", "And")
|
||||
if tag.casefold() in config["exclude_tags"]:
|
||||
print(
|
||||
"Tag {} was found in the article, but won't be included in the post".format(
|
||||
tag
|
||||
)
|
||||
)
|
||||
else:
|
||||
additional_tag_string += " #{}".format(tag)
|
||||
|
||||
try:
|
||||
article_description = (
|
||||
|
|
Loading…
Reference in a new issue