Improve tagging
This commit is contained in:
parent
257d61aefc
commit
0feaae9090
2 changed files with 31 additions and 8 deletions
|
@ -7,3 +7,4 @@ languages:
|
||||||
- en
|
- en
|
||||||
- es
|
- es
|
||||||
default_language: en
|
default_language: en
|
||||||
|
exclude_tags: []
|
|
@ -43,6 +43,7 @@ def detect_article_language(article_description: str) -> str:
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return default_language.iso_code_639_1.name
|
return default_language.iso_code_639_1.name
|
||||||
|
|
||||||
|
|
||||||
engine = create_engine("sqlite:///kuow.db")
|
engine = create_engine("sqlite:///kuow.db")
|
||||||
Base.metadata.create_all(engine)
|
Base.metadata.create_all(engine)
|
||||||
|
|
||||||
|
@ -130,18 +131,39 @@ with Session(engine) as session:
|
||||||
except (NameError, TypeError):
|
except (NameError, TypeError):
|
||||||
print("Could not find or load IDs for this post")
|
print("Could not find or load IDs for this post")
|
||||||
|
|
||||||
|
tags: list[str] = []
|
||||||
try:
|
try:
|
||||||
tags = article_soup.find(
|
tags.extend(
|
||||||
|
article_soup.find(
|
||||||
"script", {"class": "dfp_targeting", "data-key": "tags"}
|
"script", {"class": "dfp_targeting", "data-key": "tags"}
|
||||||
)["data-value"].split("|")
|
)["data-value"].split("|")
|
||||||
|
)
|
||||||
except (NameError, TypeError):
|
except (NameError, TypeError):
|
||||||
print("Could not find or load any tags for this article")
|
print("Could not find or load any tags from the 'tags' property")
|
||||||
tags = []
|
|
||||||
|
try:
|
||||||
|
tags.extend(
|
||||||
|
article_soup.find(
|
||||||
|
"script", {"class": "dfp_targeting", "data-key": "topics"}
|
||||||
|
)["data-value"].split("|")
|
||||||
|
)
|
||||||
|
except (NameError, TypeError):
|
||||||
|
print("Could not find or load any tags from the 'tags' property")
|
||||||
|
|
||||||
|
# Remove duplicates
|
||||||
|
tags = list(set(tags))
|
||||||
|
|
||||||
additional_tag_string = ""
|
additional_tag_string = ""
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
# TODO: Do a check for tags in the config file that we don't want to tag posts with
|
tag = tag.title().replace(" ", "").replace("&", "And")
|
||||||
additional_tag_string += " #{}".format(tag.title().replace(" ", ""))
|
if tag.casefold() in config["exclude_tags"]:
|
||||||
|
print(
|
||||||
|
"Tag {} was found in the article, but won't be included in the post".format(
|
||||||
|
tag
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
additional_tag_string += " #{}".format(tag)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
article_description = (
|
article_description = (
|
||||||
|
|
Loading…
Reference in a new issue