Compare commits
No commits in common. "9a52805213639d24cd0be9e729b6d06e83cf88bc" and "18c60cf2d95ad3f0203c0fc5879c33cea5d63ba0" have entirely different histories.
9a52805213
...
18c60cf2d9
4 changed files with 5 additions and 35 deletions
|
@ -2,7 +2,7 @@ when:
|
||||||
branch: main
|
branch: main
|
||||||
steps:
|
steps:
|
||||||
- name: lint
|
- name: lint
|
||||||
image: python:3-slim
|
image: python:3-alpine
|
||||||
commands:
|
commands:
|
||||||
- python -m pip install --upgrade pip
|
- python -m pip install --upgrade pip
|
||||||
- python -m pip install -r requirements.txt
|
- python -m pip install -r requirements.txt
|
||||||
|
|
|
@ -3,7 +3,3 @@ mastodon:
|
||||||
client_secret:
|
client_secret:
|
||||||
access_token:
|
access_token:
|
||||||
api_base_url:
|
api_base_url:
|
||||||
languages:
|
|
||||||
- en
|
|
||||||
- es
|
|
||||||
default_language: en
|
|
|
@ -6,7 +6,6 @@ from typing import Optional
|
||||||
import requests
|
import requests
|
||||||
import yaml
|
import yaml
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from lingua import IsoCode639_1, Language, LanguageDetectorBuilder
|
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
from sqlalchemy import create_engine, select
|
from sqlalchemy import create_engine, select
|
||||||
from sqlalchemy.exc import NoResultFound
|
from sqlalchemy.exc import NoResultFound
|
||||||
|
@ -22,27 +21,13 @@ class KuowStory(Base):
|
||||||
pageview_story_id: Mapped[str] = mapped_column(primary_key=True, unique=True)
|
pageview_story_id: Mapped[str] = mapped_column(primary_key=True, unique=True)
|
||||||
dfp_targeting_id: Mapped[str] = mapped_column()
|
dfp_targeting_id: Mapped[str] = mapped_column()
|
||||||
article_link: Mapped[str] = mapped_column()
|
article_link: Mapped[str] = mapped_column()
|
||||||
article_language: Mapped[Optional[str]] = mapped_column()
|
|
||||||
last_updated_time: Mapped[datetime] = mapped_column()
|
last_updated_time: Mapped[datetime] = mapped_column()
|
||||||
post_id: Mapped[Optional[str]] = mapped_column()
|
post_id: Mapped[Optional[str]] = mapped_column()
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"KuowStory(pageview_story_id={self.pageview_story_id!r}, dfp_targeting_id={self.dfp_targeting_id!r}, article_link={self.article_link!r}, article_language={self.article_language!r}, last_updated_time={self.last_updated_time!r}, post_id={self.post_id!r})"
|
return f"KuowStory(pageview_story_id={self.pageview_story_id!r}, dfp_targeting_id={self.dfp_targeting_id!r}, article_link={self.article_link!r}, last_updated_time={self.last_updated_time!r}, post_id={self.post_id!r})"
|
||||||
|
|
||||||
|
|
||||||
def get_language_from_iso_code_639_1_str(iso_code_639_1_str: str) -> Language:
|
|
||||||
iso_code_369_1 = getattr(IsoCode639_1, iso_code_639_1_str.upper())
|
|
||||||
return Language.from_iso_code_639_1(iso_code_369_1)
|
|
||||||
|
|
||||||
|
|
||||||
def detect_article_language(article_description: str) -> str:
|
|
||||||
detector = LanguageDetectorBuilder.from_languages(*languages).build()
|
|
||||||
try:
|
|
||||||
language = detector.detect_language_of(article_description)
|
|
||||||
return language.iso_code_639_1.name
|
|
||||||
except AttributeError:
|
|
||||||
return default_language.iso_code_639_1.name
|
|
||||||
|
|
||||||
engine = create_engine("sqlite:///kuow.db")
|
engine = create_engine("sqlite:///kuow.db")
|
||||||
Base.metadata.create_all(engine)
|
Base.metadata.create_all(engine)
|
||||||
|
|
||||||
|
@ -59,11 +44,6 @@ mastodon = Mastodon(
|
||||||
api_base_url=config["mastodon"]["api_base_url"],
|
api_base_url=config["mastodon"]["api_base_url"],
|
||||||
)
|
)
|
||||||
|
|
||||||
languages = [
|
|
||||||
get_language_from_iso_code_639_1_str(language) for language in config["languages"]
|
|
||||||
]
|
|
||||||
default_language = get_language_from_iso_code_639_1_str(config["default_language"])
|
|
||||||
|
|
||||||
kuow_response = requests.get(url)
|
kuow_response = requests.get(url)
|
||||||
soup = BeautifulSoup(kuow_response.content, "html.parser")
|
soup = BeautifulSoup(kuow_response.content, "html.parser")
|
||||||
articles = soup.find_all("span", class_="txt")
|
articles = soup.find_all("span", class_="txt")
|
||||||
|
@ -127,14 +107,14 @@ with Session(engine) as session:
|
||||||
article_record.dfp_targeting_id = article_soup.find(
|
article_record.dfp_targeting_id = article_soup.find(
|
||||||
"script", {"class": "dfp_targeting", "data-key": "id"}
|
"script", {"class": "dfp_targeting", "data-key": "id"}
|
||||||
)["data-value"]
|
)["data-value"]
|
||||||
except (NameError, TypeError):
|
except NameError:
|
||||||
print("Could not find or load IDs for this post")
|
print("Could not find or load IDs for this post")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tags = article_soup.find(
|
tags = article_soup.find(
|
||||||
"script", {"class": "dfp_targeting", "data-key": "tags"}
|
"script", {"class": "dfp_targeting", "data-key": "tags"}
|
||||||
)["data-value"].split("|")
|
)["data-value"].split("|")
|
||||||
except (NameError, TypeError):
|
except NameError:
|
||||||
print("Could not find or load any tags for this article")
|
print("Could not find or load any tags for this article")
|
||||||
tags = []
|
tags = []
|
||||||
|
|
||||||
|
@ -151,10 +131,6 @@ with Session(engine) as session:
|
||||||
)
|
)
|
||||||
if not article_record.post_id:
|
if not article_record.post_id:
|
||||||
print("Posting to Mastodon")
|
print("Posting to Mastodon")
|
||||||
|
|
||||||
article_language = detect_article_language(article_description)
|
|
||||||
article_record.article_language = article_language
|
|
||||||
|
|
||||||
mastodon_post_result = mastodon.status_post(
|
mastodon_post_result = mastodon.status_post(
|
||||||
status=article_description
|
status=article_description
|
||||||
+ "\n"
|
+ "\n"
|
||||||
|
@ -162,7 +138,6 @@ with Session(engine) as session:
|
||||||
+ article_link
|
+ article_link
|
||||||
+ "\n#KUOW #News{}".format(additional_tag_string),
|
+ "\n#KUOW #News{}".format(additional_tag_string),
|
||||||
visibility="public",
|
visibility="public",
|
||||||
language=article_language,
|
|
||||||
)
|
)
|
||||||
article_record.post_id = mastodon_post_result["id"]
|
article_record.post_id = mastodon_post_result["id"]
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -5,7 +5,6 @@ charset-normalizer==3.3.2
|
||||||
decorator==5.1.1
|
decorator==5.1.1
|
||||||
greenlet==3.0.3
|
greenlet==3.0.3
|
||||||
idna==3.6
|
idna==3.6
|
||||||
lingua-language-detector==2.0.2
|
|
||||||
Mastodon.py==1.8.1
|
Mastodon.py==1.8.1
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
python-magic==0.4.27
|
python-magic==0.4.27
|
||||||
|
@ -13,6 +12,6 @@ PyYAML==6.0.1
|
||||||
requests==2.31.0
|
requests==2.31.0
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
soupsieve==2.5
|
soupsieve==2.5
|
||||||
SQLAlchemy==2.0.27
|
SQLAlchemy==2.0.26
|
||||||
typing_extensions==4.9.0
|
typing_extensions==4.9.0
|
||||||
urllib3==2.2.0
|
urllib3==2.2.0
|
||||||
|
|
Loading…
Reference in a new issue