Compare commits

..

39 commits

Author SHA1 Message Date
53746a377e Update dependency SQLAlchemy to v2.0.36
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-11-02 03:10:39 +00:00
535f090a2d Update dependency charset-normalizer to v3.4.0
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-10-09 08:00:09 +00:00
abd93235a6 Update dependency greenlet to v3.1.1
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-09-20 18:00:08 +00:00
c252ba748a Update dependency idna to v3.10
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-09-20 16:00:09 +00:00
ea622b57ae Update dependency greenlet to v3.1.0
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-09-20 15:00:11 +00:00
23b0072d5e Update dependency urllib3 to v2.2.3
All checks were successful
ci/woodpecker/push/lint Pipeline was successful
2024-09-20 07:00:51 -07:00
4d1673d00e Update dependency SQLAlchemy to v2.0.35
Some checks are pending
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline is pending
ci/woodpecker/pull_request_closed/lint Pipeline was successful
2024-09-20 14:00:12 +00:00
b65258400e Update dependency certifi to v2024.8.30
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-09-03 13:54:56 +00:00
ba8c53d255 Update dependency idna to v3.8
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-08-23 17:00:08 +00:00
8350922f04 Update dependency soupsieve to v2.6
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-08-13 14:00:16 +00:00
25a8a6d1bc Update dependency PyYAML to v6.0.2
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-08-06 21:00:15 +00:00
a4ebfa122b Update dependency SQLAlchemy to v2.0.32
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-08-05 20:00:14 +00:00
cdaff0965e Update dependency certifi to v2024.7.4
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-07-04 02:00:14 +00:00
92e81e6f79 Update dependency typing_extensions to v4.12.2
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-07-01 18:00:16 +00:00
c2008f0205 Update dependency certifi to v2024.6.2
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-07-01 17:00:10 +00:00
ec203863e3 Update dependency urllib3 to v2.2.2
All checks were successful
ci/woodpecker/push/lint Pipeline was successful
2024-07-01 09:33:00 -07:00
83a56a2323 Update dependency SQLAlchemy to v2.0.31
Some checks are pending
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline is pending
ci/woodpecker/pull_request_closed/lint Pipeline was successful
2024-07-01 16:31:37 +00:00
2c8f2ffb2d Update dependency requests to v2.32.3
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-06-13 21:04:08 +00:00
88f2d03d33 Update dependency typing_extensions to v4.12.0
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-05-24 01:00:14 +00:00
770a8221cd Update dependency requests to v2.32.2
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-05-21 19:00:16 +00:00
a2fef7593e Update dependency requests to v2.32.1
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-05-20 23:00:14 +00:00
fd7e10d41b Update dependency requests to v2.32.0
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-05-20 17:00:14 +00:00
ee59d64287 Update dependency SQLAlchemy to v2.0.30
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-05-05 18:00:12 +00:00
e007d42fb7 Update dependency typing_extensions to v4.11.0
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-04-26 07:00:11 +00:00
32ae351f26 Update dependency idna to v3.7
All checks were successful
ci/woodpecker/push/lint Pipeline was successful
2024-04-25 23:01:00 -07:00
0b0a2b08fb Update dependency SQLAlchemy to v2.0.29
Some checks are pending
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline is pending
ci/woodpecker/pull_request_closed/lint Pipeline was successful
2024-04-26 06:00:17 +00:00
2e0bfa7842 Merge pull request 'Revert linting to Alpine' (#28) from revert-linting-to-alpine into main
All checks were successful
ci/woodpecker/push/lint Pipeline was successful
Reviewed-on: #28
2024-03-29 16:21:35 -07:00
e5631baa64 Revert linting to Alpine
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
2024-03-29 16:20:12 -07:00
0fb9cb0f00 Update dependency SQLAlchemy to v2.0.28
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-03-04 14:00:16 +00:00
b1ee78ac8b Update dependency python-dateutil to v2.9.0.post0
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-03-01 19:00:21 +00:00
259f00f3a4 Update dependency python-dateutil to v2.9.0
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-03-01 04:00:17 +00:00
c60b9bfe01 Update dependency typing_extensions to v4.10.0
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-02-25 23:00:15 +00:00
8b8f54b524 Add features to README
All checks were successful
ci/woodpecker/push/lint Pipeline was successful
2024-02-22 07:17:32 -08:00
132e628f4e Update comments
All checks were successful
ci/woodpecker/push/lint Pipeline was successful
2024-02-22 07:14:29 -08:00
77fae5eda6 Fix indent on pageview lookup
All checks were successful
ci/woodpecker/push/lint Pipeline was successful
2024-02-22 06:41:44 -08:00
84ca53d320 Merge pull request 'Also match posts by Pageview ID' (#23) from fix-duplicate-posts into main
All checks were successful
ci/woodpecker/push/lint Pipeline was successful
Reviewed-on: #23
2024-02-22 06:38:58 -08:00
1928e00d3d Also match posts by Pageview ID
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
2024-02-22 06:38:08 -08:00
5a9d5e82ac Update dependency urllib3 to v2.2.1
All checks were successful
ci/woodpecker/pr/lint Pipeline was successful
ci/woodpecker/pull_request_closed/lint Pipeline was successful
ci/woodpecker/push/lint Pipeline was successful
2024-02-18 04:00:15 +00:00
d8a0e46c81 Merge pull request 'Improve tagging' (#20) from improve-tagging into main
All checks were successful
ci/woodpecker/push/lint Pipeline was successful
Reviewed-on: #20
2024-02-14 08:21:32 -08:00
4 changed files with 41 additions and 27 deletions

View file

@ -2,9 +2,9 @@ when:
branch: main
steps:
- name: lint
image: python:3-slim
image: python:3-alpine
commands:
- python -m pip install --upgrade pip
- python -m pip install -r requirements.txt
- python -m pip install ruff
- ruff check .
- ruff check .

View file

@ -1,6 +1,12 @@
# KUOW Mastodon Bot
[![status-badge](https://ci.gruezi.net/api/badges/1/status.svg)](https://ci.gruezi.net/repos/1)
This bot scrapes the KUOW website, looks for links to news stories that haven't already been seen/posted, then posts them to Mastodon.
This bot scrapes the KUOW website, looks for news stories that haven't already been seen before, then posts them to Mastodon.
PRs welcome!
PRs welcome!
## Features
- Tags based on article topics
- Matching for already-seen articles by both article link, and "Pageview ID"
- Re-posting articles when the published time changes
- Detecting and including metadata about the language of the posts

View file

@ -8,7 +8,7 @@ import yaml
from bs4 import BeautifulSoup
from lingua import IsoCode639_1, Language, LanguageDetectorBuilder
from mastodon import Mastodon
from sqlalchemy import create_engine, select
from sqlalchemy import create_engine, select, or_
from sqlalchemy.exc import NoResultFound
from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column
@ -75,11 +75,6 @@ with Session(engine) as session:
for article in articles:
article_link = article.find("a").attrs["href"]
print("Checking {}".format(article_link))
lookup_statement = select(KuowStory).where(
KuowStory.article_link == article_link
)
lookup_result = session.scalars(lookup_statement)
is_new_article = False
article_lookup = requests.get(kuow_base_url + article_link)
@ -105,15 +100,30 @@ with Session(engine) as session:
except NameError:
print("Could not find or load the meta published time for this post")
last_updated_time = datetime.now()
try:
pageview_story_id = article_soup.find(
"script", {"class": "pageview_story"}
)["data-id"]
except (NameError, TypeError):
print(
"Could not find or load a Pageview story ID, skipping additional processing on this post"
)
continue
try:
lookup_statement = select(KuowStory).where(
or_(
KuowStory.article_link == article_link,
KuowStory.pageview_story_id == pageview_story_id,
)
)
lookup_result = session.scalars(lookup_statement)
article_record = lookup_result.one()
# Only process existing articles if the last updated time doesn't match
process_article = (
article_record.last_updated_time.astimezone() != last_updated_time
)
except NoResultFound:
# Is a new article, or at least one that doesn't match based on the link
# Is a new article
article_record = KuowStory()
process_article = True
is_new_article = True
@ -122,9 +132,6 @@ with Session(engine) as session:
print("Processing {}".format(article_link))
try:
article_record.pageview_story_id = article_soup.find(
"script", {"class": "pageview_story"}
)["data-id"]
article_record.dfp_targeting_id = article_soup.find(
"script", {"class": "dfp_targeting", "data-key": "id"}
)["data-value"]
@ -150,7 +157,7 @@ with Session(engine) as session:
except (NameError, TypeError):
print("Could not find or load any tags from the 'tags' property")
# Remove duplicates
# Remove duplicate tags
tags = list(set(tags))
additional_tag_string = ""
@ -192,6 +199,7 @@ with Session(engine) as session:
except Exception:
print("Could not load a description/post this article")
article_record.pageview_story_id = pageview_story_id
article_record.article_link = article_link
article_record.last_updated_time = last_updated_time

View file

@ -1,18 +1,18 @@
beautifulsoup4==4.12.3
blurhash==1.1.4
certifi==2024.2.2
charset-normalizer==3.3.2
certifi==2024.8.30
charset-normalizer==3.4.0
decorator==5.1.1
greenlet==3.0.3
idna==3.6
greenlet==3.1.1
idna==3.10
lingua-language-detector==2.0.2
Mastodon.py==1.8.1
python-dateutil==2.8.2
python-dateutil==2.9.0.post0
python-magic==0.4.27
PyYAML==6.0.1
requests==2.31.0
PyYAML==6.0.2
requests==2.32.3
six==1.16.0
soupsieve==2.5
SQLAlchemy==2.0.27
typing_extensions==4.9.0
urllib3==2.2.0
soupsieve==2.6
SQLAlchemy==2.0.36
typing_extensions==4.12.2
urllib3==2.2.3