From 6f2b27f00e532abf78c25834e8a59c7584d2b661 Mon Sep 17 00:00:00 2001 From: Sergey Karmanov Date: Fri, 29 May 2026 01:03:07 +0300 Subject: [PATCH] =?UTF-8?q?=D0=92=D1=8B=D0=BD=D0=B5=D1=81=20=D0=BF=D0=B0?= =?UTF-8?q?=D1=80=D1=81=D0=B5=D1=80=20=D0=B2=20=D0=BE=D1=82=D0=B4=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D0=BD=D1=8B=D0=B9=20=D1=84=D0=B0=D0=B9=D0=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 + Dockerfile | 5 ++- README.md | 18 ++++++++ crontab | 4 +- main.py | 101 ------------------------------------------ parser.py | 110 ++++++++++++++++++++++++++++++++++++++++++++++ start_telegram.py | 51 +++++++++++++++++++++ 7 files changed, 186 insertions(+), 105 deletions(-) create mode 100644 .gitignore delete mode 100644 main.py create mode 100644 parser.py create mode 100644 start_telegram.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..43ae0e2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +*.py[cod] diff --git a/Dockerfile b/Dockerfile index a422d01..f8fe452 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,8 @@ RUN pip3 install -r requirements.txt COPY crontab /tmp/crontab RUN cat /tmp/crontab > /etc/crontabs/root -COPY main.py main.py +COPY start_telegram.py start_telegram.py +COPY parser.py parser.py # run crond as main process of container -CMD ["crond", "-f", "-l", "2"] \ No newline at end of file +CMD ["crond", "-f", "-l", "2"] diff --git a/README.md b/README.md index 43a6ee9..35789cc 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,24 @@ docker run -d --name VodokanalBot \ docker compose up -d --build ``` +## Интеграции + +Общая логика парсинга, работы с `data/db.json` и поиска новых сообщений вынесена в [`parser.py`](./parser.py). + +Для новой интеграции достаточно создать свой адаптер отправки и передать его в `publish_new_posts`: + +``` Python +from parser import create_session, publish_new_posts + +session = create_session() + +publish_new_posts( + send_message=lambda message: print(message), + session=session, + url="http://www.tgnvoda.ru/avarii.php", +) +``` + ## Библиотеки * [Requests](https://requests.readthedocs.io/en/latest/) diff --git a/crontab b/crontab index f36485c..4383999 100644 --- a/crontab +++ b/crontab @@ -1,2 +1,2 @@ -@reboot cd /app && python3 /app/main.py -*/10 * * * * cd /app && python3 /app/main.py +@reboot cd /app && python3 /app/start_telegram.py +*/10 * * * * cd /app && python3 /app/start_telegram.py diff --git a/main.py b/main.py deleted file mode 100644 index b522e81..0000000 --- a/main.py +++ /dev/null @@ -1,101 +0,0 @@ -import requests, json, os -from bs4 import BeautifulSoup -from datetime import datetime - -# Config - -URL = os.environ.get('VODOKANAL_URL', 'http://www.tgnvoda.ru/avarii.php') -SEND_SILENT = os.environ.get('SEND_SILENT', False) -TELEGRAM_TOKEN = os.environ.get('TELEGRAM_TOKEN', '') -TELEGRAM_CHANNEL = os.environ.get('TELEGRAM_CHANNEL', '') -PROXY_URL = os.environ.get('PROXY_URL', '') - -if TELEGRAM_TOKEN == '': - print("Telegram token is not set") - exit() - -if TELEGRAM_CHANNEL == '': - print("Telegram channel is not set") - exit() - -# Configure HTTP client - -session = requests.Session() - -if PROXY_URL != '': - session.proxies.update({ - 'http': PROXY_URL, - 'https': PROXY_URL, - }) - -# Load database - -db = None -if (os.path.isfile('data/db.json')): - with open('data/db.json', 'r', encoding='utf-8') as f: - db = json.load(f) -else: - print("Database not loaded") - -# Get data - -req = session.get(URL) - -if (req.status_code != 200): - print("Request error: " + str(req.status_code)) - exit() - -soup = BeautifulSoup(req.content, "html.parser") - -elements = [] -for tag in soup.find_all('font', size='2', face='VERDANA'): - date = tag.select_one('font:nth-of-type(1)').b.text - if not(date.split('.')[0] == str(datetime.today().day).zfill(2) and date.split('.')[1] == str(datetime.today().month).zfill(2)): - continue - elements.append(date + "$" + tag.select_one('font:nth-of-type(2)').text.replace('\n', '')) - -if elements == []: - print("No posts") - exit() - -print("The number of posts for this day:", len(elements)) - -# Send telegram message - -def send_message(message): - req = session.get( - "https://api.telegram.org/bot" + TELEGRAM_TOKEN + "/sendMessage", - params={ - 'chat_id': TELEGRAM_CHANNEL, - 'disable_notification': str(SEND_SILENT), - 'text': message, - }, - ) - if (req.status_code != 200): - print("Telegram request error: " + str(req.status_code)) - exit() - else: - print("Telegram message sent, mess id: " + str(req.json()['result']['message_id'])) - -# Compare db and elements - -if db is not None: - diff = set(elements) - set(db) - if not diff: - print("No new posts") - exit() - - for i in diff: - send_message(i.split("$",1)[1]) -else: - for element in elements: - send_message(element.split("$",1)[1]) - -# Save database - -if not os.path.exists("data"): - os.makedirs("data") - -with open('data/db.json', 'w', encoding='utf-8') as f: - json.dump(elements, f, ensure_ascii=False) - print("Database updated") diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..86a25ae --- /dev/null +++ b/parser.py @@ -0,0 +1,110 @@ +import json +import os +from dataclasses import dataclass +from datetime import datetime + +import requests +from bs4 import BeautifulSoup + + +DEFAULT_DB_PATH = "data/db.json" + + +@dataclass(frozen=True) +class Post: + date: str + text: str + + @property + def key(self): + return self.date + "$" + self.text + + +def create_session(proxy_url=""): + session = requests.Session() + + if proxy_url: + session.proxies.update({ + "http": proxy_url, + "https": proxy_url, + }) + + return session + + +def load_database(path=DEFAULT_DB_PATH): + if not os.path.isfile(path): + print("Database not loaded") + return None + + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def save_database(posts, path=DEFAULT_DB_PATH): + directory = os.path.dirname(path) + if directory and not os.path.exists(directory): + os.makedirs(directory) + + with open(path, "w", encoding="utf-8") as f: + json.dump([post.key for post in posts], f, ensure_ascii=False) + print("Database updated") + + +def fetch_posts(session, url, today=None): + req = session.get(url) + + if req.status_code != 200: + raise RuntimeError("Request error: " + str(req.status_code)) + + return parse_posts(req.content, today=today) + + +def parse_posts(content, today=None): + today = today or datetime.today() + soup = BeautifulSoup(content, "html.parser") + + posts = [] + for tag in soup.find_all("font", size="2", face="VERDANA"): + date = tag.select_one("font:nth-of-type(1)").b.text + if not is_today(date, today): + continue + + text = tag.select_one("font:nth-of-type(2)").text.replace("\n", "") + posts.append(Post(date=date, text=text)) + + return posts + + +def is_today(date, today): + day, month = date.split(".")[:2] + return day == str(today.day).zfill(2) and month == str(today.month).zfill(2) + + +def get_new_posts(posts, database): + if database is None: + return posts + + database_keys = set(database) + return [post for post in posts if post.key not in database_keys] + + +def publish_new_posts(send_message, session, url, db_path=DEFAULT_DB_PATH): + database = load_database(db_path) + posts = fetch_posts(session, url) + + if not posts: + print("No posts") + return + + print("The number of posts for this day:", len(posts)) + + new_posts = get_new_posts(posts, database) + if not new_posts: + print("No new posts") + return + + for post in new_posts: + send_message(post.text) + + save_database(posts, db_path) diff --git a/start_telegram.py b/start_telegram.py new file mode 100644 index 0000000..1a3b6e0 --- /dev/null +++ b/start_telegram.py @@ -0,0 +1,51 @@ +import os + +from parser import create_session, publish_new_posts + + +URL = os.environ.get('VODOKANAL_URL', 'http://www.tgnvoda.ru/avarii.php') +SEND_SILENT = os.environ.get('SEND_SILENT', False) +TELEGRAM_TOKEN = os.environ.get('TELEGRAM_TOKEN', '') +TELEGRAM_CHANNEL = os.environ.get('TELEGRAM_CHANNEL', '') +PROXY_URL = os.environ.get('PROXY_URL', '') + + +def send_telegram_message(session, message): + req = session.get( + "https://api.telegram.org/bot" + TELEGRAM_TOKEN + "/sendMessage", + params={ + 'chat_id': TELEGRAM_CHANNEL, + 'disable_notification': str(SEND_SILENT), + 'text': message, + }, + ) + if (req.status_code != 200): + print("Telegram request error: " + str(req.status_code)) + exit() + else: + print("Telegram message sent, mess id: " + str(req.json()['result']['message_id'])) + + +def main(): + if TELEGRAM_TOKEN == '': + print("Telegram token is not set") + exit() + + if TELEGRAM_CHANNEL == '': + print("Telegram channel is not set") + exit() + + session = create_session(PROXY_URL) + try: + publish_new_posts( + send_message=lambda message: send_telegram_message(session, message), + session=session, + url=URL, + ) + except RuntimeError as error: + print(error) + exit() + + +if __name__ == "__main__": + main()