1
0
mirror of https://github.com/serega404/VodokanalBot.git synced 2026-07-01 17:39:17 +03:00

Compare commits

..

4 Commits

14 changed files with 224 additions and 2567 deletions
Executable
+9
View File
@@ -0,0 +1,9 @@
#
# https://help.github.com/articles/dealing-with-line-endings/
#
# These are explicitly windows files and should use crlf
* text=auto eol=lf
*.bat text eol=crlf
*.nsi text eol=crlf
+2
View File
@@ -0,0 +1,2 @@
__pycache__/
*.py[cod]
+3 -2
View File
@@ -11,7 +11,8 @@ RUN pip3 install -r requirements.txt
COPY crontab /tmp/crontab
RUN cat /tmp/crontab > /etc/crontabs/root
COPY main.py main.py
COPY start_telegram.py start_telegram.py
COPY parser.py parser.py
# run crond as main process of container
CMD ["crond", "-f", "-l", "2"]
CMD ["crond", "-f", "-l", "2"]
+29 -3
View File
@@ -2,7 +2,7 @@
[![MIT License](https://img.shields.io/github/license/serega404/VodokanalBot)](https://github.com/serega404/VodokanalBot)
### Запуск в Docker
## Запуск в Docker
``` Docker
docker volume create vodokanal_bot_data
@@ -15,11 +15,37 @@ docker run -d --name VodokanalBot \
ghcr.io/serega404/vodokanalbot:main
```
### Библиотеки
## Запуск в Docker Compose
Укажи `TELEGRAM_TOKEN` и `TELEGRAM_CHANNEL` в [`docker-compose.yml`](./docker-compose.yml), затем запусти:
``` Docker
docker compose up -d --build
```
## Интеграции
Общая логика парсинга, работы с `data/db.json` и поиска новых сообщений вынесена в [`parser.py`](./parser.py).
Для новой интеграции достаточно создать свой адаптер отправки и передать его в `publish_new_posts`:
``` Python
from parser import create_session, publish_new_posts
session = create_session()
publish_new_posts(
send_message=lambda message: print(message),
session=session,
url="http://www.tgnvoda.ru/avarii.php",
)
```
## Библиотеки
* [Requests](https://requests.readthedocs.io/en/latest/)
* [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/)
### Лицензия
## Лицензия
Распространяется под MIT License. Смотри файл [`LICENSE`](./LICENSE) для того что бы узнать подробности.
+2 -2
View File
@@ -1,2 +1,2 @@
@reboot cd /app && python3 /app/main.py
*/10 * * * * cd /app && python3 /app/main.py
@reboot cd /app && python3 /app/start_telegram.py
*/10 * * * * cd /app && python3 /app/start_telegram.py
+16
View File
@@ -0,0 +1,16 @@
services:
vodokanalbot:
image: ghcr.io/serega404/vodokanalbot:main
container_name: VodokanalBot
restart: always
environment:
TZ: Europe/Moscow
TELEGRAM_TOKEN: TOKEN
TELEGRAM_CHANNEL: CHAT_ID
# PROXY_URL: socks5h://user:password@proxy-host:1080
# PROXY_URL: http://user:password@proxy-host:3128
volumes:
- vodokanal_bot_data:/app/data
volumes:
vodokanal_bot_data:
-83
View File
@@ -1,83 +0,0 @@
import requests, json, os
from bs4 import BeautifulSoup
from datetime import datetime
# Config
URL = os.environ.get('VODOKANAL_URL', 'http://www.tgnvoda.ru/avarii.php')
SEND_SILENT = os.environ.get('SEND_SILENT', False)
TELEGRAM_TOKEN = os.environ.get('TELEGRAM_TOKEN', '')
TELEGRAM_CHANNEL = os.environ.get('TELEGRAM_CHANNEL', '')
if TELEGRAM_TOKEN == '':
print("Telegram token is not set")
exit()
if TELEGRAM_CHANNEL == '':
print("Telegram channel is not set")
exit()
# Load database
db = None
if (os.path.isfile('data/db.json')):
with open('data/db.json', 'r', encoding='utf-8') as f:
db = json.load(f)
else:
print("Database not loaded")
# Get data
req = requests.get(URL)
if (req.status_code != 200):
print("Request error: " + str(req.status_code))
exit()
soup = BeautifulSoup(req.content, "html.parser")
elements = []
for tag in soup.find_all('font', size='2', face='VERDANA'):
date = tag.select_one('font:nth-of-type(1)').b.text
if not(date.split('.')[0] == str(datetime.today().day).zfill(2) and date.split('.')[1] == str(datetime.today().month).zfill(2)):
continue
elements.append(date + "$" + tag.select_one('font:nth-of-type(2)').text.replace('\n', ''))
if elements == []:
print("No posts")
exit()
print("The number of posts for this day:", len(elements))
# Send telegram message
def send_message(message):
req = requests.get("https://api.telegram.org/bot" + TELEGRAM_TOKEN + "/sendMessage?chat_id=" + TELEGRAM_CHANNEL + "&disable_notification=" + str(SEND_SILENT) + "&text=" + message)
if (req.status_code != 200):
print("Telegram request error: " + str(req.status_code))
exit()
else:
print("Telegram message sent, mess id: " + str(req.json()['result']['message_id']))
# Compare db and elements
if db is not None:
diff = set(elements) - set(db)
if diff == []:
print("No new posts")
exit()
for i in diff:
send_message(i.split("$",1)[1])
else:
for element in elements:
send_message(element.split("$",1)[1])
# Save database
if not os.path.exists("data"):
os.makedirs("data")
with open('data/db.json', 'w', encoding='utf-8') as f:
json.dump(elements, f, ensure_ascii=False)
print("Database updated")
+110
View File
@@ -0,0 +1,110 @@
import json
import os
from dataclasses import dataclass
from datetime import datetime
import requests
from bs4 import BeautifulSoup
DEFAULT_DB_PATH = "data/db.json"
@dataclass(frozen=True)
class Post:
date: str
text: str
@property
def key(self):
return self.date + "$" + self.text
def create_session(proxy_url=""):
session = requests.Session()
if proxy_url:
session.proxies.update({
"http": proxy_url,
"https": proxy_url,
})
return session
def load_database(path=DEFAULT_DB_PATH):
if not os.path.isfile(path):
print("Database not loaded")
return None
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def save_database(posts, path=DEFAULT_DB_PATH):
directory = os.path.dirname(path)
if directory and not os.path.exists(directory):
os.makedirs(directory)
with open(path, "w", encoding="utf-8") as f:
json.dump([post.key for post in posts], f, ensure_ascii=False)
print("Database updated")
def fetch_posts(session, url, today=None):
req = session.get(url)
if req.status_code != 200:
raise RuntimeError("Request error: " + str(req.status_code))
return parse_posts(req.content, today=today)
def parse_posts(content, today=None):
today = today or datetime.today()
soup = BeautifulSoup(content, "html.parser")
posts = []
for tag in soup.find_all("font", size="2", face="VERDANA"):
date = tag.select_one("font:nth-of-type(1)").b.text
if not is_today(date, today):
continue
text = tag.select_one("font:nth-of-type(2)").text.replace("\n", "")
posts.append(Post(date=date, text=text))
return posts
def is_today(date, today):
day, month = date.split(".")[:2]
return day == str(today.day).zfill(2) and month == str(today.month).zfill(2)
def get_new_posts(posts, database):
if database is None:
return posts
database_keys = set(database)
return [post for post in posts if post.key not in database_keys]
def publish_new_posts(send_message, session, url, db_path=DEFAULT_DB_PATH):
database = load_database(db_path)
posts = fetch_posts(session, url)
if not posts:
print("No posts")
return
print("The number of posts for this day:", len(posts))
new_posts = get_new_posts(posts, database)
if not new_posts:
print("No new posts")
return
for post in new_posts:
send_message(post.text)
save_database(posts, db_path)
+2 -2
View File
@@ -1,2 +1,2 @@
requests
beautifulsoup4
requests[socks]
beautifulsoup4
-9
View File
@@ -1,9 +0,0 @@
<?php
return [
'url' => 'http://www.tgnvoda.ru/avarii.php',
// Telegram
'Send_silent' => 'false',
'Telegram_token' => 'token',
'Telegram_channel' => '@channel'
];
?>
@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2019 S.C. Chen, John Schlick, logmanoriginal
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
File diff suppressed because it is too large Load Diff
-92
View File
@@ -1,92 +0,0 @@
<?php
// Connect library
include('libs/simplehtmldom/simple_html_dom.php');
// Load config
$config = include('config.php');
$db = null;
if (file_exists("db.json")) {
$db = json_decode(file_get_contents('./db.json', true), true);
echo print_r($db);
} else {
echo ("Database not exist\n");
}
// get DOM from URL
$html = file_get_html($config['url']);
$all_elements = null;
$dateNow = date("d.m.Y");
echo "Date now: " . $dateNow . "\n";
foreach ($html->find('td[bgcolor="#ffffff"]') as $e) {
$date = "";
preg_match("/(0[1-9]|1[0-9]|2[0-9]|3[01])[.](0[1-9]|1[0-2])[.](20[0-9][0-9]|[0-9][0-9])/", $e->plaintext, $date);
if (strcasecmp(reset($date), $dateNow) == 0) { // get current date posts
// remove date from content
$content = str_replace(reset($date), "", $e->plaintext);
$content = htmlspecialchars_decode($content);
$content = str_replace("\r\n \r\n", "", $content);
$content = str_replace("\n\n", "", $content);
display_message($content);
// add post to array
$all_elements[] = $content;
}
}
if ($all_elements == null) {
echo "Posts not found\n";
exit;
}
echo "The number of posts for this day: " . count($all_elements) . "\n";
if ($db != null) {
$diff = array_diff($all_elements, $db);
if ($diff != null) {
foreach ($diff as $e) {
echo "Message sended.\n";
send_telegram_message($e);
}
} else {
echo "No changes.\n";
exit;
}
} else {
foreach ($all_elements as $e) {
echo "Message sended.\n";
send_telegram_message($e);
}
}
// save posts to database
$json = json_encode($all_elements, JSON_UNESCAPED_UNICODE);
file_put_contents("db.json", $json);
function display_message($message)
{
echo "---- START MESSAGE ----\n";
echo $message . "\n";
echo "---- END MESSAGE ----\n";
}
function send_telegram_message($message)
{
$t_message = urlencode($message); // Url fix
global $config;
$obj = json_decode(file_get_contents("https://api.telegram.org/bot" . $config['Telegram_token'] . "/sendMessage?chat_id=" . $config['Telegram_channel'] . "&disable_notification=" . $config['Send_silent'] . "&text=" . $t_message));
if ($http_response_header[0] != null && $http_response_header[0] == "HTTP/1.1 200 OK") {
try {
echo "Successfully sented message, mess id: " . $obj->{'result'}->{'message_id'} . "\n";
} catch (Exception $e) {
echo "Telegram: Не удалось записать id в файл\n";
}
} else {
echo "Telegram: Не удачный запрос: " . $http_response_header[0] . "\n";
}
}
+51
View File
@@ -0,0 +1,51 @@
import os
from parser import create_session, publish_new_posts
URL = os.environ.get('VODOKANAL_URL', 'http://www.tgnvoda.ru/avarii.php')
SEND_SILENT = os.environ.get('SEND_SILENT', False)
TELEGRAM_TOKEN = os.environ.get('TELEGRAM_TOKEN', '')
TELEGRAM_CHANNEL = os.environ.get('TELEGRAM_CHANNEL', '')
PROXY_URL = os.environ.get('PROXY_URL', '')
def send_telegram_message(session, message):
req = session.get(
"https://api.telegram.org/bot" + TELEGRAM_TOKEN + "/sendMessage",
params={
'chat_id': TELEGRAM_CHANNEL,
'disable_notification': str(SEND_SILENT),
'text': message,
},
)
if (req.status_code != 200):
print("Telegram request error: " + str(req.status_code))
exit()
else:
print("Telegram message sent, mess id: " + str(req.json()['result']['message_id']))
def main():
if TELEGRAM_TOKEN == '':
print("Telegram token is not set")
exit()
if TELEGRAM_CHANNEL == '':
print("Telegram channel is not set")
exit()
session = create_session(PROXY_URL)
try:
publish_new_posts(
send_message=lambda message: send_telegram_message(session, message),
session=session,
url=URL,
)
except RuntimeError as error:
print(error)
exit()
if __name__ == "__main__":
main()