mirror of
https://github.com/serega404/VodokanalBot.git
synced 2026-05-30 12:10:01 +03:00
Вынес парсер в отдельный файл
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
+2
-1
@@ -11,7 +11,8 @@ RUN pip3 install -r requirements.txt
|
||||
COPY crontab /tmp/crontab
|
||||
RUN cat /tmp/crontab > /etc/crontabs/root
|
||||
|
||||
COPY main.py main.py
|
||||
COPY start_telegram.py start_telegram.py
|
||||
COPY parser.py parser.py
|
||||
|
||||
# run crond as main process of container
|
||||
CMD ["crond", "-f", "-l", "2"]
|
||||
@@ -23,6 +23,24 @@ docker run -d --name VodokanalBot \
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
## Интеграции
|
||||
|
||||
Общая логика парсинга, работы с `data/db.json` и поиска новых сообщений вынесена в [`parser.py`](./parser.py).
|
||||
|
||||
Для новой интеграции достаточно создать свой адаптер отправки и передать его в `publish_new_posts`:
|
||||
|
||||
``` Python
|
||||
from parser import create_session, publish_new_posts
|
||||
|
||||
session = create_session()
|
||||
|
||||
publish_new_posts(
|
||||
send_message=lambda message: print(message),
|
||||
session=session,
|
||||
url="http://www.tgnvoda.ru/avarii.php",
|
||||
)
|
||||
```
|
||||
|
||||
## Библиотеки
|
||||
|
||||
* [Requests](https://requests.readthedocs.io/en/latest/)
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
@reboot cd /app && python3 /app/main.py
|
||||
*/10 * * * * cd /app && python3 /app/main.py
|
||||
@reboot cd /app && python3 /app/start_telegram.py
|
||||
*/10 * * * * cd /app && python3 /app/start_telegram.py
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
import requests, json, os
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
|
||||
# Config
|
||||
|
||||
URL = os.environ.get('VODOKANAL_URL', 'http://www.tgnvoda.ru/avarii.php')
|
||||
SEND_SILENT = os.environ.get('SEND_SILENT', False)
|
||||
TELEGRAM_TOKEN = os.environ.get('TELEGRAM_TOKEN', '')
|
||||
TELEGRAM_CHANNEL = os.environ.get('TELEGRAM_CHANNEL', '')
|
||||
PROXY_URL = os.environ.get('PROXY_URL', '')
|
||||
|
||||
if TELEGRAM_TOKEN == '':
|
||||
print("Telegram token is not set")
|
||||
exit()
|
||||
|
||||
if TELEGRAM_CHANNEL == '':
|
||||
print("Telegram channel is not set")
|
||||
exit()
|
||||
|
||||
# Configure HTTP client
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
if PROXY_URL != '':
|
||||
session.proxies.update({
|
||||
'http': PROXY_URL,
|
||||
'https': PROXY_URL,
|
||||
})
|
||||
|
||||
# Load database
|
||||
|
||||
db = None
|
||||
if (os.path.isfile('data/db.json')):
|
||||
with open('data/db.json', 'r', encoding='utf-8') as f:
|
||||
db = json.load(f)
|
||||
else:
|
||||
print("Database not loaded")
|
||||
|
||||
# Get data
|
||||
|
||||
req = session.get(URL)
|
||||
|
||||
if (req.status_code != 200):
|
||||
print("Request error: " + str(req.status_code))
|
||||
exit()
|
||||
|
||||
soup = BeautifulSoup(req.content, "html.parser")
|
||||
|
||||
elements = []
|
||||
for tag in soup.find_all('font', size='2', face='VERDANA'):
|
||||
date = tag.select_one('font:nth-of-type(1)').b.text
|
||||
if not(date.split('.')[0] == str(datetime.today().day).zfill(2) and date.split('.')[1] == str(datetime.today().month).zfill(2)):
|
||||
continue
|
||||
elements.append(date + "$" + tag.select_one('font:nth-of-type(2)').text.replace('\n', ''))
|
||||
|
||||
if elements == []:
|
||||
print("No posts")
|
||||
exit()
|
||||
|
||||
print("The number of posts for this day:", len(elements))
|
||||
|
||||
# Send telegram message
|
||||
|
||||
def send_message(message):
|
||||
req = session.get(
|
||||
"https://api.telegram.org/bot" + TELEGRAM_TOKEN + "/sendMessage",
|
||||
params={
|
||||
'chat_id': TELEGRAM_CHANNEL,
|
||||
'disable_notification': str(SEND_SILENT),
|
||||
'text': message,
|
||||
},
|
||||
)
|
||||
if (req.status_code != 200):
|
||||
print("Telegram request error: " + str(req.status_code))
|
||||
exit()
|
||||
else:
|
||||
print("Telegram message sent, mess id: " + str(req.json()['result']['message_id']))
|
||||
|
||||
# Compare db and elements
|
||||
|
||||
if db is not None:
|
||||
diff = set(elements) - set(db)
|
||||
if not diff:
|
||||
print("No new posts")
|
||||
exit()
|
||||
|
||||
for i in diff:
|
||||
send_message(i.split("$",1)[1])
|
||||
else:
|
||||
for element in elements:
|
||||
send_message(element.split("$",1)[1])
|
||||
|
||||
# Save database
|
||||
|
||||
if not os.path.exists("data"):
|
||||
os.makedirs("data")
|
||||
|
||||
with open('data/db.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(elements, f, ensure_ascii=False)
|
||||
print("Database updated")
|
||||
@@ -0,0 +1,110 @@
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
DEFAULT_DB_PATH = "data/db.json"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Post:
|
||||
date: str
|
||||
text: str
|
||||
|
||||
@property
|
||||
def key(self):
|
||||
return self.date + "$" + self.text
|
||||
|
||||
|
||||
def create_session(proxy_url=""):
|
||||
session = requests.Session()
|
||||
|
||||
if proxy_url:
|
||||
session.proxies.update({
|
||||
"http": proxy_url,
|
||||
"https": proxy_url,
|
||||
})
|
||||
|
||||
return session
|
||||
|
||||
|
||||
def load_database(path=DEFAULT_DB_PATH):
|
||||
if not os.path.isfile(path):
|
||||
print("Database not loaded")
|
||||
return None
|
||||
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def save_database(posts, path=DEFAULT_DB_PATH):
|
||||
directory = os.path.dirname(path)
|
||||
if directory and not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump([post.key for post in posts], f, ensure_ascii=False)
|
||||
print("Database updated")
|
||||
|
||||
|
||||
def fetch_posts(session, url, today=None):
|
||||
req = session.get(url)
|
||||
|
||||
if req.status_code != 200:
|
||||
raise RuntimeError("Request error: " + str(req.status_code))
|
||||
|
||||
return parse_posts(req.content, today=today)
|
||||
|
||||
|
||||
def parse_posts(content, today=None):
|
||||
today = today or datetime.today()
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
posts = []
|
||||
for tag in soup.find_all("font", size="2", face="VERDANA"):
|
||||
date = tag.select_one("font:nth-of-type(1)").b.text
|
||||
if not is_today(date, today):
|
||||
continue
|
||||
|
||||
text = tag.select_one("font:nth-of-type(2)").text.replace("\n", "")
|
||||
posts.append(Post(date=date, text=text))
|
||||
|
||||
return posts
|
||||
|
||||
|
||||
def is_today(date, today):
|
||||
day, month = date.split(".")[:2]
|
||||
return day == str(today.day).zfill(2) and month == str(today.month).zfill(2)
|
||||
|
||||
|
||||
def get_new_posts(posts, database):
|
||||
if database is None:
|
||||
return posts
|
||||
|
||||
database_keys = set(database)
|
||||
return [post for post in posts if post.key not in database_keys]
|
||||
|
||||
|
||||
def publish_new_posts(send_message, session, url, db_path=DEFAULT_DB_PATH):
|
||||
database = load_database(db_path)
|
||||
posts = fetch_posts(session, url)
|
||||
|
||||
if not posts:
|
||||
print("No posts")
|
||||
return
|
||||
|
||||
print("The number of posts for this day:", len(posts))
|
||||
|
||||
new_posts = get_new_posts(posts, database)
|
||||
if not new_posts:
|
||||
print("No new posts")
|
||||
return
|
||||
|
||||
for post in new_posts:
|
||||
send_message(post.text)
|
||||
|
||||
save_database(posts, db_path)
|
||||
@@ -0,0 +1,51 @@
|
||||
import os
|
||||
|
||||
from parser import create_session, publish_new_posts
|
||||
|
||||
|
||||
URL = os.environ.get('VODOKANAL_URL', 'http://www.tgnvoda.ru/avarii.php')
|
||||
SEND_SILENT = os.environ.get('SEND_SILENT', False)
|
||||
TELEGRAM_TOKEN = os.environ.get('TELEGRAM_TOKEN', '')
|
||||
TELEGRAM_CHANNEL = os.environ.get('TELEGRAM_CHANNEL', '')
|
||||
PROXY_URL = os.environ.get('PROXY_URL', '')
|
||||
|
||||
|
||||
def send_telegram_message(session, message):
|
||||
req = session.get(
|
||||
"https://api.telegram.org/bot" + TELEGRAM_TOKEN + "/sendMessage",
|
||||
params={
|
||||
'chat_id': TELEGRAM_CHANNEL,
|
||||
'disable_notification': str(SEND_SILENT),
|
||||
'text': message,
|
||||
},
|
||||
)
|
||||
if (req.status_code != 200):
|
||||
print("Telegram request error: " + str(req.status_code))
|
||||
exit()
|
||||
else:
|
||||
print("Telegram message sent, mess id: " + str(req.json()['result']['message_id']))
|
||||
|
||||
|
||||
def main():
|
||||
if TELEGRAM_TOKEN == '':
|
||||
print("Telegram token is not set")
|
||||
exit()
|
||||
|
||||
if TELEGRAM_CHANNEL == '':
|
||||
print("Telegram channel is not set")
|
||||
exit()
|
||||
|
||||
session = create_session(PROXY_URL)
|
||||
try:
|
||||
publish_new_posts(
|
||||
send_message=lambda message: send_telegram_message(session, message),
|
||||
session=session,
|
||||
url=URL,
|
||||
)
|
||||
except RuntimeError as error:
|
||||
print(error)
|
||||
exit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user