worker-parser/src/services/worker.py

121 lines
6.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import json
import sys
import bs4
import requests
from src.dataclasses.loglevels import LogLevels
from src.dataclasses.task import RequestTsInfoTask
from src.services.dummy_broker import Broker
from src.services.logger import Logger
class Worker:
"""
Worker is a class that fetches tasks queue from a broker and executes them in order, given in the tasks_queue list.
"""
def __init__(self):
self.broker = Broker()
self.logger = Logger(LogLevels.DEBUG, sys.stdout)
async def process(self):
"""
Asynchronous method of a Worker class that polls tasks from the Broker and works on them.
:return:
"""
self.logger.info("Worker started")
while True:
task = self.broker.get_task()
if not task:
sleep_for = 10
self.logger.info(f"No tasks for now. Sleeping for {sleep_for} seconds")
await asyncio.sleep(sleep_for)
continue
self.logger.debug(f"Got a task with type {type(task).__name__}. Working on it.")
match type(task).__name__:
case RequestTsInfoTask.__name__:
await self.task_request_ts(task)
async def task_request_ts(self, task: RequestTsInfoTask):
"""
Method used to fetch and parse data from https://nsis.ru/products/osago/check/
:param RequestTsInfoTask task:
:return:
"""
headers = {
"X-Requested-With": "XMLHttpRequest",
"Content-Type": "multipart/form-data; boundary=---------------------------330424154228665440354056616977"
}
data = f'-----------------------------330424154228665440354056616977\r\nContent-Disposition: form-data; name="licenseplate"\r\n\r\n{task.license_plate}\r\n-----------------------------330424154228665440354056616977\r\nContent-Disposition: form-data; name="requestdate"\r\n\r\n{task.prior_to_date}'
with requests.Session() as s:
response = s.post('https://nsis.ru/handle-form/1314895756519276544/', headers=headers, data=data)
response_text = response.text
response_json = json.loads(response_text)
# For some reason, in backend's response there is huge (4MBs) field 'random_str' that contains a heckin' random string.
response_json["random_str"] = ""
self.logger.debug(f"First stage response (JSON): {response_json}")
if not response_json["isSuccess"]:
self.logger.error("Request to https://nsis.ru/handle-form/1314895756519276544 has failed. Details in [DEBUG].")
self.logger.debug(response.reason)
process_id = response_json["data"]["processId"]
form_code = response_json["data"]["formCode"]
# TODO: find optimal time for request completion
await asyncio.sleep(7)
response = s.get(f"https://nsis.ru/api/v1/status/{process_id}/?formCode={form_code}", headers=headers)
response_text = response.text
response_json = json.loads(response_text)
response_json["random_str"] = ""
self.logger.debug(f"Second stage response (JSON): ${response_json}")
html = response_json["modals"]["html"]
soup = bs4.BeautifulSoup(html)
log_header = f"(LP: {task.license_plate}, Date: {task.prior_to_date})"
if soup.find("div", attrs={'id': 'modal-policy-not-found'}):
self.logger.debug(f"{log_header} Modal policy not found for request {data} (processId: {process_id})")
self.logger.info(f"{log_header} Modal policy was not found.")
return
if not soup.find('div', attrs={'class': 'modal'}):
self.logger.error(f"{log_header} Modal policy class was not found in response. Details in [DEBUG].")
self.logger.debug(f"{log_header} Modal policy not found. Data: {data} (processId: {process_id}). HTML from response: {html}")
return
self.logger.info(f"{log_header} Modal policy found.")
"""
Should contain 9 elements:
1. Серия полиса,
2. Номер полиса,
3. Статус договора ОСАГО,
4. Период использования, (it is a dd element with an extra class 'dataList__value--isChildren' and it contains span with actual data.
5. Марка и модель ТС,
6. Идентификационный номер транспортного средства,
7. Государственный регистрационный знак, (it is partially hidden, should use one from the request.
8. Страховая компания,
9. Расширение на территорию Республики Беларусь
"""
values = soup.findAll('dd', attrs={'class': 'dataList__value'})
if len(values) != 9:
self.logger.error(f"{log_header} The parser found {len(values)} elements, but should find 9. It could be that API has changed. Additional info is present in [DEBUG]")
self.logger.debug(f"{log_header} Array of found element {values}")
return
header_len = len(log_header)
self.logger.info(f"\n--=={log_header}==--\n"
f"Данные на {task.prior_to_date}\n"
f"Серия полиса: {values[0].text}\n"
f"Номер полиса: {values[1].text}\n"
f"Статус договора ОСАГО: {values[2].text}\n"
f"Период использования: {values[3].find('span').text}\n"
f"Марка и модель ТС: {values[4].text}\n"
f"Идентификационный номер транспортного средства: {values[5].text}\n"
f"Государственный регистрационный знак: {task.license_plate}\n"
f"Страховая компания: {values[7].text}\n"
f"Расширение на территорию Республики Беларусь: {values[8].text}\n"
f"--=={'*' * header_len}==--")