From 9da589839cd977b3055cf359211da55b6a1c377b Mon Sep 17 00:00:00 2001 From: leca Date: Sat, 7 Jun 2025 20:55:16 +0300 Subject: [PATCH] basic email parsing --- CMakeLists.txt | 4 +- email_parser/emailparser.cpp | 217 ++++++++++++----------------------- email_parser/emailparser.h | 8 +- main.cpp | 7 ++ mainwindow.cpp | 96 ++++++++-------- mainwindow.h | 2 +- settingsdialog.cpp | 2 - translations/en_US.ts | 46 +++++--- translations/ru_RU.ts | 38 +++--- utils/utils.cpp | 21 ++++ utils/utils.h | 3 + 11 files changed, 205 insertions(+), 239 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1303d29..66361be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,7 +116,7 @@ if (BUILD_OFD_BINARYEYE_SCAN) endif() if (BUILD_EMAIL_MODE) - # list(APPEND PROJECT_SOURCES email_parser/emailparser.h email_parser/emailparser.cpp) + list(APPEND PROJECT_SOURCES email_parser/emailparser.h email_parser/emailparser.cpp) list(APPEND PROJECT_SOURCES utils/base64.h utils/base64.cpp) endif() @@ -222,7 +222,7 @@ include_directories(${Boost_INCLUDE_DIRS}) target_link_libraries(checks-parser PUBLIC ${Boost_LIBRARIES}) if (BUILD_OFD_LOCAL_QR_SCAN OR BUILD_OFD_BINARYEYE_SCAN) - find_package(OpenCV REQUIRED COMPONENTS core imgproc imgcodecs opencv_objdetect) + find_package(OpenCV REQUIRED COMPONENTS core imgproc imgcodecs opencv_objdetect videoio) target_link_libraries(checks-parser PRIVATE ${OpenCV_LIBS}) target_include_directories(checks-parser PUBLIC ${OpenCV_INCLUDE_DIRS}) include_directories( ${OpenCV_INCLUDE_DIRS} ) diff --git a/email_parser/emailparser.cpp b/email_parser/emailparser.cpp index b8e7941..5160dec 100644 --- a/email_parser/emailparser.cpp +++ b/email_parser/emailparser.cpp @@ -1,13 +1,20 @@ #include "utils/utils.h" +#include "utils/base64.h" #include +#include +#include +#include +#include +#include + #include #include #include -#include +#include #include -#include #include + #if __GNUC__ < 8 && __clang_major__ < 17 # include using namespace std::experimental::filesystem; @@ -16,78 +23,12 @@ using namespace std::filesystem; #endif -std::string EmailParser::get_payload_in_email(std::string &email_content) { - boost::regex content_type_and_transfer_encoding_regex("Content-Type"); - // boost::regex body_start_regex("\r\n\r\n"); //boost::regex_constants::egrep - // boost::smatch smatch; - // if (boost::regex_search(email_content, smatch, body_start_regex)) { - // return email_content.substr(smatch.position(), email_content.length()); - // } - // return ""; -} - -std::multimap EmailParser::parse_email_content_types(std::string path) { - std::ifstream input_file(path, std::ios::in); - - std::string line = ""; - std::multimap mail_options; - std::string latest_key; - while(std::getline(input_file, line)) { - // ; - - char first_char = line.substr(0, 1)[0]; - // if (line == "\0") { - // break; - // } - - std::vector split_by_colon = split(line, ":"); - std::string key = split_by_colon[0]; - std::string value = ""; - - for (int i = 1; i < split_by_colon.size(); i ++) value += split_by_colon[i]; - if (key != "Content-Type" ) { - continue; - } - - if (first_char == '\t') { - mail_options.emplace(std::make_pair(latest_key, line)); - } else { - // std::cout << "key: " << key << "\nvalue: " << value << std::endl; - mail_options.emplace(std::make_pair(key, value)); - latest_key = key; - } - } - return mail_options; -} - -// std::vector EmailParser::find_base64_blocks_in_email(std::string &email_content) { -// std::string glued_together; -// for (auto c : email_content) { -// if (c == '\n') continue; -// glued_together.push_back(c); -// } -// boost::regex base64_regex("^[-A-Za-z0-9+/]*={0,3}$"); -// } - - EmailParser::EmailParser() { } -std::vector search_content_types_in_email_content(std::string& email_content) { - std::vector content_type_positions = {}; - boost::regex image_content_type_regex("Content-Type: image/.*"); - for (boost::sregex_iterator it{email_content.begin(), email_content.end(), image_content_type_regex}, end{}; - it != end; it++) { - content_type_positions.push_back(it->position()); - } - return content_type_positions; -} - -// void find_and_decode_email_content() - -Check EmailParser::parse(std::string &email_content) { +std::map EmailParser::parse(std::string &email_content) { //1. Search "Content-Type: image/.*" in the .eml file. // 1.1 If found 0, go to [2] // 1.2 If found 1, try decoding it, if it's not a QR code, go to [2] @@ -95,86 +36,72 @@ Check EmailParser::parse(std::string &email_content) { //2. Try decoding content of the e-mail //3. Search "t=\d{8}T\d{4,6}&s=\d{1,6}\.\d{1,2}&fn=\d{10,16}&i=\d{6}&fp=\d{10}&n=\d". Note that in some emails = and & signs could be replaced with its code in HTTP requests: %3D, %26 // 3.1 If not found, notify the user that we could not parse the .eml file - std::vector content_type_positions = search_content_types_in_email_content(email_content); - if (content_type_positions.size() < 0) { + /* Find content-types */ + Check c; + std::vector> content_types = {}; + boost::regex content_type_regex("Content-Type: image/(gif|png|jpg)"); + boost::regex part_end_regex("--.{5,48}"); - } else if (content_type_positions.size() == 1) { + for (boost::sregex_iterator it{email_content.begin(), email_content.end(), content_type_regex}, end{}; it != end; it ++) { + unsigned int start_position = it->position(), end_position = -1; - } else { - - } - - // std::string payload = get_payload_in_email(email_content); - // Check c; - - // std::cout << payload << std::endl; - - // if (payload == "") - // return c; - - - - // return c; -} - -Check EmailParser::parse_file(std::string path) { - - // std::vector contents = read_file(path); - // unsigned int body_start = -1; - // for (unsigned int i = 0; i < contents.size(); i ++) { - // std::string &line = contents[i]; - // if (line == "\r") { - // body_start = i; - // break; - // } - // } - // if (body_start == (unsigned int) -1) throw "Not an E-Mail"; - - // for (unsigned int i = 0; i < contents.size(); i ++) { - // std::string &line = contents[i]; - // if (line[0] == '\t') { - // contents[i - 1] += " " + contents[i]; - // contents.erase(remove(contents.begin(), contents.end(), line), contents.end()); - // i -= 2; - // } - // } - - // for (auto &line : contents) { - // std::cout << line << std::endl; - // } - - // std::cout << contents[body_start + 1] << std::endl; - // unsigned int body_start = contents.find("\r\n\r\n"); - // if (body_start == (unsigned int)-1) - // throw "Not a E-Mail file"; - // std::cout << contents.erase(0, body_start + 4); - // std::cout << contents << std::endl; - - // std::vector> message_parts_positions; - - // while (contents.find("--") > 0) { - - // } - - return Check(); - - std::multimap content_types = parse_email_content_types(path); - bool found_qr_image = false; - for (auto &content_type : content_types) { - boost::regex image_content_type_regex("image\\/(png|gif|jpg|jpeg)"); - boost::cmatch cmatch; - if (boost::regex_match(content_type.second.c_str(), cmatch, image_content_type_regex)) { - std::cout << cmatch << std::endl; - + for (boost::sregex_iterator it2{email_content.begin() + start_position, email_content.end(), part_end_regex}, end2{}; it2 != end2; it2++) { + end_position = it2->position(); + break; } - std::cout << content_type.first << ": " << content_type.second << std::endl; + content_types.push_back(std::pair(start_position, end_position)); } - // std::ifstream ifile(path, std::ios::in | std::ios::binary); - // const unsigned int size = std::filesystem::file_size(path); - // std::string content(size, '\0'); - // ifile.read(content.data(), size); - // return parse(content); - return Check(); + /* iterate through found content-types and try searching qr codes, decode them and see if it's the needed data */ + + for (unsigned int i = 0; i < content_types.size(); i ++) { + boost::regex to_erase("(Content.{5,64}\\r\\n)+"); + boost::regex to_erase_two("--.{5,48}"); + std::string part = email_content.substr(content_types[i].first, content_types[i].second); + boost::erase_regex(part, to_erase); + boost::erase_regex(part, to_erase_two); + part.erase(std::remove(part.begin(), part.end(), '\r'), part.end()); + part.erase(std::remove(part.begin(), part.end(), '\n'), part.end()); + std::string decoded = base64_decode(part); + cv::Mat image; + + if (decoded.substr(1, 3) == "PNG" || decoded.substr(1, 3) == "JPG") { + std::vector data(decoded.begin(), decoded.end()); + image = cv::imdecode(cv::Mat(data), 1); + } else if (decoded.substr(0, 3) == "GIF") { + std::string gif_file_path = get_application_home_path() + "/temp.gif"; + + std::ofstream gif_output(gif_file_path, std::ios::binary); + gif_output << decoded; + gif_output.close(); + cv::VideoCapture gif(gif_file_path, cv::CAP_FFMPEG); + gif.read(image); + } + + cv::QRCodeDetector qrDecoder = cv::QRCodeDetector(); + std::string decoded_qr_params = qrDecoder.detectAndDecode(image); + boost::regex check_data_content("t=\\d+T\\d+&s=\\d+\\.\\d+&fn=\\d{16}&i=\\d{4,5}&fp=\\d{10}&n=\\d"); + if (boost::regex_match(decoded_qr_params, check_data_content)) { + std::map paramsMap = get_params_from_string(decoded_qr_params); + + return paramsMap; + } + } + + /* If the E-Mail has no QR code in it as a separate part, there's posibilly a QR code inserted using html's tag with base64-encoded image. Try searching it */ + + /* If there's no such case, the last chance is which will have a link with needed parameters or the qr code that should be downloaded and decoded */ + + /* If the code has reached this part and found nothing, it's most likely that there are no QR codes at all. */ + // return Check(); +} + +std::map EmailParser::parse_file(std::string path) { + std::ifstream ifile(path, std::ios::in | std::ios::binary); + const unsigned int size = std::filesystem::file_size(path); + std::string content(size, '\0'); + ifile.read(content.data(), size); + return parse(content); + return std::map(); } diff --git a/email_parser/emailparser.h b/email_parser/emailparser.h index c9de132..ac2dd88 100644 --- a/email_parser/emailparser.h +++ b/email_parser/emailparser.h @@ -5,14 +5,10 @@ #include class EmailParser { - std::string get_payload_in_email(std::string &email_content); - std::multimap parse_email_content_types(std::string path); - - // std::vector find_base64_blocks_in_email(std::string &email_content); public: EmailParser(); - Check parse(std::string &email_content); - Check parse_file(std::string path); + std::map parse(std::string &email_content); + std::map parse_file(std::string path); }; diff --git a/main.cpp b/main.cpp index f5345f8..f80653d 100644 --- a/main.cpp +++ b/main.cpp @@ -25,12 +25,19 @@ #include #ifdef BUILD_EMAIL_MODE // #include +#include #endif #include #include int main(int argc, char *argv[]) { + + // EmailParser p; + // p.parse_file("/home/leca/example_email_receipts/magnit.eml"); + // p.parse_file("/home/leca/example_email_receipts/pyaterochka.eml"); + // p.parse_file("/home/leca/example_email_receipts/rzd.eml"); + // return 0; curl_global_init(CURL_GLOBAL_ALL); qRegisterMetaType("Check"); diff --git a/mainwindow.cpp b/mainwindow.cpp index ec5b228..e9a4b2b 100644 --- a/mainwindow.cpp +++ b/mainwindow.cpp @@ -10,7 +10,7 @@ #ifdef BUILD_OFD_LOCAL_QR_SCAN # include -#include +# include #endif #include @@ -26,6 +26,7 @@ # include # include # include +#include #endif MainWindow::MainWindow(QWidget *parent) @@ -49,6 +50,9 @@ MainWindow::MainWindow(QWidget *parent) #ifdef BUILD_OFD_BINARYEYE_SCAN QObject::connect(this, &MainWindow::httpErrorOccured, this, &MainWindow::notifyHttpServerFailure); connect(this, SIGNAL(httpNewMessage(QString)), this, SLOT(httpNewMessageHandler(QString))); +#else + ui->or_label_2->hide(); + ui->binary_eye_button->hide(); #endif #ifndef BUILD_EMAIL_MODE @@ -56,10 +60,6 @@ MainWindow::MainWindow(QWidget *parent) ui->or_label_2->hide(); #endif -#ifndef BUILD_OFD_BINARYEYE_SCAN - ui->or_label_2->hide(); - ui->binary_eye_button->hide(); -#endif #ifndef BUILD_OFD_LOCAL_QR_SCAN ui->or_label_1->hide(); ui->choose_image_button->hide(); @@ -127,19 +127,7 @@ void MainWindow::httpNewMessageHandler(QString message) { //erase /?result= from the string parametersString.erase(0, parametersString.find("=") + 1); - //TODO: punycode %26 %3D - parametersString = boost::regex_replace(parametersString, boost::regex("%26"), "&"); - parametersString = boost::regex_replace(parametersString, boost::regex("%3D"), "="); - - std::vector parameters = split(parametersString, "&"); - - std::map paramsMap; - - for (auto ¶meter : parameters) { - std::vector values = split(parameter, "="); - paramsMap.insert(std::pair (values[0], values[1])); - } - + std::map paramsMap = get_params_from_string(parametersString); emit onDataDecode(paramsMap); } @@ -167,35 +155,33 @@ void MainWindow::on_choose_image_button_clicked() { #endif //ifdef BUILD_OFD_LOCAL_QR_SCAN void MainWindow::onDataDecode(std::map data) { - ui->fn_line_edit->setText(QString::fromStdString(data["fn"])); - ui->fd_line_edit->setText(QString::fromStdString(data["i"])); - ui->fi_line_edit->setText(QString::fromStdString(data["fp"])); - - QString extractedDateTime = QString::fromStdString(data["t"]); - //TODO: some QRs contain datetime in format yyyyMMddThhmmss. Perhaps there is more different formats, should write function to detect them. - QDateTime datetime = QDateTime::fromString(extractedDateTime, "yyyyMMddThhmm"); - if (datetime == QDateTime::fromString(extractedDateTime, "20000101T1200")) { - datetime = QDateTime::fromString(extractedDateTime, "yyyyMMddThhmmss"); - } - ui->purchase_datetime_edit->setDateTime(datetime); - - int type = std::stoi(data["n"]); - ui->operation_type_combo_box->setCurrentIndex(type - 1); - - std::string total = data["s"]; - - ui->total_spin_box->setValue(std::stod(total)); + set_check_params(data); } #ifdef BUILD_EMAIL_MODE void MainWindow::on_parse_email_button_clicked() { - QMessageBox infoDialog; - infoDialog.setText(tr("This feature is under development. Wait it to appear in next updates.")); - infoDialog.setIcon(QMessageBox::Warning); - infoDialog.setWindowTitle(tr("Under development")); - infoDialog.exec(); - return; + QString filename = QFileDialog::getOpenFileName(); + + if (filename == "") { + QMessageBox infoDialog; + infoDialog.setText(tr("Please, select an e-mail which contains QR code")); + infoDialog.setIcon(QMessageBox::Critical); + infoDialog.setWindowTitle(tr("E-Mail was not selected")); + infoDialog.exec(); + return; + } + + EmailParser email_parser; + std::map paramsMap = email_parser.parse_file(filename.toStdString()); + + set_check_params(paramsMap); + // QMessageBox infoDialog; + // infoDialog.setText(tr("This feature is under development. Wait it to appear in next updates.")); + // infoDialog.setIcon(QMessageBox::Warning); + // infoDialog.setWindowTitle(tr("Under development")); + // infoDialog.exec(); + // return; } #endif // ifdef BUILD_EMAIL_MODE @@ -270,22 +256,21 @@ Check *MainWindow::parse_new_check() { return check; } catch(OfdRequestException e) { + QMessageBox infoDialog; if (!strcmp(e.what(), "Incorrect captcha")) { - QMessageBox infoDialog; + infoDialog.setText(tr("Captcha was not solved correctly!")); infoDialog.setIcon(QMessageBox::Critical); infoDialog.setWindowTitle(tr("Captcha is incorrect")); infoDialog.exec(); continue; } else if (!strcmp(e.what(), "Internal server error")) { - QMessageBox infoDialog; infoDialog.setText(tr("Internal server error. Please, try again later.")); infoDialog.setIcon(QMessageBox::Critical); infoDialog.setWindowTitle(tr("Internal server error")); infoDialog.exec(); return nullptr; } else if (!strcmp(e.what(), "Does not exist")) { - QMessageBox infoDialog; infoDialog.setText(tr("Check not found. Please, ensure correctness of entered data.")); infoDialog.setIcon(QMessageBox::Critical); infoDialog.setWindowTitle(tr("Check was not found")); @@ -316,3 +301,24 @@ void MainWindow::on_deleteSelectedButton_clicked() { ui->checkQueueTable->clearSelection(); } +void MainWindow::set_check_params(std::map paramsMap) { + ui->fn_line_edit->setText(QString::fromStdString(paramsMap["fn"])); + ui->fd_line_edit->setText(QString::fromStdString(paramsMap["i"])); + ui->fi_line_edit->setText(QString::fromStdString(paramsMap["fp"])); + + QString extractedDateTime = QString::fromStdString(paramsMap["t"]); + //TODO: some QRs contain datetime in format yyyyMMddThhmmss. Perhaps there is more different formats, should write function to detect them. + QDateTime datetime = QDateTime::fromString(extractedDateTime, "yyyyMMddThhmm"); + if (datetime == QDateTime::fromString(extractedDateTime, "20000101T1200")) { + datetime = QDateTime::fromString(extractedDateTime, "yyyyMMddThhmmss"); + } + ui->purchase_datetime_edit->setDateTime(datetime); + + int type = std::stoi(paramsMap["n"]); + ui->operation_type_combo_box->setCurrentIndex(type - 1); + + std::string total = paramsMap["s"]; + + ui->total_spin_box->setValue(std::stod(total)); +} + diff --git a/mainwindow.h b/mainwindow.h index 6503baa..37389c7 100644 --- a/mainwindow.h +++ b/mainwindow.h @@ -57,7 +57,7 @@ private slots: void on_add_new_check_button_clicked(); void on_deleteSelectedButton_clicked(); - + void set_check_params(std::map); private: Ui::MainWindow *ui; std::vector checks; diff --git a/settingsdialog.cpp b/settingsdialog.cpp index f69ac75..43773c7 100644 --- a/settingsdialog.cpp +++ b/settingsdialog.cpp @@ -4,8 +4,6 @@ #include #include -#include - #include #include diff --git a/translations/en_US.ts b/translations/en_US.ts index f9487dd..dff13ae 100644 --- a/translations/en_US.ts +++ b/translations/en_US.ts @@ -273,57 +273,65 @@ Could not start http server. - + Selected image: Selected image: - This feature is under development. Wait it to appear in next updates. - This feature is under development. Wait for it to appear in next updates. + This feature is under development. Wait for it to appear in next updates. - Under development - Under development + Under development - + + Please, select an e-mail which contains QR code + + + + + E-Mail was not selected + + + + Please, add check(s) to parse Please, add check(s) to parse - + No checks to parse No checks to parse - + Captcha was not solved correctly! Captcha was not solved correctly! - + Captcha is incorrect Captcha is incorrect - + Internal server error. Please, try again later. Internal server error. Please, try again later. - + Internal server error Internal server error - + Check not found. Please, ensure correctness of entered data. Check not found. Please, ensure correctness of entered data. - + Check was not found Check was not found @@ -336,12 +344,12 @@ Error in parsing - + Please, select a picture where QR code that contains info about check is present Please, select a picture where QR code that contains info about check is present - + Picture was not selected Picture was not selected @@ -653,7 +661,7 @@ QObject - + Using locale: Using locale: @@ -661,17 +669,17 @@ SettingsDialog - + Кто здесь? - + You need to restart program to apply language changes You need to restart program to apply language changes - + Restart required Restart required diff --git a/translations/ru_RU.ts b/translations/ru_RU.ts index c9c59f3..6335ee1 100644 --- a/translations/ru_RU.ts +++ b/translations/ru_RU.ts @@ -261,57 +261,57 @@ Не получилось запустить HTTP сервер. - + Selected image: Выбранное изображение: - - This feature is under development. Wait it to appear in next updates. + + Please, select an e-mail which contains QR code - - Under development + + E-Mail was not selected - + Please, add check(s) to parse - + No checks to parse - + Captcha was not solved correctly! Капча была решена неверно! - + Captcha is incorrect Капча введена неверно - + Internal server error. Please, try again later. Внутренняя ошибка сервера. Пожалуйста, попробуйте снова позже. - + Internal server error Внутренняя ошибка сервера - + Check not found. Please, ensure correctness of entered data. Чек не найден. Пожалуйста, убедитесь в правильности введённых данных. - + Check was not found Чек не найден @@ -324,12 +324,12 @@ Ошибка в парсинге - + Please, select a picture where QR code that contains info about check is present Пожалуйста, выберете изображение, содержащее QR код с информацией о чеке - + Picture was not selected Изображение не было выбрано @@ -637,7 +637,7 @@ QObject - + Using locale: Использую локаль: @@ -645,17 +645,17 @@ SettingsDialog - + Кто здесь? - + You need to restart program to apply language changes Требуется перезагрузить программу, чтобы применить изменения языка - + Restart required Требуется перезагрузка diff --git a/utils/utils.cpp b/utils/utils.cpp index 4e342e2..f2b12e2 100644 --- a/utils/utils.cpp +++ b/utils/utils.cpp @@ -42,6 +42,7 @@ std::string get_local_ip_address() { inet_ntop(AF_INET, tmpAddrPtr, addressBuffer, INET_ADDRSTRLEN); std::string value(addressBuffer); + //TODO: better way to determine local IP address if (!strncmp(value.c_str(), "192.168", 7)) { return value; } @@ -68,6 +69,26 @@ std::string get_path_relative_to_home(std::string path) { return std::string(std::getenv("HOME")) + "/" + path; } +std::string get_application_home_path() { + return get_path_relative_to_home(".local/share/checks_parser"); +} + +std::map get_params_from_string(std::string parametersString) { + parametersString = boost::regex_replace(parametersString, boost::regex("%26"), "&"); + parametersString = boost::regex_replace(parametersString, boost::regex("%3D"), "="); + + std::vector parameters = split(parametersString, "&"); + + std::map paramsMap; + + for (auto ¶meter : parameters) { + std::vector values = split(parameter, "="); + paramsMap.insert(std::pair (values[0], values[1])); + } + return paramsMap; +} + + template bool vector_contains_element(const std::vector& vector, const T& to_find) { for (const T& element : vector) { diff --git a/utils/utils.h b/utils/utils.h index 3a35621..4233718 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -11,6 +11,9 @@ std::string to_utf8(std::wstring wide_string); std::wstring from_utf8(std::string string); std::string get_path_relative_to_home(std::string path); +std::string get_application_home_path(); + +std::map get_params_from_string(std::string); const std::map column_names = { {"date", ColumnType::date},