diff --git a/email_parser/emailparser.cpp b/email_parser/emailparser.cpp index 5160dec..e103ac2 100644 --- a/email_parser/emailparser.cpp +++ b/email_parser/emailparser.cpp @@ -1,5 +1,3 @@ -#include "utils/utils.h" -#include "utils/base64.h" #include #include @@ -9,6 +7,7 @@ #include #include +#include #include #include #include @@ -37,28 +36,21 @@ std::map EmailParser::parse(std::string &email_content //3. Search "t=\d{8}T\d{4,6}&s=\d{1,6}\.\d{1,2}&fn=\d{10,16}&i=\d{6}&fp=\d{10}&n=\d". Note that in some emails = and & signs could be replaced with its code in HTTP requests: %3D, %26 // 3.1 If not found, notify the user that we could not parse the .eml file - /* Find content-types */ - Check c; - std::vector> content_types = {}; - boost::regex content_type_regex("Content-Type: image/(gif|png|jpg)"); + /* Find image content-types */ + boost::regex images_content_type_regex("Content-Type: image/(gif|png|jpg)"); + boost::regex text_content_types_regex("Content-Type: text\\/(html|plain)"); boost::regex part_end_regex("--.{5,48}"); + // boost::regex check_data_content("t=\\d+T\\d+&s=\\d+\\.\\d+&fn=\\d{16}&i=\\d{4,5}&fp=\\d{10}&n=\\d"); + boost::regex to_erase("([\\w-]+:\\s*.{2,64}\\r\\n)+"); + boost::regex to_erase_two("--.{5,48}"); - for (boost::sregex_iterator it{email_content.begin(), email_content.end(), content_type_regex}, end{}; it != end; it ++) { - unsigned int start_position = it->position(), end_position = -1; + std::vector> images_content_parts = find_parts(images_content_type_regex, part_end_regex, email_content); - for (boost::sregex_iterator it2{email_content.begin() + start_position, email_content.end(), part_end_regex}, end2{}; it2 != end2; it2++) { - end_position = it2->position(); - break; - } - content_types.push_back(std::pair(start_position, end_position)); - } + /* iterate through found image content-types and try searching qr codes, decode them and see if it's the needed data */ - /* iterate through found content-types and try searching qr codes, decode them and see if it's the needed data */ + for (unsigned int i = 0; i < images_content_parts.size(); i ++) { - for (unsigned int i = 0; i < content_types.size(); i ++) { - boost::regex to_erase("(Content.{5,64}\\r\\n)+"); - boost::regex to_erase_two("--.{5,48}"); - std::string part = email_content.substr(content_types[i].first, content_types[i].second); + std::string part = email_content.substr(images_content_parts[i].first, images_content_parts[i].second); boost::erase_regex(part, to_erase); boost::erase_regex(part, to_erase_two); part.erase(std::remove(part.begin(), part.end(), '\r'), part.end()); @@ -80,10 +72,11 @@ std::map EmailParser::parse(std::string &email_content } cv::QRCodeDetector qrDecoder = cv::QRCodeDetector(); - std::string decoded_qr_params = qrDecoder.detectAndDecode(image); - boost::regex check_data_content("t=\\d+T\\d+&s=\\d+\\.\\d+&fn=\\d{16}&i=\\d{4,5}&fp=\\d{10}&n=\\d"); - if (boost::regex_match(decoded_qr_params, check_data_content)) { - std::map paramsMap = get_params_from_string(decoded_qr_params); + std::string decoded_qr = qrDecoder.detectAndDecode(image); + std::string parameters = find_check_parameters(decoded_qr); + + if (parameters != "") { + std::map paramsMap = get_params_from_string(parameters); return paramsMap; } @@ -91,17 +84,105 @@ std::map EmailParser::parse(std::string &email_content /* If the E-Mail has no QR code in it as a separate part, there's posibilly a QR code inserted using html's tag with base64-encoded image. Try searching it */ + std::vector> texts_content_parts = find_parts(text_content_types_regex, part_end_regex, email_content); + + for (unsigned int i = 0; i < texts_content_parts.size(); i ++) { + + std::string part = email_content.substr(texts_content_parts[i].first, texts_content_parts[i].second); + boost::erase_regex(part, to_erase); + boost::erase_regex(part, to_erase_two); + + + //If there's '<' character, most likely that the part's content is plain html, otherwise it's most likely a base64 encoded html. + if (part.find("<") == std::string::npos) { + part.erase(std::remove(part.begin(), part.end(), '\r'), part.end()); + part.erase(std::remove(part.begin(), part.end(), '\n'), part.end()); + part = base64_decode(part); + } + + std::string parameters = find_check_parameters(part); + + if (parameters != "") { + std::map paramsMap = get_params_from_string(parameters); + + return paramsMap; + } + + std::string url = extract_qr_url_from_img(part); + Net n; + std::string path = get_path_relative_to_home(".local/share/checks_parser/tmp"); + n.get_file(url, path); + + std::ifstream ifile(path, std::ios::in | std::ios::binary); + const unsigned int size = std::filesystem::file_size(path); + std::string qr_code_contents(size, '\0'); + ifile.read(qr_code_contents.data(), size); + + std::vector data(qr_code_contents.begin(), qr_code_contents.end()); + cv::Mat image = cv::imdecode(cv::Mat(data), 1); + + cv::QRCodeDetector qrDecoder = cv::QRCodeDetector(); + std::string decoded_qr = qrDecoder.detectAndDecode(image); + parameters = find_check_parameters(decoded_qr); + + if (parameters != "") { + std::map paramsMap = get_params_from_string(parameters); + + return paramsMap; + } + } /* If there's no such case, the last chance is which will have a link with needed parameters or the qr code that should be downloaded and decoded */ /* If the code has reached this part and found nothing, it's most likely that there are no QR codes at all. */ - // return Check(); + + return std::map(); } std::map EmailParser::parse_file(std::string path) { - std::ifstream ifile(path, std::ios::in | std::ios::binary); - const unsigned int size = std::filesystem::file_size(path); - std::string content(size, '\0'); - ifile.read(content.data(), size); + // std::cout << "Parsing " << path << std::endl; + // std::ifstream ifile(path, std::ios::in | std::ios::binary); + // const unsigned int size = std::filesystem::file_size(path); + // std::string content(size, '\0'); + // ifile.read(content.data(), size); + std::string content = read_file(path); return parse(content); return std::map(); } +std::vector> EmailParser::find_parts(const boost::regex &start_regex, const boost::regex &end_regex, const std::string &content) { + std::vector> parts = {}; + + for (boost::sregex_iterator it{content.begin(), content.end(), start_regex}, end{}; it != end; it ++) { + unsigned int start_position = it->position(), end_position = content.length(); + + for (boost::sregex_iterator it2{content.begin() + start_position, content.end(), end_regex}, end2{}; it2 != end2; it2++) { + end_position = it2->position(); + break; + } + parts.push_back(std::pair(start_position, end_position)); + } + + return parts; +} + +std::string EmailParser::find_check_parameters(std::string &part) { + boost::regex params_regex ("t(=|%3d)\\d+T\\d+(&|%26)s\\1\\d+\\.\\d+\\2fn\\1\\d{16}\\2i\\1\\d{3,6}\\2fp\\1\\d{9,10}\\2n\\1\\d", boost::regex::icase); + for (boost::sregex_iterator it{part.begin(), part.end(), params_regex}, end{}; it != end; it++) { + return it->str(); + } + return ""; +} + +std::string EmailParser::extract_qr_url_from_img(std::string &part) { + std::string url = ""; + boost::regex img_tag_regex(""); + boost::regex img_url_str("https?:\\/\\/.*(qr(code)?)[^\\n\\r\"]+", boost::regex::icase); + + for (boost::sregex_iterator it{part.begin(), part.end(), img_tag_regex}, end{}; it != end; it++) { + std::string img_tag = it->str(); + for (boost::sregex_iterator it2{img_tag.begin(), img_tag.end(), img_url_str}, end2{}; it2 != end2; it2++) { + return it2->str(); + } + + } + return url; +} diff --git a/email_parser/emailparser.h b/email_parser/emailparser.h index ac2dd88..9452867 100644 --- a/email_parser/emailparser.h +++ b/email_parser/emailparser.h @@ -3,13 +3,16 @@ #include #include +#include class EmailParser { public: EmailParser(); std::map parse(std::string &email_content); std::map parse_file(std::string path); - + std::vector> find_parts(const boost::regex &start_regex, const boost::regex &end_regex, const std::string &content); + std::string find_check_parameters(std::string &part); + std::string extract_qr_url_from_img(std::string &part); }; #endif // CHECKS_PARSER_EMAIL_PARSER diff --git a/main.cpp b/main.cpp index f80653d..da1cfb0 100644 --- a/main.cpp +++ b/main.cpp @@ -34,9 +34,12 @@ int main(int argc, char *argv[]) { // EmailParser p; + // p.parse_file("/home/leca/example_email_receipts/читай_город.eml"); + // p.parse_file("/home/leca/example_email_receipts/lenta.eml"); // p.parse_file("/home/leca/example_email_receipts/magnit.eml"); // p.parse_file("/home/leca/example_email_receipts/pyaterochka.eml"); // p.parse_file("/home/leca/example_email_receipts/rzd.eml"); + // p.parse_file("/home/leca/example_email_receipts/russteels.eml"); // return 0; curl_global_init(CURL_GLOBAL_ALL); qRegisterMetaType("Check"); diff --git a/net/net.cpp b/net/net.cpp index 181fa09..2a5d8e1 100644 --- a/net/net.cpp +++ b/net/net.cpp @@ -7,29 +7,42 @@ Net::Net() {} size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp) { size_t totalSize = size * nmemb; - ((std::string*)userp)->append(std::string((char*)contents)); + ((std::string*)userp)->append(std::string((char *)contents)); return totalSize; } -size_t write_data(void *buffer, size_t size, size_t nmemb, void *filename) { - FILE *f = fopen(((std::string *)filename)->c_str(), "w"); - size_t written = fwrite(buffer, size, nmemb, f); +// size_t write_data_to_file(void *buffer, size_t size, size_t nmemb, void *filename) { +// FILE *f = fopen(((std::string *)filename)->c_str(), "wb"); +// size_t written = fwrite(buffer, size, nmemb, f); - fclose(f); +// fclose(f); +// return written; +// } + +size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) { + size_t written; + written = fwrite(ptr, size, nmemb, stream); return written; } +// size_t write_data(void *buffer, size_t size, size_t nmemb, void *string_buffer) { +// *(std::string *)string_buffer = std::string((char *)buffer); +// std::cout << (char*)buffer << std::endl; +// return size; +// } + void Net::get_file(std::string url, std::string filename) { CURL *handle = curl_easy_init(); curl_easy_setopt(handle, CURLOPT_URL, url.c_str()); + FILE *f = fopen(filename.c_str(), "wb"); curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, write_data); - curl_easy_setopt(handle, CURLOPT_WRITEDATA, &filename); + curl_easy_setopt(handle, CURLOPT_WRITEDATA, f); auto success = curl_easy_perform(handle); - + fclose(f); curl_easy_cleanup(handle); } diff --git a/net/net.h b/net/net.h index eb0752b..8904a12 100644 --- a/net/net.h +++ b/net/net.h @@ -4,7 +4,7 @@ #include #include -size_t write_data(void *buffer, size_t size, size_t nmemb, void *userp); +size_t write_data_to_file(void *buffer, size_t size, size_t nmemb, void *userp); size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp); class Net @@ -14,6 +14,7 @@ public: void get_file(std::string url, std::string filename); std::string fetch_check_data_from_ofdru(std::string fn, std::string fd, std::string fi, std::string datetime, int operation, int total, std::string captcha); void get_captcha_from_ofdru(); + std::string get_data(std::string url); }; #endif // NET_H diff --git a/translations/en_US.ts b/translations/en_US.ts index dff13ae..5913a79 100644 --- a/translations/en_US.ts +++ b/translations/en_US.ts @@ -661,7 +661,7 @@ QObject - + Using locale: Using locale: diff --git a/translations/ru_RU.ts b/translations/ru_RU.ts index 6335ee1..d752f0f 100644 --- a/translations/ru_RU.ts +++ b/translations/ru_RU.ts @@ -637,7 +637,7 @@ QObject - + Using locale: Использую локаль: diff --git a/utils/utils.cpp b/utils/utils.cpp index f2b12e2..838715c 100644 --- a/utils/utils.cpp +++ b/utils/utils.cpp @@ -75,7 +75,7 @@ std::string get_application_home_path() { std::map get_params_from_string(std::string parametersString) { parametersString = boost::regex_replace(parametersString, boost::regex("%26"), "&"); - parametersString = boost::regex_replace(parametersString, boost::regex("%3D"), "="); + parametersString = boost::regex_replace(parametersString, boost::regex("%3[Dd]"), "="); std::vector parameters = split(parametersString, "&"); @@ -352,15 +352,13 @@ void generate_qr_code(std::string data) { #endif // ifdef BUILD_OFD_BINARYEYE_SCAN #ifdef BUILD_EMAIL_MODE -std::vector read_file(std::string path) { - std::ifstream stream(path); - std::vector lines; - std::string buffer; - while(getline(stream, buffer)) { - lines.push_back(buffer); - } - stream.close(); - return lines; +std::string read_file(std::string &path) { + std::ifstream ifile(path, std::ios::in | std::ios::binary); + const unsigned int size = std::filesystem::file_size(path); + std::string content(size, '\0'); + ifile.read(content.data(), size); + ifile.close(); + return content; } #endif // ifdef BUILD_EMAIL_MODE diff --git a/utils/utils.h b/utils/utils.h index 4233718..df50986 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -50,6 +50,6 @@ std::string get_local_ip_address(); void fetch_and_download_modules(); #ifdef BUILD_EMAIL_MODE -std::vector read_file(std::string path); +std::string read_file(std::string &path); #endif #endif // UTILS_H