diff --git a/email_parser/emailparser.cpp b/email_parser/emailparser.cpp index c7d7bee..657861a 100644 --- a/email_parser/emailparser.cpp +++ b/email_parser/emailparser.cpp @@ -34,10 +34,17 @@ std::map EmailParser::parse(std::string &email_content std::string parameters; parameters = search_in_images(email_content); - if (parameters != "") return get_params_from_string(parameters); + + if (parameters != "") { + std::cout << parameters << std::endl; + return get_params_from_string(parameters); + } parameters = search_in_text(email_content); - if (parameters != "") return get_params_from_string(parameters); + if (parameters != "") { + std::cout << parameters << std::endl; + return get_params_from_string(parameters); + } /* If the code has reached this part and found nothing, it's most likely that there are no QR codes at all. */ @@ -74,7 +81,6 @@ std::string EmailParser::find_check_parameters(std::string &part) { } std::string EmailParser::extract_qr_url_from_img(std::string &part) { - std::string url = ""; boost::regex img_tag_regex(""); boost::regex img_url_str("https?:\\/\\/.*(qr(code)?)[^\\n\\r\"]+", boost::regex::icase); @@ -85,7 +91,22 @@ std::string EmailParser::extract_qr_url_from_img(std::string &part) { } } - return url; + return ""; +} + +std::vector EmailParser::extract_qr_embeddings_from_part(std::string &part) { + std::vector embeddings = {}; + boost::regex img_tag_regex(""); + boost::regex img_base64_str("data:image\\/(png|jpg);base64,[\\w+\\/=]+", boost::regex::icase); + + for (boost::sregex_iterator it{part.begin(), part.end(), img_tag_regex}, end{}; it != end; it++) { + std::string img_tag = it->str(); + for (boost::sregex_iterator it2{img_tag.begin(), img_tag.end(), img_base64_str}, end2{}; it2 != end2; it2++) { + embeddings.push_back(split(it2->str(), ",")[1]); + } + + } + return embeddings; } std::string EmailParser::search_in_images(std::string &content) { @@ -106,24 +127,7 @@ std::string EmailParser::search_in_images(std::string &content) { part.erase(std::remove(part.begin(), part.end(), '\r'), part.end()); part.erase(std::remove(part.begin(), part.end(), '\n'), part.end()); std::string decoded = base64_decode(part); - cv::Mat image; - - if (decoded.substr(1, 3) == "PNG" || decoded.substr(1, 3) == "JPG") { - std::vector data(decoded.begin(), decoded.end()); - image = cv::imdecode(cv::Mat(data), 1); - } else if (decoded.substr(0, 3) == "GIF") { - std::string gif_file_path = get_application_home_path() + "/temp.gif"; - - std::ofstream gif_output(gif_file_path, std::ios::binary); - gif_output << decoded; - gif_output.close(); - cv::VideoCapture gif(gif_file_path, cv::CAP_FFMPEG); - gif.read(image); - } - - cv::QRCodeDetector qrDecoder = cv::QRCodeDetector(); - std::string decoded_qr = qrDecoder.detectAndDecode(image); - return find_check_parameters(decoded_qr); + return handle_image(decoded); } return ""; } @@ -158,22 +162,52 @@ std::string EmailParser::search_in_text(std::string &content) { // If there's no, try search anything that looks like a link to a qr code. std::string url = extract_qr_url_from_img(part); - Net n; - std::string path = get_path_relative_to_home(".local/share/checks_parser/tmp"); - n.get_file(url, path); + if (url != "") { + Net n; + std::string path = get_path_relative_to_home(".local/share/checks_parser/tmp"); + n.get_file(url, path); - std::string qr_code_contents = read_file(path); + std::string qr_code_contents = read_file(path); - std::vector data(qr_code_contents.begin(), qr_code_contents.end()); - cv::Mat image = cv::imdecode(cv::Mat(data), 1); - - cv::QRCodeDetector qrDecoder = cv::QRCodeDetector(); - std::string decoded_qr = qrDecoder.detectAndDecode(image); - parameters = find_check_parameters(decoded_qr); + std::vector data(qr_code_contents.begin(), qr_code_contents.end()); + cv::Mat image = cv::imdecode(cv::Mat(data), 1); + cv::QRCodeDetector qrDecoder = cv::QRCodeDetector(); + std::string decoded_qr = qrDecoder.detectAndDecode(image); + parameters = find_check_parameters(decoded_qr); + } if (parameters != "") return parameters; // if there's no any link that looks like a link to QR code, maybe the qr code is encoded as base64 inside an img tag. + + std::vector embeddings = extract_qr_embeddings_from_part(part); + for (std::string &embedding : embeddings) { + std::string decoded = base64_decode(embedding); + parameters = handle_image(decoded); + if (parameters != "") return parameters; + } } return ""; } + +std::string EmailParser::handle_image(std::string &content) { + cv::Mat image; + + if (content.substr(1, 3) == "PNG" || content.substr(1, 3) == "JPG") { + std::vector data(content.begin(), content.end()); + image = cv::imdecode(cv::Mat(data), 1); + } else if (content.substr(0, 3) == "GIF") { + std::string gif_file_path = get_application_home_path() + "/temp.gif"; + + std::ofstream gif_output(gif_file_path, std::ios::binary); + gif_output << content; + gif_output.close(); + cv::VideoCapture gif(gif_file_path, cv::CAP_FFMPEG); + gif.read(image); + } + if (image.empty()) return ""; + + cv::QRCodeDetector qrDecoder = cv::QRCodeDetector(); + std::string decoded_qr = qrDecoder.detectAndDecode(image); + return find_check_parameters(decoded_qr); +} diff --git a/email_parser/emailparser.h b/email_parser/emailparser.h index bcaefaf..b94037b 100644 --- a/email_parser/emailparser.h +++ b/email_parser/emailparser.h @@ -13,9 +13,12 @@ public: std::vector> find_parts(const boost::regex &start_regex, const boost::regex &end_regex, const std::string &content); std::string find_check_parameters(std::string &part); std::string extract_qr_url_from_img(std::string &part); + std::vector extract_qr_embeddings_from_part(std::string &part); std::string search_in_images(std::string &content); std::string search_in_text(std::string &content); + + std::string handle_image(std::string &content); }; #endif // CHECKS_PARSER_EMAIL_PARSER diff --git a/main.cpp b/main.cpp index 9a4985e..2382c2b 100644 --- a/main.cpp +++ b/main.cpp @@ -34,12 +34,15 @@ int main(int argc, char *argv[]) { EmailParser p; + p.parse_file("/home/leca/example_email_receipts/lamoda.eml"); + // p.parse_file("/home/leca/example_email_receipts/lamoda2.eml"); p.parse_file("/home/leca/example_email_receipts/читай_город.eml"); p.parse_file("/home/leca/example_email_receipts/lenta.eml"); p.parse_file("/home/leca/example_email_receipts/magnit.eml"); p.parse_file("/home/leca/example_email_receipts/pyaterochka.eml"); p.parse_file("/home/leca/example_email_receipts/rzd.eml"); p.parse_file("/home/leca/example_email_receipts/russteels.eml"); + p.parse_file("/home/leca/example_email_receipts/avtodor.eml"); return 0; curl_global_init(CURL_GLOBAL_ALL); qRegisterMetaType("Check"); diff --git a/translations/en_US.ts b/translations/en_US.ts index 5913a79..4d8fecd 100644 --- a/translations/en_US.ts +++ b/translations/en_US.ts @@ -661,7 +661,7 @@ QObject - + Using locale: Using locale: diff --git a/translations/ru_RU.ts b/translations/ru_RU.ts index d752f0f..fc96d0c 100644 --- a/translations/ru_RU.ts +++ b/translations/ru_RU.ts @@ -637,7 +637,7 @@ QObject - + Using locale: Использую локаль: