#include #include #include #include #include #include #include #include #include #include #include #include #include #include #if __GNUC__ < 8 && __clang_major__ < 17 # include using namespace std::experimental::filesystem; #else # include using namespace std::filesystem; #endif EmailParser::EmailParser() { headings_regex = boost::regex("([\\w-]+:\\s*.{2,64}\\r\\n)+"); end_marker_regex = boost::regex("--[^\\n\\r<>]{5,57}"); part_end_regex = boost::regex("--[^\\n\\r<> ]{5,57}"); } std::map EmailParser::parse(std::string &email_content) { std::string parameters; parameters = search_in_images(email_content); if (parameters != "") { return get_params_from_string(parameters); } parameters = search_in_text(email_content); if (parameters != "") { return get_params_from_string(parameters); } std::cout << "Failed to parse" << std::endl; /* If the code has reached this part and found nothing, it's most likely that there are no QR codes at all. */ return std::map(); } std::map EmailParser::parse_file(std::string path) { std::cout << "Parsing file " << path << std::endl; std::string content = read_file(path); return parse(content); return std::map(); } std::vector> EmailParser::find_parts(const boost::regex &start_regex, const boost::regex &end_regex, const std::string &content) { std::vector> parts = {}; for (boost::sregex_iterator it{content.begin(), content.end(), start_regex}, end{}; it != end; it ++) { unsigned int start_position = it->position(), end_position = content.length(); for (boost::sregex_iterator it2{content.begin() + start_position, content.end(), end_regex}, end2{}; it2 != end2; it2++) { end_position = it2->position(); break; } parts.push_back(std::pair(start_position, end_position)); } return parts; } std::string EmailParser::find_check_parameters(std::string &part) { boost::regex params_regex ("t(=|(%|=)3d)\\d+T\\d+(&(amp;)?|%26)s\\1\\d+\\.\\d+\\3fn\\1\\d{16}\\3i\\1\\d{3,6}\\3fp\\1\\d{9,10}\\3n\\1\\d", boost::regex::icase); boost::smatch matched; if (boost::regex_search(part, matched, params_regex)) return matched[0].str(); return ""; } std::string EmailParser::extract_qr_url_from_img(std::string &part) { boost::regex img_url_regex("(?<= EmailParser::extract_qr_embeddings_from_part(std::string &part) { std::vector embeddings = {}; boost::regex img_base64_regex("(?<=> images_content_parts = find_parts(images_content_type_regex, part_end_regex, content); /* iterate through found image content-types and try searching qr codes, decode them and see if it's the needed data */ for (unsigned int i = 0; i < images_content_parts.size(); i ++) { std::string part = content.substr(images_content_parts[i].first, images_content_parts[i].second); boost::erase_regex(part, headings_regex); boost::erase_regex(part, end_marker_regex); boost::erase_all_regex(part, boost::regex("\\r\\n")); std::string decoded = base64_decode(part); return handle_image(decoded); } return ""; } std::string EmailParser::search_in_text(std::string &content) { boost::regex text_content_types_regex("Content-Type: text\\/(html|plain)", boost::regex::icase); /* If the E-Mail has no QR code in it as a separate part, there's posibilly a QR code inserted using html's tag with base64-encoded image. Try searching it */ std::vector> texts_content_parts = find_parts(text_content_types_regex, part_end_regex, content); for (unsigned int i = 0; i < texts_content_parts.size(); i ++) { std::string part = content.substr(texts_content_parts[i].first, texts_content_parts[i].second); std::string transfer_encoding = extract_content_transfer_encoding(part); boost::erase_regex(part, headings_regex); boost::erase_regex(part, end_marker_regex); if (transfer_encoding == "quoted-printable") { boost::erase_all_regex(part, boost::regex("=\\r\\n")); } else if (transfer_encoding == "base64") { boost::erase_all_regex(part, boost::regex("\\r\\n")); part = base64_decode(part); } // Try searching parameters just in plain html. Will help if there's a link to a QR code with it's parameters passed in request. std::string parameters = find_check_parameters(part); if (parameters != "") return parameters; // If there's no, try search anything that looks like a link to a qr code. std::string url = extract_qr_url_from_img(part); if (url != "") { Net n; std::string path = get_path_relative_to_home(".local/share/checks_parser/tmp"); n.get_file(url, path); std::string qr_code_contents = read_file(path); parameters = handle_image(qr_code_contents); } if (parameters != "") return parameters; // if there's no any link that looks like a link to QR code, maybe the qr code is encoded as base64 inside an img tag. std::vector embeddings = extract_qr_embeddings_from_part(part); for (std::string &embedding : embeddings) { std::string decoded = base64_decode(embedding); parameters = handle_image(decoded); if (parameters != "") return parameters; } } return ""; } std::string EmailParser::handle_image(std::string &content) { cv::Mat image; if (content.substr(1, 3) == "PNG" || content.substr(1, 3) == "JPG" || content.substr(6, 4) == "JFIF") { std::vector data(content.begin(), content.end()); image = cv::imdecode(cv::Mat(data), 1); } else if (content.substr(0, 3) == "GIF") { std::string gif_file_path = get_application_home_path() + "/temp.gif"; std::ofstream gif_output(gif_file_path, std::ios::binary); gif_output << content; gif_output.close(); cv::VideoCapture gif(gif_file_path, cv::CAP_FFMPEG); gif.read(image); } if (image.empty()) return ""; if (image.rows >= 500 || image.cols >= 500) { cv::Mat copy(image); cv::resize(copy, image, cv::Size(150, 150), cv::INTER_LINEAR); cv::imwrite(get_path_relative_to_home(".local/share/checks_parser/tmp.jpg"), image); } cv::QRCodeDetector qrDecoder = cv::QRCodeDetector(); std::string decoded_qr = qrDecoder.detectAndDecode(image); return find_check_parameters(decoded_qr); }