further improvements
This commit is contained in:
parent
5afaf6a94f
commit
07c7e49a21
|
@ -34,10 +34,17 @@ std::map<std::string, std::string> EmailParser::parse(std::string &email_content
|
|||
|
||||
std::string parameters;
|
||||
parameters = search_in_images(email_content);
|
||||
if (parameters != "") return get_params_from_string(parameters);
|
||||
|
||||
if (parameters != "") {
|
||||
std::cout << parameters << std::endl;
|
||||
return get_params_from_string(parameters);
|
||||
}
|
||||
|
||||
parameters = search_in_text(email_content);
|
||||
if (parameters != "") return get_params_from_string(parameters);
|
||||
if (parameters != "") {
|
||||
std::cout << parameters << std::endl;
|
||||
return get_params_from_string(parameters);
|
||||
}
|
||||
|
||||
/* If the code has reached this part and found nothing, it's most likely that there are no QR codes at all. */
|
||||
|
||||
|
@ -74,7 +81,6 @@ std::string EmailParser::find_check_parameters(std::string &part) {
|
|||
}
|
||||
|
||||
std::string EmailParser::extract_qr_url_from_img(std::string &part) {
|
||||
std::string url = "";
|
||||
boost::regex img_tag_regex("<img[^\\n\\r<]*>");
|
||||
boost::regex img_url_str("https?:\\/\\/.*(qr(code)?)[^\\n\\r\"]+", boost::regex::icase);
|
||||
|
||||
|
@ -85,7 +91,22 @@ std::string EmailParser::extract_qr_url_from_img(std::string &part) {
|
|||
}
|
||||
|
||||
}
|
||||
return url;
|
||||
return "";
|
||||
}
|
||||
|
||||
std::vector<std::string> EmailParser::extract_qr_embeddings_from_part(std::string &part) {
|
||||
std::vector<std::string> embeddings = {};
|
||||
boost::regex img_tag_regex("<img[^\\n\\r<]*>");
|
||||
boost::regex img_base64_str("data:image\\/(png|jpg);base64,[\\w+\\/=]+", boost::regex::icase);
|
||||
|
||||
for (boost::sregex_iterator it{part.begin(), part.end(), img_tag_regex}, end{}; it != end; it++) {
|
||||
std::string img_tag = it->str();
|
||||
for (boost::sregex_iterator it2{img_tag.begin(), img_tag.end(), img_base64_str}, end2{}; it2 != end2; it2++) {
|
||||
embeddings.push_back(split(it2->str(), ",")[1]);
|
||||
}
|
||||
|
||||
}
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
std::string EmailParser::search_in_images(std::string &content) {
|
||||
|
@ -106,24 +127,7 @@ std::string EmailParser::search_in_images(std::string &content) {
|
|||
part.erase(std::remove(part.begin(), part.end(), '\r'), part.end());
|
||||
part.erase(std::remove(part.begin(), part.end(), '\n'), part.end());
|
||||
std::string decoded = base64_decode(part);
|
||||
cv::Mat image;
|
||||
|
||||
if (decoded.substr(1, 3) == "PNG" || decoded.substr(1, 3) == "JPG") {
|
||||
std::vector<uchar> data(decoded.begin(), decoded.end());
|
||||
image = cv::imdecode(cv::Mat(data), 1);
|
||||
} else if (decoded.substr(0, 3) == "GIF") {
|
||||
std::string gif_file_path = get_application_home_path() + "/temp.gif";
|
||||
|
||||
std::ofstream gif_output(gif_file_path, std::ios::binary);
|
||||
gif_output << decoded;
|
||||
gif_output.close();
|
||||
cv::VideoCapture gif(gif_file_path, cv::CAP_FFMPEG);
|
||||
gif.read(image);
|
||||
}
|
||||
|
||||
cv::QRCodeDetector qrDecoder = cv::QRCodeDetector();
|
||||
std::string decoded_qr = qrDecoder.detectAndDecode(image);
|
||||
return find_check_parameters(decoded_qr);
|
||||
return handle_image(decoded);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
@ -158,22 +162,52 @@ std::string EmailParser::search_in_text(std::string &content) {
|
|||
|
||||
// If there's no, try search anything that looks like a link to a qr code.
|
||||
std::string url = extract_qr_url_from_img(part);
|
||||
Net n;
|
||||
std::string path = get_path_relative_to_home(".local/share/checks_parser/tmp");
|
||||
n.get_file(url, path);
|
||||
if (url != "") {
|
||||
Net n;
|
||||
std::string path = get_path_relative_to_home(".local/share/checks_parser/tmp");
|
||||
n.get_file(url, path);
|
||||
|
||||
std::string qr_code_contents = read_file(path);
|
||||
std::string qr_code_contents = read_file(path);
|
||||
|
||||
std::vector<uchar> data(qr_code_contents.begin(), qr_code_contents.end());
|
||||
cv::Mat image = cv::imdecode(cv::Mat(data), 1);
|
||||
|
||||
cv::QRCodeDetector qrDecoder = cv::QRCodeDetector();
|
||||
std::string decoded_qr = qrDecoder.detectAndDecode(image);
|
||||
parameters = find_check_parameters(decoded_qr);
|
||||
std::vector<uchar> data(qr_code_contents.begin(), qr_code_contents.end());
|
||||
cv::Mat image = cv::imdecode(cv::Mat(data), 1);
|
||||
|
||||
cv::QRCodeDetector qrDecoder = cv::QRCodeDetector();
|
||||
std::string decoded_qr = qrDecoder.detectAndDecode(image);
|
||||
parameters = find_check_parameters(decoded_qr);
|
||||
}
|
||||
if (parameters != "") return parameters;
|
||||
|
||||
// if there's no any link that looks like a link to QR code, maybe the qr code is encoded as base64 inside an img tag.
|
||||
|
||||
std::vector<std::string> embeddings = extract_qr_embeddings_from_part(part);
|
||||
for (std::string &embedding : embeddings) {
|
||||
std::string decoded = base64_decode(embedding);
|
||||
parameters = handle_image(decoded);
|
||||
if (parameters != "") return parameters;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string EmailParser::handle_image(std::string &content) {
|
||||
cv::Mat image;
|
||||
|
||||
if (content.substr(1, 3) == "PNG" || content.substr(1, 3) == "JPG") {
|
||||
std::vector<uchar> data(content.begin(), content.end());
|
||||
image = cv::imdecode(cv::Mat(data), 1);
|
||||
} else if (content.substr(0, 3) == "GIF") {
|
||||
std::string gif_file_path = get_application_home_path() + "/temp.gif";
|
||||
|
||||
std::ofstream gif_output(gif_file_path, std::ios::binary);
|
||||
gif_output << content;
|
||||
gif_output.close();
|
||||
cv::VideoCapture gif(gif_file_path, cv::CAP_FFMPEG);
|
||||
gif.read(image);
|
||||
}
|
||||
if (image.empty()) return "";
|
||||
|
||||
cv::QRCodeDetector qrDecoder = cv::QRCodeDetector();
|
||||
std::string decoded_qr = qrDecoder.detectAndDecode(image);
|
||||
return find_check_parameters(decoded_qr);
|
||||
}
|
||||
|
|
|
@ -13,9 +13,12 @@ public:
|
|||
std::vector<std::pair<int, int>> find_parts(const boost::regex &start_regex, const boost::regex &end_regex, const std::string &content);
|
||||
std::string find_check_parameters(std::string &part);
|
||||
std::string extract_qr_url_from_img(std::string &part);
|
||||
std::vector<std::string> extract_qr_embeddings_from_part(std::string &part);
|
||||
|
||||
std::string search_in_images(std::string &content);
|
||||
std::string search_in_text(std::string &content);
|
||||
|
||||
std::string handle_image(std::string &content);
|
||||
};
|
||||
|
||||
#endif // CHECKS_PARSER_EMAIL_PARSER
|
||||
|
|
3
main.cpp
3
main.cpp
|
@ -34,12 +34,15 @@
|
|||
int main(int argc, char *argv[]) {
|
||||
|
||||
EmailParser p;
|
||||
p.parse_file("/home/leca/example_email_receipts/lamoda.eml");
|
||||
// p.parse_file("/home/leca/example_email_receipts/lamoda2.eml");
|
||||
p.parse_file("/home/leca/example_email_receipts/читай_город.eml");
|
||||
p.parse_file("/home/leca/example_email_receipts/lenta.eml");
|
||||
p.parse_file("/home/leca/example_email_receipts/magnit.eml");
|
||||
p.parse_file("/home/leca/example_email_receipts/pyaterochka.eml");
|
||||
p.parse_file("/home/leca/example_email_receipts/rzd.eml");
|
||||
p.parse_file("/home/leca/example_email_receipts/russteels.eml");
|
||||
p.parse_file("/home/leca/example_email_receipts/avtodor.eml");
|
||||
return 0;
|
||||
curl_global_init(CURL_GLOBAL_ALL);
|
||||
qRegisterMetaType<Check>("Check");
|
||||
|
|
|
@ -661,7 +661,7 @@
|
|||
<context>
|
||||
<name>QObject</name>
|
||||
<message>
|
||||
<location filename="../main.cpp" line="74"/>
|
||||
<location filename="../main.cpp" line="77"/>
|
||||
<source>Using locale: </source>
|
||||
<translation>Using locale: </translation>
|
||||
</message>
|
||||
|
|
|
@ -637,7 +637,7 @@
|
|||
<context>
|
||||
<name>QObject</name>
|
||||
<message>
|
||||
<location filename="../main.cpp" line="74"/>
|
||||
<location filename="../main.cpp" line="77"/>
|
||||
<source>Using locale: </source>
|
||||
<translation>Использую локаль: </translation>
|
||||
</message>
|
||||
|
|
Loading…
Reference in New Issue