improved code, handling situations where names of goods are not known

This commit is contained in:
leca 2025-06-13 18:55:36 +03:00
parent 94acf816ea
commit a3ecaeef55
4 changed files with 31 additions and 20 deletions

View File

@ -30,9 +30,15 @@
#include <QPushButton> #include <QPushButton>
#include <utils/base64.h> #include <utils/base64.h>
#include <utils/utils.h>
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
// std::string path = "/tmp/html";
// std::string content = read_file(path);
// Check c = parseOfdRuAnswer(content);
// std::cout <<
// return 0;
// EmailParser p; // EmailParser p;
// p.parse_file("/home/leca/example_email_receipts/avito.eml"); // p.parse_file("/home/leca/example_email_receipts/avito.eml");
// p.parse_file("/home/leca/example_email_receipts/читай_город.eml"); // p.parse_file("/home/leca/example_email_receipts/читай_город.eml");

View File

@ -671,7 +671,7 @@
<context> <context>
<name>QObject</name> <name>QObject</name>
<message> <message>
<location filename="../main.cpp" line="76"/> <location filename="../main.cpp" line="82"/>
<source>Using locale: </source> <source>Using locale: </source>
<translation>Using locale: </translation> <translation>Using locale: </translation>
</message> </message>

View File

@ -647,7 +647,7 @@
<context> <context>
<name>QObject</name> <name>QObject</name>
<message> <message>
<location filename="../main.cpp" line="76"/> <location filename="../main.cpp" line="82"/>
<source>Using locale: </source> <source>Using locale: </source>
<translation>Использую локаль: </translation> <translation>Использую локаль: </translation>
</message> </message>

View File

@ -25,6 +25,9 @@
#include <boost/regex.hpp> #include <boost/regex.hpp>
#include <net/net.h> #include <net/net.h>
#include <settings/settings.h> #include <settings/settings.h>
#include <boost/regex.hpp>
#include <boost/algorithm/string/regex.hpp>
#include <boost/algorithm/string.hpp>
#ifdef BUILD_OFD_BINARYEYE_SCAN #ifdef BUILD_OFD_BINARYEYE_SCAN
std::string get_local_ip_address() { std::string get_local_ip_address() {
@ -194,33 +197,40 @@ std::wstring trim_html_response(std::wstring& check) {
return trimmed; return trimmed;
} }
std::vector<std::wstring> find_in_html(std::string& html, std::string regex, std::string html_start, std::string html_end) { std::vector<std::wstring> find_in_html(std::string& html, std::string regex) {
boost::regex searching_regex(regex); boost::regex searching_regex(regex, boost::match_flag_type::match_single_line);
std::vector<std::wstring> parsed; std::vector<std::wstring> parsed;
for (boost::sregex_iterator it{html.begin(), html.end(), searching_regex}, end{}; for (boost::sregex_iterator it{html.begin(), html.end(), searching_regex}, end{};
it != end; it++) { it != end; it++) {
// std::wstring found_entry = from_utf8(it->str());
std::wstring found_entry = from_utf8(it->str()); parsed.push_back(from_utf8(it->str()));
// std::cout << "Found: " << to_utf8(found_entry) << std::endl; // std::cout << "Found: " << to_utf8(found_entry) << std::endl;
std::wstring extracted = substring_from_to(found_entry, from_utf8(html_start), from_utf8(html_end)); // std::wstring extracted = substring_from_to(found_entry, from_utf8(html_start), from_utf8(html_end));
// std::cout << "Extracted: " << to_utf8(extracted) << std::endl; // std::cout << "Extracted: " << to_utf8(extracted) << std::endl;
parsed.push_back(extracted); // parsed.push_back(extracted);
} }
return parsed; return parsed;
} }
std::vector<std::wstring> find_products_in_html(std::string html) { std::vector<std::wstring> find_products_in_html(std::string html) {
return find_in_html(html, "<div class=\"ifw-col ifw-col-1 text-left\"><b>.{2,100}<\\/b><\\/div>", "<div class=\"ifw-col ifw-col-1 text-left\"><b>", "<\\/b><\\/div>"); boost::regex search_regex("(?<=\\n\\s{20}<div class=\"ifw-col ifw-col-1 text-left\">).{0,100}(?=(<\\/b>)?<\\/div>)");
boost::regex b_regex("<b>");
std::vector<std::wstring> parsed;
for (boost::sregex_iterator it{html.begin(), html.end(), search_regex}, end{};
it != end; it++) {
std::string found = it->str();
boost::erase_regex(found, b_regex);
found = boost::regex_replace(found, boost::regex("&nbsp;"), "?");
parsed.push_back(from_utf8(found));
}
return parsed;
} }
std::vector<std::wstring> find_amounts_in_html(std::string html) { std::vector<std::wstring> find_amounts_in_html(std::string html) {
std::vector<std::wstring> founds = find_in_html(html, "<div><span>\\d+(\\.|\\,)?\\d{0,3}<\\/span>", "<span>", "<\\/span>"); return find_in_html(html, "(?<=X <\\/span><span>)\\d+(\\.|,)\\d{2}(?=<\\/span>)");
for (auto &found : founds) {
std::replace(found.begin(), found.end(), ',', '.');
}
return founds;
} }
std::vector<std::wstring> find_net_weights_in_names(std::vector<std::wstring> &names) { std::vector<std::wstring> find_net_weights_in_names(std::vector<std::wstring> &names) {
@ -247,12 +257,7 @@ std::vector<std::wstring> find_net_weights_in_names(std::vector<std::wstring> &n
} }
std::vector<std::wstring> find_prices_in_html(std::string html) { std::vector<std::wstring> find_prices_in_html(std::string html) {
std::vector<std::wstring> founds = find_in_html(html, "X <\\/span><span>\\d+(\\.|,)\\d{2}<\\/span>", "X <\\/span><span>", "<\\/span>"); return find_in_html(html, "(?<=<div><span>)\\d+(\\.|\\,)?\\d{0,3}(?=<\\/span>)");
for (auto &found : founds) {
std::replace(found.begin(), found.end(), ',', '.');
}
return founds;
} }
void dumpVectorsToStderr(std::vector<std::wstring> &products, std::vector<std::wstring> &amounts, std::vector<std::wstring> &net_weights, std::vector<std::wstring> &prices) { void dumpVectorsToStderr(std::vector<std::wstring> &products, std::vector<std::wstring> &amounts, std::vector<std::wstring> &net_weights, std::vector<std::wstring> &prices) {