improved code, handling situations where names of goods are not known
This commit is contained in:
parent
94acf816ea
commit
a3ecaeef55
6
main.cpp
6
main.cpp
|
@ -30,9 +30,15 @@
|
||||||
#include <QPushButton>
|
#include <QPushButton>
|
||||||
|
|
||||||
#include <utils/base64.h>
|
#include <utils/base64.h>
|
||||||
|
#include <utils/utils.h>
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
|
// std::string path = "/tmp/html";
|
||||||
|
// std::string content = read_file(path);
|
||||||
|
|
||||||
|
// Check c = parseOfdRuAnswer(content);
|
||||||
|
// std::cout <<
|
||||||
|
// return 0;
|
||||||
// EmailParser p;
|
// EmailParser p;
|
||||||
// p.parse_file("/home/leca/example_email_receipts/avito.eml");
|
// p.parse_file("/home/leca/example_email_receipts/avito.eml");
|
||||||
// p.parse_file("/home/leca/example_email_receipts/читай_город.eml");
|
// p.parse_file("/home/leca/example_email_receipts/читай_город.eml");
|
||||||
|
|
|
@ -671,7 +671,7 @@
|
||||||
<context>
|
<context>
|
||||||
<name>QObject</name>
|
<name>QObject</name>
|
||||||
<message>
|
<message>
|
||||||
<location filename="../main.cpp" line="76"/>
|
<location filename="../main.cpp" line="82"/>
|
||||||
<source>Using locale: </source>
|
<source>Using locale: </source>
|
||||||
<translation>Using locale: </translation>
|
<translation>Using locale: </translation>
|
||||||
</message>
|
</message>
|
||||||
|
|
|
@ -647,7 +647,7 @@
|
||||||
<context>
|
<context>
|
||||||
<name>QObject</name>
|
<name>QObject</name>
|
||||||
<message>
|
<message>
|
||||||
<location filename="../main.cpp" line="76"/>
|
<location filename="../main.cpp" line="82"/>
|
||||||
<source>Using locale: </source>
|
<source>Using locale: </source>
|
||||||
<translation>Использую локаль: </translation>
|
<translation>Использую локаль: </translation>
|
||||||
</message>
|
</message>
|
||||||
|
|
|
@ -25,6 +25,9 @@
|
||||||
#include <boost/regex.hpp>
|
#include <boost/regex.hpp>
|
||||||
#include <net/net.h>
|
#include <net/net.h>
|
||||||
#include <settings/settings.h>
|
#include <settings/settings.h>
|
||||||
|
#include <boost/regex.hpp>
|
||||||
|
#include <boost/algorithm/string/regex.hpp>
|
||||||
|
#include <boost/algorithm/string.hpp>
|
||||||
|
|
||||||
#ifdef BUILD_OFD_BINARYEYE_SCAN
|
#ifdef BUILD_OFD_BINARYEYE_SCAN
|
||||||
std::string get_local_ip_address() {
|
std::string get_local_ip_address() {
|
||||||
|
@ -194,33 +197,40 @@ std::wstring trim_html_response(std::wstring& check) {
|
||||||
return trimmed;
|
return trimmed;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::wstring> find_in_html(std::string& html, std::string regex, std::string html_start, std::string html_end) {
|
std::vector<std::wstring> find_in_html(std::string& html, std::string regex) {
|
||||||
boost::regex searching_regex(regex);
|
boost::regex searching_regex(regex, boost::match_flag_type::match_single_line);
|
||||||
|
|
||||||
std::vector<std::wstring> parsed;
|
std::vector<std::wstring> parsed;
|
||||||
for (boost::sregex_iterator it{html.begin(), html.end(), searching_regex}, end{};
|
for (boost::sregex_iterator it{html.begin(), html.end(), searching_regex}, end{};
|
||||||
it != end; it++) {
|
it != end; it++) {
|
||||||
|
// std::wstring found_entry = from_utf8(it->str());
|
||||||
std::wstring found_entry = from_utf8(it->str());
|
parsed.push_back(from_utf8(it->str()));
|
||||||
// std::cout << "Found: " << to_utf8(found_entry) << std::endl;
|
// std::cout << "Found: " << to_utf8(found_entry) << std::endl;
|
||||||
std::wstring extracted = substring_from_to(found_entry, from_utf8(html_start), from_utf8(html_end));
|
// std::wstring extracted = substring_from_to(found_entry, from_utf8(html_start), from_utf8(html_end));
|
||||||
// std::cout << "Extracted: " << to_utf8(extracted) << std::endl;
|
// std::cout << "Extracted: " << to_utf8(extracted) << std::endl;
|
||||||
parsed.push_back(extracted);
|
// parsed.push_back(extracted);
|
||||||
}
|
}
|
||||||
return parsed;
|
return parsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::wstring> find_products_in_html(std::string html) {
|
std::vector<std::wstring> find_products_in_html(std::string html) {
|
||||||
return find_in_html(html, "<div class=\"ifw-col ifw-col-1 text-left\"><b>.{2,100}<\\/b><\\/div>", "<div class=\"ifw-col ifw-col-1 text-left\"><b>", "<\\/b><\\/div>");
|
boost::regex search_regex("(?<=\\n\\s{20}<div class=\"ifw-col ifw-col-1 text-left\">).{0,100}(?=(<\\/b>)?<\\/div>)");
|
||||||
|
boost::regex b_regex("<b>");
|
||||||
|
|
||||||
|
std::vector<std::wstring> parsed;
|
||||||
|
for (boost::sregex_iterator it{html.begin(), html.end(), search_regex}, end{};
|
||||||
|
it != end; it++) {
|
||||||
|
std::string found = it->str();
|
||||||
|
boost::erase_regex(found, b_regex);
|
||||||
|
found = boost::regex_replace(found, boost::regex(" "), "?");
|
||||||
|
parsed.push_back(from_utf8(found));
|
||||||
|
}
|
||||||
|
return parsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::wstring> find_amounts_in_html(std::string html) {
|
std::vector<std::wstring> find_amounts_in_html(std::string html) {
|
||||||
std::vector<std::wstring> founds = find_in_html(html, "<div><span>\\d+(\\.|\\,)?\\d{0,3}<\\/span>", "<span>", "<\\/span>");
|
return find_in_html(html, "(?<=X <\\/span><span>)\\d+(\\.|,)\\d{2}(?=<\\/span>)");
|
||||||
for (auto &found : founds) {
|
|
||||||
std::replace(found.begin(), found.end(), ',', '.');
|
|
||||||
}
|
|
||||||
|
|
||||||
return founds;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::wstring> find_net_weights_in_names(std::vector<std::wstring> &names) {
|
std::vector<std::wstring> find_net_weights_in_names(std::vector<std::wstring> &names) {
|
||||||
|
@ -247,12 +257,7 @@ std::vector<std::wstring> find_net_weights_in_names(std::vector<std::wstring> &n
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::wstring> find_prices_in_html(std::string html) {
|
std::vector<std::wstring> find_prices_in_html(std::string html) {
|
||||||
std::vector<std::wstring> founds = find_in_html(html, "X <\\/span><span>\\d+(\\.|,)\\d{2}<\\/span>", "X <\\/span><span>", "<\\/span>");
|
return find_in_html(html, "(?<=<div><span>)\\d+(\\.|\\,)?\\d{0,3}(?=<\\/span>)");
|
||||||
for (auto &found : founds) {
|
|
||||||
std::replace(found.begin(), found.end(), ',', '.');
|
|
||||||
}
|
|
||||||
|
|
||||||
return founds;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void dumpVectorsToStderr(std::vector<std::wstring> &products, std::vector<std::wstring> &amounts, std::vector<std::wstring> &net_weights, std::vector<std::wstring> &prices) {
|
void dumpVectorsToStderr(std::vector<std::wstring> &products, std::vector<std::wstring> &amounts, std::vector<std::wstring> &net_weights, std::vector<std::wstring> &prices) {
|
||||||
|
|
Loading…
Reference in New Issue