cleanup, fixed erasing of b tag

This commit is contained in:
leca 2025-06-14 12:32:51 +03:00
parent a3ecaeef55
commit 91ca01b255
4 changed files with 5 additions and 32 deletions

View File

@ -1,4 +1,3 @@
#include "email_parser/emailparser.h"
#include <mainwindow.h> #include <mainwindow.h>
#include <net/net.h> #include <net/net.h>
#include <settings/settings.h> #include <settings/settings.h>
@ -15,40 +14,14 @@
# include <filesystem> # include <filesystem>
using namespace std::filesystem; using namespace std::filesystem;
#endif #endif
#include <QDateTime>
#include <QFile>
#include <QStackedLayout>
#include <QTextStream>
#ifdef BUILD_TRANSLATIONS #ifdef BUILD_TRANSLATIONS
# include <QTranslator> # include <QTranslator>
#endif #endif
#include <settingsdialog.h>
#ifdef BUILD_EMAIL_MODE #ifdef BUILD_EMAIL_MODE
// #include <vmime/vmime.hpp>
# include <email_parser/emailparser.h> # include <email_parser/emailparser.h>
#endif #endif
#include <QPushButton>
#include <utils/base64.h>
#include <utils/utils.h>
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
// std::string path = "/tmp/html";
// std::string content = read_file(path);
// Check c = parseOfdRuAnswer(content);
// std::cout <<
// return 0;
// EmailParser p;
// p.parse_file("/home/leca/example_email_receipts/avito.eml");
// p.parse_file("/home/leca/example_email_receipts/читай_город.eml");
// p.parse_file("/home/leca/example_email_receipts/lenta.eml");
// p.parse_file("/home/leca/example_email_receipts/magnit.eml");
// p.parse_file("/home/leca/example_email_receipts/pyaterochka.eml");
// p.parse_file("/home/leca/example_email_receipts/rzd.eml");
// p.parse_file("/home/leca/example_email_receipts/russteels.eml");
// p.parse_file("/home/leca/example_email_receipts/avtodor.eml");
// return 0;
curl_global_init(CURL_GLOBAL_ALL); curl_global_init(CURL_GLOBAL_ALL);
qRegisterMetaType<Check>("Check"); qRegisterMetaType<Check>("Check");

View File

@ -671,7 +671,7 @@
<context> <context>
<name>QObject</name> <name>QObject</name>
<message> <message>
<location filename="../main.cpp" line="82"/> <location filename="../main.cpp" line="55"/>
<source>Using locale: </source> <source>Using locale: </source>
<translation>Using locale: </translation> <translation>Using locale: </translation>
</message> </message>

View File

@ -647,7 +647,7 @@
<context> <context>
<name>QObject</name> <name>QObject</name>
<message> <message>
<location filename="../main.cpp" line="82"/> <location filename="../main.cpp" line="55"/>
<source>Using locale: </source> <source>Using locale: </source>
<translation>Использую локаль: </translation> <translation>Использую локаль: </translation>
</message> </message>

View File

@ -215,13 +215,13 @@ std::vector<std::wstring> find_in_html(std::string& html, std::string regex) {
std::vector<std::wstring> find_products_in_html(std::string html) { std::vector<std::wstring> find_products_in_html(std::string html) {
boost::regex search_regex("(?<=\\n\\s{20}<div class=\"ifw-col ifw-col-1 text-left\">).{0,100}(?=(<\\/b>)?<\\/div>)"); boost::regex search_regex("(?<=\\n\\s{20}<div class=\"ifw-col ifw-col-1 text-left\">).{0,100}(?=(<\\/b>)?<\\/div>)");
boost::regex b_regex("<b>"); boost::regex b_regex("<\\/?b>");
std::vector<std::wstring> parsed; std::vector<std::wstring> parsed;
for (boost::sregex_iterator it{html.begin(), html.end(), search_regex}, end{}; for (boost::sregex_iterator it{html.begin(), html.end(), search_regex}, end{};
it != end; it++) { it != end; it++) {
std::string found = it->str(); std::string found = it->str();
boost::erase_regex(found, b_regex); boost::erase_regex(found, b_regex, boost::regex_constants::match_all);
found = boost::regex_replace(found, boost::regex("&nbsp;"), "?"); found = boost::regex_replace(found, boost::regex("&nbsp;"), "?");
parsed.push_back(from_utf8(found)); parsed.push_back(from_utf8(found));
} }