diff --git a/CMakeLists.txt b/CMakeLists.txt index b89d866..9fd739e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,12 +90,13 @@ if(QT_VERSION_MAJOR EQUAL 6) qt_finalize_executable(checks-parser) endif() -find_package(PkgConfig REQUIRED) +# find_package(PkgConfig REQUIRED) find_package(OpenCV REQUIRED) +# find_package(LibXml2 REQUIRED) -include_directories( ${OpenCV_INCLUDE_DIRS}) +include_directories( ${OpenCV_INCLUDE_DIRS} )#${LIBXML2_INCLUDE_DIR}) target_link_libraries(checks-parser PRIVATE -lzbar) target_link_libraries(checks-parser PRIVATE -ltesseract) target_link_libraries(checks-parser PRIVATE -lcurl) - -target_link_libraries( checks-parser PRIVATE ${OpenCV_LIBS} ) +# target_link_libraries(checks-parser PRIVATE -lxml2) +target_link_libraries(checks-parser PRIVATE ${OpenCV_LIBS} ) diff --git a/TODO b/TODO index d4fa7d2..dc3089b 100644 --- a/TODO +++ b/TODO @@ -1,7 +1,7 @@ -Complete module "export": +Complete module "export": [done] make UI; [done] make export to .csv [done] -Complete module "image-to-text": +Complete module "image-to-text": [done] make UI; [done] make use of tesseract/opencv (https://learnopencv.com/deep-learning-based-text-recognition-ocr-using-tesseract-and-opencv/); [done] Add features: @@ -9,7 +9,8 @@ Add features: auto download of stores modules [done] auto download of ofd modules [done] settings, a window for editing settings. [done] - add ability to control contrast and rotation of a check image before passing it to OCR + add ability to control contrast and rotation of a check image before passing it to OCR [done] + add ability to scan a qr code and request data from ofd.ru [done] Refactor: Get rid of CPR, use libcurl instead [done] Build: diff --git a/adjustpicturedialog.cpp b/adjustpicturedialog.cpp index 40d6634..c9f8e54 100644 --- a/adjustpicturedialog.cpp +++ b/adjustpicturedialog.cpp @@ -102,4 +102,3 @@ void AdjustPictureDialog::on_contrastSlider_sliderMoved(int position) { scene->clear(); scene->addPixmap(QPixmap::fromImage(image)); } - diff --git a/main.cpp b/main.cpp index ceb29cf..5774fb6 100644 --- a/main.cpp +++ b/main.cpp @@ -6,8 +6,21 @@ #include #include #include +#include int main(int argc, char *argv[]) { + + // std::ifstream test_file("/home/leca/projects/qt/checks-parser/test.html"); + + // std::string content; + + // std::string str; + // while(getline(test_file, str)) { + // content += str + "\n"; + // } + + // parseOfdRuAnswer(content); + // return 0; curl_global_init(CURL_GLOBAL_ALL); std::string program_data_path = get_path_relative_to_home(".local/share/checks_parser"); @@ -19,20 +32,8 @@ int main(int argc, char *argv[]) { Settings s(settings_file_path); Net n; - // std::vector ofd_updates = ofd.check_updates(); - // for (const std::string &update : ofd_updates) { - // std::cout << "Downloading " - // << s.get_setting("ofds_modules_url") + update << " to " - // << get_path_relative_to_home(s.get_setting("ofds_modules_dir") + - // "/" + update) - // << std::endl; - // n.get_file(s.get_setting("ofds_modules_url") + "/" + update, - // get_path_relative_to_home(s.get_setting("ofds_modules_dir") + - // "/" + update)); - // } - Parser p; - std::vector stores_updates = p.check_updates();\ + std::vector stores_updates = p.check_updates(); for (const std::string &update : stores_updates) { std::cout << "Downloading " << s.get_setting("stores_modules_url") + update << " to " diff --git a/mainwindow.cpp b/mainwindow.cpp index fa9fbd2..4beb0c8 100644 --- a/mainwindow.cpp +++ b/mainwindow.cpp @@ -65,8 +65,7 @@ void MainWindow::receiveSolvedCaptcha(std::string captcha) { std::string check_content = makeRequestToOfd(captcha); - std::cout << check_content << std::endl; - + check = parseOfdRuAnswer(check_content); } void MainWindow::on_parseButton_clicked() { @@ -83,6 +82,10 @@ void MainWindow::on_parseButton_clicked() { SolveCaptchaDialog dialog = SolveCaptchaDialog(this); connect(&dialog, &SolveCaptchaDialog::solvedCaptcha, this, &MainWindow::receiveSolvedCaptcha); dialog.exec(); + + OutputDialog d = OutputDialog(this, check); + d.show(); + d.exec(); return; } @@ -96,8 +99,6 @@ void MainWindow::on_parseButton_clicked() { return; } - Check check; - for (auto& g : c) { check.add_goods(g); } diff --git a/nano.42371.save b/nano.42371.save deleted file mode 100644 index f8de794..0000000 --- a/nano.42371.save +++ /dev/null @@ -1,6 +0,0 @@ -t=20240828T2033 -s=895.50 -fn=7380440700069236 -i=21386 -fp=1292383731 -n=1 diff --git a/ofd.desc b/ofd.desc deleted file mode 100644 index 8c903e2..0000000 --- a/ofd.desc +++ /dev/null @@ -1,2 +0,0 @@ -1. curl -X GET https://check.ofd.ru/api/captcha/common/img -2. curl 'https://check.ofd.ru/Document/FetchReceiptFromFns' -H 'content-type: application/json;charset=UTF-8' --data-raw '{"TotalSum":52344,"FnNumber":"7281440701327430","ReceiptOperationType":"1","DocNumber":"25955","DocFiscalSign":"2518183888","Captcha":"INSERT SOLVED CAPTCHA","DocDateTime":"2024-08-16T19:36:00.000Z"}' diff --git a/utils/utils.cpp b/utils/utils.cpp index 916d6da..28f8a1d 100644 --- a/utils/utils.cpp +++ b/utils/utils.cpp @@ -1,7 +1,10 @@ #include "utils.h" #include +#include +#include #include +#include #include std::string to_utf8(std::wstring wide_string) { @@ -42,3 +45,92 @@ std::vector split(std::string s, std::string delimiter) { return result; } + +std::wstring substring_from_to(std::wstring& text, std::wstring from, std::wstring to) { + unsigned int start_pos = 0; + unsigned int end_pos = 0; + + std::wstring substring; + + std::wregex start_regex(from); + std::wregex end_regex(to); + + for (std::wsregex_iterator it{text.begin(), text.end(), start_regex}, end{}; + it != end; it++) { + start_pos = it->position() + it->str().size(); + break; + } + + if(text == from_utf8("")) return text; + substring = text.substr(start_pos, text.size()); + + for (std::wsregex_iterator it{substring.begin(), substring.end(), end_regex}, end{}; + it != end; it++) { + end_pos = it->position(); + break; + } + + if (end_pos == 0) return substring; + + substring = substring.substr(0, end_pos); + + return substring; +} + +std::wstring trim_html_response(std::wstring& check) { + std::wstring begin_check_marker = from_utf8(""); + std::wstring end_check_marker = from_utf8(""); + std::wstring trimmed = substring_from_to(check, begin_check_marker, end_check_marker); + trimmed += from_utf8("\n"); + return trimmed; +} + +std::vector find_in_html(std::string& html, std::string regex, std::string html_start, std::string html_end) { + std::regex searching_regex(regex); + + std::vector parsed; + for (std::sregex_iterator it{html.begin(), html.end(), searching_regex}, end{}; + it != end; it++) { + + std::wstring found_entry = from_utf8(it->str()); + std::wstring extracted = substring_from_to(found_entry, from_utf8(html_start), from_utf8(html_end)); + + parsed.push_back(extracted); + } + return parsed; +} + +std::vector find_products_in_html(std::string html) { + return find_in_html(html, "
.*<\\/b><\\/div>", "
", "<\\/b><\\/div>"); +} + +std::vector find_amounts_in_html(std::string html) { + return find_in_html(html, "\\d+<\\/span>", "", "<\\/span>"); +} + +std::vector find_prices_in_html(std::string html) { + return find_in_html(html, "X <\\/span>\\d+\\.\\d{2}<\\/span>", "X <\\/span>", "<\\/span>"); +} + +Check parseOfdRuAnswer(std::string html) { + std::wstring wstr_html = from_utf8(html); + std::string trimmed = to_utf8(trim_html_response(wstr_html)); + + std::vector products = find_products_in_html(trimmed); + std::vector amounts = find_amounts_in_html(trimmed); + std::vector prices = find_prices_in_html(trimmed); + + if ((products.size() + amounts.size() + prices.size())/products.size() != 3) { + std::cerr << "An error has occured during the parsing of html. Please, contact the developer." << std::endl; + std::exit(-1); + } + + Check c; + + for (int i = 0; i < products.size(); i ++) { + Goods goods(to_utf8(products[i]), std::stod(prices[i]), std::stod(amounts[i])); + c.add_goods(goods); + } + + return c; +} diff --git a/utils/utils.h b/utils/utils.h index aa8d2b9..a235f28 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -3,6 +3,7 @@ #include #include +#include "../check/check.h" std::string to_utf8(std::wstring wide_string); std::wstring from_utf8(std::string string); @@ -14,4 +15,8 @@ bool vector_contains_element(const std::vector &vector, const T &to_find); std::vector split(std::string, std::string); +Check parseOfdRuAnswer(std::string); + +std::wstring trim_html_response(std::wstring& check); + #endif // UTILS_H