requests to ofd
This commit is contained in:
parent
1843479e6b
commit
47dfc19395
|
@ -90,12 +90,13 @@ if(QT_VERSION_MAJOR EQUAL 6)
|
||||||
qt_finalize_executable(checks-parser)
|
qt_finalize_executable(checks-parser)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
find_package(PkgConfig REQUIRED)
|
# find_package(PkgConfig REQUIRED)
|
||||||
find_package(OpenCV REQUIRED)
|
find_package(OpenCV REQUIRED)
|
||||||
|
# find_package(LibXml2 REQUIRED)
|
||||||
|
|
||||||
include_directories( ${OpenCV_INCLUDE_DIRS})
|
include_directories( ${OpenCV_INCLUDE_DIRS} )#${LIBXML2_INCLUDE_DIR})
|
||||||
target_link_libraries(checks-parser PRIVATE -lzbar)
|
target_link_libraries(checks-parser PRIVATE -lzbar)
|
||||||
target_link_libraries(checks-parser PRIVATE -ltesseract)
|
target_link_libraries(checks-parser PRIVATE -ltesseract)
|
||||||
target_link_libraries(checks-parser PRIVATE -lcurl)
|
target_link_libraries(checks-parser PRIVATE -lcurl)
|
||||||
|
# target_link_libraries(checks-parser PRIVATE -lxml2)
|
||||||
target_link_libraries( checks-parser PRIVATE ${OpenCV_LIBS} )
|
target_link_libraries(checks-parser PRIVATE ${OpenCV_LIBS} )
|
||||||
|
|
7
TODO
7
TODO
|
@ -1,7 +1,7 @@
|
||||||
Complete module "export":
|
Complete module "export": [done]
|
||||||
make UI; [done]
|
make UI; [done]
|
||||||
make export to .csv [done]
|
make export to .csv [done]
|
||||||
Complete module "image-to-text":
|
Complete module "image-to-text": [done]
|
||||||
make UI; [done]
|
make UI; [done]
|
||||||
make use of tesseract/opencv (https://learnopencv.com/deep-learning-based-text-recognition-ocr-using-tesseract-and-opencv/); [done]
|
make use of tesseract/opencv (https://learnopencv.com/deep-learning-based-text-recognition-ocr-using-tesseract-and-opencv/); [done]
|
||||||
Add features:
|
Add features:
|
||||||
|
@ -9,7 +9,8 @@ Add features:
|
||||||
auto download of stores modules [done]
|
auto download of stores modules [done]
|
||||||
auto download of ofd modules [done]
|
auto download of ofd modules [done]
|
||||||
settings, a window for editing settings. [done]
|
settings, a window for editing settings. [done]
|
||||||
add ability to control contrast and rotation of a check image before passing it to OCR
|
add ability to control contrast and rotation of a check image before passing it to OCR [done]
|
||||||
|
add ability to scan a qr code and request data from ofd.ru [done]
|
||||||
Refactor:
|
Refactor:
|
||||||
Get rid of CPR, use libcurl instead [done]
|
Get rid of CPR, use libcurl instead [done]
|
||||||
Build:
|
Build:
|
||||||
|
|
|
@ -102,4 +102,3 @@ void AdjustPictureDialog::on_contrastSlider_sliderMoved(int position) {
|
||||||
scene->clear();
|
scene->clear();
|
||||||
scene->addPixmap(QPixmap::fromImage(image));
|
scene->addPixmap(QPixmap::fromImage(image));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
27
main.cpp
27
main.cpp
|
@ -6,8 +6,21 @@
|
||||||
#include <curl/curl.h>
|
#include <curl/curl.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
|
// std::ifstream test_file("/home/leca/projects/qt/checks-parser/test.html");
|
||||||
|
|
||||||
|
// std::string content;
|
||||||
|
|
||||||
|
// std::string str;
|
||||||
|
// while(getline(test_file, str)) {
|
||||||
|
// content += str + "\n";
|
||||||
|
// }
|
||||||
|
|
||||||
|
// parseOfdRuAnswer(content);
|
||||||
|
// return 0;
|
||||||
curl_global_init(CURL_GLOBAL_ALL);
|
curl_global_init(CURL_GLOBAL_ALL);
|
||||||
|
|
||||||
std::string program_data_path = get_path_relative_to_home(".local/share/checks_parser");
|
std::string program_data_path = get_path_relative_to_home(".local/share/checks_parser");
|
||||||
|
@ -19,20 +32,8 @@ int main(int argc, char *argv[]) {
|
||||||
Settings s(settings_file_path);
|
Settings s(settings_file_path);
|
||||||
Net n;
|
Net n;
|
||||||
|
|
||||||
// std::vector<std::string> ofd_updates = ofd.check_updates();
|
|
||||||
// for (const std::string &update : ofd_updates) {
|
|
||||||
// std::cout << "Downloading "
|
|
||||||
// << s.get_setting("ofds_modules_url") + update << " to "
|
|
||||||
// << get_path_relative_to_home(s.get_setting("ofds_modules_dir") +
|
|
||||||
// "/" + update)
|
|
||||||
// << std::endl;
|
|
||||||
// n.get_file(s.get_setting("ofds_modules_url") + "/" + update,
|
|
||||||
// get_path_relative_to_home(s.get_setting("ofds_modules_dir") +
|
|
||||||
// "/" + update));
|
|
||||||
// }
|
|
||||||
|
|
||||||
Parser p;
|
Parser p;
|
||||||
std::vector<std::string> stores_updates = p.check_updates();\
|
std::vector<std::string> stores_updates = p.check_updates();
|
||||||
for (const std::string &update : stores_updates) {
|
for (const std::string &update : stores_updates) {
|
||||||
std::cout << "Downloading "
|
std::cout << "Downloading "
|
||||||
<< s.get_setting("stores_modules_url") + update << " to "
|
<< s.get_setting("stores_modules_url") + update << " to "
|
||||||
|
|
|
@ -65,8 +65,7 @@ void MainWindow::receiveSolvedCaptcha(std::string captcha) {
|
||||||
|
|
||||||
std::string check_content = makeRequestToOfd(captcha);
|
std::string check_content = makeRequestToOfd(captcha);
|
||||||
|
|
||||||
std::cout << check_content << std::endl;
|
check = parseOfdRuAnswer(check_content);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MainWindow::on_parseButton_clicked() {
|
void MainWindow::on_parseButton_clicked() {
|
||||||
|
@ -83,6 +82,10 @@ void MainWindow::on_parseButton_clicked() {
|
||||||
SolveCaptchaDialog dialog = SolveCaptchaDialog(this);
|
SolveCaptchaDialog dialog = SolveCaptchaDialog(this);
|
||||||
connect(&dialog, &SolveCaptchaDialog::solvedCaptcha, this, &MainWindow::receiveSolvedCaptcha);
|
connect(&dialog, &SolveCaptchaDialog::solvedCaptcha, this, &MainWindow::receiveSolvedCaptcha);
|
||||||
dialog.exec();
|
dialog.exec();
|
||||||
|
|
||||||
|
OutputDialog d = OutputDialog(this, check);
|
||||||
|
d.show();
|
||||||
|
d.exec();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,8 +99,6 @@ void MainWindow::on_parseButton_clicked() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Check check;
|
|
||||||
|
|
||||||
for (auto& g : c) {
|
for (auto& g : c) {
|
||||||
check.add_goods(g);
|
check.add_goods(g);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
t=20240828T2033
|
|
||||||
s=895.50
|
|
||||||
fn=7380440700069236
|
|
||||||
i=21386
|
|
||||||
fp=1292383731
|
|
||||||
n=1
|
|
2
ofd.desc
2
ofd.desc
|
@ -1,2 +0,0 @@
|
||||||
1. curl -X GET https://check.ofd.ru/api/captcha/common/img
|
|
||||||
2. curl 'https://check.ofd.ru/Document/FetchReceiptFromFns' -H 'content-type: application/json;charset=UTF-8' --data-raw '{"TotalSum":52344,"FnNumber":"7281440701327430","ReceiptOperationType":"1","DocNumber":"25955","DocFiscalSign":"2518183888","Captcha":"INSERT SOLVED CAPTCHA","DocDateTime":"2024-08-16T19:36:00.000Z"}'
|
|
|
@ -1,7 +1,10 @@
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
#include <codecvt>
|
#include <codecvt>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
#include <locale>
|
#include <locale>
|
||||||
|
#include <regex>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
std::string to_utf8(std::wstring wide_string) {
|
std::string to_utf8(std::wstring wide_string) {
|
||||||
|
@ -42,3 +45,92 @@ std::vector<std::string> split(std::string s, std::string delimiter) {
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::wstring substring_from_to(std::wstring& text, std::wstring from, std::wstring to) {
|
||||||
|
unsigned int start_pos = 0;
|
||||||
|
unsigned int end_pos = 0;
|
||||||
|
|
||||||
|
std::wstring substring;
|
||||||
|
|
||||||
|
std::wregex start_regex(from);
|
||||||
|
std::wregex end_regex(to);
|
||||||
|
|
||||||
|
for (std::wsregex_iterator it{text.begin(), text.end(), start_regex}, end{};
|
||||||
|
it != end; it++) {
|
||||||
|
start_pos = it->position() + it->str().size();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(text == from_utf8("")) return text;
|
||||||
|
substring = text.substr(start_pos, text.size());
|
||||||
|
|
||||||
|
for (std::wsregex_iterator it{substring.begin(), substring.end(), end_regex}, end{};
|
||||||
|
it != end; it++) {
|
||||||
|
end_pos = it->position();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (end_pos == 0) return substring;
|
||||||
|
|
||||||
|
substring = substring.substr(0, end_pos);
|
||||||
|
|
||||||
|
return substring;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::wstring trim_html_response(std::wstring& check) {
|
||||||
|
std::wstring begin_check_marker = from_utf8("<!-- Products -->");
|
||||||
|
std::wstring end_check_marker = from_utf8("<!-- \\/Products -->");
|
||||||
|
std::wstring trimmed = substring_from_to(check, begin_check_marker, end_check_marker);
|
||||||
|
trimmed += from_utf8("\n</div>");
|
||||||
|
return trimmed;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::wstring> find_in_html(std::string& html, std::string regex, std::string html_start, std::string html_end) {
|
||||||
|
std::regex searching_regex(regex);
|
||||||
|
|
||||||
|
std::vector<std::wstring> parsed;
|
||||||
|
for (std::sregex_iterator it{html.begin(), html.end(), searching_regex}, end{};
|
||||||
|
it != end; it++) {
|
||||||
|
|
||||||
|
std::wstring found_entry = from_utf8(it->str());
|
||||||
|
std::wstring extracted = substring_from_to(found_entry, from_utf8(html_start), from_utf8(html_end));
|
||||||
|
|
||||||
|
parsed.push_back(extracted);
|
||||||
|
}
|
||||||
|
return parsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::wstring> find_products_in_html(std::string html) {
|
||||||
|
return find_in_html(html, "<div class=\"ifw-col ifw-col-1 text-left\"><b>.*<\\/b><\\/div>", "<div class=\"ifw-col ifw-col-1 text-left\"><b>", "<\\/b><\\/div>");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::wstring> find_amounts_in_html(std::string html) {
|
||||||
|
return find_in_html(html, "<span>\\d+<\\/span>", "<span>", "<\\/span>");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::wstring> find_prices_in_html(std::string html) {
|
||||||
|
return find_in_html(html, "X <\\/span><span>\\d+\\.\\d{2}<\\/span>", "X <\\/span><span>", "<\\/span>");
|
||||||
|
}
|
||||||
|
|
||||||
|
Check parseOfdRuAnswer(std::string html) {
|
||||||
|
std::wstring wstr_html = from_utf8(html);
|
||||||
|
std::string trimmed = to_utf8(trim_html_response(wstr_html));
|
||||||
|
|
||||||
|
std::vector<std::wstring> products = find_products_in_html(trimmed);
|
||||||
|
std::vector<std::wstring> amounts = find_amounts_in_html(trimmed);
|
||||||
|
std::vector<std::wstring> prices = find_prices_in_html(trimmed);
|
||||||
|
|
||||||
|
if ((products.size() + amounts.size() + prices.size())/products.size() != 3) {
|
||||||
|
std::cerr << "An error has occured during the parsing of html. Please, contact the developer." << std::endl;
|
||||||
|
std::exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Check c;
|
||||||
|
|
||||||
|
for (int i = 0; i < products.size(); i ++) {
|
||||||
|
Goods goods(to_utf8(products[i]), std::stod(prices[i]), std::stod(amounts[i]));
|
||||||
|
c.add_goods(goods);
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "../check/check.h"
|
||||||
|
|
||||||
std::string to_utf8(std::wstring wide_string);
|
std::string to_utf8(std::wstring wide_string);
|
||||||
std::wstring from_utf8(std::string string);
|
std::wstring from_utf8(std::string string);
|
||||||
|
@ -14,4 +15,8 @@ bool vector_contains_element(const std::vector<T> &vector, const T &to_find);
|
||||||
|
|
||||||
std::vector<std::string> split(std::string, std::string);
|
std::vector<std::string> split(std::string, std::string);
|
||||||
|
|
||||||
|
Check parseOfdRuAnswer(std::string);
|
||||||
|
|
||||||
|
std::wstring trim_html_response(std::wstring& check);
|
||||||
|
|
||||||
#endif // UTILS_H
|
#endif // UTILS_H
|
||||||
|
|
Loading…
Reference in New Issue