This commit is contained in:
2025-05-08 21:02:56 +03:00
parent 6706cbec45
commit 86a11faf70
24 changed files with 1022 additions and 460 deletions

View File

@@ -1,9 +1,12 @@
#include "utils/utils.h"
#include <email_parser/emailparser.h>
#include <utils/base64.h>
#include <utils/utils.h>
#include <check/check.h>
#include <boost/regex.hpp>
#include <sstream>
#include <iostream>
#include <bits/stdc++.h>
#include <fstream>
#if __GNUC__ < 8 && __clang_major__ < 17
# include <experimental/filesystem>
@@ -14,7 +17,7 @@
#endif
std::string EmailParser::get_payload_in_email(std::string &email_content) {
boost::regex content_type_and_transfer_encoding_regex("Content-Type")
boost::regex content_type_and_transfer_encoding_regex("Content-Type");
// boost::regex body_start_regex("\r\n\r\n"); //boost::regex_constants::egrep
// boost::smatch smatch;
// if (boost::regex_search(email_content, smatch, body_start_regex)) {
@@ -23,6 +26,40 @@ std::string EmailParser::get_payload_in_email(std::string &email_content) {
// return "";
}
std::multimap<std::string, std::string> EmailParser::parse_email_content_types(std::string path) {
std::ifstream input_file(path, std::ios::in);
std::string line = "";
std::multimap<std::string, std::string> mail_options;
std::string latest_key;
while(std::getline(input_file, line)) {
// ;
char first_char = line.substr(0, 1)[0];
// if (line == "\0") {
// break;
// }
std::vector<std::string> split_by_colon = split(line, ":");
std::string key = split_by_colon[0];
std::string value = "";
for (int i = 1; i < split_by_colon.size(); i ++) value += split_by_colon[i];
if (key != "Content-Type" ) {
continue;
}
if (first_char == '\t') {
mail_options.emplace(std::make_pair(latest_key, line));
} else {
// std::cout << "key: " << key << "\nvalue: " << value << std::endl;
mail_options.emplace(std::make_pair(key, value));
latest_key = key;
}
}
return mail_options;
}
// std::vector<int> EmailParser::find_base64_blocks_in_email(std::string &email_content) {
// std::string glued_together;
// for (auto c : email_content) {
@@ -38,6 +75,18 @@ EmailParser::EmailParser() {
}
std::vector<int> search_content_types_in_email_content(std::string& email_content) {
std::vector<int> content_type_positions = {};
boost::regex image_content_type_regex("Content-Type: image/.*");
for (boost::sregex_iterator it{email_content.begin(), email_content.end(), image_content_type_regex}, end{};
it != end; it++) {
content_type_positions.push_back(it->position());
}
return content_type_positions;
}
// void find_and_decode_email_content()
Check EmailParser::parse(std::string &email_content) {
//1. Search "Content-Type: image/.*" in the .eml file.
// 1.1 If found 0, go to [2]
@@ -46,7 +95,15 @@ Check EmailParser::parse(std::string &email_content) {
//2. Try decoding content of the e-mail
//3. Search "t=\d{8}T\d{4,6}&s=\d{1,6}\.\d{1,2}&fn=\d{10,16}&i=\d{6}&fp=\d{10}&n=\d". Note that in some emails = and & signs could be replaced with its code in HTTP requests: %3D, %26
// 3.1 If not found, notify the user that we could not parse the .eml file
std::vector<int> content_type_positions = search_content_types_in_email_content(email_content);
if (content_type_positions.size() < 0) {
} else if (content_type_positions.size() == 1) {
} else {
}
// std::string payload = get_payload_in_email(email_content);
// Check c;
@@ -62,9 +119,62 @@ Check EmailParser::parse(std::string &email_content) {
}
Check EmailParser::parse_file(std::string path) {
std::ifstream ifile(path, std::ios::in | std::ios::binary);
const unsigned int size = std::filesystem::file_size(path);
std::string content(size, '\0');
ifile.read(content.data(), size);
return parse(content);
// std::vector<std::string> contents = read_file(path);
// unsigned int body_start = -1;
// for (unsigned int i = 0; i < contents.size(); i ++) {
// std::string &line = contents[i];
// if (line == "\r") {
// body_start = i;
// break;
// }
// }
// if (body_start == (unsigned int) -1) throw "Not an E-Mail";
// for (unsigned int i = 0; i < contents.size(); i ++) {
// std::string &line = contents[i];
// if (line[0] == '\t') {
// contents[i - 1] += " " + contents[i];
// contents.erase(remove(contents.begin(), contents.end(), line), contents.end());
// i -= 2;
// }
// }
// for (auto &line : contents) {
// std::cout << line << std::endl;
// }
// std::cout << contents[body_start + 1] << std::endl;
// unsigned int body_start = contents.find("\r\n\r\n");
// if (body_start == (unsigned int)-1)
// throw "Not a E-Mail file";
// std::cout << contents.erase(0, body_start + 4);
// std::cout << contents << std::endl;
// std::vector<std::tuple<int, int>> message_parts_positions;
// while (contents.find("--") > 0) {
// }
return Check();
std::multimap<std::string, std::string> content_types = parse_email_content_types(path);
bool found_qr_image = false;
for (auto &content_type : content_types) {
boost::regex image_content_type_regex("image\\/(png|gif|jpg|jpeg)");
boost::cmatch cmatch;
if (boost::regex_match(content_type.second.c_str(), cmatch, image_content_type_regex)) {
std::cout << cmatch << std::endl;
}
std::cout << content_type.first << ": " << content_type.second << std::endl;
}
// std::ifstream ifile(path, std::ios::in | std::ios::binary);
// const unsigned int size = std::filesystem::file_size(path);
// std::string content(size, '\0');
// ifile.read(content.data(), size);
// return parse(content);
return Check();
}

View File

@@ -2,14 +2,18 @@
#define CHECKS_PARSER_EMAIL_PARSER
#include <check/check.h>
#include <map>
class EmailParser {
std::string get_payload_in_email(std::string &email_content);
std::multimap<std::string, std::string> parse_email_content_types(std::string path);
// std::vector<int> find_base64_blocks_in_email(std::string &email_content);
public:
EmailParser();
Check parse(std::string &email_content);
Check parse_file(std::string path);
};
#endif // CHECKS_PARSER_EMAIL_PARSER