SyndicationDomination 0.0
An RSS/Atom parser, because there's nothing else out there.
Loading...
Searching...
No Matches
html.hpp
Go to the documentation of this file.
1#pragma once
2
3
4#include "utils.hpp"
5#include <stdexcept>
6#include <string>
7#include <cstring>
8
9#include <tidy.h>
10#include <tidybuffio.h>
11
12#include <pugixml.hpp>
13
14
15using namespace pugi;
16
31class Html {
32private:
33 xml_document doc;
34 xml_node head;
35
36 std::string title{""};
37 std::string icon_url{""};
38 std::string img_url{""};
39 std::string rss_url{""};
40 std::string description{""};
41 std::string article{""};
42 std::string body{""};
43
47 static void configure_tidy_doc(TidyDoc &doc);
48
52 TidyDoc tidy_doc_from_file(std::string path);
53
57 std::string convert_to_xml(TidyDoc doc);
58
59 static inline const std::vector<std::string> USELESS_CHILDREN = {
60 "script", "form", "input", "label", "nav", "footer", "header"
61 };
62
67 void remove_useless_children(xml_node &root);
68
72 Html(TidyDoc &tdoc);
73
77 xml_node get_body_node();
78
79public:
80
86 Html(std::string path);
87
93 static Html from_string(std::string s);
94
95 std::string get_title();
96 std::string get_icon_url();
97 std::string get_img_url();
98 std::string get_rss_url();
99 std::string get_body();
100 std::string get_article();
101 std::string get_description();
102
103 std::string to_json(bool metadata_only=false);
104};
Represents an HTML document.
Definition html.hpp:31
std::string get_body()
Definition html.cpp:173
std::string get_description()
Definition html.cpp:204
std::string to_json(bool metadata_only=false)
Definition html.cpp:224
static Html from_string(std::string s)
Constructs the Html object from a string containing valid HTML.
Definition html.cpp:71
std::string get_article()
Definition html.cpp:182
std::string get_rss_url()
Definition html.cpp:152
std::string get_icon_url()
Definition html.cpp:86
std::string get_img_url()
Definition html.cpp:129
std::string get_title()
Definition html.cpp:79