|
| Rule (HtmlTag tag=HtmlTag::ANY, bool optional=false, bool greedy=false) noexcept |
| Constructs a Rule with a known HTML tag.
|
|
| Rule (std::string tag, bool optional=false, bool greedy=false) noexcept |
| Constructs a Rule with the HTML tag given as a string.
|
|
| ~Rule () noexcept=default |
|
| Rule (Rule &&) noexcept=default |
|
| Rule (const Rule &other) |
|
Rule & | operator= (Rule &&) noexcept=default |
|
Rule & | operator= (const Rule &other) |
|
const Rule * | child () const noexcept |
| Returns the child or nullptr if childless.
|
|
const Rule * | next () const noexcept |
| Returns the next rule or nullptr if no following rule.
|
|
const std::vector< Rule > & | nested () const noexcept |
| Returns the nested rules.
|
|
Rule * | child () noexcept |
| Returns the child or nullptr if childless.
|
|
Rule * | next () noexcept |
| Returns the next rule or nullptr if no following rule.
|
|
std::vector< Rule > & | nested () noexcept |
| Returns the nested rules.
|
|
Rule & | append_child (Rule new_child) |
| Appends a child.
|
|
Rule & | append_next (Rule sibling) |
| Appends a following Rule.
|
|
Rule & | append_nested (Rule nested) |
| Appends a nested Rule.
|
|
Rule & | append_match (std::unique_ptr< Match > match) |
| Appends a Match.
|
|
template<typename MatchType , typename... Args> |
Rule & | append_match (Args &&... arg) |
| Emplaces a Match.
|
|
Rule & | append_capture (std::unique_ptr< Capture > cap) |
| Appends a Capture.
|
|
template<typename CaptureType , typename... Args> |
Rule & | append_capture (Args &&... arg) |
| Emplaces a Capture.
|
|
HtmlTag | get_tag () const noexcept |
| Returns the HtmlTag this rule matches.
|
|
Rule & | set_tag (HtmlTag tag) noexcept |
| Sets the HtmlTag this rule matches.
|
|
bool | is_optional () const noexcept |
| Returns true if this rule is optional, i.e. if a match has to be found.
|
|
Rule & | set_optional (bool optional) noexcept |
| Sets whether this rule is optional, i.e.
|
|
bool | is_greedy () const noexcept |
| Returns true if this rule is to be matched repeatedly.
|
|
Rule & | set_greedy (bool greedy) noexcept |
| Sets whether this rule is to be matched repeatedly.
|
|
std::optional< std::string > | get_tagname () const |
| Get custom HTML tag name.
|
|
Rule & | set_tagname (const std::string &tagname) |
| Set custom HTML tag name.
|
|
hext::Result | extract (const Html &html, std::uint64_t max_searches=0) const |
| Recursively extracts values from an hext::HTML.
|
|
hext::Result | extract (const GumboNode *node, std::uint64_t max_searches=0) const |
| Recursively extracts values from a GumboNode.
|
|
bool | matches (const GumboNode *node) const |
| Returns true if this Rule matches node.
|
|
std::vector< ResultPair > | capture (const GumboNode *node) const |
| Returns the result of applying every Capture to node.
|
|
Extracts values from HTML.
A Rule defines how to match and capture HTML nodes. It can be applied to a GumboNode tree, where it recursively tries to find matches.
- Example:
{
}
"<div><a href='/bob'> <img src='bob.jpg'/> </a></div>"
"<div><a href='/alice'><img src='alice.jpg'/></a></div>"
"<div><a href='/carol'><img src='carol.jpg'/></a></div>");
Captures an HTML Element's attribute.
Matches HTML elements having an HTML attribute with a certain name and, optionally,...
A RAII wrapper for Gumbo.
Extracts values from HTML.
hext::Result extract(const Html &html, std::uint64_t max_searches=0) const
Recursively extracts values from an hext::HTML.
Rule & append_capture(std::unique_ptr< Capture > cap)
Appends a Capture.
Rule & append_child(Rule new_child)
Appends a child.
@ A
https://developer.mozilla.org/en-US/docs/Web/HTML/Element/a
@ IMG
https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img
std::vector< ResultMap > Result
A vector containing ResultMap.
Definition at line 89 of file Rule.h.