|
| Rule (HtmlTag tag=HtmlTag::ANY, bool optional=false, bool greedy=false) noexcept |
| Constructs a Rule with a known HTML tag. More...
|
|
| Rule (std::string tag, bool optional=false, bool greedy=false) noexcept |
| Constructs a Rule with the HTML tag given as a string. More...
|
|
| ~Rule () noexcept=default |
|
| Rule (Rule &&) noexcept=default |
|
| Rule (const Rule &other) |
|
Rule & | operator= (Rule &&) noexcept=default |
|
Rule & | operator= (const Rule &other) |
|
const Rule * | child () const noexcept |
| Returns the child or nullptr if childless. More...
|
|
const Rule * | next () const noexcept |
| Returns the next rule or nullptr if no following rule. More...
|
|
const std::vector< Rule > & | nested () const noexcept |
| Returns the nested rules. More...
|
|
Rule * | child () noexcept |
| Returns the child or nullptr if childless. More...
|
|
Rule * | next () noexcept |
| Returns the next rule or nullptr if no following rule. More...
|
|
std::vector< Rule > & | nested () noexcept |
| Returns the nested rules. More...
|
|
Rule & | append_child (Rule new_child) |
| Appends a child. More...
|
|
Rule & | append_next (Rule sibling) |
| Appends a following Rule. More...
|
|
Rule & | append_nested (Rule nested) |
| Appends a nested Rule. More...
|
|
Rule & | append_match (std::unique_ptr< Match > match) |
| Appends a Match. More...
|
|
template<typename MatchType , typename... Args> |
Rule & | append_match (Args &&... arg) |
| Emplaces a Match. More...
|
|
Rule & | append_capture (std::unique_ptr< Capture > cap) |
| Appends a Capture. More...
|
|
template<typename CaptureType , typename... Args> |
Rule & | append_capture (Args &&... arg) |
| Emplaces a Capture. More...
|
|
HtmlTag | get_tag () const noexcept |
| Returns the HtmlTag this rule matches. More...
|
|
Rule & | set_tag (HtmlTag tag) noexcept |
| Sets the HtmlTag this rule matches. More...
|
|
bool | is_optional () const noexcept |
| Returns true if this rule is optional, i.e. if a match has to be found. More...
|
|
Rule & | set_optional (bool optional) noexcept |
| Sets whether this rule is optional, i.e. More...
|
|
bool | is_greedy () const noexcept |
| Returns true if this rule is to be matched repeatedly. More...
|
|
Rule & | set_greedy (bool greedy) noexcept |
| Sets whether this rule is to be matched repeatedly. More...
|
|
std::optional< std::string > | get_tagname () const |
| Get custom HTML tag name. More...
|
|
Rule & | set_tagname (const std::string &tagname) |
| Set custom HTML tag name. More...
|
|
hext::Result | extract (const Html &html, std::uint64_t max_searches=0) const |
| Recursively extracts values from an hext::HTML. More...
|
|
hext::Result | extract (const GumboNode *node, std::uint64_t max_searches=0) const |
| Recursively extracts values from a GumboNode. More...
|
|
bool | matches (const GumboNode *node) const |
| Returns true if this Rule matches node. More...
|
|
std::vector< ResultPair > | capture (const GumboNode *node) const |
| Returns the result of applying every Capture to node. More...
|
|
Extracts values from HTML.
A Rule defines how to match and capture HTML nodes. It can be applied to a GumboNode tree, where it recursively tries to find matches.
- Example:
anchor.append_match<AttributeMatch>("href")
.append_capture<AttributeCapture>("href", "link");
{
img.append_capture<AttributeCapture>("src", "img");
anchor.append_child(std::move(img));
}
Html html(
"<div><a href='/bob'> <img src='bob.jpg'/> </a></div>"
"<div><a href='/alice'><img src='alice.jpg'/></a></div>"
"<div><a href='/carol'><img src='carol.jpg'/></a></div>");
Rule(HtmlTag tag=HtmlTag::ANY, bool optional=false, bool greedy=false) noexcept
Constructs a Rule with a known HTML tag.
@ A
https://developer.mozilla.org/en-US/docs/Web/HTML/Element/a
@ IMG
https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img
std::vector< ResultMap > Result
A vector containing ResultMap.
Definition at line 89 of file Rule.h.