All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Pages
Html.h
Go to the documentation of this file.
1 // Copyright 2015, 2016 Thomas Trapp
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef HEXT_HTML_H_INCLUDED
16 #define HEXT_HTML_H_INCLUDED
17 
18 /// @file
19 /// Declares hext::Html
20 
21 #include <cstddef>
22 
23 #include <gumbo.h>
24 
25 
26 namespace hext {
27 
28 
29 /// A RAII wrapper for Gumbo.
30 ///
31 /// All HTML is expected to be UTF-8 encoded.
32 /// Gumbo will parse anything you throw at it. When given invalid or incomplete
33 /// HTML it will even fix it for you.
34 ///
35 /// @par Example:
36 /// ~~~~~~~~~~~~~
37 /// Html page("<html><body>This is a string containing html</body></html>");
38 /// const GumboNode * root = page.root();
39 /// // root now points to the top most HTML element (<html>).
40 /// assert(root);
41 /// Rule html_root(HtmlTag::HTML);
42 /// assert(html_root.matches(root));
43 /// ~~~~~~~~~~~~~
44 class Html
45 {
46 public:
47  /// Constructs an Html from a non-owning null-terminated string.
48  ///
49  /// @warning The buffer must stay alive until the destruction of this
50  /// instance.
51  ///
52  /// @param buffer: A null-terminated string containing HTML.
53  explicit Html(const char * buffer) noexcept;
54 
55  /// Constructs an Html from a non-owning pointer.
56  ///
57  /// @warning The buffer must stay alive until the destruction of this
58  /// instance.
59  ///
60  /// @param buffer: A string containing HTML.
61  /// @param size: The length of the given buffer.
62  Html(const char * buffer, std::size_t size) noexcept;
63 
64  ~Html() noexcept;
65  Html(Html&&) noexcept = default;
66  Html& operator=(Html&&) noexcept = default;
67 
68  /// Returns a non-owning pointer to the root node of the HTML document.
69  ///
70  /// @warning The pointer may not be used after the destruction of this
71  /// instance.
72  const GumboNode * root() const noexcept;
73 
74 private:
75  Html(const Html&) = delete;
76  Html& operator=(const Html&) = delete;
77 
78  /// Gumbo's resource handle.
79  GumboOutput * g_outp_;
80 };
81 
82 
83 } // namespace hext
84 
85 
86 #endif // HEXT_HTML_H_INCLUDED
87 
~Html() noexcept
const GumboNode * root() const noexcept
Returns a non-owning pointer to the root node of the HTML document.
Html & operator=(Html &&) noexcept=default
Html(const char *buffer) noexcept
Constructs an Html from a non-owning null-terminated string.
A RAII wrapper for Gumbo.
Definition: Html.h:44