8 #ifndef ORCUS_SAX_PARSER_HPP 9 #define ORCUS_SAX_PARSER_HPP 11 #include "sax_parser_base.hpp" 29 template<
typename _Handler,
typename _Config = sax_parser_default_config>
33 typedef _Handler handler_type;
34 typedef _Config config_type;
36 sax_parser(
const char* content,
const size_t size, handler_type& handler);
50 void element_open(
const char* begin_pos);
51 void element_close(
const char* begin_pos);
53 void declaration(
const char* name_check);
60 handler_type& m_handler;
63 template<
typename _Handler,
typename _Config>
65 const char* content,
const size_t size, handler_type& handler) :
71 template<
typename _Handler,
typename _Config>
76 template<
typename _Handler,
typename _Config>
85 assert(m_buffer_pos == 0);
88 template<
typename _Handler,
typename _Config>
94 if (!has_char() || cur_char() !=
'<')
97 if (config_type::baseline_version >= 11)
101 if (next_char_checked() !=
'?')
108 template<
typename _Handler,
typename _Config>
113 if (cur_char() ==
'<')
116 if (!m_root_elem_open)
120 else if (m_nest_level)
128 template<
typename _Handler,
typename _Config>
131 assert(cur_char() ==
'<');
132 const char* pos = mp_char;
133 char c = next_char_checked();
143 declaration(
nullptr);
146 if (!is_alpha(c) && c !=
'_')
152 template<
typename _Handler,
typename _Config>
155 assert(is_alpha(cur_char()) || cur_char() ==
'_');
158 element_name(elem, begin_pos);
167 if (next_and_char() !=
'>')
170 elem.end_pos = mp_char;
171 m_handler.start_element(elem);
173 m_handler.end_element(elem);
174 #if ORCUS_DEBUG_SAX_PARSER 175 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
183 elem.end_pos = mp_char;
185 m_handler.start_element(elem);
187 #if ORCUS_DEBUG_SAX_PARSER 188 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
197 template<
typename _Handler,
typename _Config>
200 assert(cur_char() ==
'/');
204 element_name(elem, begin_pos);
206 if (cur_char() !=
'>')
209 elem.end_pos = mp_char;
211 m_handler.end_element(elem);
212 #if ORCUS_DEBUG_SAX_PARSER 213 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
216 m_root_elem_open =
false;
219 template<
typename _Handler,
typename _Config>
222 assert(cur_char() ==
'!');
224 size_t len = remains();
228 switch (next_and_char())
233 if (next_and_char() !=
'-')
247 expects_next(
"CDATA[", 6);
255 expects_next(
"OCTYPE", 6);
266 template<
typename _Handler,
typename _Config>
269 assert(cur_char() ==
'?');
275 #if ORCUS_DEBUG_SAX_PARSER 276 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
279 if (name_check && decl_name != name_check)
281 std::ostringstream os;
282 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
286 m_handler.start_declaration(decl_name);
290 while (cur_char_checked() !=
'?')
295 if (next_char_checked() !=
'>')
298 m_handler.end_declaration(decl_name);
301 #if ORCUS_DEBUG_SAX_PARSER 302 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
306 template<
typename _Handler,
typename _Config>
309 size_t len = remains();
313 const char* p0 = mp_char;
314 size_t i = 0, match = 0;
315 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
329 else if (c ==
'>' && match == 2)
332 size_t cdata_len = i - 2;
333 m_handler.characters(
pstring(p0, cdata_len),
false);
343 template<
typename _Handler,
typename _Config>
348 name(param.root_element);
352 size_t len = remains();
356 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
360 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
363 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
367 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
373 has_char_throw(
"DOCTYPE section too short.");
376 value(param.fpi,
false);
378 has_char_throw(
"DOCTYPE section too short.");
380 has_char_throw(
"DOCTYPE section too short.");
382 if (cur_char() ==
'>')
385 #if ORCUS_DEBUG_SAX_PARSER 386 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
388 m_handler.doctype(param);
394 value(param.uri,
false);
396 has_char_throw(
"DOCTYPE section too short.");
398 has_char_throw(
"DOCTYPE section too short.");
400 if (cur_char() !=
'>')
403 #if ORCUS_DEBUG_SAX_PARSER 404 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
406 m_handler.doctype(param);
410 template<
typename _Handler,
typename _Config>
413 const char* p0 = mp_char;
414 for (; has_char(); next())
416 if (cur_char() ==
'<')
419 if (cur_char() ==
'&')
424 buf.append(p0, mp_char-p0);
425 characters_with_encoded_char(buf);
427 m_handler.characters(
pstring(),
false);
429 m_handler.characters(
pstring(buf.get(), buf.size()),
true);
437 m_handler.characters(val,
false);
441 template<
typename _Handler,
typename _Config>
445 pstring attr_ns_name, attr_name, attr_value;
446 attribute_name(attr.ns, attr.name);
448 #if ORCUS_DEBUG_SAX_PARSER 449 std::ostringstream os;
450 os <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'";
456 std::ostringstream os;
457 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
462 attr.transient = value(attr.value,
true);
467 #if ORCUS_DEBUG_SAX_PARSER 468 os <<
" value='" << attr.value <<
"'" << endl;
472 m_handler.attribute(attr);
Definition: pstring.hpp:24
Definition: cell_buffer.hpp:21
Definition: sax_parser.hpp:15
static const uint8_t baseline_version
Definition: sax_parser.hpp:22
Definition: sax_parser_base.hpp:100
Definition: sax_parser_base.hpp:85
Definition: sax_parser_base.hpp:45
Definition: base64.hpp:15
Definition: sax_parser.hpp:30
Definition: sax_parser_base.hpp:108