3 * Copyright 2004-2006 Aaron Voisine <aaron@voisine.org>
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 * Modified for Palacios by Jack Lange <jarusl@cs.northwestern.edu>
31 #include <palacios/vmm_xml.h>
32 #include <palacios/vmm_sprintf.h>
34 #include <palacios/vmm.h>
36 #define V3_XML_BUFSIZE 1024 // size of internal memory buffers
38 // Flags for struct v3_xml
39 #define V3_XML_NAMEM 0x80 // name is malloced
40 #define V3_XML_TXTM 0x40 // txt is malloced
41 #define V3_XML_DUP 0x20 // attribute name and value are strduped
45 #define V3_XML_WS "\t\r\n " // whitespace
46 #define V3_XML_ERRL 128 // maximum error string length
48 struct v3_xml_root { // additional data for the root tag
49 struct v3_xml xml; // is a super-struct built on top of v3_xml struct
50 struct v3_xml * cur; // current xml tree insertion point
51 char *str_ptr; // original xml string
52 char *tmp_start; // start of work area
53 char *tmp_end; // end of work area
54 char **ent; // general entities (ampersand sequences)
55 char ***attr; // default attributes
56 short standalone; // non-zero if <?xml standalone="yes"?>
57 char err[V3_XML_ERRL]; // error string
60 static char * empty_attrib_list[] = { NULL }; // empty, null terminated array of strings
63 static void * tmp_realloc(void * old_ptr, uint_t old_size, uint_t new_size) {
64 void * new_buf = V3_Malloc(new_size);
66 if (new_buf == NULL) {
70 memcpy(new_buf, old_ptr, old_size);
80 // set an error string and return root
81 static void v3_xml_err(struct v3_xml_root * root, char * xml_str, const char * err, ...) {
85 char fmt[V3_XML_ERRL];
87 for (tmp = root->tmp_start; tmp < xml_str; tmp++) {
93 snprintf(fmt, V3_XML_ERRL, "[error near line %d]: %s", line, err);
96 vsnprintf(root->err, V3_XML_ERRL, fmt, ap);
99 PrintError("XML Error: %s\n", root->err);
102 v3_xml_free(&(root->xml));
109 // returns the first child tag with the given name or NULL if not found
110 struct v3_xml * v3_xml_child(struct v3_xml * xml, const char * name) {
111 struct v3_xml * child = NULL;
117 while ((child) && (strcasecmp(name, child->name) != 0)) {
118 child = child->sibling;
124 // returns the Nth tag with the same name in the same subsection or NULL if not
126 struct v3_xml * v3_xml_idx(struct v3_xml * xml, int idx) {
127 for (; xml && idx; idx--) {
134 // returns the value of the requested tag attribute or NULL if not found
135 const char * v3_xml_attr(struct v3_xml * xml, const char * attr) {
138 struct v3_xml_root * root = (struct v3_xml_root *)xml;
140 if ((!xml) || (!xml->attr)) {
144 while ((xml->attr[i]) && (strcasecmp(attr, xml->attr[i]) != 0)) {
148 if (xml->attr[i] != NULL) {
149 return xml->attr[i + 1]; // found attribute
152 while (root->xml.parent != NULL) {
153 root = (struct v3_xml_root *)root->xml.parent; // root tag
157 ( (root->attr[i] != NULL) &&
158 (strcasecmp(xml->name, root->attr[i][0]) != 0) );
161 if (! root->attr[i]) {
162 return NULL; // no matching default attributes
165 while ((root->attr[i][j] != NULL) && (strcasecmp(attr, root->attr[i][j]) != 0)) {
169 return (root->attr[i][j] != NULL) ? root->attr[i][j + 1] : NULL; // found default
172 // same as v3_xml_get but takes an already initialized va_list
173 static struct v3_xml * v3_xml_vget(struct v3_xml * xml, va_list ap) {
174 char * name = va_arg(ap, char *);
177 if ((name != NULL) && (*name != 0)) {
178 idx = va_arg(ap, int);
179 xml = v3_xml_child(xml, name);
181 return (idx < 0) ? xml : v3_xml_vget(v3_xml_idx(xml, idx), ap);
184 // Traverses the xml tree to retrieve a specific subtag. Takes a variable
185 // length list of tag names and indexes. The argument list must be terminated
186 // by either an index of -1 or an empty string tag name. Example:
187 // title = v3_xml_get(library, "shelf", 0, "book", 2, "title", -1);
188 // This retrieves the title of the 3rd book on the 1st shelf of library.
189 // Returns NULL if not found.
190 struct v3_xml * v3_xml_get(struct v3_xml * xml, ...) {
195 r = v3_xml_vget(xml, ap);
201 // sets a flag for the given tag and returns the tag
202 static struct v3_xml * v3_xml_set_flag(struct v3_xml * xml, short flag)
204 if (xml) xml->flags |= flag;
212 // Recursively decodes entity and character references and normalizes new lines
213 // ent is a null terminated array of alternating entity names and values. set t
214 // to '&' for general entity decoding, '%' for parameter entity decoding, 'c'
215 // for cdata sections, ' ' for attribute normalization, or '*' for non-cdata
216 // attribute normalization. Returns s, or if the decoded string is longer than
217 // s, returns a malloced string that must be freed.
218 static char * v3_xml_decode(char * s, char ** ent, char t) {
224 // normalize line endings
230 memmove(s, (s + 1), strlen(s));
249 if ((t != 'c') && (strncmp(s, "&#", 2) == 0)) { // character reference
251 c = strtox(s + 3, &e); // base 16
253 c = strtoi(s + 2, &e); // base 10
256 if ((!c) || (*e != ';')) {
257 // not a character ref
264 memmove(s, strchr(s, ';') + 1, strlen(strchr(s, ';')));
265 } else if ( ( (*s == '&') &&
266 ((t == '&') || (t == ' ') || (t == '*'))) ||
267 ( (*s == '%') && (t == '%'))) {
271 (ent[b]) && (strncmp(s + 1, ent[b], strlen(ent[b])) != 0);
272 (b += 2)); // find entity in entity list
274 if (ent[b++]) { // found a match
275 if (((c = strlen(ent[b])) - 1) > ((e = strchr(s, ';')) - s)) {
276 l = (d = (s - r)) + c + strlen(e); // new length
277 r = ((r == m) ? strcpy(V3_Malloc(l), r) : tmp_realloc(r, strlen(r), l));
278 e = strchr((s = r + d), ';'); // fix up pointers
281 memmove(s + c, e + 1, strlen(e)); // shift rest of string
282 strncpy(s, ent[b], c); // copy in replacement text
284 // not a known entity
287 } else if ( ( (t == ' ') || (t == '*')) &&
291 // no decoding needed
297 // normalize spaces for non-cdata attributes
298 for (s = r; *s; s++) {
299 if ((l = strspn(s, " "))) {
300 memmove(s, s + l, strlen(s + l) + 1);
303 while ((*s) && (*s != ' ')) {
308 if ((--s >= r) && (*s == ' ')) {
309 // trim any trailing space
318 // called when parser finds character content between open and closing tag
319 static void v3_xml_char_content(struct v3_xml_root * root, char * s, size_t len, char t) {
320 struct v3_xml * xml = root->cur;
324 if ((xml == NULL) || (xml->name == NULL) || (len == 0)) {
329 s[len] = '\0'; // null terminate text (calling functions anticipate this)
330 len = strlen(s = v3_xml_decode(s, root->ent, t)) + 1;
333 // initial character content
336 // allocate our own memory and make a copy
337 xml->txt = (xml->flags & V3_XML_TXTM) ?
338 (tmp_realloc(xml->txt, strlen(xml->txt), (l = strlen(xml->txt)) + len)) :
339 (strcpy(V3_Malloc((l = strlen(xml->txt)) + len), xml->txt));
341 strcpy(xml->txt + l, s); // add new char content
344 V3_Free(s); // free s if it was malloced by v3_xml_decode()
349 v3_xml_set_flag(xml, V3_XML_TXTM);
353 // called when parser finds closing tag
354 static int v3_xml_close_tag(struct v3_xml_root * root, char * name, char * s) {
355 if ( (root->cur == NULL) ||
356 (root->cur->name == NULL) ||
357 (strcasecmp(name, root->cur->name))) {
358 v3_xml_err(root, s, "unexpected closing tag </%s>", name);
362 root->cur = root->cur->parent;
366 // checks for circular entity references, returns non-zero if no circular
367 // references are found, zero otherwise
368 static int v3_xml_ent_ok(char * name, char * s, char ** ent) {
372 while ((*s != '\0') && (*s != '&')) {
373 // find next entity reference
381 if (strncmp(s + 1, name, strlen(name)) == 0) {
386 for (i = 0; (ent[i]) && (strncmp(ent[i], s + 1, strlen(ent[i]))); i += 2);
388 if ((ent[i] != NULL) && (v3_xml_ent_ok(name, ent[i + 1], ent) == 0)) {
396 // frees a tag attribute list
397 static void v3_xml_free_attr(char **attr) {
401 if ((attr == NULL) || (attr == empty_attrib_list)) {
407 // find end of attribute list
411 m = attr[i + 1]; // list of which names and values are malloced
413 for (i = 0; m[i]; i++) {
414 if (m[i] & V3_XML_NAMEM) {
415 V3_Free(attr[i * 2]);
418 if (m[i] & V3_XML_TXTM) {
419 V3_Free(attr[(i * 2) + 1]);
432 // returns a new empty v3_xml structure with the given root tag name
433 static struct v3_xml * v3_xml_new(const char * name) {
434 static char * ent[] = { "lt;", "<", "gt;", ">", "quot;", """,
435 "apos;", "'", "amp;", "&", NULL };
437 struct v3_xml_root * root = (struct v3_xml_root *)V3_Malloc(sizeof(struct v3_xml_root));
438 memset(root, 0, sizeof(struct v3_xml_root));
440 root->xml.name = (char *)name;
441 root->cur = &root->xml;
443 memset(root->err, 0, V3_XML_ERRL);
445 root->ent = V3_Malloc(sizeof(ent));
446 memcpy(root->ent, ent, sizeof(ent));
448 root->xml.attr = empty_attrib_list;
449 root->attr = (char ***)(empty_attrib_list);
454 // inserts an existing tag into an v3_xml structure
455 static struct v3_xml * v3_xml_insert(struct v3_xml * xml, struct v3_xml * dest, size_t off) {
456 struct v3_xml * cur, * prev, * head;
465 if ((head = dest->child)) {
466 // already have sub tags
468 if (head->off <= off) {
472 ((cur->ordered) && (cur->ordered->off <= off));
475 xml->ordered = cur->ordered;
484 for (cur = head, prev = NULL;
485 ((cur) && (strcasecmp(cur->name, xml->name) != 0));
486 prev = cur, cur = cur->sibling);
489 if (cur && cur->off <= off) {
492 while (cur->next && cur->next->off <= off) {
496 xml->next = cur->next;
499 // first tag of this type
503 prev->sibling = cur->sibling;
506 xml->next = cur; // old first tag is now next
508 // new sibling insert point
509 for (cur = head, prev = NULL;
510 ((cur) && (cur->off <= off));
511 prev = cur, cur = cur->sibling);
528 // Adds a child tag. off is the offset of the child tag relative to the start
529 // of the parent tag's character content. Returns the child tag.
530 static struct v3_xml * v3_xml_add_child(struct v3_xml * xml, const char * name, size_t off) {
531 struct v3_xml * child;
537 child = (struct v3_xml *)V3_Malloc(sizeof(struct v3_xml));
538 memset(child, 0, sizeof(struct v3_xml));
540 child->name = (char *)name;
541 child->attr = empty_attrib_list;
544 return v3_xml_insert(child, xml, off);
548 // called when parser finds start of new tag
549 static void v3_xml_open_tag(struct v3_xml_root * root, char * name, char ** attr) {
550 struct v3_xml * xml = root->cur;
553 xml = v3_xml_add_child(xml, name, strlen(xml->txt));
560 root->cur = xml; // update tag insertion point
569 // parse the given xml string and return an v3_xml structure
570 static struct v3_xml * parse_str(char * buf, size_t len) {
571 struct v3_xml_root * root = (struct v3_xml_root *)v3_xml_new(NULL);
576 char ** tmp_attr = NULL; // initialize a to avoid compile warning
583 v3_xml_err(root, NULL, "Empty XML String\n");
587 root->tmp_start = buf;
588 root->tmp_end = buf + len; // record start and end of work area
590 last_char = buf[len - 1]; // save end char
591 buf[len - 1] = '\0'; // turn end char into null terminator
593 while ((*buf) && (*buf != '<')) {
599 v3_xml_err(root, buf, "root tag missing");
604 attr = (char **)empty_attrib_list;
605 tag_ptr = ++buf; // skip first '<'
607 if (isalpha(*buf) || (*buf == '_') || (*buf == ':') || (*buf < '\0')) {
610 if (root->cur == NULL) {
611 v3_xml_err(root, tag_ptr, "markup outside of root element");
615 buf += strcspn(buf, V3_XML_WS "/>");
617 while (isspace(*buf)) {
618 // null terminate tag name,
619 // this writes '\0' to spaces after first tag
623 // check if attribute follows tag
624 if ((*buf) && (*buf != '/') && (*buf != '>')) {
625 // there is an attribute
626 // find attributes for correct tag
628 ((tmp_attr = root->attr[i]) &&
629 (strcasecmp(tmp_attr[0], tag_ptr) != 0));
632 // 'tmp_attr' now points to the attribute list associated with 'tag_ptr'
635 // attributes are name value pairs,
636 // 2nd to last entry is null (end of list)
637 // last entry points to a string map marking whether values have been malloced...
638 // loop through attributes until hitting the closing bracket
640 (*buf) && (*buf != '/') && (*buf != '>');
642 // buf is incremented later on
644 int attr_cnt = (attr_idx / 2) + 1;
645 int val_idx = attr_idx + 1;
646 int term_idx = attr_idx + 2;
647 int last_idx = attr_idx + 3;
649 // attr = allocated space
650 // attr[val_idx] = mem for list of maloced vals
652 attr = tmp_realloc(attr,
653 (((attr_cnt - 1) * (2 * sizeof(char *))) +
654 (2 * sizeof(char *))),
655 ((attr_cnt * (2 * sizeof(char *))) +
656 (2 * sizeof(char *))));
658 attr[last_idx] = tmp_realloc(attr[last_idx - 2],
662 attr = V3_Malloc(4 * sizeof(char *));
663 attr[last_idx] = V3_Malloc(2);
667 attr[attr_idx] = buf; // set attribute name
668 attr[val_idx] = ""; // temporary attribute value
669 attr[term_idx] = NULL; // null terminate list
670 strcpy(attr[last_idx] + attr_cnt, " "); // value is not malloc'd, offset into the stringmap
672 buf += strcspn(buf, V3_XML_WS "=/>");
674 if ((*buf == '=') || isspace(*buf)) {
676 *(buf++) = '\0'; // null terminate tag attribute name
678 // eat whitespace (and more multiple '=' ?)
679 buf += strspn(buf, V3_XML_WS "=");
683 if ((quote_char == '"') || (quote_char == '\'')) { // attribute value
684 attr[val_idx] = ++buf;
686 while ((*buf) && (*buf != quote_char)) {
691 // null terminate attribute val
694 v3_xml_free_attr(attr);
695 v3_xml_err(root, tag_ptr, "missing %c", quote_char);
700 ( (tmp_attr) && (tmp_attr[j]) &&
701 (strcasecmp(tmp_attr[j], attr[attr_idx]) != 0));
704 attr[val_idx] = v3_xml_decode(attr[val_idx], root->ent,
705 ((tmp_attr && tmp_attr[j]) ?
709 if ( (attr[val_idx] < tag_ptr) ||
710 (attr[val_idx] > buf) ) {
711 attr[last_idx][attr_cnt - 1] = V3_XML_TXTM; // value malloced
716 while (isspace(*buf)) {
725 if ( ((*buf) && (*buf != '>')) ||
726 ((!*buf) && (last_char != '>'))) {
729 v3_xml_free_attr(attr);
731 v3_xml_err(root, tag_ptr, "missing >");
734 v3_xml_open_tag(root, tag_ptr, attr);
735 v3_xml_close_tag(root, tag_ptr, buf);
736 } else if (((quote_char = *buf) == '>') ||
737 ((!*buf) && (last_char == '>'))) {
739 *buf = '\0'; // temporarily null terminate tag name
740 v3_xml_open_tag(root, tag_ptr, attr);
744 v3_xml_free_attr(attr);
746 v3_xml_err(root, tag_ptr, "missing >");
749 } else if (*buf == '/') {
752 buf += strcspn(tag_ptr = buf + 1, V3_XML_WS ">") + 1;
755 if ((*buf == '\0') && (last_char != '>')) {
756 v3_xml_err(root, tag_ptr, "missing >");
760 *buf = '\0'; // temporarily null terminate tag name
762 if (v3_xml_close_tag(root, tag_ptr, buf) == -1) {
768 buf += strspn(buf, V3_XML_WS);
770 } else if (strncmp(buf, "!--", 3) == 0) {
772 if ( ((buf = strstr(buf + 3, "--")) == 0) ||
773 ((*(buf += 2) != '>') && (*buf)) ||
774 ((!*buf) && (last_char != '>'))) {
775 v3_xml_err(root, tag_ptr, "unclosed <!--");
778 } else if (! strncmp(buf, "![CDATA[", 8)) {
780 if ((buf = strstr(buf, "]]>"))) {
781 v3_xml_char_content(root, tag_ptr + 8, (buf += 2) - tag_ptr - 10, 'c');
783 v3_xml_err(root, tag_ptr, "unclosed <![CDATA[");
787 v3_xml_err(root, tag_ptr, "unexpected <");
791 if (! buf || ! *buf) {
798 if (*buf && (*buf != '<')) {
799 // tag character content
800 while (*buf && (*buf != '<')) {
805 v3_xml_char_content(root, tag_ptr, buf - tag_ptr, '&');
809 } else if (*buf == '\0') {
814 if (root->cur == NULL) {
816 } else if (root->cur->name == NULL) {
817 v3_xml_err(root, tag_ptr, "root tag missing");
820 v3_xml_err(root, tag_ptr, "unclosed tag <%s>", root->cur->name);
826 struct v3_xml * v3_xml_parse(char * buf) {
828 char * xml_buf = NULL;
834 str_len = strlen(buf);
835 xml_buf = (char *)V3_Malloc(str_len + 1);
836 strcpy(xml_buf, buf);
838 return parse_str(xml_buf, str_len);
843 // free the memory allocated for the v3_xml structure
844 void v3_xml_free(struct v3_xml * xml) {
845 struct v3_xml_root * root = (struct v3_xml_root *)xml;
853 v3_xml_free(xml->child);
854 v3_xml_free(xml->ordered);
856 if (xml->parent == NULL) {
857 // free root tag allocations
859 for (i = 10; root->ent[i]; i += 2) {
860 // 0 - 9 are default entites (<>&"')
861 if ((s = root->ent[i + 1]) < root->tmp_start || s > root->tmp_end) {
866 V3_Free(root->ent); // free list of general entities
868 for (i = 0; (a = root->attr[i]); i++) {
869 for (j = 1; a[j++]; j += 2) {
870 // free malloced attribute values
871 if (a[j] && (a[j] < root->tmp_start || a[j] > root->tmp_end)) {
879 // free default attribute list
883 V3_Free(root->str_ptr); // malloced xml data
886 v3_xml_free_attr(xml->attr); // tag attributes
888 if ((xml->flags & V3_XML_TXTM)) {
893 if ((xml->flags & V3_XML_NAMEM)) {