3 * Copyright 2004-2006 Aaron Voisine <aaron@voisine.org>
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 * Modified for Palacios by Jack Lange <jarusl@cs.northwestern.edu>
31 #include <palacios/vmm_xml.h>
32 #include <palacios/vmm_sprintf.h>
34 #include <palacios/vmm.h>
36 #define V3_XML_BUFSIZE 1024 // size of internal memory buffers
38 // Flags for struct v3_xml
39 #define V3_XML_NAMEM 0x80 // name is malloced
40 #define V3_XML_TXTM 0x40 // txt is malloced
41 #define V3_XML_DUP 0x20 // attribute name and value are strduped
45 #define V3_XML_WS "\t\r\n " // whitespace
46 #define V3_XML_ERRL 128 // maximum error string length
48 struct v3_xml_root { // additional data for the root tag
49 struct v3_xml xml; // is a super-struct built on top of v3_xml struct
50 struct v3_xml * cur; // current xml tree insertion point
51 char *str_ptr; // original xml string
52 char *tmp_start; // start of work area
53 char *tmp_end; // end of work area
54 char **ent; // general entities (ampersand sequences)
55 char ***attr; // default attributes
56 short standalone; // non-zero if <?xml standalone="yes"?>
57 char err[V3_XML_ERRL]; // error string
60 static char * empty_attrib_list[] = { NULL }; // empty, null terminated array of strings
63 static void * tmp_realloc(void * old_ptr, uint_t old_size, uint_t new_size) {
64 void * new_buf = V3_Malloc(new_size);
66 if (new_buf == NULL) {
70 memcpy(new_buf, old_ptr, old_size);
80 // set an error string and return root
81 static void v3_xml_err(struct v3_xml_root * root, char * xml_str, const char * err, ...) {
85 char fmt[V3_XML_ERRL];
87 for (tmp = root->tmp_start; tmp < xml_str; tmp++) {
93 snprintf(fmt, V3_XML_ERRL, "[error near line %d]: %s", line, err);
96 vsnprintf(root->err, V3_XML_ERRL, fmt, ap);
99 PrintError("XML Error: %s\n", root->err);
102 v3_xml_free(&(root->xml));
109 // returns the first child tag with the given name or NULL if not found
110 struct v3_xml * v3_xml_child(struct v3_xml * xml, const char * name) {
111 struct v3_xml * child = NULL;
117 while ((child) && (strcasecmp(name, child->name) != 0)) {
118 child = child->sibling;
124 // returns the Nth tag with the same name in the same subsection or NULL if not
126 struct v3_xml * v3_xml_idx(struct v3_xml * xml, int idx) {
127 for (; xml && idx; idx--) {
134 // returns the value of the requested tag attribute or NULL if not found
135 const char * v3_xml_attr(struct v3_xml * xml, const char * attr) {
138 struct v3_xml_root * root = (struct v3_xml_root *)xml;
140 if ((!xml) || (!xml->attr)) {
144 while ((xml->attr[i]) && (strcasecmp(attr, xml->attr[i]) != 0)) {
148 if (xml->attr[i] != NULL) {
149 return xml->attr[i + 1]; // found attribute
152 while (root->xml.parent != NULL) {
153 root = (struct v3_xml_root *)root->xml.parent; // root tag
157 ( (root->attr[i] != NULL) &&
158 (strcasecmp(xml->name, root->attr[i][0]) != 0) );
161 if (! root->attr[i]) {
162 return NULL; // no matching default attributes
165 while ((root->attr[i][j] != NULL) && (strcasecmp(attr, root->attr[i][j]) != 0)) {
169 return (root->attr[i][j] != NULL) ? root->attr[i][j + 1] : NULL; // found default
172 // same as v3_xml_get but takes an already initialized va_list
173 static struct v3_xml * v3_xml_vget(struct v3_xml * xml, va_list ap) {
174 char * name = va_arg(ap, char *);
177 if ((name != NULL) && (*name != 0)) {
178 idx = va_arg(ap, int);
179 xml = v3_xml_child(xml, name);
181 return (idx < 0) ? xml : v3_xml_vget(v3_xml_idx(xml, idx), ap);
184 // Traverses the xml tree to retrieve a specific subtag. Takes a variable
185 // length list of tag names and indexes. The argument list must be terminated
186 // by either an index of -1 or an empty string tag name. Example:
187 // title = v3_xml_get(library, "shelf", 0, "book", 2, "title", -1);
188 // This retrieves the title of the 3rd book on the 1st shelf of library.
189 // Returns NULL if not found.
190 struct v3_xml * v3_xml_get(struct v3_xml * xml, ...) {
195 r = v3_xml_vget(xml, ap);
201 // sets a flag for the given tag and returns the tag
202 static struct v3_xml * v3_xml_set_flag(struct v3_xml * xml, short flag)
204 if (xml) xml->flags |= flag;
212 // Recursively decodes entity and character references and normalizes new lines
213 // ent is a null terminated array of alternating entity names and values. set t
214 // to '&' for general entity decoding, '%' for parameter entity decoding, 'c'
215 // for cdata sections, ' ' for attribute normalization, or '*' for non-cdata
216 // attribute normalization. Returns s, or if the decoded string is longer than
217 // s, returns a malloced string that must be freed.
218 static char * v3_xml_decode(char * s, char ** ent, char t) {
224 // normalize line endings
230 memmove(s, (s + 1), strlen(s));
249 if ((t != 'c') && (strncmp(s, "&#", 2) == 0)) { // character reference
251 c = strtox(s + 3, &e); // base 16
253 c = strtoi(s + 2, &e); // base 10
256 if ((!c) || (*e != ';')) {
257 // not a character ref
264 memmove(s, strchr(s, ';') + 1, strlen(strchr(s, ';')));
265 } else if ( ( (*s == '&') &&
266 ((t == '&') || (t == ' ') || (t == '*'))) ||
267 ( (*s == '%') && (t == '%'))) {
271 (ent[b]) && (strncmp(s + 1, ent[b], strlen(ent[b])) != 0);
272 (b += 2)); // find entity in entity list
274 if (ent[b++]) { // found a match
275 if (((c = strlen(ent[b])) - 1) > ((e = strchr(s, ';')) - s)) {
276 l = (d = (s - r)) + c + strlen(e); // new length
277 r = ((r == m) ? strcpy(V3_Malloc(l), r) : tmp_realloc(r, strlen(r), l));
278 e = strchr((s = r + d), ';'); // fix up pointers
281 memmove(s + c, e + 1, strlen(e)); // shift rest of string
282 strncpy(s, ent[b], c); // copy in replacement text
284 // not a known entity
287 } else if ( ( (t == ' ') || (t == '*')) &&
291 // no decoding needed
297 // normalize spaces for non-cdata attributes
298 for (s = r; *s; s++) {
299 if ((l = strspn(s, " "))) {
300 memmove(s, s + l, strlen(s + l) + 1);
303 while ((*s) && (*s != ' ')) {
308 if ((--s >= r) && (*s == ' ')) {
309 // trim any trailing space
318 // called when parser finds character content between open and closing tag
319 static void v3_xml_char_content(struct v3_xml_root * root, char * s, size_t len, char t) {
320 struct v3_xml * xml = root->cur;
324 if ((xml == NULL) || (xml->name == NULL) || (len == 0)) {
329 s[len] = '\0'; // null terminate text (calling functions anticipate this)
330 len = strlen(s = v3_xml_decode(s, root->ent, t)) + 1;
333 // initial character content
336 // allocate our own memory and make a copy
337 xml->txt = (xml->flags & V3_XML_TXTM) ?
338 (tmp_realloc(xml->txt, strlen(xml->txt), (l = strlen(xml->txt)) + len)) :
339 (strcpy(V3_Malloc((l = strlen(xml->txt)) + len), xml->txt));
341 strcpy(xml->txt + l, s); // add new char content
344 V3_Free(s); // free s if it was malloced by v3_xml_decode()
349 v3_xml_set_flag(xml, V3_XML_TXTM);
353 // called when parser finds closing tag
354 static int v3_xml_close_tag(struct v3_xml_root * root, char * name, char * s) {
355 if ( (root->cur == NULL) ||
356 (root->cur->name == NULL) ||
357 (strcasecmp(name, root->cur->name))) {
358 v3_xml_err(root, s, "unexpected closing tag </%s>", name);
362 root->cur = root->cur->parent;
367 // checks for circular entity references, returns non-zero if no circular
368 // references are found, zero otherwise
369 static int v3_xml_ent_ok(char * name, char * s, char ** ent) {
373 while ((*s != '\0') && (*s != '&')) {
374 // find next entity reference
382 if (strncmp(s + 1, name, strlen(name)) == 0) {
387 for (i = 0; (ent[i]) && (strncmp(ent[i], s + 1, strlen(ent[i]))); i += 2);
389 if ((ent[i] != NULL) && (v3_xml_ent_ok(name, ent[i + 1], ent) == 0)) {
398 // frees a tag attribute list
399 static void v3_xml_free_attr(char **attr) {
403 if ((attr == NULL) || (attr == empty_attrib_list)) {
409 // find end of attribute list
413 m = attr[i + 1]; // list of which names and values are malloced
415 for (i = 0; m[i]; i++) {
416 if (m[i] & V3_XML_NAMEM) {
417 V3_Free(attr[i * 2]);
420 if (m[i] & V3_XML_TXTM) {
421 V3_Free(attr[(i * 2) + 1]);
434 // returns a new empty v3_xml structure with the given root tag name
435 static struct v3_xml * v3_xml_new(const char * name) {
436 static char * ent[] = { "lt;", "<", "gt;", ">", "quot;", """,
437 "apos;", "'", "amp;", "&", NULL };
439 struct v3_xml_root * root = (struct v3_xml_root *)V3_Malloc(sizeof(struct v3_xml_root));
440 memset(root, 0, sizeof(struct v3_xml_root));
442 root->xml.name = (char *)name;
443 root->cur = &root->xml;
445 memset(root->err, 0, V3_XML_ERRL);
447 root->ent = V3_Malloc(sizeof(ent));
448 memcpy(root->ent, ent, sizeof(ent));
450 root->xml.attr = empty_attrib_list;
451 root->attr = (char ***)(empty_attrib_list);
456 // inserts an existing tag into an v3_xml structure
457 static struct v3_xml * v3_xml_insert(struct v3_xml * xml, struct v3_xml * dest, size_t off) {
458 struct v3_xml * cur, * prev, * head;
467 if ((head = dest->child)) {
468 // already have sub tags
470 if (head->off <= off) {
474 ((cur->ordered) && (cur->ordered->off <= off));
477 xml->ordered = cur->ordered;
486 for (cur = head, prev = NULL;
487 ((cur) && (strcasecmp(cur->name, xml->name) != 0));
488 prev = cur, cur = cur->sibling);
491 if (cur && cur->off <= off) {
494 while (cur->next && cur->next->off <= off) {
498 xml->next = cur->next;
501 // first tag of this type
505 prev->sibling = cur->sibling;
508 xml->next = cur; // old first tag is now next
510 // new sibling insert point
511 for (cur = head, prev = NULL;
512 ((cur) && (cur->off <= off));
513 prev = cur, cur = cur->sibling);
530 // Adds a child tag. off is the offset of the child tag relative to the start
531 // of the parent tag's character content. Returns the child tag.
532 static struct v3_xml * v3_xml_add_child(struct v3_xml * xml, const char * name, size_t off) {
533 struct v3_xml * child;
539 child = (struct v3_xml *)V3_Malloc(sizeof(struct v3_xml));
540 memset(child, 0, sizeof(struct v3_xml));
542 child->name = (char *)name;
543 child->attr = empty_attrib_list;
546 return v3_xml_insert(child, xml, off);
550 // called when parser finds start of new tag
551 static void v3_xml_open_tag(struct v3_xml_root * root, char * name, char ** attr) {
552 struct v3_xml * xml = root->cur;
555 xml = v3_xml_add_child(xml, name, strlen(xml->txt));
562 root->cur = xml; // update tag insertion point
571 // parse the given xml string and return an v3_xml structure
572 static struct v3_xml * parse_str(char * buf, size_t len) {
573 struct v3_xml_root * root = (struct v3_xml_root *)v3_xml_new(NULL);
578 char ** tmp_attr = NULL; // initialize a to avoid compile warning
585 v3_xml_err(root, NULL, "Empty XML String\n");
589 root->tmp_start = buf;
590 root->tmp_end = buf + len; // record start and end of work area
592 last_char = buf[len - 1]; // save end char
593 buf[len - 1] = '\0'; // turn end char into null terminator
595 while ((*buf) && (*buf != '<')) {
601 v3_xml_err(root, buf, "root tag missing");
606 attr = (char **)empty_attrib_list;
607 tag_ptr = ++buf; // skip first '<'
609 if (isalpha(*buf) || (*buf == '_') || (*buf == ':') || (*buf < '\0')) {
612 if (root->cur == NULL) {
613 v3_xml_err(root, tag_ptr, "markup outside of root element");
617 buf += strcspn(buf, V3_XML_WS "/>");
619 while (isspace(*buf)) {
620 // null terminate tag name,
621 // this writes '\0' to spaces after first tag
625 // check if attribute follows tag
626 if ((*buf) && (*buf != '/') && (*buf != '>')) {
627 // there is an attribute
628 // find attributes for correct tag
630 ((tmp_attr = root->attr[i]) &&
631 (strcasecmp(tmp_attr[0], tag_ptr) != 0));
634 // 'tmp_attr' now points to the attribute list associated with 'tag_ptr'
637 // attributes are name value pairs,
638 // 2nd to last entry is null (end of list)
639 // last entry points to a string map marking whether values have been malloced...
640 // loop through attributes until hitting the closing bracket
642 (*buf) && (*buf != '/') && (*buf != '>');
644 // buf is incremented later on
646 int attr_cnt = (attr_idx / 2) + 1;
647 int val_idx = attr_idx + 1;
648 int term_idx = attr_idx + 2;
649 int last_idx = attr_idx + 3;
651 // attr = allocated space
652 // attr[val_idx] = mem for list of maloced vals
654 attr = tmp_realloc(attr,
655 (((attr_cnt - 1) * (2 * sizeof(char *))) +
656 (2 * sizeof(char *))),
657 ((attr_cnt * (2 * sizeof(char *))) +
658 (2 * sizeof(char *))));
660 attr[last_idx] = tmp_realloc(attr[last_idx - 2],
664 attr = V3_Malloc(4 * sizeof(char *));
665 attr[last_idx] = V3_Malloc(2);
669 attr[attr_idx] = buf; // set attribute name
670 attr[val_idx] = ""; // temporary attribute value
671 attr[term_idx] = NULL; // null terminate list
672 strcpy(attr[last_idx] + attr_cnt, " "); // value is not malloc'd, offset into the stringmap
674 buf += strcspn(buf, V3_XML_WS "=/>");
676 if ((*buf == '=') || isspace(*buf)) {
678 *(buf++) = '\0'; // null terminate tag attribute name
680 // eat whitespace (and more multiple '=' ?)
681 buf += strspn(buf, V3_XML_WS "=");
685 if ((quote_char == '"') || (quote_char == '\'')) { // attribute value
686 attr[val_idx] = ++buf;
688 while ((*buf) && (*buf != quote_char)) {
693 // null terminate attribute val
696 v3_xml_free_attr(attr);
697 v3_xml_err(root, tag_ptr, "missing %c", quote_char);
702 ( (tmp_attr) && (tmp_attr[j]) &&
703 (strcasecmp(tmp_attr[j], attr[attr_idx]) != 0));
706 attr[val_idx] = v3_xml_decode(attr[val_idx], root->ent,
707 ((tmp_attr && tmp_attr[j]) ?
711 if ( (attr[val_idx] < tag_ptr) ||
712 (attr[val_idx] > buf) ) {
713 attr[last_idx][attr_cnt - 1] = V3_XML_TXTM; // value malloced
718 while (isspace(*buf)) {
727 if ( ((*buf) && (*buf != '>')) ||
728 ((!*buf) && (last_char != '>'))) {
731 v3_xml_free_attr(attr);
733 v3_xml_err(root, tag_ptr, "missing >");
736 v3_xml_open_tag(root, tag_ptr, attr);
737 v3_xml_close_tag(root, tag_ptr, buf);
738 } else if (((quote_char = *buf) == '>') ||
739 ((!*buf) && (last_char == '>'))) {
741 *buf = '\0'; // temporarily null terminate tag name
742 v3_xml_open_tag(root, tag_ptr, attr);
746 v3_xml_free_attr(attr);
748 v3_xml_err(root, tag_ptr, "missing >");
751 } else if (*buf == '/') {
754 buf += strcspn(tag_ptr = buf + 1, V3_XML_WS ">") + 1;
757 if ((*buf == '\0') && (last_char != '>')) {
758 v3_xml_err(root, tag_ptr, "missing >");
762 *buf = '\0'; // temporarily null terminate tag name
764 if (v3_xml_close_tag(root, tag_ptr, buf) == -1) {
770 buf += strspn(buf, V3_XML_WS);
772 } else if (strncmp(buf, "!--", 3) == 0) {
774 if ( ((buf = strstr(buf + 3, "--")) == 0) ||
775 ((*(buf += 2) != '>') && (*buf)) ||
776 ((!*buf) && (last_char != '>'))) {
777 v3_xml_err(root, tag_ptr, "unclosed <!--");
780 } else if (! strncmp(buf, "![CDATA[", 8)) {
782 if ((buf = strstr(buf, "]]>"))) {
783 v3_xml_char_content(root, tag_ptr + 8, (buf += 2) - tag_ptr - 10, 'c');
785 v3_xml_err(root, tag_ptr, "unclosed <![CDATA[");
789 v3_xml_err(root, tag_ptr, "unexpected <");
793 if (! buf || ! *buf) {
800 if (*buf && (*buf != '<')) {
801 // tag character content
802 while (*buf && (*buf != '<')) {
807 v3_xml_char_content(root, tag_ptr, buf - tag_ptr, '&');
811 } else if (*buf == '\0') {
816 if (root->cur == NULL) {
818 } else if (root->cur->name == NULL) {
819 v3_xml_err(root, tag_ptr, "root tag missing");
822 v3_xml_err(root, tag_ptr, "unclosed tag <%s>", root->cur->name);
828 struct v3_xml * v3_xml_parse(char * buf) {
830 char * xml_buf = NULL;
836 str_len = strlen(buf);
837 xml_buf = (char *)V3_Malloc(str_len + 1);
838 strcpy(xml_buf, buf);
840 return parse_str(xml_buf, str_len);
845 // free the memory allocated for the v3_xml structure
846 void v3_xml_free(struct v3_xml * xml) {
847 struct v3_xml_root * root = (struct v3_xml_root *)xml;
855 v3_xml_free(xml->child);
856 v3_xml_free(xml->ordered);
858 if (xml->parent == NULL) {
859 // free root tag allocations
861 for (i = 10; root->ent[i]; i += 2) {
862 // 0 - 9 are default entites (<>&"')
863 if ((s = root->ent[i + 1]) < root->tmp_start || s > root->tmp_end) {
868 V3_Free(root->ent); // free list of general entities
870 for (i = 0; (a = root->attr[i]); i++) {
871 for (j = 1; a[j++]; j += 2) {
872 // free malloced attribute values
873 if (a[j] && (a[j] < root->tmp_start || a[j] > root->tmp_end)) {
881 // free default attribute list
885 V3_Free(root->str_ptr); // malloced xml data
888 v3_xml_free_attr(xml->attr); // tag attributes
890 if ((xml->flags & V3_XML_TXTM)) {
895 if ((xml->flags & V3_XML_NAMEM)) {