/********************************************************************************************************* * ------------------------------------------------------------------------------------------------------ * file description * ------------------------------------------------------------------------------------------------------ * \file xml.h * \unit xml * \brief This is a C language version of xml parser * \author Lamdonn * \version v1.0.0 * \license GPL-2.0 * \copyright Copyright (C) 2023 Lamdonn. ********************************************************************************************************/ #include "xml.h" #include #include #include #include /* dump buffer define */ typedef struct { char* address; /**< buffer base address */ unsigned int size; /**< size of buffer */ unsigned int end; /**< end of buffer used */ } BUFFER; /* attribute define */ typedef struct ATTR { struct ATTR *next; /**< next attribute */ char* name; /**< name of attribute */ char* value; /**< value of attribute */ } ATTR; /* xml define */ typedef struct XML { struct XML *next; /**< next xml */ char* name; /**< name of xml */ ATTR* attrs; /**< attributes */ char* text; /**< text of xml */ struct XML *child; /**< child of xml */ } XML; static const char* lbegin = NULL; /**< beginning of line */ static int etype = XML_E_OK; /**< type of error message */ static int eline = 0; /**< line of error message */ static int ecolumn = 0; /**< column of error message */ /* set error message and type */ #define E(type) (etype=(type),ecolumn=text-lbegin) /** * \brief for analysing failed parses * \param[out] line: error line * \param[out] column: error column * \return error type */ int xml_error_info(int* line, int* column) { /* No error occurred, return directly */ if (etype == XML_E_OK) return XML_E_OK; /* Output the line and column where the error is located */ if (line) *line = eline; if (column) *column = ecolumn; /* Return error type */ return etype; } /** * \brief get the smallest power of 2 not greater than x. * \param[in] x: positive integer * \return the smallest power of 2 not greater than x */ static int pow2gt(int x) { int b = sizeof(int) * 8; int i = 1; --x; while (i < b) { x |= (x >> i); i <<= 1; } return x + 1; } /** * \brief Duplicate a given string. * * \param[in] str String to be duplicated. * \return Pointer to the duplicated string if successful, NULL otherwise. */ static char* xml_strdup(const char* str) { int size = (int)strlen(str) + 1; char* s; /* Allocate memory for the new string */ s = (char*)malloc(size); if (!s) return NULL; /* Copy the given string into the allocated memory */ memcpy(s, str, size); return s; } /** * \brief Compare two strings. * * \param[in] s1 The first string to compare. * \param[in] s2 The second string to compare. * \return 0 if the strings are equal, a negative value if s1 is less than s2, or a positive value if s1 is greater than s2. */ static int string_compare(const char* s1, const char* s2) { if (s1 == s2) return 0; /* If the memory addresses are the same, the strings are equal */ if (!s1) return -1; /* If s1 is NULL, it is considered less than s2 */ if (!s2) return 1; /* If s2 is NULL, it is considered greater than s1 */ /* Compare the strings character by character */ while (*s1 == *s2) { if (*s1 == 0) return 0; /* If both characters are null terminators, the strings are equal */ s1++, s2++; /* Move to the next characters */ } /* Return the difference between the ASCII values of the characters at the current positions */ return *(const unsigned char*)s1 - *(const unsigned char*)s2; } /** * \brief confirm whether buf still has the required capacity, otherwise add capacity. * \param[in] buf: buf handle * \param[in] needed: required capacity * \return 1 success or 0 fail */ static int expansion(BUFFER *buf, unsigned int needed) { char* address; int size; if (!buf) return 0; needed += buf->end; if (needed <= buf->size) return 1; /* there is still enough space in the current buf */ size = pow2gt(needed); address = (char*)realloc(buf->address, size); if (!address) return 0; buf->size = size; buf->address = address; return 1; } #define buf_append(n) expansion(buf, (n)) /* append n size space for buf */ #define buf_putc(c) (buf->address[buf->end++]=(c)) /* put a non zero character into buf */ #define buf_puts(s, len) do{for(int i=0;iaddress[buf->end]) /* obtain the address at the tail of buf */ /** * \brief Skip leading whitespace characters in a text. * * \param[in] text The text to process. * \return A pointer to the first non-whitespace character in the text. */ static const char* skip(const char* text) { while (text && *text && (unsigned char)(*text) <= ' ') { /* when a newline character is encountered, record the current parsing line */ if (*text == '\n') { /* Increase eline and set lbegin to the current position */ eline++; lbegin = text; } /* Move to the next character */ text++; } /* Return a pointer to the first non-whitespace character */ return text; } /** * \brief Create a new ATTR object. * * \return A pointer to a newly allocated ATTR object with its memory initialized to zero. */ static ATTR* new_attr() { /* Allocate memory for a new ATTR object */ ATTR* attr = (ATTR*)malloc(sizeof(ATTR)); /* Initialize the memory of the ATTR object to zero */ memset(attr, 0, sizeof(ATTR)); return attr; } /** * \brief Delete a linked list of ATTR objects and free the memory. * * \param[in] attrs The head of the linked list of ATTR objects to delete. */ static void attribute_delete(ATTR* attrs) { ATTR* next; while (attrs) { /* Store the next ATTR object */ next = attrs->next; /* Free the memory allocated for the name */ if (attrs->name) free(attrs->name); /* Free the memory allocated for the value */ if (attrs->value) free(attrs->value); /* Free the memory allocated for the current ATTR object */ free(attrs); /* Move to the next ATTR object in the linked list */ attrs = next; } } /** * \brief Create a new XML node. * * \return A pointer to a newly allocated XML node object with its memory initialized to zero, or NULL if memory allocation fails. */ static xml_t new_node(void) { /* Allocate memory for a new XML node object */ xml_t node = (xml_t)malloc(sizeof(XML)); /* Check if memory allocation fails, return NULL */ if (!node) return NULL; /* Initialize the memory of the XML node object to zero */ memset(node, 0, sizeof(XML)); /* Return a pointer to the newly created XML node object */ return node; } /** * \brief delete a xml object. * \param[in] xml: xml handle * \return none */ void xml_delete(xml_t xml) { xml_t next; if (xml) { /* Free the memory allocated for the name */ if (xml->name) free(xml->name); /* Free the memory allocated for the text */ if (xml->text) free(xml->text); /* Delete the attributes of the XML node */ if (xml->attrs) attribute_delete(xml->attrs); /* Recursively delete the child node */ while (xml->child) { /* Store the next child node */ next = xml->child->next; /* Delete the child node */ xml_delete(xml->child); /* Move to the next child node */ xml->child = next; } /* Free the memory allocated for the XML node itself */ free(xml); } } /** * \brief Check the validity of an XML node name. * * \param[in] name The name of the XML node to check. * \param[out] len The length of the XML node name. * \return 0:illegal, 1:end<0>, 2:delim. */ static int check_name(const char* name, int *len) { int ret = 1; char *s = (char *)name; int l = 0; /* Check the validity of the beginning */ if (*s > 0 && !isalpha(*s)) return 0; /* Check the name specification, and calculate the length */ while (*s) { if (*s == '>' || *s == ' ' || *s == '\t') { ret = 2; break; } /* illegal character */ if (*s > 0 && !(isalnum(*s) || *s=='_' || *s=='-' || *s=='.' || *s==':')) { ret = 0; break; } s++, l++; } /* Output length */ if (len) *len = l; return ret; } /** * \brief Duplicate an XML node name with validity check. * * \param[in] name The name of the XML node to duplicate. * \return A newly allocated string containing the duplicated name, or NULL if the name is invalid or memory allocation fails. */ static char *name_strdup(const char* name) { char *s; int len = 0, ret = 0; /* Check the validity of name and return its length */ ret = check_name(name, &len); if (ret == 0 || ret == 2) return NULL; /* Allocating space */ s = malloc(len + 1); if (!s) return NULL; /* Copy string */ memcpy(s, name, len); s[len] = 0; return s; } /** * \brief Duplicate an XML node value with validity check. * * \param[in] value The value of the XML node to duplicate. * \return A newly allocated string containing the duplicated value, or NULL if the value contains non-printable characters or memory allocation fails. */ static char *value_strdup(const char* value) { char *s = (char *)value; int len = 0; while (*s) { if (*s > 0 && !isprint(*s)) return NULL; /* Check the value specification, and calculate the length */ s++, len++; } /* Allocating space and assigning */ s = malloc(len + 1); if (!s) return NULL; memcpy(s, value, len); s[len] = 0; return s; } /** * \brief create a xml object. * \param[in] name: name of xml node * \return new xml object */ xml_t xml_create(const char* name) { xml_t xml = NULL; if (!name) return NULL; /* Create a new XML node */ xml = new_node(); if (!xml) return NULL; /* Assign to the name of XML */ xml->name = name_strdup(name); if (!xml->name) { free(xml); return NULL; } return xml; } /** * \brief add an attribute to xml. * \param[in] xml: xml handle * \param[in] name: attribute name * \param[in] value: attribute value * \return 1 success or 0 fail */ int xml_add_attribute(xml_t xml, const char *name, const char *value) { ATTR* attr; /* Input value validity check */ if (!xml) return 0; if (!name) return 0; if (!value) return 0; /* Create a new XML attribute */ attr = new_attr(); if (!attr) return 0; /* Assign to the attribute name */ attr->name = name_strdup(name); if (!attr->name) goto FAIL; /* Assign to the attribute value */ attr->value = value_strdup(value); if (!attr->value) goto FAIL; /* Attach attribute */ attr->next = xml->attrs; xml->attrs = attr; return 1; FAIL: if (attr->name) free(attr->name); if (attr->value) free(attr->value); free(attr); return 0; } /** * \brief remove an attribute from xml. * \param[in] xml: xml handle * \param[in] name: attribute name or NULL matches all attributes * \param[in] index: index * \return 1 success or 0 fail */ int xml_remove_attribute(xml_t xml, const char *name, int index) { ATTR *attr, *prev; if (!xml) return 0; if (index < 0) return 0; attr = xml->attrs; while (attr) { /* Check if the attribute name matches */ if (!name || !string_compare(name, attr->name)) { index--; /* Break if the correct attribute is found */ if (index < 0) break; } prev = attr; attr = attr->next; } /* If the index is not found */ if (index >= 0) return 0; /* If the attribute is not found */ if (!attr) return 0; /* Adjust the pointers to remove the attribute */ if (prev) prev->next = attr->next; /* Update the head of the attribute list if necessary */ else xml->attrs = attr->next; /* Free the memory for the attribute name and value */ if (attr->name) free(attr->name); if (attr->value) free(attr->value); /* Free the memory for the attribute object */ free(attr); return 1; } /** * \brief get attribute of xml. * \param[in] xml: xml handle * \param[in] name: attribute name or NULL matches all attributes * \param[in] index: index * \return value of attribute */ const char* xml_get_attribute(xml_t xml, const char *name, int index) { ATTR *attr, *prev; if (!xml) return NULL; if (index < 0) return NULL; attr = xml->attrs; while (attr) { /* Check if the attribute name matches */ if (!name || !string_compare(name, attr->name)) { index--; /* Break if the correct attribute is found */ if (index < 0) break; } prev = attr; attr = attr->next; } /* If the index is not found */ if (index >= 0) return NULL; /* If the attribute is not found */ if (!attr) return NULL; return attr->value; } /** * \brief inserts the specified xml object into another xml object. * \param[in] xml: xml handle * \param[in] index: index * \param[in] ins: insert xml object * \return 1 success or 0 fail */ int xml_insert(xml_t xml, int index, xml_t ins) { xml_t prev; /* Input value validity check */ if (!xml) return 0; if (!ins) return 0; if (index < 0) return 0; /* Insert the node at the beginning */ if (index == 0) { ins->next = xml->child; xml->child = ins; } /* Move to the desired index */ prev = xml->child; while (prev && --index) { prev = prev->next; } /* If the index is not valid */ if (!prev) return 0; /* Attach xml object */ ins->next = prev->next; prev->next = ins; return 1; } /** * \brief remove an child xml from xml. * \param[in] xml: xml handle * \param[in] name: child xml name or NULL matches all child * \param[in] index: index * \return 1 success or 0 fail */ int xml_remove(xml_t xml, const char *name, int index) { xml_t prev = NULL, node; /* Input value validity check */ if (!xml) return 0; if (xml->text) return 0; if (index < 0) return 0; node = xml->child; while (node) { /* Check if the node name matches */ if (!name || !string_compare(name, node->name)) { index--; /* Break if the correct node is found */ if (index < 0) break; } prev = node; node = node->next; } /* If the index is not found */ if (index >= 0) return 0; /* If the node is not found */ if (!node) return 0; /* Adjust the pointers to remove the node */ if (prev) prev->next = node->next; /* Update the head of the child list if necessary */ else xml->child = node->next; /* Delete the node and free memory */ xml_delete(node); return 1; } /** * \brief goes to the specified child object. * \param[in] xml: xml handle * \param[in] name: children xml name or NULL matches all children * \param[in] index: index * \return specified child object */ xml_t xml_to(xml_t xml, const char *name, int index) { xml_t prev = NULL, node; /* Input value validity check */ if (!xml) return NULL; if (index < 0) return NULL; node = xml->child; while (node) { /* Check if the node name matches */ if (!name || !string_compare(name, node->name)) { index--; /* Break if the correct node is found */ if (index < 0) break; } prev = node; node = node->next; } /* If the index is not found */ if (index >= 0) return NULL; /* If the node is not found */ if (!node) return NULL; return node; } /** * \brief set text into xml. * \param[in] xml: xml handle * \param[in] text: text * \return 1 success or 0 fail */ int xml_set_text(xml_t xml, const char *text) { char *s; if (!xml) return 0; if (!text) return 0; /* Duplicate the text content */ s = xml_strdup(text); if (!s) return 0; /* Free the previous text content if it exists */ if (xml->text) free(xml->text); /* Set the text content of the XML node */ xml->text = s; return 1; } /** * \brief get text of xml. * \param[in] xml: xml handle * \return text of xml */ const char* xml_get_text(xml_t xml) { if (!xml) return NULL; return xml->text; } /** * \brief Print text content with special characters converted to XML entities. * * \param[in] text The text content to print. * \param[in] buf The buffer for output. * \return 1 if the text content is successfully printed, 0 otherwise. */ static int print_text(char *text, BUFFER* buf) { int len = 0, ap = 0; char *s = text; /* Calculate the length of the text and the length of the escape character that needs to be appended */ while (*s) { if (*s == '<') ap += 3; else if (*s == '>') ap += 3; else if (*s == '&') ap += 4; else if (*s == '\'') ap += 5; else if (*s == '\"') ap += 5; len++; s++; } /* Append buffer space */ if (!buf_append(len + ap)) return 0; /* Print text into buffer */ s = text; while (len--) { if (*s == '<') buf_puts("<", 4); else if (*s == '>') buf_puts(">", 4); else if (*s == '&') buf_puts("&", 5); else if (*s == '\'') buf_puts("'", 6); else if (*s == '\"') buf_puts(""", 6); else buf_putc(*s); s++; } return 1; } /** * \brief Print attribute content with special characters converted to XML entities. * * \param[in] attrs The attribute content to print. * \param[in] buf The buffer for output. * \return 1 if the attribute content is successfully printed, 0 otherwise. */ static int print_attrs(ATTR* attrs, BUFFER* buf) { int len; /* Traverse and print each attribute */ while (attrs) { /* Get the length of attrubute name */ len = strlen(attrs->name); if (!buf_append(len + 3)) return 0; buf_putc(' '); buf_puts(attrs->name, len); /* Print attrubute name */ buf_putc('='); buf_putc('"'); /* Print attrubute value */ print_text(attrs->value, buf); if (!buf_append(1)) return 0; buf_putc('"'); attrs = attrs->next; } return 1; } /** * \brief convert xml to text, using a buffered strategy. * \param[in] xml: xml handle * \param[in] buf: The buffer for output. * \param[in] depth: The depth at which the current node is located * \param[in] format: Neat formatted printing * \return 1 if the xml content is successfully printed, 0 otherwise. */ static int print_node(xml_t xml, BUFFER* buf, int depth, int format) { int len; int i; if (!xml) return 0; /* Traverse and print each xml */ while (xml) { /* print indent */ if (format) { if (format == 1) { if (!buf_append(depth)) return 0; for (i = 0; i < depth; i++) buf_putc('\t'); } else format = 1; } /* print name */ len = strlen(xml->name); if (!buf_append(len + 1)) return 0; buf_putc('<'); buf_puts(xml->name, len); /* print attributes */ if (xml->attrs) print_attrs(xml->attrs, buf); /* tag header closing */ if (!buf_append(1)) return 0; buf_putc('>'); /* print text */ if (xml->text) { print_text(xml->text, buf); if (format) format = 2; // After printing the text, it must follow the label } /* print child */ if (xml->child) { if (format == 1) { if (!buf_append(1)) return 0; buf_putc('\n'); } print_node(xml->child, buf, depth + 1, format); } /* print end label */ if (format) { if (format == 1) { if (!buf_append(depth)) return 0; for (i = 0; i < depth; i++) buf_putc('\t'); } else format = 1; } if (!buf_append(len + (format?4:3))) return 0; buf_putc('<'); buf_putc('/'); buf_puts(xml->name, len); buf_putc('>'); if (format) buf_putc('\n'); xml = xml->next; } return 1; } /** * \brief convert xml to text, using a buffered strategy. * \param[in] xml: xml handle * \param[in] preset: preset is a guess at the final size, guessing well reduces reallocation * \param[in] unformat: unformat=0 gives formatted, otherwise gives unformatted * \param[out] len: address that receives the length of printed characters * \return address of converted text, free the char* when finished */ char* xml_dumps(xml_t xml, int preset, int unformat, int* len) { BUFFER p = {NULL, 0, 0}; BUFFER *buf = &p; /* Initialize buf */ if (preset < 1) preset = 1; if (!buf_append(preset)) return NULL; /* Print root node */ if (!print_node(xml, buf, 0, unformat?0:1) || !buf_append(1)) { free(buf->address); return NULL; } /* Add string terminator */ buf_end() = '\0'; /* Output string length */ if (len) *len = buf->end; return buf->address; } /** * \brief according to the xml object, generate a file. * \param[in] xml: xml handle * \param[in] filename: file name * \return file len or negative fail */ int xml_file_dump(xml_t xml, char* filename) { FILE* f; char* out; int len; if (!xml) return -1; out = xml_dumps(xml, 0, 0, &len); if (!out) return -1; f = fopen(filename, "w"); if (!f) { free(out); return -1; } fwrite(out, 1, len, f); fclose(f); free(out); return len; } /** * \brief skip XML comments. * \param[in] text: The text content to skip. * \return The character address after skip */ static const char* skip_comment(const char *text) { if (*text == '<') { /* Matched to the front of the comment label */ if (!strncmp(text, "", skip characters that are not '-' first */ while (*text && *text != '-') { /* Record line breaks */ if (*text == '\n') { eline++, lbegin = text; } text++; } /* It ends at the end of the string, and the annotation does not form a closed interval */ if (!*text) { E(XML_E_COMMENT); return text; } /* Matched to the end of the annotation label, the annotation has formed a complete closed interval */ if (!strncmp(text, "-->", 3)) break; text++; } /* skip "-->" */ text += 3; } } return text; } /** * \brief Parsing name. * \param[in] text: The text content to parse. * \param[out] name: Output the name generated by parsing * \return The character address after parse */ static const char* parse_name(const char *text, char **name) { int len = 0, ret = 0; char *s; /* Check the validity of the name and obtain the length */ ret = check_name(text, &len); if (ret == 0 || ret == 1) { E(XML_E_ILLEGAL); return text + len; } /* Allocate space for name */ s = (char*)malloc(len + 1); if (!s) { E(XML_E_MEMORY); return text + len; } /* Assign a value to name and output it */ memcpy(s, text, len); s[len] = 0; *name = s; return text + len; } /** * \brief Parsing one attribute. * \param[in] text: The text content to parse. * \param[out] attr: Output the attributes generated by parsing * \return The character address after parse */ static const char* parse_one_attribute(const char *text, ATTR* attr) { char* s; int len = 0, ret = 0; char *name, *value; char q = 0; /* quote: single or double */ text = skip(text); if (*text == '>') return text; /* null attributes */ /* Check the validity of the name and obtain the length */ ret = check_name(text, &len); /* Encountered illegal characters */ if (ret == 0) { if (text[len] != '=') { E(XML_E_ILLEGAL); return text + len; } } /* Parsed to the end of the text */ else if (ret == 1) { E(XML_E_END); return text + len; } /* Allocate space for name */ name = (char*)malloc(len + 1); if (!name) { E(XML_E_MEMORY); return text + len; } /* Assign a value to name */ strncpy(name, text, len); name[len] = 0; /* Recorde name */ attr->name = name; /* Skip */ text += len; text = skip(text); /* No separator for name-value encountered */ if (*text != '=') { E(XML_E_VALUE); return text; } /* Record whether the attribute value is currently included in `'` or in `"` */ text = skip(++text); if (*text == '\'') q = '\''; else if (*text == '\"') q = '\"'; else { E(XML_E_QUOTE); return text; } /* Initialize the value used to parse the value variable */ text++; s = (char *)text; len = 0; /* How much space is needed for pre exploration and how many escape characters need to be parsed */ while (*s != q) { if (*s == '&') { if (!strncmp(s, "<", 4)) s += 3; else if (!strncmp(s, ">", 4)) s += 3; else if (!strncmp(s, "&", 5)) s += 4; else if (!strncmp(s, "'", 6)) s += 5; else if (!strncmp(s, """, 6)) s += 5; } len++, s++; } /* Allocate space to store parsed content */ value = (char*)malloc(len + 1); if (!value) { E(XML_E_MEMORY); return text; } /* Store character by character */ s = value; while (*text != q) { /* Record line breaks */ if (*text == '\n') { eline++, lbegin = text; } /* Record escape characters */ if (*text == '&') { if (!strncmp(text, "<", 4)) {text += 4; *s++ = '<'; continue;} else if (!strncmp(text, ">", 4)) {text += 4; *s++ = '>'; continue;} else if (!strncmp(text, "&", 5)) {text += 5; *s++ = '&'; continue;} else if (!strncmp(text, "'", 6)) {text += 6; *s++ = '\''; continue;} else if (!strncmp(text, """, 6)) {text += 6; *s++ = '\"'; continue;} } /* Normal character */ *s++ = *text++; } /* Add the end of the string and record it */ value[len] = 0; attr->value = value; return text + 1; /* skip quote */ } /** * \brief Parse all attributes of the entire label. * \param[in] text: The text content to parse. * \param[out] out: Output the attributes generated by parsing * \return The character address after parse */ static const char* parse_attributes(const char *text, ATTR **out) { ATTR *first = NULL, *prev = NULL, *attr = NULL; *out = NULL; /* Reset output */ /* Create new attributes to record the results of parsing */ attr = new_attr(); if (!attr) { E(XML_E_MEMORY); return text; } text = parse_one_attribute(text, attr); /* parse error, or no attribute, return directly */ if (etype != XML_E_OK || !attr->name) { attribute_delete(attr); return text; } /* Continue parsing other attributes */ first = attr, prev = first, *out = first; while (*text) { /* Create new attributes to record the results of parsing */ attr = new_attr(); if (!attr) { E(XML_E_MEMORY); return text; } /* Parse other attributes */ text = parse_one_attribute(text, attr); /* parse error */ if (etype != XML_E_OK) { attribute_delete(attr); return text; } /* end attribute, all attributes have been parsed */ if (!attr->name) { attribute_delete(attr); break; } /* Link */ prev->next = attr; } return text; } /** * \brief Parse text. * \param[in] text: The text content to parse. * \param[out] out: Output the attributes generated by parsing * \return The character address after parse */ static const char* parse_text(const char *text, char **out) { char *s, *t; int len = 0; s = (char *)skip(text); /* Not an escape, but a separate '<' character is not allowed */ if (*s == '<' && s[1] != '!') { *out = NULL; return s; } /* Predict the length of text to allocate space */ while (*s) { /* Text contains special escape */ if (*s == '<') { /* Skip comments */ if (!strncmp(s, "", 3)) break; s++; } s += 3; continue; } /* Get CDATA */ else if (!strncmp(s, "", 3)) break; s++, len++; } s += 3; continue; } else break; } /* Normal escape characters */ if (*s == '&') { if (!strncmp(s, "<", 4)) s += 3; else if (!strncmp(s, ">", 4)) s += 3; else if (!strncmp(s, "&", 5)) s += 4; else if (!strncmp(s, "'", 6)) s += 5; else if (!strncmp(s, """, 6)) s += 5; } s++, len++; } /* Allocate space */ t = (char *)malloc(len + 1); if (!t) { E(XML_E_MEMORY); return text; } /* Similar work when repeatedly predicting length to store characters in allocated space */ *out = t; while (text < s) { /* Skip comments */ text = skip_comment(text); if (etype != XML_E_OK) return text; if (*text == '<') { if (!strncmp(text, "", 3)) break; *t++ = *text++; } text += 3; continue; } } if (*text == '\n') {eline++, lbegin = text;} if (*text == '&') { if (!strncmp(text, "<", 4)) {text += 4; *t++ = '<'; continue;} else if (!strncmp(text, ">", 4)) {text += 4; *t++ = '>'; continue;} else if (!strncmp(text, "&", 5)) {text += 5; *t++ = '&'; continue;} else if (!strncmp(text, "'", 6)) {text += 6; *t++ = '\''; continue;} else if (!strncmp(text, """, 6)) {text += 6; *t++ = '\"'; continue;} } *t++ = *text++; } /* Add string terminator */ *t = 0; return text; } /** * \brief Parse xml node. * \param[in] text: The text content to parse. * \param[out] node: Output the node information * \return The character address after parse */ static const char* parse_node(const char* text, xml_t node) { xml_t tail = NULL, child; ATTR* attrs = NULL; int len = 0; char *out = NULL; const char *s = NULL; /* Get '<' */ while (*text) { /* Exploring Character `<` */ text = skip(text); if (*text != '<') { E(XML_E_LABEL); return text; } /* Check if this is a comment, if it is, skip it */ s = skip_comment(text); if (etype != XML_E_OK) return text; /* Indicates that the middle comment has been skipped */ if (s > text) { text = s; continue; } /* Normal `<` */ else { text = s; break; } } /* Skip `<` */ text++; /* parse name */ s = parse_name(text, &node->name); if (etype != XML_E_OK) return s; len = s - text; text = s; /* parsing the header tag is not over yet, and then parsing the attribute */ if (*text != '>') { text = parse_attributes(text, &attrs); node->attrs = attrs; if (etype != XML_E_OK) return text; } /* parse text */ text = parse_text(text + 1, &out); node->text = out; /* parse children and end tag */ while (*text) { /* Exploring Character `<` */ text = skip(text); if (*text != '<') { E(XML_E_ILLEGAL); return text; } /* label tail */ if (text[1] == '/') { /* Skip `name, text, len)) { E(XML_E_LABEL); return text; } /* Skip name */ text += len; /* Determine if aend tag has been formed */ if (*text != '>') { E(XML_E_LABEL); return text; } /* Reset `len` */ len = 0; /* Skip `>` */ text++; return text; } /* parsing child elements */ else { /* Check if this is a comment, if it is, skip it */ s = skip_comment(text); if (etype != XML_E_OK) return text; /* Indicates that the middle comment has been skipped */ if (s > text) { text = s; continue; } /* Normal `<` */ else text = s; /* Create a new node to store new parsing content */ child = new_node(); if (!child) { E(XML_E_MEMORY); return text; } /* Add to child node linked list */ if (!node->child) node->child = child; else tail->next = child; tail = child; /* Parse child node */ text = parse_node(text, child); if (etype != XML_E_OK) return text; /* Skip the extra characters */ while (*text && *text != '<') { if (*text == '\n') { eline++, lbegin = text; } text++; } } } /* No closing tag */ if (len > 0) E(XML_E_LABEL); return text; } /** * \brief Parse xml file head. * \param[in] text: The text content to parse. * \return The character address after parse */ static const char* parse_head(const char* text) { char *name = NULL, *version = NULL, *encoding = NULL; ATTR *attr_version = NULL, *attr_encoding = NULL; /* Skip `name, "version")) { E(XML_E_VERSION); goto END; } /* Create new attributes to store encoding information */ attr_encoding = new_attr(); if (!attr_encoding) { E(XML_E_MEMORY); goto END; } /* Parse attributes to determine if it is the expected `encoding` */ text = parse_one_attribute(text, attr_encoding); if (etype != XML_E_OK) goto END; /* parse error */ if (strcmp(attr_encoding->name, "encoding")) { E(XML_E_ENCODING); goto END; } } /* Determine whether the label forms a complete closed interval */ text = skip(text); if (strncmp(text, "?>", 2)) { E(XML_E_NOTES); goto END; } /* Skip `?>` */ text += 2; END: if (attr_version) attribute_delete(attr_version); if (attr_encoding) attribute_delete(attr_encoding); if (name) free(name); return text; } /** * \brief load xml from text string. * \param[in] text: text * \return xml object */ xml_t xml_loads(const char* text) { xml_t root; etype = XML_E_OK; eline = 1; lbegin = text; ecolumn = 0; /* parse head notes */ if (strncmp(text, "