1 year ago
#297106
Willyu
libxml2 c, how to get table elemnets in HTML
I'm using libxml2 in c++,there are some tables in a HTML page,how can I get specified data with given name or index of rows and columns,I just want to get the 'No. Name Age' data
now I can traverse the whole HTML file with following code
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/HTMLparser.h>
void traverse_dom_trees(xmlNode * a_node)
{
xmlNode *cur_node = NULL;
if(NULL == a_node) return;
for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
printf("line: %d\n", (char *)cur_node->line);
printf("type: %d\n", cur_node->type);
printf("name: %s\n", (char *)cur_node->name);
printf("content: %s\n\n", (char *)cur_node->content);
traverse_dom_trees(cur_node->children);
}
}
int main(int argc, char **argv)
{
htmlDocPtr doc;
xmlNode *roo_element = NULL;
if (argc != 2)
{
printf("\nInvalid argument\n");
return(1);
}
/* Macro to check API for match with the DLL we are using */
LIBXML_TEST_VERSION
doc = htmlReadFile(argv[1], NULL, HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
if (doc == NULL)
{
fprintf(stderr, "Document not parsed successfully.\n");
return 0;
}
roo_element = xmlDocGetRootElement(doc);
if (roo_element == NULL)
{
fprintf(stderr, "empty document\n");
xmlFreeDoc(doc);
return 0;
}
printf("Root Node is %s\n\n", roo_element->name);
traverse_dom_trees(roo_element);
xmlFreeDoc(doc); // free document
xmlCleanupParser(); // Free globals
return 0;
}
Output: Output
I want to get the name and age with variables, I have tried with checking the cur_node->name
but it can not stop
html
c
libxml2
0 Answers
Your Answer