1 year ago

#297106

test-img

Willyu

libxml2 c, how to get table elemnets in HTML

I'm using libxml2 in c++,there are some tables in a HTML page,how can I get specified data with given name or index of rows and columns,I just want to get the 'No. Name Age' data

link contains tables

Table

now I can traverse the whole HTML file with following code

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/HTMLparser.h>

void traverse_dom_trees(xmlNode * a_node)
{
    xmlNode *cur_node = NULL;

    if(NULL == a_node) return;

    for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
        printf("line: %d\n", (char *)cur_node->line);
        printf("type: %d\n", cur_node->type);
        printf("name: %s\n", (char *)cur_node->name);
        printf("content: %s\n\n", (char *)cur_node->content);

        traverse_dom_trees(cur_node->children);
    }
}

int main(int argc, char **argv) 
{
    htmlDocPtr doc;
    xmlNode *roo_element = NULL;

    if (argc != 2)  
    {
        printf("\nInvalid argument\n");
        return(1);
    }

    /* Macro to check API for match with the DLL we are using */
    LIBXML_TEST_VERSION    

    doc = htmlReadFile(argv[1], NULL, HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
    if (doc == NULL) 
    {
        fprintf(stderr, "Document not parsed successfully.\n");
        return 0;
    }

    roo_element = xmlDocGetRootElement(doc);

    if (roo_element == NULL) 
    {
        fprintf(stderr, "empty document\n");
        xmlFreeDoc(doc);
        return 0;
    }

    printf("Root Node is %s\n\n", roo_element->name);

    traverse_dom_trees(roo_element);

    xmlFreeDoc(doc);       // free document
    xmlCleanupParser();    // Free globals
    return 0;
}

Output: Output

I want to get the name and age with variables, I have tried with checking the cur_node->name but it can not stop

html

c

libxml2

0 Answers

Your Answer

Accepted video resources