/*
* html.c: a libFuzzer target to test several HTML parser interfaces.
*
* See Copyright for the status of this software.
*/
#include
#include
#include
#include "fuzz.h"
int
LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
char ***argv ATTRIBUTE_UNUSED) {
xmlInitParser();
#ifdef LIBXML_CATALOG_ENABLED
xmlInitializeCatalog();
#endif
xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
return 0;
}
int
LLVMFuzzerTestOneInput(const char *data, size_t size) {
static const size_t maxChunkSize = 128;
htmlDocPtr doc;
htmlParserCtxtPtr ctxt;
xmlOutputBufferPtr out;
const char *docBuffer;
size_t docSize, consumed, chunkSize;
int opts, outSize;
xmlFuzzDataInit(data, size);
opts = xmlFuzzReadInt();
docBuffer = xmlFuzzReadRemaining(&docSize);
if (docBuffer == NULL) {
xmlFuzzDataCleanup();
return(0);
}
/* Pull parser */
doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts);
/*
* Also test the serializer. Call htmlDocContentDumpOutput with our
* own buffer to avoid encoding the output. The HTML encoding is
* excruciatingly slow (see htmlEntityValueLookup).
*/
out = xmlAllocOutputBuffer(NULL);
htmlDocContentDumpOutput(out, doc, NULL);
xmlOutputBufferClose(out);
xmlFreeDoc(doc);
/* Push parser */
ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
XML_CHAR_ENCODING_NONE);
htmlCtxtUseOptions(ctxt, opts);
for (consumed = 0; consumed < docSize; consumed += chunkSize) {
chunkSize = docSize - consumed;
if (chunkSize > maxChunkSize)
chunkSize = maxChunkSize;
htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0);
}
htmlParseChunk(ctxt, NULL, 0, 1);
xmlFreeDoc(ctxt->myDoc);
htmlFreeParserCtxt(ctxt);
/* Cleanup */
xmlFuzzDataCleanup();
xmlResetLastError();
return(0);
}