// $Id$ #include #include #include #include #include #include #include #include #include #include #include using XERCES_CPP_NAMESPACE_QUALIFIER MemBufInputSource; #include using XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument; #include using XERCES_CPP_NAMESPACE_QUALIFIER DOMNodeList; #include using XERCES_CPP_NAMESPACE_QUALIFIER DOMAttr; #include using XERCES_CPP_NAMESPACE_QUALIFIER AbstractDOMParser; #include using XERCES_CPP_NAMESPACE_QUALIFIER XMLString; #include using XERCES_CPP_NAMESPACE_QUALIFIER XMLUni; #include "XML.h" #include "net_util.h" #include "util.h" #include "AnalyzerTags.h" #include "BroList.h" #include "EventHandler.h" #include "EventLauncher.h" #include "Var.h" bool XML_Analyzer::inited_ = false; unsigned int XML_Analyzer::doc_count_ = 0; int XML_Analyzer::doc_frag_size_ = 0; QueryStore * XML_Analyzer::queries = 0; std::map XML_Analyzer::val_types; XMLCh * XML_Analyzer::X_TYPE_NAME; XMLCh * XML_Analyzer::X_VALUE_NAME; QueryStore::QueryStore(const char * qf) { char* query_files = strdup(qf); char* query_file; while ( (query_file = strsep(&query_files, " \t")) ) { if ( *query_file ) { DBG_LOG(DBG_XML, "about to load query \"%s\"", query_file ); DBG_PUSH(DBG_XML); const char * qf_path = new char[1024]; FILE* dummy = search_for_file(query_file, "xq", &qf_path); if ( !dummy ) { xml_warn( fmt( "query file %s not found", query_file)); continue; } DBG_LOG(DBG_XML, "found at %s", qf_path); fclose(dummy); std::string uri( getURIForPath(qf_path) ); DBG_LOG(DBG_XML, "now parsing from URI %s", uri.c_str() ); loadQuery(uri.c_str(), query_file); delete [] qf_path; DBG_POP(DBG_XML); } } if ( !queries.size() ) xml_warn( "no queries parsed; disabling analyzer" ); delete query_files; } QueryStore::QueryStore(const char * query_dir, const char * qf) { std::string base_uri( getURIForPath(query_dir) ); base_uri += "/"; char* query_files = strdup(qf); char* query_file; while ( (query_file = strsep(&query_files, " \t")) ) { if ( *query_file ) { std::string uri(base_uri + query_file); loadQuery(uri.c_str(), query_file); } } if ( !queries.size() ) xml_warn( "no queries parsed; disabling analyzer" ); delete query_files; } QueryStore::~QueryStore() { for ( unsigned int i = 0; i < queries.size(); ++i ) delete queries[i]; } void QueryStore::loadQuery(const char * query, const char * name) { // first look whether the file exists and is a regular file, // otherwise we get a SIGSEGV in the call to parseFromURI below; // ignore the first five characters ("file:") struct stat fs; if ( stat(query + 5, &fs) ) { xml_warn( fmt( "query file %s does not exist", query + 5)); return; } if ( ! S_ISREG(fs.st_mode) ) { xml_warn( fmt( "query file %s is no regular file", query + 5) ); return; } try { AutoDelete xuri( XMLString::transcode(query) ); queries.push_back( xqilla_.parseFromURI(xuri.get()) ); names.push_back( std::string(name) ); DBG_LOG(DBG_XML, "query parsed and added to query store"); } catch(XQException e) { AutoDelete query_file( XMLString::transcode(e.getXQueryFile()) ); AutoDelete error( XMLString::transcode(e.getError()) ); xml_warn( fmt( "error parsing query file %s: %s (line %d, col %d)", query_file.get(), error.get(), e.getXQueryLine(), e.getXQueryColumn() ) ); } } std::string QueryStore::getURIForPath(const char * path) { std::string uri("file:"); if ( path[0] == '/' ) { // this is an absolute path uri += path; } else { // this is a relative path, we need to get the working dir char cwd[1024]; getcwd(cwd, 1024); if ( strlen(cwd) == 1023 ) xml_warn( "query_dir path probably too long" ); uri += cwd; uri += "/"; uri += path; } return uri; } void DocumentCacheHack::setParsingProperties() { _parser.setDoNamespaces (opt_internal_int("XML::parser_DoNamespaces")); _parser.setExitOnFirstFatalError (opt_internal_int("XML::parser_ExitOnFirstFatalError")); _parser.setLoadExternalDTD (opt_internal_int("XML::parser_LoadExternalDTD")); _parser.setDoSchema (opt_internal_int("XML::parser_DoSchema")); _parser.cacheGrammarFromParse (opt_internal_int("XML::parser_cacheGrammarFromParse")); _parser.useCachedGrammarInParse (opt_internal_int("XML::parser_useCachedGrammarInParse")); StringVal* scanner = opt_internal_string("XML::parser_useScanner"); _parser.useScanner(X(scanner ? scanner->CheckString() : "IGXMLScanner")); _parser.setValidationScheme (opt_internal_int("XML::parser_ValidationScheme") ? AbstractDOMParser::Val_Auto : AbstractDOMParser::Val_Never); } XML_AnalyzerEndpoint::XML_AnalyzerEndpoint (XML_Analyzer * a, QueryStore * q, bool is_orig) : analyzer(a), conn(a->Conn()), queries(q), orig(is_orig) { buffer = new std::vector; skip_data = false; data_tested = false; } XML_AnalyzerEndpoint::~XML_AnalyzerEndpoint() { delete buffer; } void XML_AnalyzerEndpoint::Deliver(int len, const u_char* data) { if ( skip_data ) return; // filter out non-xml data if ( !data_tested && buffer->size() > 4 ) { if ( strncasecmp((char *)&(*buffer)[0], "clear(); return; } data_tested = true; } if ( !data_tested && !buffer->size() && len > 4 ) { if ( strncasecmp((char *)data, "insert(buffer->end(), data, data + len); } void XML_AnalyzerEndpoint::EndOfData() { DoEndOfData(); // always reset the endpoint state data_tested = false; skip_data = false; buffer->clear(); } void XML_AnalyzerEndpoint::DoEndOfData() { // buffer->size() < 5 => this can be no XML data if ( skip_data || buffer->size() < 5 ) return; DBG_XML_ARGS(conn, "new XML document of size %d", buffer->size()); DBG_PUSH(DBG_XML); char sys_id[15]; sprintf(sys_id, "doc_%010d", XML_Analyzer::next_doc_nr()); AutoDelete xsys_id( XMLString::transcode(sys_id) ); DBG_LOG(DBG_XML, "system id for document: %s", sys_id); MemBufInputSource in( &(*buffer)[0], buffer->size(), xsys_id.get() ); AutoDelete ctx(queries->createContext()); DocumentCacheHack * cache = new DocumentCacheHack(ctx->getMemoryManager()); cache->setParsingProperties(); double tm = current_time(); try { // start by parsing the document DOMDocument * doc = cache->loadXMLDocument(in, ctx); DBG_LOG(DBG_XML, "document parsing done (%f secs)", current_time() - tm); ctx->incrementDocumentRefCount(doc); // generate an xml_document event if a handler is defined if ( xml_document ) { DOMElement * elt = doc->getDocumentElement(); AutoDelete elt_name( XMLString::transcode(elt->getLocalName()) ); AutoDelete nsuri( XMLString::transcode(elt->getNamespaceURI()) ); bro_event_xml_document ( analyzer, conn, new StringVal( elt_name.get() ), nsuri.get() ? new StringVal(nsuri.get()) : new StringVal(""), new StringVal( buffer->size(), (char *)&(*buffer)[0] ) ); } // now execute the queries for ( unsigned int i = 0; i < queries->size(); ++i ) { XQQuery * query = queries->item(i); ctx->clearDynamicContext(); DBG_LOG(DBG_XML, "about to execute query \"%s\"", queries->name(i) ); DBG_PUSH(DBG_XML); Sequence seq( ctx->getMemoryManager() ); NodeImpl * node = new NodeImpl(doc, ctx); seq.addItem( node ); ctx->setContextItem( seq.first() ); ctx->setContextPosition( 1 ); ctx->setContextSize( 1 ); tm = current_time(); try { Result result = query->execute(ctx); DBG_LOG(DBG_XML, "query execution done (%f secs)", current_time() - tm); tm = current_time(); analyzer->generateEvents(result, ctx); DBG_LOG(DBG_XML, "event generation done (%f secs)", current_time() - tm); } catch(XQException e) { AutoDelete error( XMLString::transcode(e.getError()) ); bro_event_xml_query_error( analyzer, conn, new StringVal(queries->name(i)), new StringVal(error.get()), e.getXQueryLine(), e.getXQueryColumn() ); } DBG_POP(DBG_XML); } ctx->decrementDocumentRefCount(doc); } catch(XQException e) { AutoDelete error( XMLString::transcode(e.getError()) ); StringVal * doc_frag = new StringVal((int)buffer->size() < XML_Analyzer::doc_frag_size() ? buffer->size() : XML_Analyzer::doc_frag_size(), (char *)&(*buffer)[0] ); bro_event_xml_non_wellformed_doc( analyzer, analyzer->Conn(), new StringVal(error.get()), doc_frag, orig); } DBG_POP(DBG_XML); } void XML_Analyzer::initialize() { DBG_LOG(DBG_XML, "XML_Analyzer initialization"); DBG_PUSH(DBG_XML); StringVal* query_dir = opt_internal_string("XML::query_dir"); StringVal* query_files = opt_internal_string("XML::query_files"); if ( query_dir && query_dir->Len() ) queries = new QueryStore( query_dir->CheckString(), query_files->CheckString() ); else queries = new QueryStore( query_files->CheckString() ); doc_frag_size_ = opt_internal_int("XML::error_doc_frag_size"); val_types[TAG_NAME_BOOL] = new TypeTag(TYPE_BOOL); val_types[TAG_NAME_COUNT] = new TypeTag(TYPE_COUNT); val_types[TAG_NAME_INT] = new TypeTag(TYPE_INT); val_types[TAG_NAME_DOUBLE] = new TypeTag(TYPE_DOUBLE); val_types[TAG_NAME_STRING] = new TypeTag(TYPE_STRING); val_types[TAG_NAME_RECORD] = new TypeTag(TYPE_RECORD); val_types[TAG_NAME_SET] = new TypeTag(TYPE_LIST); val_types[TAG_NAME_TABLE] = new TypeTag(TYPE_TABLE); X_TYPE_NAME = XMLString::transcode(ATTR_NAME_TYPE); X_VALUE_NAME = XMLString::transcode(ATTR_NAME_VALUE); inited_ = true; DBG_POP(DBG_XML); } XML_Analyzer::XML_Analyzer(Connection* c) : Analyzer(AnalyzerTag::XML, c), conn(c) { if ( ! inited_ ) initialize(); if ( queries && queries->size() ) { orig_endp = new XML_AnalyzerEndpoint(this, queries, true); resp_endp = new XML_AnalyzerEndpoint(this, queries, false); } else { orig_endp = resp_endp = 0; SetSkip(true); return; } } XML_Analyzer::~XML_Analyzer() { delete orig_endp; delete resp_endp; } void XML_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { if ( !orig_endp || !resp_endp ) return; (orig ? orig_endp : resp_endp)->Deliver(len, data); } void XML_Analyzer::EndOfData(bool orig) { if ( !orig_endp || !resp_endp ) return; (orig ? orig_endp : resp_endp)->EndOfData(); } void XML_Analyzer::generateEvents(Result& result, DynamicContext * ctx) { Item::Ptr item; while(item = result->next(ctx)) { if ( !item->isNode() ) { xml_ev_error( "result item is no node", conn ); continue; } const NodeImpl * node = (const NodeImpl *) item.get(); const DOMNode * dnode = node->getDOMNode(); if ( !dnode || dnode->getNodeType() != DOMNode::ELEMENT_NODE ) { xml_ev_error( "result node is no element", conn ); continue; } const DOMElement * elt = (const DOMElement *) dnode; AutoDelete tag( XMLString::transcode(elt->getTagName()) ); if ( strcmp( tag.get(), TAG_NAME_EVENT ) ) { xml_ev_error( fmt("result element '%s' is no 'event' element", tag.get()), conn ); continue; } DOMAttr * attr = elt->getAttributeNode(X_TYPE_NAME); if ( !attr ) { xml_ev_error( "type of event not specified", conn ); continue; } AutoDelete xml_ev_type( XMLString::transcode(attr->getValue()) ); std::string ev_str("XML::"); ev_str += xml_ev_type.get(); EventHandler * event = event_registry->Lookup(ev_str.c_str()); if ( !event ) { xml_ev_error( fmt( "event %s does not exist", xml_ev_type.get() ), conn); continue; } // ok, we've got the event, now construct the val_list val_list* vl = getEventValList(elt); if ( vl ) conn->ConnectionEvent(EventHandlerPtr(event), this, vl); } } val_list * XML_Analyzer::getEventValList(const DOMElement * elt) { val_list * vl = new val_list; vl->append(BuildConnVal()); DOMNodeList * kids = elt->getChildNodes(); unsigned int i; for ( i = 0; i < kids->getLength(); i++ ) { DOMNode * node = kids->item(i); if ( node->getNodeType() != DOMNode::ELEMENT_NODE ) { xml_ev_error( "child node of event element is no element", conn ); break; } Val * val = getVal( (DOMElement *) node ); if ( !val ) break; vl->append(val); } if ( i < kids->getLength() ) { // an error occured; cleanup loop_over_list(*vl, j) Unref((*vl)[j]); delete vl; return 0; } return vl; } BroType * XML_Analyzer::getType(DOMElement * elt) { DOMAttr * attr = elt->getAttributeNode( X_TYPE_NAME ); if ( !attr ) { xml_ev_error( "no type attribute on element", conn ); return 0; } AutoDelete type_name( XMLString::transcode(attr->getValue()) ); std::string type_str("XML::"); type_str += type_name.get(); BroType * type = opt_internal_type( type_str.c_str() ); if ( !type ) { xml_ev_error( fmt( "type %s not defined", type_name.get()), conn ); return 0; } return type; } Val * XML_Analyzer::getVal(DOMElement * elt) { AutoDelete tag( XMLString::transcode(elt->getTagName()) ); TypeTag * val_type = val_types[std::string(tag.get())]; if ( !val_type ) { xml_ev_error( fmt( "Val type %s is not defined", tag.get() ), conn ); return 0; } switch(*val_type) { case TYPE_BOOL: return getBoolVal(elt); case TYPE_COUNT: return getCountVal(elt); case TYPE_INT: return getIntVal(elt); case TYPE_DOUBLE: return getDoubleVal(elt); case TYPE_STRING: return getStringVal(elt); case TYPE_RECORD: return getRecordVal(elt); case TYPE_LIST: return getSetVal(elt); case TYPE_TABLE: return getTableVal(elt); default: xml_ev_error( fmt( "Val type %s not supported", tag.get() ), conn ); return 0; } } Val * XML_Analyzer::getBoolVal(DOMElement * elt) { DOMAttr * attr = elt->getAttributeNode( X_VALUE_NAME ); if ( !attr ) { xml_ev_error( "no value attribute on 'bool' element", conn ); return 0; } AutoDelete str_val( XMLString::transcode(attr->getValue()) ); bool val = false; if ( !strcmp( str_val.get(), "true" ) ) val = true; else if ( !strcmp( str_val.get(), "false" ) ) val = false; else val = atoi(str_val.get()); return new Val( val, TYPE_COUNT ); } Val * XML_Analyzer::getCountVal(DOMElement * elt) { DOMAttr * attr = elt->getAttributeNode( X_VALUE_NAME ); if ( !attr ) { xml_ev_error( "no value attribute on 'count' element", conn ); return 0; } AutoDelete val( XMLString::transcode(attr->getValue()) ); return new Val( atoi(val.get()), TYPE_COUNT ); } Val * XML_Analyzer::getIntVal(DOMElement * elt) { DOMAttr * attr = elt->getAttributeNode( X_VALUE_NAME ); if ( !attr ) { xml_ev_error( "no value attribute on 'int' element", conn ); return 0; } AutoDelete val( XMLString::transcode(attr->getValue()) ); return new Val( atoi(val.get()), TYPE_INT ); } Val * XML_Analyzer::getDoubleVal(DOMElement * elt) { DOMAttr * attr = elt->getAttributeNode( X_VALUE_NAME ); if ( !attr ) { xml_ev_error( "no value attribute on 'double' element", conn ); return 0; } AutoDelete val( XMLString::transcode(attr->getValue()) ); return new Val( atof(val.get()), TYPE_DOUBLE ); } StringVal * XML_Analyzer::getStringVal(DOMElement * elt) { DOMAttr * attr = elt->getAttributeNode( X_VALUE_NAME ); if ( !attr ) { xml_ev_error( "no value attribute on 'string' element", conn ); return 0; } AutoDelete val( XMLString::transcode(attr->getValue()) ); return new StringVal( val.get() ); } RecordVal * XML_Analyzer::getRecordVal(DOMElement * elt) { BroType * type = getType(elt); if ( !type ) return 0; if ( type->Tag() != TYPE_RECORD ) { xml_ev_error( fmt( "type %s is not a record type", type_name(type->Tag()) ), conn ); return 0; } RecordVal * record = new RecordVal((RecordType *) type); DOMNodeList * kids = elt->getChildNodes(); for ( unsigned int i = 0; i < kids->getLength(); i++ ) { DOMNode * node = kids->item(i); if ( node->getNodeType() != DOMNode::ELEMENT_NODE ) { xml_ev_error( "child node of record is no element", conn ); Unref(record); return 0; } Val * val = getVal( (DOMElement *) node ); if ( !val ) { Unref(record); return 0; } record->Assign(i, val); } return record; } TableVal * XML_Analyzer::getSetVal(DOMElement * elt) { BroType * type = getType(elt); if ( !type ) return 0; if ( type->Tag() != TYPE_TABLE ) { xml_ev_error( fmt( "type %s is not a table type", type_name(type->Tag()) ), conn ); return 0; } TableVal * set = new TableVal( (TableType *) type ); // TODO: check that all set elements are of the set type DOMNodeList * kids = elt->getChildNodes(); for ( unsigned int i = 0; i < kids->getLength(); i++ ) { DOMNode * node = kids->item(i); if ( node->getNodeType() != DOMNode::ELEMENT_NODE ) { xml_ev_error( "child node of list is no element", conn ); Unref(set); return 0; } Val * val = getVal( (DOMElement *) node ); if ( !val ) { Unref(set); return 0; } if ( ! set->Assign(val, 0) ) { xml_ev_error( "types of set element didn't match set type", conn ); Unref(val); Unref(set); return 0; } Unref(val); } return set; } int XML_Analyzer::addTableEntry(DOMNode * node, TableVal * table) { if ( node->getNodeType() != DOMNode::ELEMENT_NODE ) { xml_ev_error( "child node of table is no element", conn ); return 0; } DOMElement * elt = (DOMElement *) node; AutoDelete tag( XMLString::transcode(elt->getTagName()) ); if ( strcmp(tag, TAG_NAME_TABLE_ENTRY) ) { xml_ev_error( "child node of table elt is not a 'tableEntry' elt", conn ); return 0; } DOMNodeList * kids = elt->getChildNodes(); if ( kids->getLength() != 2 ) { xml_ev_error( fmt("%d elements in table entry (should be 2)", (int) kids->getLength()), conn ); return 0; } DOMNode * indexNode = kids->item(0); if ( indexNode->getNodeType() != DOMNode::ELEMENT_NODE ) { xml_ev_error( "index node of table entry is no element", conn ); return 0; } DOMNode * valueNode = kids->item(1); if ( valueNode->getNodeType() != DOMNode::ELEMENT_NODE ) { xml_ev_error( "value node of table entry is no element", conn ); return 0; } Val * index = getVal( (DOMElement *) indexNode ); if ( !index ) return 0; Val * value = getVal( (DOMElement *) valueNode ); if ( !value ) { Unref(index); return 0; } if ( ! table->Assign(index, value) ) { xml_ev_error( "types of index and/or value of table didn't match", conn ); Unref(index); Unref(value); return 0; } Unref(index); return 1; } TableVal * XML_Analyzer::getTableVal(DOMElement * elt) { BroType * type = getType(elt); if ( !type ) return 0; if ( type->Tag() != TYPE_TABLE ) { xml_ev_error( fmt( "type %s is not a table type", type_name(type->Tag()) ), conn ); return 0; } TableVal * table = new TableVal( (TableType *) type ); DOMNodeList * kids = elt->getChildNodes(); for ( unsigned int i = 0; i < kids->getLength(); i++ ) if ( ! addTableEntry(kids->item(i), table) ) { Unref(table); return 0; } return table; } void xml_warn(const char * msg) { warn( "XML_Analyzer:", msg ); } void xml_warn(const char * msg, Connection * conn) { std::string txt("XML_Analyzer on connection "); txt += fmt_conn_id(conn->OrigAddr(), ntohs(conn->OrigPort()), conn->RespAddr(), ntohs(conn->RespPort())); txt += ": "; txt += msg; warn( txt.c_str() ); } void xml_error(const char * msg) { run_time( "XML_Analyzer: %s", msg ); } void xml_error(const char * msg, Connection * conn) { const char * conn_id = fmt_conn_id(conn->OrigAddr(), ntohs(conn->OrigPort()), conn->RespAddr(), ntohs(conn->RespPort())); run_time( "XML_Analyzer on connection %s: %s", conn_id, msg ); } void xml_ev_error(const char * msg) { run_time( "XML_Analyzer: problem constructing event: %s", msg ); } void xml_ev_error(const char * msg, Connection * conn) { const char * conn_id = fmt_conn_id(conn->OrigAddr(), ntohs(conn->OrigPort()), conn->RespAddr(), ntohs(conn->RespPort())); run_time( "XML_Analyzer on connection %s: problem constructing event: %s", conn_id, msg ); }