pax_global_header00006660000000000000000000000064124262215140014511gustar00rootroot0000000000000052 comment=9c224be43a43bc54ebfc2d2e47ab3b9f97836cb2 domhandler-2.3.0/000077500000000000000000000000001242622151400136305ustar00rootroot00000000000000domhandler-2.3.0/.travis.yml000066400000000000000000000002431242622151400157400ustar00rootroot00000000000000before_install: - '[ "${TRAVIS_NODE_VERSION}" != "0.8" ] || npm install -g npm@1.4.28' - npm install -g npm@latest language: node_js node_js: - 0.8 - 0.10 domhandler-2.3.0/LICENSE000066400000000000000000000023541242622151400146410ustar00rootroot00000000000000Copyright (c) Felix Böhm All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. domhandler-2.3.0/index.js000066400000000000000000000105271242622151400153020ustar00rootroot00000000000000var ElementType = require("domelementtype"); var re_whitespace = /\s+/g; var NodePrototype = require("./lib/node"); var ElementPrototype = require("./lib/element"); function DomHandler(callback, options, elementCB){ if(typeof callback === "object"){ elementCB = options; options = callback; callback = null; } else if(typeof options === "function"){ elementCB = options; options = defaultOpts; } this._callback = callback; this._options = options || defaultOpts; this._elementCB = elementCB; this.dom = []; this._done = false; this._tagStack = []; this._parser = this._parser || null; } //default options var defaultOpts = { normalizeWhitespace: false, //Replace all whitespace with single spaces withStartIndices: false, //Add startIndex properties to nodes }; DomHandler.prototype.onparserinit = function(parser){ this._parser = parser; }; //Resets the handler back to starting state DomHandler.prototype.onreset = function(){ DomHandler.call(this, this._callback, this._options, this._elementCB); }; //Signals the handler that parsing is done DomHandler.prototype.onend = function(){ if(this._done) return; this._done = true; this._parser = null; this._handleCallback(null); }; DomHandler.prototype._handleCallback = DomHandler.prototype.onerror = function(error){ if(typeof this._callback === "function"){ this._callback(error, this.dom); } else { if(error) throw error; } }; DomHandler.prototype.onclosetag = function(){ //if(this._tagStack.pop().name !== name) this._handleCallback(Error("Tagname didn't match!")); var elem = this._tagStack.pop(); if(this._elementCB) this._elementCB(elem); }; DomHandler.prototype._addDomElement = function(element){ var parent = this._tagStack[this._tagStack.length - 1]; var siblings = parent ? parent.children : this.dom; var previousSibling = siblings[siblings.length - 1]; element.next = null; if(this._options.withStartIndices){ element.startIndex = this._parser.startIndex; } if (this._options.withDomLvl1) { element.__proto__ = element.type === "tag" ? ElementPrototype : NodePrototype; } if(previousSibling){ element.prev = previousSibling; previousSibling.next = element; } else { element.prev = null; } siblings.push(element); element.parent = parent || null; }; DomHandler.prototype.onopentag = function(name, attribs){ var element = { type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag, name: name, attribs: attribs, children: [] }; this._addDomElement(element); this._tagStack.push(element); }; DomHandler.prototype.ontext = function(data){ //the ignoreWhitespace is officially dropped, but for now, //it's an alias for normalizeWhitespace var normalize = this._options.normalizeWhitespace || this._options.ignoreWhitespace; var lastTag; if(!this._tagStack.length && this.dom.length && (lastTag = this.dom[this.dom.length-1]).type === ElementType.Text){ if(normalize){ lastTag.data = (lastTag.data + data).replace(re_whitespace, " "); } else { lastTag.data += data; } } else { if( this._tagStack.length && (lastTag = this._tagStack[this._tagStack.length - 1]) && (lastTag = lastTag.children[lastTag.children.length - 1]) && lastTag.type === ElementType.Text ){ if(normalize){ lastTag.data = (lastTag.data + data).replace(re_whitespace, " "); } else { lastTag.data += data; } } else { if(normalize){ data = data.replace(re_whitespace, " "); } this._addDomElement({ data: data, type: ElementType.Text }); } } }; DomHandler.prototype.oncomment = function(data){ var lastTag = this._tagStack[this._tagStack.length - 1]; if(lastTag && lastTag.type === ElementType.Comment){ lastTag.data += data; return; } var element = { data: data, type: ElementType.Comment }; this._addDomElement(element); this._tagStack.push(element); }; DomHandler.prototype.oncdatastart = function(){ var element = { children: [{ data: "", type: ElementType.Text }], type: ElementType.CDATA }; this._addDomElement(element); this._tagStack.push(element); }; DomHandler.prototype.oncommentend = DomHandler.prototype.oncdataend = function(){ this._tagStack.pop(); }; DomHandler.prototype.onprocessinginstruction = function(name, data){ this._addDomElement({ name: name, data: data, type: ElementType.Directive }); }; module.exports = DomHandler; domhandler-2.3.0/lib/000077500000000000000000000000001242622151400143765ustar00rootroot00000000000000domhandler-2.3.0/lib/element.js000066400000000000000000000006731242622151400163730ustar00rootroot00000000000000// DOM-Level-1-compliant structure var NodePrototype = require('./node'); var ElementPrototype = module.exports = Object.create(NodePrototype); var domLvl1 = { tagName: "name" }; Object.keys(domLvl1).forEach(function(key) { var shorthand = domLvl1[key]; Object.defineProperty(ElementPrototype, key, { get: function() { return this[shorthand] || null; }, set: function(val) { this[shorthand] = val; return val; } }); }); domhandler-2.3.0/lib/node.js000066400000000000000000000016231242622151400156630ustar00rootroot00000000000000// This object will be used as the prototype for Nodes when creating a // DOM-Level-1-compliant structure. var NodePrototype = module.exports = { get firstChild() { var children = this.children; return children && children[0] || null; }, get lastChild() { var children = this.children; return children && children[children.length - 1] || null; }, get nodeType() { return nodeTypes[this.type] || nodeTypes.element; } }; var domLvl1 = { tagName: "name", childNodes: "children", parentNode: "parent", previousSibling: "prev", nextSibling: "next", nodeValue: "data" }; var nodeTypes = { element: 1, text: 3, cdata: 4, comment: 8 }; Object.keys(domLvl1).forEach(function(key) { var shorthand = domLvl1[key]; Object.defineProperty(NodePrototype, key, { get: function() { return this[shorthand] || null; }, set: function(val) { this[shorthand] = val; return val; } }); }); domhandler-2.3.0/package.json000066400000000000000000000014141242622151400161160ustar00rootroot00000000000000{ "name": "domhandler", "version": "2.3.0", "description": "handler for htmlparser2 that turns pages into a dom", "main": "index.js", "directories": { "test": "tests" }, "scripts": { "test": "mocha -R list && jshint index.js test/" }, "repository": { "type": "git", "url": "git://github.com/fb55/DomHandler.git" }, "keywords": [ "dom", "htmlparser2" ], "dependencies": { "domelementtype": "1" }, "devDependencies": { "htmlparser2": "3.8", "mocha": "1", "jshint": "~2.3.0" }, "author": "Felix Boehm ", "jshintConfig": { "quotmark": "double", "trailing": true, "unused": true, "undef": true, "node": true, "proto": true, "globals": { "it": true } } } domhandler-2.3.0/readme.md000066400000000000000000000043561242622151400154170ustar00rootroot00000000000000#DOMHandler [![Build Status](https://secure.travis-ci.org/fb55/DomHandler.png)](http://travis-ci.org/fb55/DomHandler) The DOM handler (formally known as DefaultHandler) creates a tree containing all nodes of a page. The tree may be manipulated using the DOMUtils library. ##Usage ```javascript var handler = new DomHandler([ callback(err, dom), ] [ options ]); // var parser = new Parser(handler[, options]); ``` ##Example ```javascript var htmlparser = require("htmlparser2"); var rawHtml = "Xyz ", "expected": [ { "type": "script", "name": "script", "attribs": {}, "children": [ { "data": "", "type": "text" } ] } ] }domhandler-2.3.0/test/cases/07-unescaped_in_style.json000066400000000000000000000006271242622151400227060ustar00rootroot00000000000000{ "name": "Unescaped chars in style", "options": {}, "html": "", "expected": [ { "type": "style", "name": "style", "attribs": { "type": "text/css" }, "children": [ { "data": "\n body > p\n\t{ font-weight: bold; }", "type": "text" } ] } ] }domhandler-2.3.0/test/cases/08-extra_spaces_in_tag.json000066400000000000000000000005251242622151400230310ustar00rootroot00000000000000{ "name": "Extra spaces in tag", "options": {}, "html": "the text", "expected": [ { "type": "tag", "name": "font", "attribs": { "size": "14" }, "children": [ { "data": "the text", "type": "text" } ] } ] }domhandler-2.3.0/test/cases/09-unquoted_attrib.json000066400000000000000000000005041242622151400222360ustar00rootroot00000000000000{ "name": "Unquoted attributes", "options": {}, "html": "the text", "expected": [ { "type": "tag", "name": "font", "attribs": { "size": "14" }, "children": [ { "data": "the text", "type": "text" } ] } ] }domhandler-2.3.0/test/cases/10-singular_attribute.json000066400000000000000000000003661242622151400227320ustar00rootroot00000000000000{ "name": "Singular attribute", "options": {}, "html": "