From 510dc42f67a737cad3ef208a616269dd170aa77b Mon Sep 17 00:00:00 2001 From: Adam Brunner Date: Tue, 17 Nov 2015 15:30:16 +0100 Subject: [PATCH 01/10] Add IDEA project folder to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 91dfed8..0aa35d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .DS_Store -node_modules \ No newline at end of file +node_modules +.idea From de64be5f8d5246e035567da498e073045c027789 Mon Sep 17 00:00:00 2001 From: Adam Brunner Date: Tue, 17 Nov 2015 15:30:26 +0100 Subject: [PATCH 02/10] Remove unused dependancies --- package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/package.json b/package.json index eca641c..3f9bf42 100644 --- a/package.json +++ b/package.json @@ -31,7 +31,6 @@ "homepage": "https://github.com/maxogden/commonjs-html-prettyprinter", "dependencies": { "concat-stream": "^1.4.7", - "glob": "^3.1.13" }, "devDependencies": {}, "scripts": { From 7a7b101166ea98af4f9c674107b52951879b7596 Mon Sep 17 00:00:00 2001 From: Adam Brunner Date: Tue, 17 Nov 2015 15:30:40 +0100 Subject: [PATCH 03/10] Code formatting --- bin/html.js | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/bin/html.js b/bin/html.js index 839cea6..31acf07 100755 --- a/bin/html.js +++ b/bin/html.js @@ -1,39 +1,43 @@ #!/usr/bin/env node -var html = require("../lib/html") -var fs = require('fs') -var concat = require('concat-stream') +var html = require('../lib/html'); +var fs = require('fs'); +var concat = require('concat-stream'); -var args = process.argv.slice(0) +var args = process.argv.slice(0); // shift off node and script name -args.shift() -args.shift() +args.shift(); +args.shift(); -if (args.length > 0) processFiles(args) -else readStdin() +if (args.length > 0) { + processFiles(args); +} else { + readStdin(); +} function readStdin() { - var stdin = process.openStdin() - stdin.pipe(concat(function concatted (buff) { - process.stdout.write(html.prettyPrint(buff.toString(), {indent_size: 2})) - })) + var stdin = process.openStdin(); + stdin.pipe(concat(function concatted(buff) { + var prettyHtml = html.prettyPrint(buff.toString(), { indent_size: 2 }); + process.stdout.write(prettyHtml); + })); } function processFiles(files) { if (files.length > 1) { files.map(function(filename) { prettifyFile(filename) - }) + }); return } - var str = fs.readFileSync(files[0]).toString() - process.stdout.write(prettify(str)) + var str = fs.readFileSync(files[0]).toString(); + process.stdout.write(prettify(str)); } function prettify(str) { - return html.prettyPrint(str, {indent_size: 2}) + return html.prettyPrint(str, { indent_size: 2 }); } function prettifyFile(filename) { - fs.writeFileSync(filename, prettify(fs.readFileSync(filename).toString())) + fs.writeFileSync(filename, prettify(fs.readFileSync(filename).toString())); } From f0cb558b6105efbbbf323bd6fdb97ba9a8c0ee8f Mon Sep 17 00:00:00 2001 From: Adam Brunner Date: Wed, 18 Nov 2015 10:28:49 +0100 Subject: [PATCH 04/10] Syntax error package.json --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 3f9bf42..f2ee4e0 100644 --- a/package.json +++ b/package.json @@ -30,7 +30,7 @@ }, "homepage": "https://github.com/maxogden/commonjs-html-prettyprinter", "dependencies": { - "concat-stream": "^1.4.7", + "concat-stream": "^1.4.7" }, "devDependencies": {}, "scripts": { From aff399e24ccc26c009c5cae8bb00c26de9f64de0 Mon Sep 17 00:00:00 2001 From: Adam Brunner Date: Wed, 18 Nov 2015 10:29:10 +0100 Subject: [PATCH 05/10] Extract Parser to external lib class --- lib/html.js | 537 ++++++++------------------------------------------ lib/parser.js | 415 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 500 insertions(+), 452 deletions(-) create mode 100644 lib/parser.js diff --git a/lib/html.js b/lib/html.js index 455c1ae..06fc1ff 100644 --- a/lib/html.js +++ b/lib/html.js @@ -1,31 +1,32 @@ /* - Style HTML +Style HTML --------------- - Written by Nochum Sossonko, (nsossonko@hotmail.com) +Written by Nochum Sossonko, (nsossonko@hotmail.com) - Based on code initially developed by: Einar Lielmanis, - http://jsbeautifier.org/ +Based on code initially developed by: Einar Lielmanis, + http://jsbeautifier.org/ - You are free to use this in any way you want, in case you find this useful or working for you. +You are free to use this in any way you want, in case you find this useful or working for you. - Usage: - style_html(html_source); +Usage: + style_html(html_source); - style_html(html_source, options); + style_html(html_source, options); - The options are: - indent_size (default 4) — indentation size, - indent_char (default space) — character to indent with, - max_char (default 70) - maximum amount of characters per line, - brace_style (default "collapse") - "collapse" | "expand" | "end-expand" - put braces on the same line as control statements (default), or put braces on own line (Allman / ANSI style), or just put end braces on own line. - unformatted (defaults to inline tags) - list of tags, that shouldn't be reformatted - indent_scripts (default normal) - "keep"|"separate"|"normal" +The options are: + indent_size (default 4) — indentation size, + indent_char (default space) — character to indent with, + max_char (default 70) - maximum amount of characters per line, + brace_style (default "collapse") - "collapse" | "expand" | "end-expand" + put braces on the same line as control statements (default), or put braces on own line (Allman / ANSI style), + or just put end braces on own line. + unformatted (defaults to inline tags) - list of tags, that shouldn't be reformatted + indent_scripts (default normal) - "keep"|"separate"|"normal" - e.g. + e.g. style_html(html_source, { 'indent_size': 2, @@ -36,472 +37,102 @@ }); */ -function style_html(html_source, options) { -//Wrapper function to invoke all the necessary constructors and deal with the output. - - var multi_parser, - indent_size, - indent_character, - max_char, - brace_style, - unformatted; +var Parser = require('./parser'); +function style_html(html_source, options) { options = options || {}; - indent_size = options.indent_size || 4; - indent_character = options.indent_char || ' '; - brace_style = options.brace_style || 'collapse'; - max_char = options.max_char == 0 ? Infinity : options.max_char || 70; - unformatted = options.unformatted || ['a', 'span', 'bdo', 'em', 'strong', 'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'q', 'sub', 'sup', 'tt', 'i', 'b', 'big', 'small', 'u', 's', 'strike', 'font', 'ins', 'del', 'pre', 'address', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']; - - function Parser() { - - this.pos = 0; //Parser position - this.token = ''; - this.current_mode = 'CONTENT'; //reflects the current Parser mode: TAG/CONTENT - this.tags = { //An object to hold tags, their position, and their parent-tags, initiated with default values - parent: 'parent1', - parentcount: 1, - parent1: '' - }; - this.tag_type = ''; - this.token_text = this.last_token = this.last_text = this.token_type = ''; - - this.Utils = { //Uilities made available to the various functions - whitespace: "\n\r\t ".split(''), - single_token: 'br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed,?php,?,?='.split(','), //all the single tags for HTML - extra_liners: 'head,body,/html'.split(','), //for tags that need a line of whitespace before them - in_array: function (what, arr) { - for (var i=0; i= this.input.length) { - return content.length?content.join(''):['', 'TK_EOF']; - } - - input_char = this.input.charAt(this.pos); - this.pos++; - this.line_char_count++; - - if (this.Utils.in_array(input_char, this.Utils.whitespace)) { - if (content.length) { - space = true; - } - this.line_char_count--; - continue; //don't want to insert unnecessary space - } - else if (space) { - if (this.line_char_count >= this.max_char) { //insert a line when the max_char is reached - content.push('\n'); - for (var i=0; i', 'igm'); - reg_match.lastIndex = this.pos; - var reg_array = reg_match.exec(this.input); - var end_script = reg_array?reg_array.index:this.input.length; //absolute end of script - if(this.pos < end_script) { //get everything in between the script tags - content = this.input.substring(this.pos, end_script); - this.pos = end_script; - } - return content; - } - - this.record_tag = function (tag){ //function to record a tag and its parent in this.tags Object - if (this.tags[tag + 'count']) { //check for the existence of this tag type - this.tags[tag + 'count']++; - this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level - } - else { //otherwise initialize this tag type - this.tags[tag + 'count'] = 1; - this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level - } - this.tags[tag + this.tags[tag + 'count'] + 'parent'] = this.tags.parent; //set the parent (i.e. in the case of a div this.tags.div1parent) - this.tags.parent = tag + this.tags[tag + 'count']; //and make this the current parent (i.e. in the case of a div 'div1') - } - - this.retrieve_tag = function (tag) { //function to retrieve the opening tag to the corresponding closer - if (this.tags[tag + 'count']) { //if the openener is not in the Object we ignore it - var temp_parent = this.tags.parent; //check to see if it's a closable tag. - while (temp_parent) { //till we reach '' (the initial value); - if (tag + this.tags[tag + 'count'] === temp_parent) { //if this is it use it - break; - } - temp_parent = this.tags[temp_parent + 'parent']; //otherwise keep on climbing up the DOM Tree - } - if (temp_parent) { //if we caught something - this.indent_level = this.tags[tag + this.tags[tag + 'count']]; //set the indent_level accordingly - this.tags.parent = this.tags[temp_parent + 'parent']; //and set the current parent - } - delete this.tags[tag + this.tags[tag + 'count'] + 'parent']; //delete the closed tags parent reference... - delete this.tags[tag + this.tags[tag + 'count']]; //...and the tag itself - if (this.tags[tag + 'count'] == 1) { - delete this.tags[tag + 'count']; - } - else { - this.tags[tag + 'count']--; - } - } - } - - this.get_tag = function () { //function to get a full tag and parse its type - var input_char = '', - content = [], - space = false, - tag_start, tag_end; - - do { - if (this.pos >= this.input.length) { - return content.length?content.join(''):['', 'TK_EOF']; - } - input_char = this.input.charAt(this.pos); - this.pos++; - this.line_char_count++; - - if (this.Utils.in_array(input_char, this.Utils.whitespace)) { //don't want to insert unnecessary space - space = true; - this.line_char_count--; - continue; - } - - if (input_char === "'" || input_char === '"') { - if (!content[1] || content[1] !== '!') { //if we're in a comment strings don't get treated specially - input_char += this.get_unformatted(input_char); - space = true; - } - } - - if (input_char === '=') { //no space before = - space = false; - } + var script_indent_level; + var indent_scripts = options.indent_scripts || 'normal'; + var indent_size = options.indent_size || 4; + var indent_character = options.indent_char || ' '; + var brace_style = options.brace_style || 'collapse'; + var max_char = options.max_char == 0 ? Infinity : options.max_char || 70; + var unformatted = options.unformatted || ['a', 'span', 'bdo', 'em', 'strong', 'dfn', 'code', 'samp', 'kbd', 'var', + 'cite', 'abbr', 'acronym', 'q', 'sub', 'sup', 'tt', 'i', 'b', 'big', 'small', 'u', 's', 'strike', 'font', 'ins', + 'del', 'pre', 'address', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']; - if (content.length && content[content.length-1] !== '=' && input_char !== '>' - && space) { //no space after = or before > - if (this.line_char_count >= this.max_char) { - this.print_newline(false, content); - this.line_char_count = 0; - } - else { - content.push(' '); - this.line_char_count++; - } - space = false; - } - if (input_char === '<') { - tag_start = this.pos - 1; - } - content.push(input_char); //inserts character at-a-time (or string) - } while (input_char !== '>'); - - var tag_complete = content.join(''); - var tag_index; - if (tag_complete.indexOf(' ') != -1) { //if there's whitespace, thats where the tag name ends - tag_index = tag_complete.indexOf(' '); - } - else { //otherwise go with the tag ending - tag_index = tag_complete.indexOf('>'); - } - var tag_check = tag_complete.substring(1, tag_index).toLowerCase(); - if (tag_complete.charAt(tag_complete.length-2) === '/' || - this.Utils.in_array(tag_check, this.Utils.single_token)) { //if this tag name is a single tag type (either in the list or has a closing /) - this.tag_type = 'SINGLE'; - } - else if (tag_check === 'script') { //for later script handling - this.record_tag(tag_check); - this.tag_type = 'SCRIPT'; - } - else if (tag_check === 'style') { //for future style handling (for now it justs uses get_content) - this.record_tag(tag_check); - this.tag_type = 'STYLE'; - } - else if (this.Utils.in_array(tag_check, unformatted)) { // do not reformat the "unformatted" tags - var comment = this.get_unformatted('', tag_complete); //...delegate to get_unformatted function - content.push(comment); - // Preserve collapsed whitespace either before or after this tag. - if (tag_start > 0 && this.Utils.in_array(this.input.charAt(tag_start - 1), this.Utils.whitespace)){ - content.splice(0, 0, this.input.charAt(tag_start - 1)); - } - tag_end = this.pos - 1; - if (this.Utils.in_array(this.input.charAt(tag_end + 1), this.Utils.whitespace)){ - content.push(this.input.charAt(tag_end + 1)); - } - this.tag_type = 'SINGLE'; - } - else if (tag_check.charAt(0) === '!') { //peek for so... - var comment = this.get_unformatted('-->', tag_complete); //...delegate to get_unformatted - content.push(comment); - } - this.tag_type = 'START'; - } - else if (tag_check.indexOf('[endif') != -1) {//peek for ', tag_complete); - content.push(comment); - this.tag_type = 'SINGLE'; - } - } - else { - if (tag_check.charAt(0) === '/') { //this tag is a double tag so check for tag-ending - this.retrieve_tag(tag_check.substring(1)); //remove it and all ancestors - this.tag_type = 'END'; - } - else { //otherwise it's a start-tag - this.record_tag(tag_check); //push it on the tag stack - this.tag_type = 'START'; - } - if (this.Utils.in_array(tag_check, this.Utils.extra_liners)) { //check if this double needs an extra line - this.print_newline(true, this.output); - } - } - return content.join(''); //returns fully formatted tag - } - - this.get_unformatted = function (delimiter, orig_tag) { //function to return unformatted content in its entirety - - if (orig_tag && orig_tag.toLowerCase().indexOf(delimiter) != -1) { - return ''; - } - var input_char = ''; - var content = ''; - var space = true; - do { - - if (this.pos >= this.input.length) { - return content; - } - - input_char = this.input.charAt(this.pos); - this.pos++ - - if (this.Utils.in_array(input_char, this.Utils.whitespace)) { - if (!space) { - this.line_char_count--; - continue; - } - if (input_char === '\n' || input_char === '\r') { - content += '\n'; - /* Don't change tab indention for unformatted blocks. If using code for html editing, this will greatly affect
 tags if they are specified in the 'unformatted array'
-            for (var i=0; i 0) {
-          this.indent_level--;
-        }
-      }
-    }
-    return this;
-  }
-
-  /*_____________________--------------------_____________________*/
-
-  multi_parser = new Parser(); //wrapping functions Parser
-  multi_parser.printer(html_source, indent_character, indent_size, max_char, brace_style); //initialize starting values
+  var multi_parser = new Parser();
+  multi_parser.printer(html_source, indent_character, indent_size, max_char, brace_style, unformatted);
 
   while (true) {
-      var t = multi_parser.get_token();
-      multi_parser.token_text = t[0];
-      multi_parser.token_type = t[1];
+    var t = multi_parser.get_token();
+    multi_parser.token_text = t[0];
+    multi_parser.token_type = t[1];
 
-    if (multi_parser.token_type === 'TK_EOF') {
+    if (multi_parser.token_type === Parser.TOKEN_TYPE_EOF) {
       break;
     }
 
     switch (multi_parser.token_type) {
-      case 'TK_TAG_START':
+      case Parser.TOKEN_TYPE_TAG_NAME_START:
         multi_parser.print_newline(false, multi_parser.output);
         multi_parser.print_token(multi_parser.token_text);
         multi_parser.indent();
-        multi_parser.current_mode = 'CONTENT';
+        multi_parser.current_mode = Parser.MODE_CONTENT;
         break;
-      case 'TK_TAG_STYLE':
-      case 'TK_TAG_SCRIPT':
+
+      case Parser.TOKEN_TYPE_TAG_NAME_STYLE:
+      case Parser.TOKEN_TYPE_TAG_NAME_SCRIPT:
         multi_parser.print_newline(false, multi_parser.output);
         multi_parser.print_token(multi_parser.token_text);
-        multi_parser.current_mode = 'CONTENT';
+        multi_parser.current_mode = Parser.MODE_CONTENT;
         break;
-      case 'TK_TAG_END':
+
+      case Parser.TOKEN_TYPE_TAG_NAME_END:
         //Print new line only if the tag has no content and has child
-        if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
-            var tag_name = multi_parser.token_text.match(/\w+/)[0];
-            var tag_extracted_from_last_output = multi_parser.output[multi_parser.output.length -1].match(/<\s*(\w+)/);
-            if (tag_extracted_from_last_output === null || tag_extracted_from_last_output[1] !== tag_name)
-                multi_parser.print_newline(true, multi_parser.output);
+        if (multi_parser.last_token === Parser.TOKEN_TYPE_CONTENT && multi_parser.last_text === '') {
+          var tag_name = multi_parser.token_text.match(/\w+/)[0];
+          var tag_extracted_from_last_output = multi_parser.output[multi_parser.output.length - 1].match(/<\s*(\w+)/);
+          if (tag_extracted_from_last_output === null || tag_extracted_from_last_output[1] !== tag_name) {
+            multi_parser.print_newline(true, multi_parser.output);
+          }
         }
         multi_parser.print_token(multi_parser.token_text);
-        multi_parser.current_mode = 'CONTENT';
+        multi_parser.current_mode = Parser.MODE_CONTENT;
         break;
-      case 'TK_TAG_SINGLE':
+
+      case Parser.TOKEN_TYPE_TAG_NAME_SINGLE:
         // Don't add a newline before elements that should remain unformatted.
         var tag_check = multi_parser.token_text.match(/^\s*<([a-z]+)/i);
-        if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)){
-            multi_parser.print_newline(false, multi_parser.output);
+        if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) {
+          multi_parser.print_newline(false, multi_parser.output);
         }
         multi_parser.print_token(multi_parser.token_text);
-        multi_parser.current_mode = 'CONTENT';
+        multi_parser.current_mode = Parser.MODE_CONTENT;
         break;
-      case 'TK_CONTENT':
+
+      case Parser.TOKEN_TYPE_CONTENT:
         if (multi_parser.token_text !== '') {
           multi_parser.print_token(multi_parser.token_text);
         }
-        multi_parser.current_mode = 'TAG';
+        multi_parser.current_mode = Parser.MODE_TAG;
         break;
-      case 'TK_STYLE':
-      case 'TK_SCRIPT':
+
+      case Parser.TOKEN_TYPE_STYLE:
+      case Parser.TOKEN_TYPE_SCRIPT:
         if (multi_parser.token_text !== '') {
           multi_parser.output.push('\n');
           var text = multi_parser.token_text;
-          if (multi_parser.token_type == 'TK_SCRIPT') {
-            var _beautifier = typeof js_beautify == 'function' && js_beautify;
-          } else if (multi_parser.token_type == 'TK_STYLE') {
-            var _beautifier = typeof css_beautify == 'function' && css_beautify;
+          var _beautifier = null;
+          if (multi_parser.token_type == Parser.TOKEN_TYPE_SCRIPT) {
+            _beautifier = typeof js_beautify == 'function' && js_beautify;
+          } else if (multi_parser.token_type == Parser.TOKEN_TYPE_STYLE) {
+            _beautifier = typeof css_beautify == 'function' && css_beautify;
           }
 
-          if (options.indent_scripts == "keep") {
-            var script_indent_level = 0;
-          } else if (options.indent_scripts == "separate") {
-            var script_indent_level = -multi_parser.indent_level;
-          } else {
-            var script_indent_level = 1;
+          switch (indent_scripts) {
+            case 'keep':
+              script_indent_level = 0;
+              break;
+
+            case 'separate':
+              script_indent_level = -multi_parser.indent_level;
+              break;
+
+            case 'normal':
+            default:
+              script_indent_level = 1;
           }
 
           var indentation = multi_parser.get_full_indent(script_indent_level);
@@ -512,25 +143,27 @@ function style_html(html_source, options) {
             // simply indent the string otherwise
             var white = text.match(/^\s*/)[0];
             var _level = white.match(/[^\n\r]*$/)[0].split(multi_parser.indent_string).length - 1;
-            var reindent = multi_parser.get_full_indent(script_indent_level -_level);
+            var reindent = multi_parser.get_full_indent(script_indent_level - _level);
             text = text.replace(/^\s*/, indentation)
-                   .replace(/\r\n|\r|\n/g, '\n' + reindent)
-                   .replace(/\s*$/, '');
+              .replace(/\r\n|\r|\n/g, '\n' + reindent)
+              .replace(/\s*$/, '');
           }
           if (text) {
             multi_parser.print_token(text);
             multi_parser.print_newline(true, multi_parser.output);
           }
         }
-        multi_parser.current_mode = 'TAG';
+        multi_parser.current_mode = Parser.MODE_TAG;
         break;
     }
+
     multi_parser.last_token = multi_parser.token_type;
     multi_parser.last_text = multi_parser.token_text;
   }
+
   return multi_parser.output.join('');
 }
 
 module.exports = {
   prettyPrint: style_html
-};
\ No newline at end of file
+};
diff --git a/lib/parser.js b/lib/parser.js
new file mode 100644
index 0000000..b762b54
--- /dev/null
+++ b/lib/parser.js
@@ -0,0 +1,415 @@
+function Parser() {
+
+  this.pos = 0; //Parser position
+  this.current_mode = Parser.MODE_CONTENT; //reflects the current Parser mode: TAG/CONTENT
+  this.tags = { //An object to hold tags, their position, and their parent-tags, initiated with default values
+    parent: 'parent1',
+    parentcount: 1,
+    parent1: ''
+  };
+  this.tag_type = '';
+  this.token_text = this.last_token = this.last_text = this.token_type = '';
+
+  return this;
+}
+
+Parser.prototype = {
+  Utils: { //Utilities made available to the various functions
+    whitespace: "\n\r\t ".split(''),
+    single_token: 'br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed,?php,?,?='.split(','), //all the single tags for HTML
+    extra_liners: 'head,body,/html'.split(','), //for tags that need a line of whitespace before them
+    in_array: function(what, arr) {
+      return arr.indexOf(what) !== -1;
+    }
+  },
+
+  get_content: function() { //function to capture regular content between tags
+    var input_char = '',
+      content = [],
+      space = false; //if a space is needed
+
+    while (this.input.charAt(this.pos) !== '<') {
+      if (this.pos >= this.input.length) {
+        return content.length ? content.join('') : ['', Parser.TOKEN_TYPE_EOF];
+      }
+
+      input_char = this.input.charAt(this.pos);
+      this.pos++;
+      this.line_char_count++;
+
+      if (this.Utils.in_array(input_char, this.Utils.whitespace)) {
+        if (content.length) {
+          space = true;
+        }
+        this.line_char_count--;
+        continue; //don't want to insert unnecessary space
+      }
+      else if (space) {
+        if (this.line_char_count >= this.max_char) { //insert a line when the max_char is reached
+          content.push('\n');
+          for (var i = 0; i < this.indent_level; i++) {
+            content.push(this.indent_string);
+          }
+          this.line_char_count = 0;
+        }
+        else {
+          content.push(' ');
+          this.line_char_count++;
+        }
+        space = false;
+      }
+      content.push(input_char); //letter at-a-time (or string) inserted to an array
+    }
+    return content.length ? content.join('') : '';
+  },
+
+  get_contents_to: function(name) { //get the full content of a script or style to pass to js_beautify
+    if (this.pos == this.input.length) {
+      return ['', Parser.TOKEN_TYPE_EOF];
+    }
+    var content = '';
+    var reg_match = new RegExp('\<\/' + name + '\\s*\>', 'igm');
+    reg_match.lastIndex = this.pos;
+    var reg_array = reg_match.exec(this.input);
+    var end_script = reg_array ? reg_array.index : this.input.length; //absolute end of script
+    if (this.pos < end_script) { //get everything in between the script tags
+      content = this.input.substring(this.pos, end_script);
+      this.pos = end_script;
+    }
+    return content;
+  },
+
+  record_tag: function(tag) { //function to record a tag and its parent in this.tags Object
+    var tag_count = tag + 'count';
+    if (this.tags[tag_count]) { //check for the existence of this tag type
+      this.tags[tag_count]++;
+      this.tags[tag + this.tags[tag_count]] = this.indent_level; //and record the present indent level
+    }
+    else { //otherwise initialize this tag type
+      this.tags[tag_count] = 1;
+      this.tags[tag + this.tags[tag_count]] = this.indent_level; //and record the present indent level
+    }
+    this.tags[tag + this.tags[tag_count] + 'parent'] = this.tags.parent; //set the parent (i.e. in the case of a div this.tags.div1parent)
+    this.tags.parent = tag + this.tags[tag_count]; //and make this the current parent (i.e. in the case of a div 'div1')
+  },
+
+  retrieve_tag: function(tag) { //function to retrieve the opening tag to the corresponding closer
+    var tag_count = tag + 'count';
+    if (this.tags[tag_count]) { //if the openener is not in the Object we ignore it
+      var temp_parent = this.tags.parent; //check to see if it's a closable tag.
+      while (temp_parent) { //till we reach '' (the initial value);
+        if (tag + this.tags[tag_count] === temp_parent) { //if this is it use it
+          break;
+        }
+        temp_parent = this.tags[temp_parent + 'parent']; //otherwise keep on climbing up the DOM Tree
+      }
+      if (temp_parent) { //if we caught something
+        this.indent_level = this.tags[tag + this.tags[tag_count]]; //set the indent_level accordingly
+        this.tags.parent = this.tags[temp_parent + 'parent']; //and set the current parent
+      }
+      delete this.tags[tag + this.tags[tag_count] + 'parent']; //delete the closed tags parent reference...
+      delete this.tags[tag + this.tags[tag_count]]; //...and the tag itself
+      if (this.tags[tag_count] == 1) {
+        delete this.tags[tag_count];
+      }
+      else {
+        this.tags[tag_count]--;
+      }
+    }
+  },
+
+  get_tag: function() { //function to get a full tag and parse its type
+    var input_char = '',
+      content = [],
+      space = false,
+      tag_start, tag_end;
+
+    do {
+      if (this.pos >= this.input.length) {
+        return content.length ? content.join('') : ['', Parser.TOKEN_TYPE_EOF];
+      }
+
+      input_char = this.input.charAt(this.pos);
+      this.pos++;
+      this.line_char_count++;
+
+      if (this.Utils.in_array(input_char, this.Utils.whitespace)) { //don't want to insert unnecessary space
+        space = true;
+        this.line_char_count--;
+        continue;
+      }
+
+      if (input_char === "'" || input_char === '"') {
+        if (!content[1] || content[1] !== '!') { //if we're in a comment strings don't get treated specially
+          input_char += this.get_unformatted(input_char);
+          space = true;
+        }
+      }
+
+      if (input_char === '=') { //no space before =
+        space = false;
+      }
+
+      if (content.length && content[content.length - 1] !== '=' && input_char !== '>'
+        && space) { //no space after = or before >
+        if (this.line_char_count >= this.max_char) {
+          this.print_newline(false, content);
+          this.line_char_count = 0;
+        }
+        else {
+          content.push(' ');
+          this.line_char_count++;
+        }
+        space = false;
+      }
+      if (input_char === '<') {
+        tag_start = this.pos - 1;
+      }
+      content.push(input_char); //inserts character at-a-time (or string)
+    }
+    while (input_char !== '>');
+
+    var tag_complete = content.join('');
+    var tag_index;
+    var comment;
+    if (tag_complete.indexOf(' ') != -1) { //if there's whitespace, thats where the tag name ends
+      tag_index = tag_complete.indexOf(' ');
+    }
+    else { //otherwise go with the tag ending
+      tag_index = tag_complete.indexOf('>');
+    }
+    var tag_check = tag_complete.substring(1, tag_index).toLowerCase();
+    if (tag_complete.charAt(tag_complete.length - 2) === '/' ||
+      this.Utils.in_array(tag_check, this.Utils.single_token)) { //if this tag name is a single tag type (either in the list or has a closing /)
+      this.tag_type = Parser.TOKEN_TYPE_TAG_SINGLE;
+    }
+    else if (tag_check === 'script') { //for later script handling
+      this.record_tag(tag_check);
+      this.tag_type = Parser.TOKEN_TYPE_TAG_SCRIPT;
+    }
+    else if (tag_check === 'style') { //for future style handling (for now it justs uses get_content)
+      this.record_tag(tag_check);
+      this.tag_type = Parser.TOKEN_TYPE_TAG_STYLE;
+    }
+    else if (this.Utils.in_array(tag_check, this.unformatted)) { // do not reformat the "unformatted" tags
+      comment = this.get_unformatted('', tag_complete); //...delegate to get_unformatted function
+      content.push(comment);
+      // Preserve collapsed whitespace either before or after this tag.
+      if (tag_start > 0 && this.Utils.in_array(this.input.charAt(tag_start - 1), this.Utils.whitespace)) {
+        content.splice(0, 0, this.input.charAt(tag_start - 1));
+      }
+      tag_end = this.pos - 1;
+      if (this.Utils.in_array(this.input.charAt(tag_end + 1), this.Utils.whitespace)) {
+        content.push(this.input.charAt(tag_end + 1));
+      }
+      this.tag_type = Parser.TOKEN_TYPE_TAG_SINGLE;
+    }
+    else if (tag_check.charAt(0) === '!') { //peek for  so...
+          comment = this.get_unformatted('-->', tag_complete); //...delegate to get_unformatted
+          content.push(comment);
+        }
+        this.tag_type = Parser.TOKEN_TYPE_TAG_START;
+      }
+      else if (tag_check.indexOf('[endif') != -1) {//peek for ', tag_complete);
+        content.push(comment);
+        this.tag_type = Parser.TOKEN_TYPE_TAG_SINGLE;
+      }
+    }
+    else {
+      if (tag_check.charAt(0) === '/') { //this tag is a double tag so check for tag-ending
+        this.retrieve_tag(tag_check.substring(1)); //remove it and all ancestors
+        this.tag_type = Parser.TOKEN_TYPE_TAG_END;
+      }
+      else { //otherwise it's a start-tag
+        this.record_tag(tag_check); //push it on the tag stack
+        this.tag_type = Parser.TOKEN_TYPE_TAG_START;
+      }
+      if (this.Utils.in_array(tag_check, this.Utils.extra_liners)) { //check if this double needs an extra line
+        this.print_newline(true, this.output);
+      }
+    }
+    return content.join(''); //returns fully formatted tag
+  },
+
+  get_unformatted: function(delimiter, orig_tag) { //function to return unformatted content in its entirety
+
+    if (orig_tag && orig_tag.toLowerCase().indexOf(delimiter) != -1) {
+      return '';
+    }
+    var input_char = '';
+    var content = '';
+    var space = true;
+    do {
+
+      if (this.pos >= this.input.length) {
+        return content;
+      }
+
+      input_char = this.input.charAt(this.pos);
+      this.pos++;
+
+      if (this.Utils.in_array(input_char, this.Utils.whitespace)) {
+        if (!space) {
+          this.line_char_count--;
+          continue;
+        }
+        if (input_char === '\n' || input_char === '\r') {
+          content += '\n';
+          /*  Don't change tab indention for unformatted blocks.  If using code for html editing, this will greatly affect 
 tags if they are specified in the 'unformatted array'
+           for (var i=0; i 0) {
+        this.indent_level--;
+      }
+    };
+  }
+};
+
+Parser.TOKEN_TYPE_TAG_SINGLE = 'SINGLE';
+Parser.TOKEN_TYPE_TAG_STYLE = 'STYLE';
+Parser.TOKEN_TYPE_TAG_SCRIPT = 'SCRIPT';
+Parser.TOKEN_TYPE_TAG_START = 'START';
+Parser.TOKEN_TYPE_TAG_END = 'END';
+
+Parser.TOKEN_TYPE_TAG_NAME_SINGLE = 'TK_TAG_SINGLE';
+Parser.TOKEN_TYPE_TAG_NAME_STYLE = 'TK_TAG_STYLE';
+Parser.TOKEN_TYPE_TAG_NAME_SCRIPT = 'TK_TAG_SCRIPT';
+Parser.TOKEN_TYPE_TAG_NAME_START = 'TK_TAG_START';
+Parser.TOKEN_TYPE_TAG_NAME_END = 'TK_TAG_END';
+
+Parser.TOKEN_TYPE_EOF = 'TK_EOF';
+Parser.TOKEN_TYPE_CONTENT = 'TK_CONTENT';
+Parser.TOKEN_TYPE_STYLE = 'TK_STYLE';
+Parser.TOKEN_TYPE_SCRIPT = 'TK_SCRIPT';
+
+Parser.MODE_CONTENT = 'CONTENT';
+Parser.MODE_TAG = 'TAG';
+
+module.exports = Parser;

From 789c2b7c42d475978b4529dfcbdb81cb49abfef2 Mon Sep 17 00:00:00 2001
From: Adam Brunner 
Date: Thu, 19 Nov 2015 09:21:58 +0100
Subject: [PATCH 06/10] Extracting more methods in Parser

---
 lib/parser.js | 51 ++++++++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/lib/parser.js b/lib/parser.js
index b762b54..bb11725 100644
--- a/lib/parser.js
+++ b/lib/parser.js
@@ -360,35 +360,36 @@ Parser.prototype = {
       this.indent_string += this.indent_character;
     }
 
-    this.print_newline = function(ignore, arr) {
-      this.line_char_count = 0;
-      if (!arr || !arr.length) {
-        return;
-      }
-      if (!ignore) { //we might want the extra line
-        while (this.Utils.in_array(arr[arr.length - 1], this.Utils.whitespace)) {
-          arr.pop();
-        }
-      }
-      arr.push('\n');
-      for (var i = 0; i < this.indent_level; i++) {
-        arr.push(this.indent_string);
+  },
+
+  print_newline: function(ignore, arr) {
+    this.line_char_count = 0;
+    if (!arr || !arr.length) {
+      return;
+    }
+    if (!ignore) { //we might want the extra line
+      while (this.Utils.in_array(arr[arr.length - 1], this.Utils.whitespace)) {
+        arr.pop();
       }
-    };
+    }
+    arr.push('\n');
+    for (var i = 0; i < this.indent_level; i++) {
+      arr.push(this.indent_string);
+    }
+  },
 
-    this.print_token = function(text) {
-      this.output.push(text);
-    };
+  print_token: function(text) {
+    this.output.push(text);
+  },
 
-    this.indent = function() {
-      this.indent_level++;
-    };
+  indent: function() {
+    this.indent_level++;
+  },
 
-    this.unindent = function() {
-      if (this.indent_level > 0) {
-        this.indent_level--;
-      }
-    };
+  unindent: function() {
+    if (this.indent_level > 0) {
+      this.indent_level--;
+    }
   }
 };
 

From 93612637b2eed40edb0a9e0d592794d88ae65205 Mon Sep 17 00:00:00 2001
From: Adam Brunner 
Date: Thu, 19 Nov 2015 09:22:21 +0100
Subject: [PATCH 07/10] Refactoring option handling

---
 lib/html.js | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/lib/html.js b/lib/html.js
index 06fc1ff..7001dd2 100644
--- a/lib/html.js
+++ b/lib/html.js
@@ -1,3 +1,5 @@
+"use strict";
+
 /*
 
 Style HTML
@@ -40,17 +42,15 @@ The options are:
 var Parser = require('./parser');
 
 function style_html(html_source, options) {
-  options = options || {};
-
   var script_indent_level;
-  var indent_scripts = options.indent_scripts || 'normal';
-  var indent_size = options.indent_size || 4;
-  var indent_character = options.indent_char || ' ';
-  var brace_style = options.brace_style || 'collapse';
-  var max_char = options.max_char == 0 ? Infinity : options.max_char || 70;
-  var unformatted = options.unformatted || ['a', 'span', 'bdo', 'em', 'strong', 'dfn', 'code', 'samp', 'kbd', 'var',
-      'cite', 'abbr', 'acronym', 'q', 'sub', 'sup', 'tt', 'i', 'b', 'big', 'small', 'u', 's', 'strike', 'font', 'ins',
-      'del', 'pre', 'address', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'];
+
+  options = getOptions(options);
+  var indent_scripts = options.indent_scripts;
+  var indent_size = options.indent_size;
+  var indent_character = options.indent_char;
+  var brace_style = options.brace_style;
+  var max_char = options.max_char;
+  var unformatted = options.unformatted;
 
   var multi_parser = new Parser();
   multi_parser.printer(html_source, indent_character, indent_size, max_char, brace_style, unformatted);
@@ -164,6 +164,24 @@ function style_html(html_source, options) {
   return multi_parser.output.join('');
 }
 
+function getOptions(options) {
+  options = options || {};
+
+  options.indent_scripts = options.indent_scripts || 'normal';
+  options.indent_size = options.indent_size || 4;
+  options.indent_character = options.indent_char || ' ';
+  options.brace_style = options.brace_style || 'collapse';
+  options.max_char = options.max_char == 0 ? Infinity : options.max_char || 70;
+  options.unformatted = options.unformatted || ['a', 'span', 'bdo', 'em', 'strong', 'dfn', 'code', 'samp', 'kbd', 'var',
+      'cite', 'abbr', 'acronym', 'q', 'sub', 'sup', 'tt', 'i', 'b', 'big', 'small', 'u', 's', 'strike', 'font', 'ins',
+      'del', 'pre', 'address', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'];
+
+  return options;
+}
+
+
 module.exports = {
-  prettyPrint: style_html
+  Parser: Parser,
+  prettyPrint: style_html,
+  getOptions: getOptions
 };

From 90c43e392e0c5c2caf15014a58242d44fb55c9e5 Mon Sep 17 00:00:00 2001
From: Adam Brunner 
Date: Thu, 19 Nov 2015 10:06:01 +0100
Subject: [PATCH 08/10] Indent character is undefined

---
 lib/html.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/html.js b/lib/html.js
index 7001dd2..4a11f72 100644
--- a/lib/html.js
+++ b/lib/html.js
@@ -169,7 +169,7 @@ function getOptions(options) {
 
   options.indent_scripts = options.indent_scripts || 'normal';
   options.indent_size = options.indent_size || 4;
-  options.indent_character = options.indent_char || ' ';
+  options.indent_char = options.indent_char || ' ';
   options.brace_style = options.brace_style || 'collapse';
   options.max_char = options.max_char == 0 ? Infinity : options.max_char || 70;
   options.unformatted = options.unformatted || ['a', 'span', 'bdo', 'em', 'strong', 'dfn', 'code', 'samp', 'kbd', 'var',

From da0c973989a4e2869b1a45b07dddb08a134600a8 Mon Sep 17 00:00:00 2001
From: Adam Brunner 
Date: Thu, 19 Nov 2015 13:26:32 +0100
Subject: [PATCH 09/10] fixing token type check

---
 lib/parser.js | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/parser.js b/lib/parser.js
index bb11725..5e6f441 100644
--- a/lib/parser.js
+++ b/lib/parser.js
@@ -8,7 +8,10 @@ function Parser() {
     parent1: ''
   };
   this.tag_type = '';
-  this.token_text = this.last_token = this.last_text = this.token_type = '';
+  this.token_text = '';
+  this.last_token = '';
+  this.last_text = '';
+  this.token_type = '';
 
   return this;
 }
@@ -290,7 +293,7 @@ Parser.prototype = {
   get_token: function() { //initial handler for token-retrieval
     var token;
 
-    if (this.last_token === Parser.TOKEN_TYPE_TAG_SCRIPT || this.last_token === Parser.TOKEN_TYPE_TAG_STYLE) { //check if we need to format javascript
+    if (this.last_token === Parser.TOKEN_TYPE_TAG_NAME_SCRIPT || this.last_token === Parser.TOKEN_TYPE_TAG_NAME_STYLE) { //check if we need to format javascript
       var type = this.last_token.substr(7);
       token = this.get_contents_to(type);
       if (typeof token !== 'string') {

From 5f9a5eb7c36a82aa1afd20205f076167b38cf478 Mon Sep 17 00:00:00 2001
From: Adam Brunner 
Date: Thu, 19 Nov 2015 15:29:46 +0100
Subject: [PATCH 10/10] fix style and script content handling

---
 lib/parser.js | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/parser.js b/lib/parser.js
index 5e6f441..1ae290b 100644
--- a/lib/parser.js
+++ b/lib/parser.js
@@ -299,7 +299,7 @@ Parser.prototype = {
       if (typeof token !== 'string') {
         return token;
       }
-      return [token, this.get_tag_name_type(type)];
+      return [token, this.get_type(type)];
     }
     if (this.current_mode === Parser.MODE_CONTENT) {
       token = this.get_content();
@@ -337,6 +337,15 @@ Parser.prototype = {
     }
   },
 
+  get_type: function(tag_type) {
+    switch (tag_type) {
+      case Parser.TOKEN_TYPE_TAG_STYLE:
+        return Parser.TOKEN_TYPE_STYLE;
+      case Parser.TOKEN_TYPE_TAG_SCRIPT:
+        return Parser.TOKEN_TYPE_SCRIPT;
+    }
+  },
+
   get_full_indent: function(level) {
     level = this.indent_level + level || 0;
     if (level < 1) {