bitburner-src/src/NetscriptTokenizer.js

/* Tokenizer 
 * Acts on top of the InputStream class. Takes in a character input stream and and parses it into tokens.
 * Tokens can be accessed with peek() and next().
 *
 *  Token types:
 *      {type: "punc", value: "(" }           	// punctuation: parens, comma, semicolon etc.
 *      {type: "num", value: 5 }              	// numbers (including floats)
 *      {type: "str", value: "Hello World!" } 	// strings
 *      {type: "kw", value: "for/if/" }        	// keywords, see defs below
 *      {type: "var", value: "a" }            	// identifiers/variables
 *      {type: "op", value: "!=" }            	// operator characters
 *		{type: "bool", value: "true" } 			// Booleans
 *
 */
 
function Tokenizer(input) {
    var current = null;
    var keywords = " if elif else true false while for ";
    
    return {
        next    : next,
        peek    : peek,
        eof     : eof,
        croak   : input.croak
    }
    
    function is_keyword(x) {
        return keywords.indexOf(" " + x + " ") >= 0;
    }
    
    function is_digit(ch) {
        return /[0-9]/i.test(ch);
    }
    
    //An identifier can start with any letter or an underscore
    function is_id_start(ch) {
        return /[a-z_]/i.test(ch);
    }
    
    function is_id(ch) {
        return is_id_start(ch) || "?!-<>=0123456789".indexOf(ch) >= 0;
    }
    
    function is_op_char(ch) {
        return "+-*/%=&|<>!".indexOf(ch) >= 0;
    }
    
    function is_punc(ch) {
        return ",;(){}[]".indexOf(ch) >= 0;
    }
    
    function is_whitespace(ch) {
        return " \t\n".indexOf(ch) >= 0;
    }
    
    function read_while(predicate) {
        var str = "";
        while (!input.eof() && predicate(input.peek()))
            str += input.next();
        return str;
    }
    
    function read_number() {
        var has_dot = false;
        //Reads the number from the input. Checks for only a single decimal point
        var number = read_while(function(ch){
            if (ch == ".") {
                if (has_dot) return false;
                has_dot = true;
                return true;
            }
            return is_digit(ch);
        });
        return { type: "num", value: parseFloat(number) };
    }
    
    //This function also checks the identifier against a list of known keywords (defined at the top)
    //and will return a kw object rather than identifier if it is one
    function read_ident() {
        //Identifier must start with a letter or underscore..and can contain anything from ?!-<>=0123456789
        var id = read_while(is_id);
        return {
            type  : is_keyword(id) ? "kw" : "var",
            value : id
        };
    }
    
    function read_escaped(end) {
        var escaped = false, str = "";
        input.next();   //Skip the quotation mark
        while (!input.eof()) {
            var ch = input.next();
            if (escaped) {
                str += ch;
                escaped = false;
            } else if (ch == "\\") {
                escaped = true;
            } else if (ch == end) {
                break;
            } else {
                str += ch;
            }
        }
        return str;
    }
    
    function read_string(ch) {
        if (ch == '"') {
            return { type: "str", value: read_escaped('"') };
        } else if (ch == '\'') {
            return { type: "str", value: read_escaped('\'') };
        }
    }
    
    //Only supports single-line comments right now
    function skip_comment() {
        read_while(function(ch){ return ch != "\n" });
        input.next();
    }
    
    //Gets the next token
    function read_next() {
        //Skip over whitespace
        read_while(is_whitespace);
        
        if (input.eof()) return null;
        
        //Peek the next character and decide what to do based on what that
        //next character is
        var ch = input.peek();
        
        if (ch == "//") {
            skip_comment();
            return read_next();
        }
        
        if (ch == '"' || ch == '\'')          return read_string(ch);
        if (is_digit(ch))       return read_number();
        if (is_id_start(ch))    return read_ident();    
        if (is_punc(ch)) return {
            type    : "punc",
            value   : input.next()
        }
        if (is_op_char(ch)) return {
            type    : "op",
            value   : read_while(is_op_char)
        }
        
    }
    
    function peek() {
        //Returns current token, unless its null in which case it grabs the next one
        //and returns it
        return current || (current = read_next());
    }
    
    function next() {
        //The token might have been peaked already, in which case read_next() was already
        //called so just return current
        var tok = current;
        current = null;
        return tok || read_next();
    }
    
    function eof() {
        return peek() == null;
    }
}
Added InputStream, Tokenizer, and Parser(unfinished) class. Changed Newerth to Aevum 2016-11-14 07:42:31 +01:00			`/* Tokenizer`
			`* Acts on top of the InputStream class. Takes in a character input stream and and parses it into tokens.`
			`* Tokens can be accessed with peek() and next().`
			`*`
			`* Token types:`
Finished implementing Netscript. Not completely tested yet. Find out how to make it multithreaded (Web Workers is the best way according to internet 2016-11-17 23:25:40 +01:00			`* {type: "punc", value: "(" } // punctuation: parens, comma, semicolon etc.`
			`* {type: "num", value: 5 } // numbers (including floats)`
			`* {type: "str", value: "Hello World!" } // strings`
			`* {type: "kw", value: "for/if/" } // keywords, see defs below`
			`* {type: "var", value: "a" } // identifiers/variables`
			`* {type: "op", value: "!=" } // operator characters`
			`* {type: "bool", value: "true" } // Booleans`
Added InputStream, Tokenizer, and Parser(unfinished) class. Changed Newerth to Aevum 2016-11-14 07:42:31 +01:00			`*`
			`*/`

			`function Tokenizer(input) {`
			`var current = null;`
Finished implementing Netscript. Not completely tested yet. Find out how to make it multithreaded (Web Workers is the best way according to internet 2016-11-17 23:25:40 +01:00			`var keywords = " if elif else true false while for ";`
Added InputStream, Tokenizer, and Parser(unfinished) class. Changed Newerth to Aevum 2016-11-14 07:42:31 +01:00
			`return {`
			`next : next,`
			`peek : peek,`
			`eof : eof,`
			`croak : input.croak`
			`}`

			`function is_keyword(x) {`
			`return keywords.indexOf(" " + x + " ") >= 0;`
			`}`

			`function is_digit(ch) {`
			`return /[0-9]/i.test(ch);`
			`}`

			`//An identifier can start with any letter or an underscore`
			`function is_id_start(ch) {`
			`return /[a-z_]/i.test(ch);`
			`}`

			`function is_id(ch) {`
			`return is_id_start(ch) \|\| "?!-<>=0123456789".indexOf(ch) >= 0;`
			`}`

			`function is_op_char(ch) {`
			`return "+-*/%=&\|<>!".indexOf(ch) >= 0;`
			`}`

			`function is_punc(ch) {`
			`return ",;(){}[]".indexOf(ch) >= 0;`
			`}`

			`function is_whitespace(ch) {`
			`return " \t\n".indexOf(ch) >= 0;`
			`}`

			`function read_while(predicate) {`
			`var str = "";`
			`while (!input.eof() && predicate(input.peek()))`
			`str += input.next();`
			`return str;`
			`}`

			`function read_number() {`
			`var has_dot = false;`
			`//Reads the number from the input. Checks for only a single decimal point`
			`var number = read_while(function(ch){`
			`if (ch == ".") {`
			`if (has_dot) return false;`
			`has_dot = true;`
			`return true;`
			`}`
			`return is_digit(ch);`
			`});`
			`return { type: "num", value: parseFloat(number) };`
			`}`

			`//This function also checks the identifier against a list of known keywords (defined at the top)`
			`//and will return a kw object rather than identifier if it is one`
			`function read_ident() {`
			`//Identifier must start with a letter or underscore..and can contain anything from ?!-<>=0123456789`
			`var id = read_while(is_id);`
			`return {`
			`type : is_keyword(id) ? "kw" : "var",`
			`value : id`
			`};`
			`}`

			`function read_escaped(end) {`
			`var escaped = false, str = "";`
			`input.next(); //Skip the quotation mark`
			`while (!input.eof()) {`
			`var ch = input.next();`
			`if (escaped) {`
			`str += ch;`
			`escaped = false;`
			`} else if (ch == "\\") {`
			`escaped = true;`
			`} else if (ch == end) {`
			`break;`
			`} else {`
			`str += ch;`
			`}`
			`}`
			`return str;`
			`}`

Fixed some bugs, began adding a Script class 2016-11-21 07:11:14 +01:00			`function read_string(ch) {`
			`if (ch == '"') {`
			`return { type: "str", value: read_escaped('"') };`
			`} else if (ch == '\'') {`
			`return { type: "str", value: read_escaped('\'') };`
			`}`
Added InputStream, Tokenizer, and Parser(unfinished) class. Changed Newerth to Aevum 2016-11-14 07:42:31 +01:00			`}`

			`//Only supports single-line comments right now`
			`function skip_comment() {`
			`read_while(function(ch){ return ch != "\n" });`
			`input.next();`
			`}`

			`//Gets the next token`
			`function read_next() {`
			`//Skip over whitespace`
			`read_while(is_whitespace);`

			`if (input.eof()) return null;`

			`//Peek the next character and decide what to do based on what that`
			`//next character is`
			`var ch = input.peek();`

			`if (ch == "//") {`
			`skip_comment();`
			`return read_next();`
			`}`

Fixed some bugs, began adding a Script class 2016-11-21 07:11:14 +01:00			`if (ch == '"' \|\| ch == '\'') return read_string(ch);`
Added InputStream, Tokenizer, and Parser(unfinished) class. Changed Newerth to Aevum 2016-11-14 07:42:31 +01:00			`if (is_digit(ch)) return read_number();`
			`if (is_id_start(ch)) return read_ident();`
			`if (is_punc(ch)) return {`
			`type : "punc",`
			`value : input.next()`
			`}`
			`if (is_op_char(ch)) return {`
			`type : "op",`
			`value : read_while(is_op_char)`
			`}`

			`}`

			`function peek() {`
			`//Returns current token, unless its null in which case it grabs the next one`
			`//and returns it`
			`return current \|\| (current = read_next());`
			`}`

			`function next() {`
			`//The token might have been peaked already, in which case read_next() was already`
			`//called so just return current`
			`var tok = current;`
			`current = null;`
			`return tok \|\| read_next();`
			`}`

			`function eof() {`
			`return peek() == null;`
			`}`
			`}`