bitburner-src/src/NetscriptTokenizer.js

169 lines
4.8 KiB
JavaScript
Raw Normal View History

/* Tokenizer
* Acts on top of the InputStream class. Takes in a character input stream and and parses it into tokens.
* Tokens can be accessed with peek() and next().
*
* Token types:
* {type: "punc", value: "(" } // punctuation: parens, comma, semicolon etc.
* {type: "num", value: 5 } // numbers (including floats)
* {type: "str", value: "Hello World!" } // strings
* {type: "kw", value: "for/if/" } // keywords, see defs below
* {type: "var", value: "a" } // identifiers/variables
* {type: "op", value: "!=" } // operator characters
* {type: "bool", value: "true" } // Booleans
*
*/
function Tokenizer(input) {
var current = null;
var keywords = " if elif else true false while for ";
return {
next : next,
peek : peek,
eof : eof,
croak : input.croak
}
function is_keyword(x) {
return keywords.indexOf(" " + x + " ") >= 0;
}
function is_digit(ch) {
return /[0-9]/i.test(ch);
}
//An identifier can start with any letter or an underscore
function is_id_start(ch) {
return /[a-z_]/i.test(ch);
}
function is_id(ch) {
return is_id_start(ch) || "?!-<>=0123456789".indexOf(ch) >= 0;
}
function is_op_char(ch) {
return "+-*/%=&|<>!".indexOf(ch) >= 0;
}
function is_punc(ch) {
return ",;(){}[]".indexOf(ch) >= 0;
}
function is_whitespace(ch) {
return " \t\n".indexOf(ch) >= 0;
}
function read_while(predicate) {
var str = "";
while (!input.eof() && predicate(input.peek()))
str += input.next();
return str;
}
function read_number() {
var has_dot = false;
//Reads the number from the input. Checks for only a single decimal point
var number = read_while(function(ch){
if (ch == ".") {
if (has_dot) return false;
has_dot = true;
return true;
}
return is_digit(ch);
});
return { type: "num", value: parseFloat(number) };
}
//This function also checks the identifier against a list of known keywords (defined at the top)
//and will return a kw object rather than identifier if it is one
function read_ident() {
//Identifier must start with a letter or underscore..and can contain anything from ?!-<>=0123456789
var id = read_while(is_id);
return {
type : is_keyword(id) ? "kw" : "var",
value : id
};
}
function read_escaped(end) {
var escaped = false, str = "";
input.next(); //Skip the quotation mark
while (!input.eof()) {
var ch = input.next();
if (escaped) {
str += ch;
escaped = false;
} else if (ch == "\\") {
escaped = true;
} else if (ch == end) {
break;
} else {
str += ch;
}
}
return str;
}
function read_string(ch) {
if (ch == '"') {
return { type: "str", value: read_escaped('"') };
} else if (ch == '\'') {
return { type: "str", value: read_escaped('\'') };
}
}
//Only supports single-line comments right now
function skip_comment() {
read_while(function(ch){ return ch != "\n" });
input.next();
}
//Gets the next token
function read_next() {
//Skip over whitespace
read_while(is_whitespace);
if (input.eof()) return null;
//Peek the next character and decide what to do based on what that
//next character is
var ch = input.peek();
if (ch == "//") {
skip_comment();
return read_next();
}
if (ch == '"' || ch == '\'') return read_string(ch);
if (is_digit(ch)) return read_number();
if (is_id_start(ch)) return read_ident();
if (is_punc(ch)) return {
type : "punc",
value : input.next()
}
if (is_op_char(ch)) return {
type : "op",
value : read_while(is_op_char)
}
}
function peek() {
//Returns current token, unless its null in which case it grabs the next one
//and returns it
return current || (current = read_next());
}
function next() {
//The token might have been peaked already, in which case read_next() was already
//called so just return current
var tok = current;
current = null;
return tok || read_next();
}
function eof() {
return peek() == null;
}
}