695 lines
14 KiB
JavaScript
695 lines
14 KiB
JavaScript
class Juicescript_lexer {
|
|
/*
|
|
CONSTRUCTOR: Return new juicescript lexer for SOURCE with OPTIONS
|
|
*/
|
|
constructor(source, options){
|
|
// STORE ARGUMENTS //
|
|
// source
|
|
this.source = source;
|
|
|
|
// io adapter
|
|
this.io = options.io;
|
|
}
|
|
|
|
/*
|
|
MAIN: Run lexical analysis
|
|
*/
|
|
scan(){
|
|
// RESET //
|
|
// counters
|
|
this.start = 0;
|
|
this.end = 0;
|
|
this.line = 1;
|
|
|
|
// token list
|
|
this.token_list = [];
|
|
|
|
// warning and error counter
|
|
this.warning_count = 0;
|
|
this.error_count = 0;
|
|
|
|
|
|
// SCAN WHOLE SOURCE //
|
|
while(!this.is_at_end()){
|
|
// start where last scan ended
|
|
this.start = this.end;
|
|
|
|
// consume next character
|
|
this.next();
|
|
|
|
// scan next token
|
|
this.scan_one();
|
|
}
|
|
|
|
|
|
// ADD END-OF-FILE TOKEN //
|
|
this.token_add({
|
|
type: Juicescript.token_type.EOF,
|
|
line: this.line,
|
|
lexeme: ""
|
|
});
|
|
|
|
|
|
// RETURN LIST OF TOKENS //
|
|
return this.token_list;
|
|
}
|
|
|
|
/*
|
|
HELPER: Scan one token at current position
|
|
*/
|
|
scan_one(){
|
|
switch(this.char){
|
|
// WHITESPACE //
|
|
case " ":
|
|
case "\r":
|
|
case "\t":
|
|
break;
|
|
|
|
|
|
// DELIMITER //
|
|
case ";":
|
|
this.token_add({type: Juicescript.token_type.DELIMITER});
|
|
break;
|
|
|
|
|
|
// NEWLINE //
|
|
case "\n":
|
|
this.token_add({type: Juicescript.token_type.DELIMITER});
|
|
this.line++;
|
|
break;
|
|
|
|
|
|
// OPERATORS //
|
|
case "!":
|
|
if (this.match("=")) this.token_add({type: Juicescript.token_type.NOT_EQUAL});
|
|
else this.token_add({type: Juicescript.token_type.NOT});
|
|
break;
|
|
|
|
case "=":
|
|
if (this.match("=")) this.token_add({type: Juicescript.token_type.EQUAL_EQUAL});
|
|
else this.error("unexpected character '" + this.char + "'");
|
|
break;
|
|
|
|
case "<":
|
|
if (this.match("=")) this.token_add({type: Juicescript.token_type.LESS_EQUAL});
|
|
else this.token_add({type: Juicescript.token_type.LESS});
|
|
break;
|
|
|
|
case ">":
|
|
if (this.match("=")) this.token_add({type: Juicescript.token_type.GREATER_EQUAL});
|
|
else this.token_add({type: Juicescript.token_type.GREATER});
|
|
break;
|
|
|
|
|
|
// BRACKETS //
|
|
case "[":
|
|
this.token_add({type: Juicescript.token_type.BRACKET_SQUARE_OPEN});
|
|
break;
|
|
|
|
case "]":
|
|
this.token_add({type: Juicescript.token_type.BRACKET_SQUARE_CLOSE});
|
|
break;
|
|
|
|
case "{":
|
|
this.token_add({type: Juicescript.token_type.BRACKET_CURLY_OPEN});
|
|
break;
|
|
|
|
case "}":
|
|
this.token_add({type: Juicescript.token_type.BRACKET_CURLY_CLOSE});
|
|
break;
|
|
|
|
|
|
// COMMENTS //
|
|
case "#":
|
|
case "/":
|
|
// block comment
|
|
if(this.char === "/" && this.match("*")){
|
|
this.scan_block_comment();
|
|
break;
|
|
}
|
|
|
|
// single slash
|
|
if(this.char === "/" && !this.match("/")){
|
|
this.error("unexpected character '" + this.char + "'");
|
|
break;
|
|
}
|
|
|
|
// normal comment
|
|
while(this.peek() !== "\n" && !this.is_at_end()) this.next();
|
|
break;
|
|
|
|
|
|
// STRINGS //
|
|
// handle escape sequences
|
|
case "\"":
|
|
this.scan_string(this.char, true);
|
|
break;
|
|
|
|
// ignore escape sequences
|
|
case "'":
|
|
this.scan_string(this.char, false);
|
|
break;
|
|
|
|
|
|
// VARIABLE //
|
|
case "$":
|
|
this.scan_variable();
|
|
break;
|
|
|
|
|
|
// PREFIXED FLAGS //
|
|
case ":":
|
|
this.scan_flag();
|
|
break;
|
|
|
|
|
|
// NEGATIVE NUMBERS //
|
|
case "-":
|
|
// only if there's a valid digit after it
|
|
if(this.is_digit(this.peek())){
|
|
// consume minus sign
|
|
this.next();
|
|
|
|
// scan like a normal number
|
|
this.scan_number();
|
|
break;
|
|
}
|
|
|
|
// ignore with error
|
|
this.error("unexpected character '" + this.char + "'");
|
|
break;
|
|
|
|
|
|
// SPECIAL CHARS //
|
|
// ampersand
|
|
case "&":
|
|
this.token_add({type: Juicescript.token_type.AMPERSAND});
|
|
break;
|
|
|
|
// question mark
|
|
case "?":
|
|
this.token_add({type: Juicescript.token_type.QUESTION_MARK});
|
|
break;
|
|
|
|
|
|
// EVERYTHING ELSE //
|
|
default:
|
|
// numbers
|
|
if(this.is_digit(this.char)){
|
|
this.scan_number();
|
|
break;
|
|
}
|
|
|
|
// identifiers
|
|
if(this.is_alpha(this.char)){
|
|
this.scan_identifier();
|
|
break;
|
|
}
|
|
|
|
// unexpected (ignore with error)
|
|
this.error("unexpected character '" + this.char + "'");
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
SCANNER: Handle string surrounded by MARKER and optionally convert ESCAPE-SEQUENCES
|
|
*/
|
|
scan_string(marker, escape_sequences){
|
|
// TRY TO CONSUME UNTIL END OF SOURCE //
|
|
while(!this.is_at_end()){
|
|
// do we have a quote?
|
|
if(this.peek() === marker){
|
|
// count backslashes in front of quote
|
|
let backslash_count = 0;
|
|
while(this.peek(-(backslash_count + 1)) === "\\"){
|
|
backslash_count++;
|
|
};
|
|
|
|
// terminate string if count of backslashes is correct
|
|
if(backslash_count % 2 === 0) break;
|
|
}
|
|
|
|
// take note of passed lines
|
|
if(this.peek() === "\n") this.line++;
|
|
|
|
// consume next character
|
|
this.next();
|
|
}
|
|
|
|
|
|
// DID WE REACH THE END OF SOURCE WITHOUT TERMINATION? //
|
|
if(this.is_at_end()){
|
|
// ignore with error
|
|
this.error("unterminated string");
|
|
return;
|
|
}
|
|
|
|
|
|
// GET STRING VALUE //
|
|
// consume closing quote
|
|
this.next();
|
|
|
|
// get consumed string
|
|
let string = this.source.substring(this.start + 1, this.end - 1);
|
|
|
|
|
|
// RESOLVE ESCAPE SEQUENCES //
|
|
// iterate over whole string
|
|
let offset = 0;
|
|
let pos = -1;
|
|
while((pos = string.indexOf("\\", offset)) > -1){
|
|
// defaults for escaping one character
|
|
let char_escaped = string.substring(pos + 1, pos + 2);
|
|
let replace = char_escaped;
|
|
let remove_length = replace.length;
|
|
|
|
// special escape sequences
|
|
switch(char_escaped){
|
|
// newline
|
|
case "n":
|
|
replace = "\n";
|
|
break;
|
|
|
|
// tab
|
|
case "t":
|
|
replace = "\t";
|
|
break;
|
|
|
|
// null
|
|
case "0":
|
|
replace = "\0";
|
|
break;
|
|
|
|
// unicode
|
|
case "u":
|
|
// get four-letter codepoint string
|
|
let next_four_chars = string.substring(pos + 2, pos + 6);
|
|
|
|
// check if this is valid hexadecimal
|
|
if(/^[0-9a-fA-F]*$/.test(next_four_chars)){
|
|
// convert codepoint to decimal number
|
|
let codepoint = parseInt(next_four_chars, 16);
|
|
|
|
// get corresponding unicode character
|
|
replace = String.fromCharCode(codepoint);
|
|
remove_length += 4;
|
|
}
|
|
break;
|
|
}
|
|
|
|
// if all aren't allowed, only replace essential escape sequences
|
|
if(replace === "\\" || replace === "'" || escape_sequences){
|
|
// replace in string
|
|
string = string.substr(0, pos) + replace + string.substr(pos + 1 + remove_length);
|
|
}
|
|
|
|
// remember we resolved this one
|
|
offset = pos + replace.length;
|
|
}
|
|
|
|
|
|
// ADD TOKEN //
|
|
this.token_add({type: Juicescript.token_type.STRING, value: string});
|
|
}
|
|
|
|
/*
|
|
SCANNER: Handle block comment
|
|
*/
|
|
scan_block_comment(){
|
|
// TRY TO CONSUME UNTIL END OF SOURCE //
|
|
while(!this.is_at_end()){
|
|
// do we have a `*/`?
|
|
if(this.char === "*" && this.peek() === "/"){
|
|
// block comment ends here
|
|
break;
|
|
}
|
|
|
|
// take note of passed lines
|
|
if(this.peek() === "\n") this.line++;
|
|
|
|
// consume next character
|
|
this.next();
|
|
}
|
|
|
|
|
|
// DID WE REACH THE END OF SOURCE WITHOUT TERMINATION? //
|
|
if(this.is_at_end()){
|
|
// ignore with error
|
|
this.error("unterminated block comment");
|
|
return;
|
|
}
|
|
|
|
// consume (=ignore) closing slash
|
|
this.next();
|
|
}
|
|
|
|
/*
|
|
SCANNER: Handle variable
|
|
*/
|
|
scan_variable(){
|
|
// GET VARIABLE NAME //
|
|
// consume all valid characters
|
|
while(this.is_alphanumeric(this.peek())) this.next();
|
|
|
|
// get consumed string
|
|
let variable = this.source.substring(this.start + 1, this.end);
|
|
|
|
|
|
// ADD TOKEN //
|
|
this.token_add({type: Juicescript.token_type.VARIABLE, value: variable});
|
|
}
|
|
|
|
/*
|
|
SCANNER: Handle number
|
|
*/
|
|
scan_number(){
|
|
// DEFAULT VALUES FOR POSITIVE BASE 10 NUMBER //
|
|
let negative = false;
|
|
let base = null;
|
|
let is_valid_char = this.is_digit;
|
|
let number_string_offset = 0;
|
|
|
|
|
|
// HANDLE OTHER BASES //
|
|
// check for '0' prefix
|
|
if(this.char === "0"){
|
|
// assume we have to cut off a prefix of length 2
|
|
number_string_offset = 2;
|
|
|
|
// check
|
|
switch(this.peek().toLowerCase()){
|
|
case "b":
|
|
// binary (base 2)
|
|
base = 2;
|
|
is_valid_char = this.is_binary;
|
|
break;
|
|
|
|
case "o":
|
|
// octal (base 8)
|
|
base = 8;
|
|
is_valid_char = this.is_octal;
|
|
break;
|
|
|
|
case "x":
|
|
// hexadecimal (base 16)
|
|
base = 16;
|
|
is_valid_char = this.is_hexadecimal;
|
|
break;
|
|
|
|
default:
|
|
// didn't find valid base-char, ignore prefix
|
|
number_string_offset = 0;
|
|
}
|
|
|
|
// consume base-char if valid
|
|
if(number_string_offset > 0) this.next();
|
|
}
|
|
|
|
|
|
// GET NUMBER'S VALUE AS STRING //
|
|
// consume all valid chars
|
|
while(is_valid_char(this.peek())) this.next();
|
|
|
|
// allow decimal point on base 10 numbers
|
|
if(base === null && this.peek() === "." && is_valid_char(this.peek(1))){
|
|
// consume decimal point
|
|
this.next();
|
|
|
|
// consume all valid chars
|
|
while(is_valid_char(this.peek())) this.next();
|
|
}
|
|
|
|
// get consumed string
|
|
let number_string_full = this.source.substring(this.start, this.end);
|
|
|
|
|
|
// HANDLE NEGATIVE NUMBERS //
|
|
if(number_string_full.substring(0, 1) === "-"){
|
|
// remember to negate later
|
|
negative = true;
|
|
|
|
// ignore minus sign
|
|
number_string_offset++;
|
|
}
|
|
|
|
|
|
// STORE NUMBER IN TOKEN //
|
|
// get number string
|
|
let number_string = number_string_full.substring(number_string_offset);
|
|
|
|
// parse number
|
|
let number;
|
|
if(base !== null){
|
|
// custom base
|
|
number = parseFloat(parseInt(number_string, base));
|
|
|
|
} else {
|
|
// base 10
|
|
number = parseFloat(number_string);
|
|
}
|
|
|
|
// maybe negate
|
|
if(negative) number *= -1;
|
|
|
|
// add token
|
|
this.token_add({type: Juicescript.token_type.NUMBER, value: number});
|
|
}
|
|
|
|
/*
|
|
SCANNER: Handle identifier
|
|
*/
|
|
scan_identifier(){
|
|
// GET IDENTIFIER NAME //
|
|
// consume all valid chars
|
|
while(this.is_alphanumeric(this.peek())) this.next();
|
|
|
|
// get consumed string
|
|
let identifier = this.source.substring(this.start, this.end);
|
|
|
|
|
|
// CHECK IF THIS IS A SUFFIXED FLAG //
|
|
// has `:` after it?
|
|
if(this.match(":")){
|
|
// add token
|
|
this.token_add({type: Juicescript.token_type.FLAG, value: identifier});
|
|
|
|
// ignore the rest
|
|
return;
|
|
}
|
|
|
|
|
|
// MAYBE CONVERT IDENTIFIER TO KEYWORD //
|
|
// try to load from lookup table
|
|
let keyword = ({
|
|
"DEF": Juicescript.token_type.DEF,
|
|
|
|
"GLOB": Juicescript.token_type.GLOBAL,
|
|
"GLOBAL": Juicescript.token_type.GLOBAL,
|
|
"PUB": Juicescript.token_type.GLOBAL,
|
|
"PUBLIC": Juicescript.token_type.GLOBAL,
|
|
|
|
"END": Juicescript.token_type.END,
|
|
|
|
"TRUE": Juicescript.token_type.TRUE,
|
|
"FALSE": Juicescript.token_type.FALSE,
|
|
|
|
"NULL": Juicescript.token_type.NULL,
|
|
})[identifier.toUpperCase()] ?? null;
|
|
|
|
// found something?
|
|
if(keyword !== null){
|
|
// found entry: add keyword token
|
|
this.token_add({type: keyword});
|
|
|
|
} else {
|
|
// didn't find entry: add as identifier
|
|
this.token_add({type: Juicescript.token_type.IDENTIFIER, value: identifier});
|
|
}
|
|
}
|
|
|
|
/*
|
|
SCANNER: Handle flag
|
|
*/
|
|
scan_flag(){
|
|
// GET FLAG NAME //
|
|
// consume all valid characters
|
|
while(this.is_alphanumeric(this.peek())) this.next();
|
|
|
|
// get consumed string
|
|
let flag = this.source.substring(this.start + 1, this.end);
|
|
|
|
// consume (=ignore) optional `:` suffix
|
|
this.match(":");
|
|
|
|
|
|
// CHECK IF THERE EVEN IS A NAME //
|
|
if(flag.length <= 0){
|
|
// ignore with error
|
|
this.error("unexpected character '" + this.source.charAt(this.start) + "'");
|
|
return;
|
|
}
|
|
|
|
|
|
// ADD TOKEN //
|
|
this.token_add({type: Juicescript.token_type.FLAG, value: flag});
|
|
}
|
|
|
|
/*
|
|
HELPER: Consume next character from source
|
|
*/
|
|
next(){
|
|
this.char = this.source.charAt(this.end++);
|
|
}
|
|
|
|
/*
|
|
HELPER: Return OFFSET next character from source
|
|
*/
|
|
peek(offset = 0){
|
|
return this.source.charAt(this.end + offset);
|
|
}
|
|
|
|
/*
|
|
HELPER: Consume (and return true) if OFFSET next character from source matches NEEDLE
|
|
*/
|
|
match(needle, offset = 0){
|
|
// ignore if it doesn't match
|
|
if(this.peek(offset) != needle) return false;
|
|
|
|
// consume if it matches
|
|
this.end += offset;
|
|
this.next();
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
HELPER: Return if we are at end of source
|
|
*/
|
|
is_at_end(){
|
|
return this.end >= this.source.length;
|
|
}
|
|
|
|
/*
|
|
HELPER: Is CHAR a digit?
|
|
*/
|
|
is_digit(char){
|
|
return (char >= "0" && char <= "9");
|
|
}
|
|
|
|
/*
|
|
HELPER: Is CHAR a binary digit?
|
|
*/
|
|
is_binary(char){
|
|
return (char === "0" || char === "1");
|
|
}
|
|
|
|
/*
|
|
HELPER: Is CHAR a octal digit?
|
|
*/
|
|
is_octal(char){
|
|
return (char >= "0" && char <= "7");
|
|
}
|
|
|
|
/*
|
|
HELPER: Is CHAR a hexadecimal digit?
|
|
*/
|
|
is_hexadecimal(char){
|
|
return (char >= "0" && char <= "9") ||
|
|
(char >= "a" && char <= "f") ||
|
|
(char >= "A" && char <= "F");
|
|
}
|
|
|
|
/*
|
|
HELPER: Is CHAR a char from a-z?
|
|
*/
|
|
is_alpha(char){
|
|
return (char >= "a" && char <= "z") ||
|
|
(char >= "A" && char <= "Z");
|
|
}
|
|
|
|
/*
|
|
HELPER: Is CHAR a in a-z, A-Z, -, _?
|
|
*/
|
|
is_alphanumeric(char){
|
|
return (this.is_alpha(char) || this.is_digit(char) || char === "-" || char === "_");
|
|
}
|
|
|
|
/*
|
|
HELPER: Add new token object with OPTIONS
|
|
*/
|
|
token_add(options){
|
|
// NEW OJECT //
|
|
let token = {};
|
|
|
|
|
|
// COLLECT REQUIRED ATTRIBUTES //
|
|
// type
|
|
token.type = options.type ?? null;
|
|
if(!Juicescript.token_type.has(token.type)){
|
|
throw "invalid token type '" + token.type + "'";
|
|
}
|
|
|
|
|
|
// COLLECT ATTRIBUTES WITH POSSIBLE FALLBACK VALUES //
|
|
// line
|
|
token.line = options.line ?? this.line;
|
|
|
|
// lexeme
|
|
token.lexeme = options.lexeme ?? this.source.substring(this.start, this.end);
|
|
|
|
|
|
// OPTIONAL ATTRIBUTES //
|
|
token.value = options.value ?? null;
|
|
|
|
|
|
// ADD TO LIST //
|
|
this.token_list.push(token);
|
|
}
|
|
|
|
/*
|
|
HELPER: Automagically keep track of problems and add additional info to stderr
|
|
*/
|
|
debug(text, additional){
|
|
// add defaults
|
|
additional ??= {};
|
|
additional.line ??= this.line;
|
|
|
|
// forward
|
|
this.io.stderr.debug(text, additional);
|
|
}
|
|
info(text, additional){
|
|
// add defaults
|
|
additional ??= {};
|
|
additional.line ??= this.line;
|
|
|
|
// forward
|
|
this.io.stderr.info(text, additional);
|
|
}
|
|
warning(text, additional){
|
|
// KEEP TRACK OF PROBLEM //
|
|
this.warning_count++;
|
|
|
|
|
|
// PRINT MESSAGE //
|
|
// add defaults
|
|
additional ??= {};
|
|
additional.line ??= this.line;
|
|
|
|
// forward
|
|
this.io.stderr.warning(text, additional);
|
|
}
|
|
error(text, additional){
|
|
// KEEP TRACK OF PROBLEM //
|
|
this.error_count++;
|
|
|
|
|
|
// PRINT MESSAGE //
|
|
// add defaults
|
|
additional ??= {};
|
|
additional.line ??= this.line;
|
|
|
|
// forward
|
|
this.io.stderr.error(text, additional);
|
|
}
|
|
}
|