diff --git a/crates/nxc-cli/Cargo.toml b/crates/nxc-cli/Cargo.toml index 9472640..d420ab0 100644 --- a/crates/nxc-cli/Cargo.toml +++ b/crates/nxc-cli/Cargo.toml @@ -5,6 +5,10 @@ edition.workspace = true license.workspace = true authors.workspace = true +[[bin]] +name = "nxc" +path = "src/main.rs" + [[bin]] name = "nexacore" path = "src/main.rs" diff --git a/crates/nxc-cli/src/main.rs b/crates/nxc-cli/src/main.rs index c80751b..8382fd3 100644 --- a/crates/nxc-cli/src/main.rs +++ b/crates/nxc-cli/src/main.rs @@ -20,15 +20,28 @@ fn run() -> Result<(), String> { }; match command.as_str() { - "build" => { + "check" | "build" => { let Some(path) = args.next() else { - return Err("usage: nexacore build ".to_string()); + return Err(format!("usage: {} {command} ", executable_name())); }; - let result = nxc_driver::compile_file(Path::new(&path)) - .map_err(format_compile_error)?; - println!("compiled {path}"); - println!("tokens: {}", result.tokens.len()); - println!("items: {}", result.module.items.len()); + + let output = + nxc_driver::check_file(Path::new(&path)).map_err(format_driver_error)?; + + if output.has_errors() { + eprintln!("{}", output.render_diagnostics()); + return Err(format!( + "check failed with {} diagnostic(s)", + output.diagnostics.len() + )); + } + + let summary = output.summary(); + println!("checked {}", output.path.display()); + println!("tokens: {}", output.tokens.len()); + println!("items: {}", summary.items); + println!("functions: {}", summary.functions); + println!("structs: {}", summary.structs); Ok(()) } "run" => Err("runtime execution is not implemented yet".to_string()), @@ -41,25 +54,34 @@ fn run() -> Result<(), String> { } } -fn format_compile_error(error: nxc_driver::CompileError) -> String { +fn executable_name() -> String { + env::args() + .next() + .and_then(|path| { + Path::new(&path) + .file_name() + .map(|name| name.to_string_lossy().to_string()) + }) + .unwrap_or_else(|| "nxc".to_string()) +} + +fn format_driver_error(error: nxc_driver::DriverError) -> String { match error { - nxc_driver::CompileError::Io(io) => format!("io error: {io}"), - nxc_driver::CompileError::Parse(parse) => format!( - "parse error at line {}, column {}: {}", - parse.span.line, parse.span.column, parse.message - ), + nxc_driver::DriverError::Io(io) => format!("io error: {io}"), } } fn print_help() { + let name = executable_name(); println!("NexaCore CLI"); println!("usage:"); - println!(" nexacore build "); - println!(" nexacore run "); - println!(" nexacore new "); - println!(" nexacore test"); - println!(" nexacore fmt"); - println!(" nexacore add "); - println!(" nexacore doc"); + println!(" {name} check "); + println!(" {name} build "); + println!(" {name} run "); + println!(" {name} new "); + println!(" {name} test"); + println!(" {name} fmt"); + println!(" {name} add "); + println!(" {name} doc"); } diff --git a/crates/nxc-driver/src/lib.rs b/crates/nxc-driver/src/lib.rs index 07c05c3..21ceec6 100644 --- a/crates/nxc-driver/src/lib.rs +++ b/crates/nxc-driver/src/lib.rs @@ -1,41 +1,98 @@ +use std::fmt::Write; use std::fs; -use std::path::Path; +use std::path::{Path, PathBuf}; -use nxc_frontend::{Lexer, Module, ParseError, Parser, Token}; +use nxc_frontend::{ + has_errors, Diagnostic, Item, LexResult, Lexer, Module, ParseResult, Parser, Token, +}; -#[derive(Debug)] -pub struct CompileResult { +#[derive(Debug, Clone)] +pub struct FrontendOutput { + pub path: PathBuf, + pub source: String, pub tokens: Vec, pub module: Module, + pub diagnostics: Vec, } #[derive(Debug)] -pub enum CompileError { +pub enum DriverError { Io(std::io::Error), - Parse(ParseError), } -impl From for CompileError { +impl From for DriverError { fn from(value: std::io::Error) -> Self { Self::Io(value) } } -impl From for CompileError { - fn from(value: ParseError) -> Self { - Self::Parse(value) +impl FrontendOutput { + pub fn has_errors(&self) -> bool { + has_errors(&self.diagnostics) + } + + pub fn summary(&self) -> AstSummary { + let mut summary = AstSummary { + items: self.module.items.len(), + ..AstSummary::default() + }; + + for item in &self.module.items { + match item { + Item::Function(_) => summary.functions += 1, + Item::Struct(_) => summary.structs += 1, + } + } + + summary + } + + pub fn render_diagnostics(&self) -> String { + let path = self.path.display().to_string(); + let mut out = String::new(); + for (index, diagnostic) in self.diagnostics.iter().enumerate() { + if index > 0 { + out.push('\n'); + out.push('\n'); + } + let _ = write!(out, "{}", diagnostic.render(&path, &self.source)); + } + out } } -pub fn compile_file(path: impl AsRef) -> Result { - let source = fs::read_to_string(path)?; - compile_source(&source) +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct AstSummary { + pub items: usize, + pub functions: usize, + pub structs: usize, } -pub fn compile_source(source: &str) -> Result { - let tokens = Lexer::new(source).tokenize(); - let mut parser = Parser::new(tokens.clone()); - let module = parser.parse_module()?; - Ok(CompileResult { tokens, module }) +pub fn check_file(path: impl AsRef) -> Result { + let path = path.as_ref().to_path_buf(); + let source = fs::read_to_string(&path)?; + Ok(check_source(path, source)) +} + +pub fn check_source(path: PathBuf, source: String) -> FrontendOutput { + let LexResult { + tokens, + diagnostics: mut lexer_diagnostics, + } = Lexer::new(&source).lex(); + + let ParseResult { + module, + diagnostics: parser_diagnostics, + } = Parser::new(tokens.clone()).parse_module(); + + lexer_diagnostics.extend(parser_diagnostics); + + FrontendOutput { + path, + source, + tokens, + module, + diagnostics: lexer_diagnostics, + } } diff --git a/crates/nxc-frontend/src/ast.rs b/crates/nxc-frontend/src/ast.rs index e0506fd..4277905 100644 --- a/crates/nxc-frontend/src/ast.rs +++ b/crates/nxc-frontend/src/ast.rs @@ -3,25 +3,27 @@ use crate::token::Span; #[derive(Debug, Clone, Default)] pub struct Module { pub items: Vec, + pub span: Span, } #[derive(Debug, Clone)] pub enum Item { - Use(UseDecl), Function(FunctionDecl), Struct(StructDecl), } -#[derive(Debug, Clone)] -pub struct UseDecl { - pub path: Vec, - pub span: Span, +impl Item { + pub fn span(&self) -> Span { + match self { + Item::Function(item) => item.span, + Item::Struct(item) => item.span, + } + } } #[derive(Debug, Clone)] pub struct FunctionDecl { pub is_public: bool, - pub is_async: bool, pub name: String, pub params: Vec, pub return_type: Option, @@ -54,33 +56,92 @@ pub struct FieldDecl { #[derive(Debug, Clone, Default)] pub struct Block { pub statements: Vec, + pub span: Span, } #[derive(Debug, Clone)] -pub enum Stmt { +pub struct Stmt { + pub kind: StmtKind, + pub span: Span, +} + +#[derive(Debug, Clone)] +pub enum StmtKind { Let { - mutable: bool, name: String, ty: Option, value: Expr, - span: Span, }, + Return(Option), + If(IfStmt), Expr(Expr), - Return(Option, Span), } #[derive(Debug, Clone)] -pub enum Expr { - Identifier(String, Span), - Integer(i64, Span), - String(String, Span), +pub struct IfStmt { + pub condition: Expr, + pub then_block: Block, + pub else_block: Option, + pub span: Span, +} + +#[derive(Debug, Clone)] +pub struct Expr { + pub kind: ExprKind, + pub span: Span, +} + +#[derive(Debug, Clone)] +pub enum ExprKind { + Literal(Literal), + Identifier(String), + Unary { + op: UnaryOp, + expr: Box, + }, + Binary { + left: Box, + op: BinaryOp, + right: Box, + }, + Group(Box), Call { callee: Box, args: Vec, - span: Span, }, } +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + Integer(i64), + Float(f64), + String(String), + Bool(bool), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UnaryOp { + Negate, + Not, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryOp { + Multiply, + Divide, + Remainder, + Add, + Subtract, + Equal, + NotEqual, + Less, + LessEqual, + Greater, + GreaterEqual, + LogicalAnd, + LogicalOr, +} + #[derive(Debug, Clone)] pub struct TypeRef { pub name: String, diff --git a/crates/nxc-frontend/src/diagnostics.rs b/crates/nxc-frontend/src/diagnostics.rs new file mode 100644 index 0000000..d0d300d --- /dev/null +++ b/crates/nxc-frontend/src/diagnostics.rs @@ -0,0 +1,62 @@ +use crate::token::Span; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Severity { + Error, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Diagnostic { + pub severity: Severity, + pub message: String, + pub span: Span, +} + +impl Diagnostic { + pub fn error(message: impl Into, span: Span) -> Self { + Self { + severity: Severity::Error, + message: message.into(), + span, + } + } + + pub fn render(&self, path: &str, source: &str) -> String { + let line_number = self.span.start_line.max(1); + let column_number = self.span.start_column.max(1); + let line_text = source + .lines() + .nth(line_number.saturating_sub(1)) + .unwrap_or_default(); + + let gutter_width = line_number.to_string().len().max(1); + let underline_width = if self.span.start_line == self.span.end_line { + (self.span.end_column.saturating_sub(self.span.start_column)).max(1) + } else { + 1 + }; + + format!( + "{severity}: {message}\n --> {path}:{line}:{column}\n {space} |\n {line:>width$} | {line_text}\n {space} | {caret_pad}{carets}", + severity = match self.severity { + Severity::Error => "error", + }, + message = self.message, + path = path, + line = line_number, + column = column_number, + space = " ".repeat(gutter_width), + width = gutter_width, + line_text = line_text, + caret_pad = " ".repeat(column_number.saturating_sub(1)), + carets = "^".repeat(underline_width), + ) + } +} + +pub fn has_errors(diagnostics: &[Diagnostic]) -> bool { + diagnostics + .iter() + .any(|diagnostic| diagnostic.severity == Severity::Error) +} + diff --git a/crates/nxc-frontend/src/lexer.rs b/crates/nxc-frontend/src/lexer.rs index d372ffd..5b3df7a 100644 --- a/crates/nxc-frontend/src/lexer.rs +++ b/crates/nxc-frontend/src/lexer.rs @@ -1,5 +1,12 @@ +use crate::diagnostics::Diagnostic; use crate::token::{Keyword, Span, Token, TokenKind}; +#[derive(Debug, Clone)] +pub struct LexResult { + pub tokens: Vec, + pub diagnostics: Vec, +} + #[derive(Debug)] pub struct Lexer<'src> { chars: Vec, @@ -10,6 +17,7 @@ pub struct Lexer<'src> { pending_dedents: usize, at_line_start: bool, finished: bool, + diagnostics: Vec, _source: &'src str, } @@ -24,249 +32,294 @@ impl<'src> Lexer<'src> { pending_dedents: 0, at_line_start: true, finished: false, + diagnostics: Vec::new(), _source: source, } } - pub fn tokenize(mut self) -> Vec { + pub fn lex(mut self) -> LexResult { let mut tokens = Vec::new(); while let Some(token) = self.next_token() { - let is_eof = matches!(token.kind, TokenKind::Eof); + let eof = matches!(token.kind, TokenKind::Eof); tokens.push(token); - if is_eof { + if eof { break; } } - tokens + + LexResult { + tokens, + diagnostics: self.diagnostics, + } } fn next_token(&mut self) -> Option { - if self.finished { - return None; - } - - if self.pending_dedents > 0 { - self.pending_dedents -= 1; - return Some(self.make_token(TokenKind::Dedent, self.position, self.position)); - } - - if self.at_line_start { - let indent = self.consume_indentation(); - let current = *self.indent_stack.last().unwrap_or(&0); - if indent > current { - self.indent_stack.push(indent); - self.at_line_start = false; - return Some(self.make_token(TokenKind::Indent, self.position, self.position)); + while !self.finished { + if self.pending_dedents > 0 { + self.pending_dedents -= 1; + let span = Span::single(self.position, self.line, self.column); + return Some(Token::new(TokenKind::Dedent, span)); } - if indent < current { - while let Some(&last) = self.indent_stack.last() { - if indent < last { - self.indent_stack.pop(); - self.pending_dedents += 1; - } else { - break; - } - } - - self.at_line_start = false; - if self.pending_dedents > 0 { - self.pending_dedents -= 1; - return Some(self.make_token(TokenKind::Dedent, self.position, self.position)); + if self.at_line_start { + if let Some(token) = self.handle_line_start() { + return Some(token); } } - self.at_line_start = false; - } - self.skip_inline_whitespace(); + self.skip_inline_whitespace(); - let start = self.position; - let line = self.line; - let column = self.column; + let start = self.position; + let start_line = self.line; + let start_column = self.column; - let ch = match self.peek() { - Some(ch) => ch, - None => { + let Some(ch) = self.peek() else { if self.indent_stack.len() > 1 { self.indent_stack.pop(); - return Some(Token::new( - TokenKind::Dedent, - Span::new(start, start, line, column), - )); + let span = Span::single(self.position, self.line, self.column); + return Some(Token::new(TokenKind::Dedent, span)); } self.finished = true; - return Some(Token::new( - TokenKind::Eof, - Span::new(start, start, line, column), - )); - } - }; + let span = Span::single(self.position, self.line, self.column); + return Some(Token::new(TokenKind::Eof, span)); + }; - if ch == '\n' { - self.bump(); - self.at_line_start = true; - return Some(Token::new( - TokenKind::Newline, - Span::new(start, self.position, line, column), - )); - } - - if ch == '"' { - return Some(self.lex_string()); - } - - if ch.is_ascii_digit() { - return Some(self.lex_number()); - } - - if is_ident_start(ch) { - return Some(self.lex_identifier()); - } - - let token = match ch { - '(' => single(self, TokenKind::LeftParen), - ')' => single(self, TokenKind::RightParen), - '{' => single(self, TokenKind::LeftBrace), - '}' => single(self, TokenKind::RightBrace), - '[' => single(self, TokenKind::LeftBracket), - ']' => single(self, TokenKind::RightBracket), - ',' => single(self, TokenKind::Comma), - '.' => single(self, TokenKind::Dot), - ':' => single(self, TokenKind::Colon), - '+' => single(self, TokenKind::Plus), - '*' => single(self, TokenKind::Star), - '/' => single(self, TokenKind::Slash), - '%' => single(self, TokenKind::Percent), - '?' => single(self, TokenKind::Question), - '-' => { + if ch == '\n' { self.bump(); - if self.peek() == Some('>') { + self.at_line_start = true; + let span = Span::new( + start, + self.position, + start_line, + start_column, + self.line, + self.column, + ); + return Some(Token::new(TokenKind::Newline, span)); + } + + if ch == '"' { + return Some(self.lex_string()); + } + + if ch.is_ascii_digit() { + return Some(self.lex_number()); + } + + if is_ident_start(ch) { + return Some(self.lex_identifier()); + } + + let token = match ch { + '(' => self.single(TokenKind::LeftParen), + ')' => self.single(TokenKind::RightParen), + ',' => self.single(TokenKind::Comma), + ':' => self.single(TokenKind::Colon), + '+' => self.single(TokenKind::Plus), + '-' => { self.bump(); - Token::new(TokenKind::Arrow, Span::new(start, self.position, line, column)) - } else { - Token::new(TokenKind::Minus, Span::new(start, self.position, line, column)) - } - } - '=' => { - self.bump(); - match self.peek() { - Some('=') => { + if self.peek() == Some('>') { self.bump(); - Token::new( - TokenKind::EqualEqual, - Span::new(start, self.position, line, column), - ) + self.token(TokenKind::Arrow, start, start_line, start_column) + } else { + self.token(TokenKind::Minus, start, start_line, start_column) } - Some('>') => { + } + '*' => self.single(TokenKind::Star), + '/' => self.single(TokenKind::Slash), + '%' => self.single(TokenKind::Percent), + '=' => { + self.bump(); + if self.peek() == Some('=') { self.bump(); - Token::new( - TokenKind::FatArrow, - Span::new(start, self.position, line, column), - ) + self.token(TokenKind::EqualEqual, start, start_line, start_column) + } else { + self.token(TokenKind::Equal, start, start_line, start_column) } - _ => Token::new(TokenKind::Equal, Span::new(start, self.position, line, column)), } - } - '!' => { - self.bump(); - if self.peek() == Some('=') { + '!' => { self.bump(); - Token::new( - TokenKind::BangEqual, - Span::new(start, self.position, line, column), - ) - } else { - Token::new(TokenKind::Bang, Span::new(start, self.position, line, column)) + if self.peek() == Some('=') { + self.bump(); + self.token(TokenKind::BangEqual, start, start_line, start_column) + } else { + self.token(TokenKind::Bang, start, start_line, start_column) + } } - } - '<' => { - self.bump(); - if self.peek() == Some('=') { + '<' => { self.bump(); - Token::new( - TokenKind::LessEqual, - Span::new(start, self.position, line, column), - ) - } else { - Token::new(TokenKind::Less, Span::new(start, self.position, line, column)) + if self.peek() == Some('=') { + self.bump(); + self.token(TokenKind::LessEqual, start, start_line, start_column) + } else { + self.token(TokenKind::Less, start, start_line, start_column) + } } - } - '>' => { - self.bump(); - if self.peek() == Some('=') { + '>' => { self.bump(); - Token::new( - TokenKind::GreaterEqual, - Span::new(start, self.position, line, column), - ) - } else { - Token::new(TokenKind::Greater, Span::new(start, self.position, line, column)) + if self.peek() == Some('=') { + self.bump(); + self.token(TokenKind::GreaterEqual, start, start_line, start_column) + } else { + self.token(TokenKind::Greater, start, start_line, start_column) + } } - } - _ => { - self.bump(); - Token::new(TokenKind::Newline, Span::new(start, self.position, line, column)) - } - }; + '&' => { + self.bump(); + if self.peek() == Some('&') { + self.bump(); + self.token(TokenKind::AndAnd, start, start_line, start_column) + } else { + self.report("unexpected character '&'; did you mean '&&'?", start); + continue; + } + } + '|' => { + self.bump(); + if self.peek() == Some('|') { + self.bump(); + self.token(TokenKind::OrOr, start, start_line, start_column) + } else { + self.report("unexpected character '|'; did you mean '||'?", start); + continue; + } + } + _ => { + self.bump(); + self.report(format!("unexpected character '{ch}'"), start); + continue; + } + }; - Some(token) + return Some(token); + } + + None } - fn consume_indentation(&mut self) -> usize { - let mut indent = 0; + fn handle_line_start(&mut self) -> Option { loop { - match self.peek() { - Some(' ') => { - self.bump(); - indent += 1; - } - Some('\t') => { - self.bump(); - indent += 4; - } - Some('\n') => return 0, - Some('#') => { - while let Some(ch) = self.peek() { + if self.pending_dedents > 0 { + self.pending_dedents -= 1; + let span = Span::single(self.position, self.line, self.column); + return Some(Token::new(TokenKind::Dedent, span)); + } + + let mut indent = 0usize; + while let Some(ch) = self.peek() { + match ch { + ' ' => { self.bump(); - if ch == '\n' { - self.at_line_start = true; - break; + indent += 1; + } + '\t' => { + self.bump(); + indent += 4; + } + _ => break, + } + } + + match self.peek() { + Some('\n') => { + let start = self.position; + let line = self.line; + let column = self.column; + self.bump(); + let span = + Span::new(start, self.position, line, column, self.line, self.column); + self.at_line_start = true; + return Some(Token::new(TokenKind::Newline, span)); + } + Some('#') => { + self.consume_comment(); + if self.peek() == Some('\n') { + let start = self.position; + let line = self.line; + let column = self.column; + self.bump(); + let span = Span::new( + start, + self.position, + line, + column, + self.line, + self.column, + ); + self.at_line_start = true; + return Some(Token::new(TokenKind::Newline, span)); + } + } + Some(_) => { + let current = *self.indent_stack.last().unwrap_or(&0); + if indent > current { + self.indent_stack.push(indent); + self.at_line_start = false; + let span = Span::single(self.position, self.line, self.column); + return Some(Token::new(TokenKind::Indent, span)); + } + + if indent < current { + while let Some(&last) = self.indent_stack.last() { + if indent < last { + self.indent_stack.pop(); + self.pending_dedents += 1; + } else { + break; + } + } + + if *self.indent_stack.last().unwrap_or(&0) != indent { + self.report( + "inconsistent indentation level", + self.position.saturating_sub(1), + ); + } + + if self.pending_dedents > 0 { + self.pending_dedents -= 1; + self.at_line_start = false; + let span = Span::single(self.position, self.line, self.column); + return Some(Token::new(TokenKind::Dedent, span)); } } - return 0; + + self.at_line_start = false; + return None; + } + None => { + self.at_line_start = false; + return None; } - _ => break, } } - indent } fn skip_inline_whitespace(&mut self) { - while let Some(ch) = self.peek() { - if ch == ' ' || ch == '\t' || ch == '\r' { - self.bump(); - continue; - } - - if ch == '#' { - while let Some(comment) = self.peek() { + loop { + match self.peek() { + Some(' ') | Some('\t') | Some('\r') => { self.bump(); - if comment == '\n' { - self.at_line_start = true; - break; - } } - continue; + Some('#') => self.consume_comment(), + _ => break, } + } + } - break; + fn consume_comment(&mut self) { + while let Some(ch) = self.peek() { + if ch == '\n' { + break; + } + self.bump(); } } fn lex_identifier(&mut self) -> Token { let start = self.position; - let line = self.line; - let column = self.column; + let start_line = self.line; + let start_column = self.column; let mut value = String::new(); while let Some(ch) = self.peek() { if is_ident_continue(ch) { @@ -276,17 +329,23 @@ impl<'src> Lexer<'src> { break; } } - let kind = match Keyword::from_ident(&value) { - Some(keyword) => TokenKind::Keyword(keyword), - None => TokenKind::Identifier(value), + + let kind = match value.as_str() { + "true" => TokenKind::Bool(true), + "false" => TokenKind::Bool(false), + _ => match Keyword::from_ident(&value) { + Some(keyword) => TokenKind::Keyword(keyword), + None => TokenKind::Identifier(value), + }, }; - Token::new(kind, Span::new(start, self.position, line, column)) + + self.token(kind, start, start_line, start_column) } fn lex_number(&mut self) -> Token { let start = self.position; - let line = self.line; - let column = self.column; + let start_line = self.line; + let start_column = self.column; let mut value = String::new(); while let Some(ch) = self.peek() { if ch.is_ascii_digit() { @@ -296,35 +355,109 @@ impl<'src> Lexer<'src> { break; } } - Token::new( - TokenKind::Integer(value), - Span::new(start, self.position, line, column), - ) + + let kind = if self.peek() == Some('.') && self.peek_next().is_some_and(|next| next.is_ascii_digit()) { + value.push('.'); + self.bump(); + while let Some(ch) = self.peek() { + if ch.is_ascii_digit() { + value.push(ch); + self.bump(); + } else { + break; + } + } + TokenKind::Float(value) + } else { + TokenKind::Integer(value) + }; + + self.token(kind, start, start_line, start_column) } fn lex_string(&mut self) -> Token { let start = self.position; - let line = self.line; - let column = self.column; + let start_line = self.line; + let start_column = self.column; self.bump(); + let mut value = String::new(); + let mut terminated = false; while let Some(ch) = self.peek() { - self.bump(); if ch == '"' { + self.bump(); + terminated = true; break; } + + if ch == '\n' { + break; + } + value.push(ch); + self.bump(); } + + if !terminated { + self.diagnostics.push(Diagnostic::error( + "unterminated string literal", + Span::new( + start, + self.position, + start_line, + start_column, + self.line, + self.column, + ), + )); + } + + self.token(TokenKind::String(value), start, start_line, start_column) + } + + fn single(&mut self, kind: TokenKind) -> Token { + let start = self.position; + let start_line = self.line; + let start_column = self.column; + self.bump(); + self.token(kind, start, start_line, start_column) + } + + fn token( + &self, + kind: TokenKind, + start: usize, + start_line: usize, + start_column: usize, + ) -> Token { Token::new( - TokenKind::String(value), - Span::new(start, self.position, line, column), + kind, + Span::new( + start, + self.position, + start_line, + start_column, + self.line, + self.column, + ), ) } + fn report(&mut self, message: impl Into, position: usize) { + self.diagnostics.push(Diagnostic::error( + message, + Span::single(position, self.line, self.column), + )); + } + fn peek(&self) -> Option { self.chars.get(self.position).copied() } + fn peek_next(&self) -> Option { + self.chars.get(self.position + 1).copied() + } + fn bump(&mut self) { if let Some(ch) = self.peek() { self.position += 1; @@ -336,18 +469,6 @@ impl<'src> Lexer<'src> { } } } - - fn make_token(&self, kind: TokenKind, start: usize, end: usize) -> Token { - Token::new(kind, Span::new(start, end, self.line, self.column)) - } -} - -fn single(lexer: &mut Lexer<'_>, kind: TokenKind) -> Token { - let start = lexer.position; - let line = lexer.line; - let column = lexer.column; - lexer.bump(); - Token::new(kind, Span::new(start, lexer.position, line, column)) } fn is_ident_start(ch: char) -> bool { @@ -357,4 +478,3 @@ fn is_ident_start(ch: char) -> bool { fn is_ident_continue(ch: char) -> bool { is_ident_start(ch) || ch.is_ascii_digit() } - diff --git a/crates/nxc-frontend/src/lib.rs b/crates/nxc-frontend/src/lib.rs index 4dbecc5..618dd69 100644 --- a/crates/nxc-frontend/src/lib.rs +++ b/crates/nxc-frontend/src/lib.rs @@ -1,10 +1,15 @@ pub mod ast; +pub mod diagnostics; pub mod lexer; pub mod parser; pub mod token; -pub use ast::Module; -pub use lexer::Lexer; -pub use parser::{ParseError, Parser}; +pub use ast::{ + BinaryOp, Block, Expr, ExprKind, FunctionDecl, IfStmt, Item, Literal, Module, Param, Stmt, + StmtKind, StructDecl, TypeRef, UnaryOp, +}; +pub use diagnostics::{has_errors, Diagnostic, Severity}; +pub use lexer::{LexResult, Lexer}; +pub use parser::{ParseResult, Parser}; pub use token::{Keyword, Span, Token, TokenKind}; diff --git a/crates/nxc-frontend/src/parser.rs b/crates/nxc-frontend/src/parser.rs index aa6803c..1c44e64 100644 --- a/crates/nxc-frontend/src/parser.rs +++ b/crates/nxc-frontend/src/parser.rs @@ -1,298 +1,535 @@ use crate::ast::{ - Block, Expr, FieldDecl, FunctionDecl, Item, Module, Param, Stmt, StructDecl, TypeRef, UseDecl, + BinaryOp, Block, Expr, ExprKind, FieldDecl, FunctionDecl, IfStmt, Item, Literal, Module, + Param, Stmt, StmtKind, StructDecl, TypeRef, UnaryOp, }; +use crate::diagnostics::Diagnostic; use crate::token::{Keyword, Span, Token, TokenKind}; #[derive(Debug, Clone)] -pub struct ParseError { - pub message: String, - pub span: Span, +pub struct ParseResult { + pub module: Module, + pub diagnostics: Vec, } pub struct Parser { tokens: Vec, current: usize, + diagnostics: Vec, } impl Parser { pub fn new(tokens: Vec) -> Self { - Self { tokens, current: 0 } + Self { + tokens, + current: 0, + diagnostics: Vec::new(), + } } - pub fn parse_module(&mut self) -> Result { + pub fn parse_module(mut self) -> ParseResult { + let start = self.peek().span; let mut items = Vec::new(); + + self.skip_newlines(); while !self.is_at_end() { - self.skip_newlines(); - if self.is_at_end() { - break; + if let Some(item) = self.parse_item() { + items.push(item); + } else { + self.synchronize_item(); } - items.push(self.parse_item()?); self.skip_newlines(); } - Ok(Module { items }) + + let end = self.peek().span; + ParseResult { + module: Module { + span: start.merge(end), + items, + }, + diagnostics: self.diagnostics, + } } - fn parse_item(&mut self) -> Result { - if self.matches_keyword(Keyword::Use) { - return self.parse_use().map(Item::Use); - } - + fn parse_item(&mut self) -> Option { let is_public = self.matches_keyword(Keyword::Pub); - let is_async = self.matches_keyword(Keyword::Async); if self.matches_keyword(Keyword::Fn) { - return self.parse_function(is_public, is_async).map(Item::Function); + return self.parse_function(is_public).map(Item::Function); } if self.matches_keyword(Keyword::Struct) { return self.parse_struct(is_public).map(Item::Struct); } - Err(self.error_here("expected module item")) + let token = self.peek().clone(); + self.error_here( + "expected top-level declaration (`fn` or `struct`)", + token.span, + ); + None } - fn parse_use(&mut self) -> Result { - let start = self.previous_span(); - let mut path = Vec::new(); - path.push(self.expect_identifier()?); - while self.matches(TokenKind::Dot) { - path.push(self.expect_identifier()?); - } - Ok(UseDecl { path, span: start }) - } - - fn parse_function( - &mut self, - is_public: bool, - is_async: bool, - ) -> Result { - let start = self.previous_span(); - let name = self.expect_identifier()?; + fn parse_function(&mut self, is_public: bool) -> Option { + let start = self.previous().span; + let (name, _) = self.consume_identifier("expected function name")?; self.expect(TokenKind::LeftParen, "expected '(' after function name")?; + let mut params = Vec::new(); if !self.check(&TokenKind::RightParen) { loop { - let param_name = self.expect_identifier()?; + let (param_name, param_span) = + self.consume_identifier("expected parameter name")?; self.expect(TokenKind::Colon, "expected ':' after parameter name")?; let ty = self.parse_type()?; params.push(Param { name: param_name, - ty, - span: start, + ty: ty.clone(), + span: param_span.merge(ty.span), }); - if !self.matches(TokenKind::Comma) { + + if !self.matches(&TokenKind::Comma) { break; } } } - self.expect(TokenKind::RightParen, "expected ')' after parameters")?; - let return_type = if self.matches(TokenKind::Arrow) { + self.expect(TokenKind::RightParen, "expected ')' after parameters")?; + let return_type = if self.matches(&TokenKind::Arrow) { Some(self.parse_type()?) } else { None }; - - self.expect(TokenKind::Colon, "expected ':' before function body")?; - self.skip_newlines(); - let body = self.parse_block()?; - - Ok(FunctionDecl { + let body = self.parse_block_after_colon("expected function body")?; + Some(FunctionDecl { is_public, - is_async, name, params, return_type, + span: start.merge(body.span), body, - span: start, }) } - fn parse_struct(&mut self, is_public: bool) -> Result { - let start = self.previous_span(); - let name = self.expect_identifier()?; + fn parse_struct(&mut self, is_public: bool) -> Option { + let start = self.previous().span; + let (name, _) = self.consume_identifier("expected struct name")?; self.expect(TokenKind::Colon, "expected ':' after struct name")?; - self.skip_newlines(); + self.expect_newline("expected newline after struct declaration header")?; self.expect(TokenKind::Indent, "expected indented struct body")?; let mut fields = Vec::new(); + self.skip_newlines(); while !self.check(&TokenKind::Dedent) && !self.is_at_end() { - self.skip_newlines(); - if self.check(&TokenKind::Dedent) { - break; - } - let field_name = self.expect_identifier()?; + let (field_name, field_span) = match self.consume_identifier("expected field name") { + Some(value) => value, + None => { + self.synchronize_statement(); + self.skip_newlines(); + continue; + } + }; self.expect(TokenKind::Colon, "expected ':' after field name")?; - let ty = self.parse_type()?; + let ty = match self.parse_type() { + Some(ty) => ty, + None => { + self.synchronize_statement(); + self.skip_newlines(); + continue; + } + }; fields.push(FieldDecl { name: field_name, - ty, - span: start, + ty: ty.clone(), + span: field_span.merge(ty.span), }); self.skip_newlines(); } - self.expect(TokenKind::Dedent, "expected end of struct body")?; - - Ok(StructDecl { + let end = self.expect(TokenKind::Dedent, "expected end of struct body")?; + Some(StructDecl { is_public, name, fields, - span: start, + span: start.merge(end), }) } - fn parse_block(&mut self) -> Result { - self.expect(TokenKind::Indent, "expected indented block")?; + fn parse_block_after_colon(&mut self, context_message: &str) -> Option { + self.expect(TokenKind::Colon, "expected ':' before block")?; + self.expect_newline(context_message)?; + let indent_span = self.expect(TokenKind::Indent, "expected indented block")?; let mut statements = Vec::new(); + self.skip_newlines(); while !self.check(&TokenKind::Dedent) && !self.is_at_end() { - self.skip_newlines(); - if self.check(&TokenKind::Dedent) { - break; + match self.parse_statement() { + Some(statement) => statements.push(statement), + None => self.synchronize_statement(), } - statements.push(self.parse_statement()?); self.skip_newlines(); } - self.expect(TokenKind::Dedent, "expected end of block")?; - Ok(Block { statements }) + let end = self.expect(TokenKind::Dedent, "expected end of block")?; + Some(Block { + statements, + span: indent_span.merge(end), + }) } - fn parse_statement(&mut self) -> Result { + fn parse_statement(&mut self) -> Option { if self.matches_keyword(Keyword::Let) { - return self.parse_let(false); + return self.parse_let_statement(); } - if self.matches_keyword(Keyword::Var) { - return self.parse_let(true); - } - if self.matches_keyword(Keyword::Return) { - let span = self.previous_span(); - if self.check(&TokenKind::Newline) || self.check(&TokenKind::Dedent) { - return Ok(Stmt::Return(None, span)); - } - let expr = self.parse_expression()?; - return Ok(Stmt::Return(Some(expr), span)); - } - Ok(Stmt::Expr(self.parse_expression()?)) - } - fn parse_let(&mut self, mutable: bool) -> Result { - let span = self.previous_span(); - let name = self.expect_identifier()?; - let ty = if self.matches(TokenKind::Colon) { - Some(self.parse_type()?) - } else { - None - }; - self.expect(TokenKind::Equal, "expected '=' in variable declaration")?; - let value = self.parse_expression()?; - Ok(Stmt::Let { - mutable, - name, - ty, - value, + if self.matches_keyword(Keyword::Return) { + return Some(self.parse_return_statement()); + } + + if self.matches_keyword(Keyword::If) { + return self.parse_if_statement(); + } + + let expr = self.parse_expression(Precedence::Lowest)?; + let span = expr.span; + Some(Stmt { + kind: StmtKind::Expr(expr), span, }) } - fn parse_expression(&mut self) -> Result { - let mut expr = self.parse_primary()?; - while self.matches(TokenKind::LeftParen) { - let mut args = Vec::new(); - if !self.check(&TokenKind::RightParen) { - loop { - args.push(self.parse_expression()?); - if !self.matches(TokenKind::Comma) { - break; - } - } + fn parse_let_statement(&mut self) -> Option { + let start = self.previous().span; + let (name, name_span) = self.consume_identifier("expected variable name after `let`")?; + let ty = if self.matches(&TokenKind::Colon) { + Some(self.parse_type()?) + } else { + None + }; + self.expect(TokenKind::Equal, "expected '=' in let binding")?; + let value = self.parse_expression(Precedence::Lowest)?; + + let mut span = start.merge(name_span).merge(value.span); + if let Some(ty) = &ty { + span = span.merge(ty.span); + } + + Some(Stmt { + kind: StmtKind::Let { name, ty, value }, + span, + }) + } + + fn parse_return_statement(&mut self) -> Stmt { + let start = self.previous().span; + if self.check(&TokenKind::Newline) || self.check(&TokenKind::Dedent) || self.is_at_end() { + return Stmt { + kind: StmtKind::Return(None), + span: start, + }; + } + + match self.parse_expression(Precedence::Lowest) { + Some(expr) => Stmt { + kind: StmtKind::Return(Some(expr.clone())), + span: start.merge(expr.span), + }, + None => Stmt { + kind: StmtKind::Return(None), + span: start, + }, + } + } + + fn parse_if_statement(&mut self) -> Option { + let start = self.previous().span; + let condition = self.parse_expression(Precedence::Lowest)?; + let then_block = self.parse_block_after_colon("expected newline after if condition")?; + self.skip_newlines(); + + let else_block = if self.matches_keyword(Keyword::Else) { + Some(self.parse_block_after_colon("expected newline after else")?) + } else { + None + }; + + let end = else_block + .as_ref() + .map(|block| block.span) + .unwrap_or(then_block.span); + + Some(Stmt { + span: start.merge(end), + kind: StmtKind::If(IfStmt { + condition, + then_block, + else_block, + span: start.merge(end), + }), + }) + } + + fn parse_expression(&mut self, min_precedence: Precedence) -> Option { + let mut left = self.parse_prefix()?; + + loop { + if self.check(&TokenKind::LeftParen) && min_precedence <= Precedence::Call { + left = self.finish_call(left)?; + continue; } - let span = self.previous_span(); - self.expect(TokenKind::RightParen, "expected ')' after arguments")?; - expr = Expr::Call { - callee: Box::new(expr), - args, + + let Some((op, precedence)) = self.current_binary_operator() else { + break; + }; + + if precedence < min_precedence { + break; + } + + self.advance(); + let right = self.parse_expression(precedence.next())?; + let span = left.span.merge(right.span); + left = Expr { + kind: ExprKind::Binary { + left: Box::new(left), + op, + right: Box::new(right), + }, span, }; } - Ok(expr) + + Some(left) } - fn parse_primary(&mut self) -> Result { + fn parse_prefix(&mut self) -> Option { + if self.matches(&TokenKind::Bang) { + let operator_span = self.previous().span; + let expr = self.parse_expression(Precedence::Unary)?; + return Some(Expr { + span: operator_span.merge(expr.span), + kind: ExprKind::Unary { + op: UnaryOp::Not, + expr: Box::new(expr), + }, + }); + } + + if self.matches(&TokenKind::Minus) { + let operator_span = self.previous().span; + let expr = self.parse_expression(Precedence::Unary)?; + return Some(Expr { + span: operator_span.merge(expr.span), + kind: ExprKind::Unary { + op: UnaryOp::Negate, + expr: Box::new(expr), + }, + }); + } + + self.parse_primary() + } + + fn parse_primary(&mut self) -> Option { let token = self.advance().clone(); match token.kind { - TokenKind::Identifier(value) => Ok(Expr::Identifier(value, token.span)), - TokenKind::Integer(value) => { - let parsed = value.parse::().unwrap_or_default(); - Ok(Expr::Integer(parsed, token.span)) - } - TokenKind::String(value) => Ok(Expr::String(value, token.span)), - _ => Err(ParseError { - message: "expected expression".to_string(), + TokenKind::Identifier(name) => Some(Expr { + kind: ExprKind::Identifier(name), span: token.span, }), + TokenKind::Integer(value) => { + let value = value.parse::().unwrap_or_default(); + Some(Expr { + kind: ExprKind::Literal(Literal::Integer(value)), + span: token.span, + }) + } + TokenKind::Float(value) => { + let value = value.parse::().unwrap_or_default(); + Some(Expr { + kind: ExprKind::Literal(Literal::Float(value)), + span: token.span, + }) + } + TokenKind::String(value) => Some(Expr { + kind: ExprKind::Literal(Literal::String(value)), + span: token.span, + }), + TokenKind::Bool(value) => Some(Expr { + kind: ExprKind::Literal(Literal::Bool(value)), + span: token.span, + }), + TokenKind::LeftParen => { + let start = token.span; + let expr = self.parse_expression(Precedence::Lowest)?; + let end = self.expect(TokenKind::RightParen, "expected ')' after expression")?; + Some(Expr { + kind: ExprKind::Group(Box::new(expr)), + span: start.merge(end), + }) + } + _ => { + self.error_here("expected expression", token.span); + None + } } } - fn parse_type(&mut self) -> Result { + fn finish_call(&mut self, callee: Expr) -> Option { + let start = callee.span; + self.expect(TokenKind::LeftParen, "expected '(' after callee")?; + let mut args = Vec::new(); + if !self.check(&TokenKind::RightParen) { + loop { + args.push(self.parse_expression(Precedence::Lowest)?); + if !self.matches(&TokenKind::Comma) { + break; + } + } + } + let end = self.expect(TokenKind::RightParen, "expected ')' after arguments")?; + Some(Expr { + span: start.merge(end), + kind: ExprKind::Call { + callee: Box::new(callee), + args, + }, + }) + } + + fn parse_type(&mut self) -> Option { let token = self.advance().clone(); match token.kind { - TokenKind::Identifier(name) => Ok(TypeRef { + TokenKind::Identifier(name) => Some(TypeRef { name, span: token.span, }), - _ => Err(ParseError { - message: "expected type name".to_string(), - span: token.span, - }), + _ => { + self.error_here("expected type name", token.span); + None + } } } - fn expect_identifier(&mut self) -> Result { + fn current_binary_operator(&self) -> Option<(BinaryOp, Precedence)> { + let token = &self.peek().kind; + match token { + TokenKind::OrOr => Some((BinaryOp::LogicalOr, Precedence::LogicalOr)), + TokenKind::AndAnd => Some((BinaryOp::LogicalAnd, Precedence::LogicalAnd)), + TokenKind::EqualEqual => Some((BinaryOp::Equal, Precedence::Equality)), + TokenKind::BangEqual => Some((BinaryOp::NotEqual, Precedence::Equality)), + TokenKind::Less => Some((BinaryOp::Less, Precedence::Comparison)), + TokenKind::LessEqual => Some((BinaryOp::LessEqual, Precedence::Comparison)), + TokenKind::Greater => Some((BinaryOp::Greater, Precedence::Comparison)), + TokenKind::GreaterEqual => Some((BinaryOp::GreaterEqual, Precedence::Comparison)), + TokenKind::Plus => Some((BinaryOp::Add, Precedence::Term)), + TokenKind::Minus => Some((BinaryOp::Subtract, Precedence::Term)), + TokenKind::Star => Some((BinaryOp::Multiply, Precedence::Factor)), + TokenKind::Slash => Some((BinaryOp::Divide, Precedence::Factor)), + TokenKind::Percent => Some((BinaryOp::Remainder, Precedence::Factor)), + _ => None, + } + } + + fn consume_identifier(&mut self, message: &str) -> Option<(String, Span)> { let token = self.advance().clone(); match token.kind { - TokenKind::Identifier(name) => Ok(name), - _ => Err(ParseError { - message: "expected identifier".to_string(), - span: token.span, - }), + TokenKind::Identifier(name) => Some((name, token.span)), + _ => { + self.error_here(message, token.span); + None + } } } - fn expect(&mut self, kind: TokenKind, message: &str) -> Result<(), ParseError> { - if self.matches(kind) { - Ok(()) + fn expect(&mut self, kind: TokenKind, message: &str) -> Option { + if self.matches(&kind) { + Some(self.previous().span) } else { - Err(self.error_here(message)) + self.error_here(message, self.peek().span); + None + } + } + + fn expect_newline(&mut self, message: &str) -> Option { + if self.matches(&TokenKind::Newline) { + while self.matches(&TokenKind::Newline) {} + Some(self.previous().span) + } else { + self.error_here(message, self.peek().span); + None } } fn matches_keyword(&mut self, keyword: Keyword) -> bool { - if matches!(self.peek().kind, TokenKind::Keyword(found) if found == keyword) { - self.advance(); - return true; + match &self.peek().kind { + TokenKind::Keyword(found) if *found == keyword => { + self.advance(); + true + } + _ => false, } - false } - fn matches(&mut self, kind: TokenKind) -> bool { - if self.check(&kind) { + fn matches(&mut self, kind: &TokenKind) -> bool { + if self.check(kind) { self.advance(); - return true; + true + } else { + false } - false } fn check(&self, kind: &TokenKind) -> bool { - if self.is_at_end() { - return matches!(kind, TokenKind::Eof); - } - same_variant(&self.peek().kind, kind) + self.peek().kind.same_variant(kind) } fn skip_newlines(&mut self) { - while self.matches(TokenKind::Newline) {} + while self.matches(&TokenKind::Newline) {} + } + + fn synchronize_item(&mut self) { + while !self.is_at_end() { + if self.check(&TokenKind::Newline) { + self.advance(); + if self.check_keyword(Keyword::Fn) + || self.check_keyword(Keyword::Struct) + || self.check_keyword(Keyword::Pub) + { + return; + } + continue; + } + + if self.check_keyword(Keyword::Fn) + || self.check_keyword(Keyword::Struct) + || self.check_keyword(Keyword::Pub) + { + return; + } + + self.advance(); + } + } + + fn synchronize_statement(&mut self) { + while !self.is_at_end() { + if self.check(&TokenKind::Newline) { + self.advance(); + return; + } + if self.check(&TokenKind::Dedent) { + return; + } + self.advance(); + } + } + + fn error_here(&mut self, message: impl Into, span: Span) { + self.diagnostics.push(Diagnostic::error(message, span)); + } + + fn check_keyword(&self, keyword: Keyword) -> bool { + matches!(self.peek().kind, TokenKind::Keyword(found) if found == keyword) } fn is_at_end(&self) -> bool { @@ -303,30 +540,46 @@ impl Parser { &self.tokens[self.current] } - fn advance(&mut self) -> &Token { - if !self.is_at_end() { - self.current += 1; - } + fn previous(&self) -> &Token { &self.tokens[self.current.saturating_sub(1)] } - fn previous_span(&self) -> Span { - if self.current == 0 { - Span::default() + fn advance(&mut self) -> &Token { + if !self.is_at_end() { + let index = self.current; + self.current += 1; + &self.tokens[index] } else { - self.tokens[self.current - 1].span - } - } - - fn error_here(&self, message: &str) -> ParseError { - ParseError { - message: message.to_string(), - span: self.peek().span, + &self.tokens[self.current] } } } -fn same_variant(left: &TokenKind, right: &TokenKind) -> bool { - std::mem::discriminant(left) == std::mem::discriminant(right) +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum Precedence { + Lowest = 0, + LogicalOr = 1, + LogicalAnd = 2, + Equality = 3, + Comparison = 4, + Term = 5, + Factor = 6, + Unary = 7, + Call = 8, } +impl Precedence { + fn next(self) -> Self { + match self { + Precedence::Lowest => Precedence::LogicalOr, + Precedence::LogicalOr => Precedence::LogicalAnd, + Precedence::LogicalAnd => Precedence::Equality, + Precedence::Equality => Precedence::Comparison, + Precedence::Comparison => Precedence::Term, + Precedence::Term => Precedence::Factor, + Precedence::Factor => Precedence::Unary, + Precedence::Unary => Precedence::Call, + Precedence::Call => Precedence::Call, + } + } +} diff --git a/crates/nxc-frontend/src/token.rs b/crates/nxc-frontend/src/token.rs index 1aae286..45ed6da 100644 --- a/crates/nxc-frontend/src/token.rs +++ b/crates/nxc-frontend/src/token.rs @@ -4,22 +4,48 @@ use std::fmt; pub struct Span { pub start: usize, pub end: usize, - pub line: usize, - pub column: usize, + pub start_line: usize, + pub start_column: usize, + pub end_line: usize, + pub end_column: usize, } impl Span { - pub fn new(start: usize, end: usize, line: usize, column: usize) -> Self { + pub fn new( + start: usize, + end: usize, + start_line: usize, + start_column: usize, + end_line: usize, + end_column: usize, + ) -> Self { Self { start, end, - line, - column, + start_line, + start_column, + end_line, + end_column, + } + } + + pub fn single(position: usize, line: usize, column: usize) -> Self { + Self::new(position, position, line, column, line, column) + } + + pub fn merge(self, other: Self) -> Self { + Self { + start: self.start.min(other.start), + end: self.end.max(other.end), + start_line: self.start_line, + start_column: self.start_column, + end_line: other.end_line, + end_column: other.end_column, } } } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq)] pub struct Token { pub kind: TokenKind, pub span: Span, @@ -31,23 +57,19 @@ impl Token { } } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq)] pub enum TokenKind { Identifier(String), Integer(String), + Float(String), String(String), + Bool(bool), Keyword(Keyword), LeftParen, RightParen, - LeftBrace, - RightBrace, - LeftBracket, - RightBracket, Comma, - Dot, Colon, Arrow, - FatArrow, Plus, Minus, Star, @@ -61,52 +83,41 @@ pub enum TokenKind { LessEqual, Greater, GreaterEqual, - Question, + AndAnd, + OrOr, Newline, Indent, Dedent, Eof, } +impl TokenKind { + pub fn same_variant(&self, other: &Self) -> bool { + std::mem::discriminant(self) == std::mem::discriminant(other) + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Keyword { - Async, Else, Fn, - For, If, - Impl, - Import, - In, Let, - Match, Pub, Return, Struct, - Use, - Var, - While, } impl Keyword { pub fn from_ident(value: &str) -> Option { match value { - "async" => Some(Self::Async), "else" => Some(Self::Else), "fn" => Some(Self::Fn), - "for" => Some(Self::For), "if" => Some(Self::If), - "impl" => Some(Self::Impl), - "import" => Some(Self::Import), - "in" => Some(Self::In), "let" => Some(Self::Let), - "match" => Some(Self::Match), "pub" => Some(Self::Pub), "return" => Some(Self::Return), "struct" => Some(Self::Struct), - "use" => Some(Self::Use), - "var" => Some(Self::Var), - "while" => Some(Self::While), _ => None, } } @@ -117,7 +128,9 @@ impl fmt::Display for TokenKind { match self { TokenKind::Identifier(name) => write!(f, "identifier({name})"), TokenKind::Integer(value) => write!(f, "integer({value})"), + TokenKind::Float(value) => write!(f, "float({value})"), TokenKind::String(value) => write!(f, "string({value})"), + TokenKind::Bool(value) => write!(f, "bool({value})"), TokenKind::Keyword(keyword) => write!(f, "keyword({keyword:?})"), other => write!(f, "{other:?}"), } diff --git a/crates/nxc-frontend/tests/lexer_tests.rs b/crates/nxc-frontend/tests/lexer_tests.rs new file mode 100644 index 0000000..38cbaed --- /dev/null +++ b/crates/nxc-frontend/tests/lexer_tests.rs @@ -0,0 +1,48 @@ +use nxc_frontend::{Keyword, Lexer, TokenKind}; + +#[test] +fn lexes_literals_and_operators() { + let result = Lexer::new("let value = 12.5 + 3 * -2 && true\n").lex(); + assert!(result.diagnostics.is_empty()); + + let kinds: Vec = result.tokens.into_iter().map(|token| token.kind).collect(); + assert_eq!( + kinds, + vec![ + TokenKind::Keyword(Keyword::Let), + TokenKind::Identifier("value".into()), + TokenKind::Equal, + TokenKind::Float("12.5".into()), + TokenKind::Plus, + TokenKind::Integer("3".into()), + TokenKind::Star, + TokenKind::Minus, + TokenKind::Integer("2".into()), + TokenKind::AndAnd, + TokenKind::Bool(true), + TokenKind::Newline, + TokenKind::Eof, + ] + ); +} + +#[test] +fn lexes_indent_and_dedent_tokens() { + let source = "fn main() -> Int:\n let value = 1\n return value\n"; + let result = Lexer::new(source).lex(); + assert!(result.diagnostics.is_empty()); + + let kinds: Vec = result.tokens.into_iter().map(|token| token.kind).collect(); + assert!(kinds.iter().any(|kind| matches!(kind, TokenKind::Indent))); + assert!(kinds.iter().any(|kind| matches!(kind, TokenKind::Dedent))); +} + +#[test] +fn reports_unterminated_string() { + let result = Lexer::new("let value = \"oops\n").lex(); + assert_eq!(result.diagnostics.len(), 1); + assert!(result.diagnostics[0] + .message + .contains("unterminated string literal")); +} + diff --git a/crates/nxc-frontend/tests/parser_tests.rs b/crates/nxc-frontend/tests/parser_tests.rs new file mode 100644 index 0000000..b553bc2 --- /dev/null +++ b/crates/nxc-frontend/tests/parser_tests.rs @@ -0,0 +1,180 @@ +use nxc_frontend::{ + BinaryOp, ExprKind, Item, Lexer, Literal, Parser, StmtKind, UnaryOp, +}; + +fn parse(source: &str) -> nxc_frontend::ParseResult { + let lexed = Lexer::new(source).lex(); + assert!( + lexed.diagnostics.is_empty(), + "unexpected lexer diagnostics: {:?}", + lexed.diagnostics + ); + Parser::new(lexed.tokens).parse_module() +} + +#[test] +fn parses_function_with_if_else_and_returns() { + let source = "\ +fn classify(value: Int) -> Int: + if value > 10: + return 1 + else: + return 0 +"; + + let parsed = parse(source); + assert!(parsed.diagnostics.is_empty(), "{:?}", parsed.diagnostics); + assert_eq!(parsed.module.items.len(), 1); + + let Item::Function(function) = &parsed.module.items[0] else { + panic!("expected function item"); + }; + + assert_eq!(function.name, "classify"); + assert_eq!(function.params.len(), 1); + assert_eq!(function.body.statements.len(), 1); + + let StmtKind::If(if_stmt) = &function.body.statements[0].kind else { + panic!("expected if statement"); + }; + + match &if_stmt.condition.kind { + ExprKind::Binary { op, .. } => assert_eq!(*op, BinaryOp::Greater), + other => panic!("unexpected condition: {other:?}"), + } + assert!(if_stmt.else_block.is_some()); +} + +#[test] +fn respects_binary_operator_precedence() { + let source = "\ +fn main() -> Int: + return 1 + 2 * 3 == 7 || false +"; + + let parsed = parse(source); + assert!(parsed.diagnostics.is_empty(), "{:?}", parsed.diagnostics); + + let Item::Function(function) = &parsed.module.items[0] else { + panic!("expected function item"); + }; + + let StmtKind::Return(Some(expr)) = &function.body.statements[0].kind else { + panic!("expected return statement"); + }; + + let ExprKind::Binary { left, op, right } = &expr.kind else { + panic!("expected binary expression"); + }; + assert_eq!(*op, BinaryOp::LogicalOr); + + let ExprKind::Binary { + left: equality_left, + op: equality_op, + right: equality_right, + } = &left.kind + else { + panic!("expected equality expression"); + }; + assert_eq!(*equality_op, BinaryOp::Equal); + + let ExprKind::Binary { + left: add_left, + op: add_op, + right: add_right, + } = &equality_left.kind + else { + panic!("expected additive expression"); + }; + assert_eq!(*add_op, BinaryOp::Add); + + match &add_left.kind { + ExprKind::Literal(Literal::Integer(1)) => {} + other => panic!("unexpected left additive operand: {other:?}"), + } + + let ExprKind::Binary { + left: mul_left, + op: mul_op, + right: mul_right, + } = &add_right.kind + else { + panic!("expected multiplicative expression"); + }; + assert_eq!(*mul_op, BinaryOp::Multiply); + + match &mul_left.kind { + ExprKind::Literal(Literal::Integer(2)) => {} + other => panic!("unexpected left multiplicative operand: {other:?}"), + } + match &mul_right.kind { + ExprKind::Literal(Literal::Integer(3)) => {} + other => panic!("unexpected right multiplicative operand: {other:?}"), + } + match &equality_right.kind { + ExprKind::Literal(Literal::Integer(7)) => {} + other => panic!("unexpected equality right operand: {other:?}"), + } + match &right.kind { + ExprKind::Literal(Literal::Bool(false)) => {} + other => panic!("unexpected logical-or right operand: {other:?}"), + } +} + +#[test] +fn parses_grouping_unary_and_calls() { + let source = "\ +fn main() -> Int: + return -(compute(1, 2) + 3) +"; + + let parsed = parse(source); + assert!(parsed.diagnostics.is_empty(), "{:?}", parsed.diagnostics); + + let Item::Function(function) = &parsed.module.items[0] else { + panic!("expected function item"); + }; + let StmtKind::Return(Some(expr)) = &function.body.statements[0].kind else { + panic!("expected return statement"); + }; + + let ExprKind::Unary { op, expr: inner } = &expr.kind else { + panic!("expected unary expression"); + }; + assert_eq!(*op, UnaryOp::Negate); + + let ExprKind::Group(grouped) = &inner.kind else { + panic!("expected grouped expression"); + }; + + let ExprKind::Binary { left, op, .. } = &grouped.kind else { + panic!("expected additive expression"); + }; + assert_eq!(*op, BinaryOp::Add); + + let ExprKind::Call { args, .. } = &left.kind else { + panic!("expected call expression"); + }; + assert_eq!(args.len(), 2); +} + +#[test] +fn recovers_and_reports_syntax_errors() { + let source = "\ +fn broken(value: Int) -> Int + let x = 1 + +struct Config: + port: Int +"; + + let parsed = parse(source); + assert!(!parsed.diagnostics.is_empty()); + assert_eq!(parsed.module.items.len(), 1); + + let Item::Struct(struct_decl) = &parsed.module.items[0] else { + panic!("expected recovered struct declaration"); + }; + assert_eq!(struct_decl.name, "Config"); +} + diff --git a/examples/backend-api/main.nx b/examples/backend-api/main.nx index f2d1c93..e80404b 100644 --- a/examples/backend-api/main.nx +++ b/examples/backend-api/main.nx @@ -1,30 +1,18 @@ -use core.env -use db.postgres.Pool -use web.http.{App, Response} +struct AppConfig: + port: Int + service_name: String -struct AppState: - pool: Pool +fn build_message(name: String, port: Int) -> String: + if port > 0 && port < 65536: + return name + else: + return "invalid" -async fn health(state: AppState) -> Response: - let version = env.get("APP_VERSION").or("dev") - let row = await state.pool.query_one( - "select now() as now" - )? - - Response.json({ - "status": "ok", - "version": version, - "database_time": row["now"] - }) - -async fn main() -> Result: - let database_url = env.require("DATABASE_URL")? - let port = env.get("PORT").or("8080").to_int()? - let pool = Pool.connect(database_url, max: 16)? - - let app = App.new() - .state(AppState { pool: pool }) - .get("/health", health) - - await app.listen("0.0.0.0", port)? +fn main() -> Int: + let config = build_message("backend-api", 8080) + let enabled = true || false + if enabled: + return 0 + else: + return 1