From 04c3fdcd07faa25a3d18366402a3c6b3508fc8ae Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Sun, 14 May 2023 18:11:38 +0200 Subject: [PATCH] progress --- example.ed | 6 +- simple.ed | 5 ++ src/ast/mod.rs | 2 +- src/codegen.rs | 113 +++++++++++++++++--------------- src/grammar.lalrpop | 152 ++++++++++++++++++++++++++------------------ src/lexer.rs | 1 + src/main.rs | 38 +++++------ src/tokens.rs | 29 +++++++-- 8 files changed, 200 insertions(+), 146 deletions(-) create mode 100644 simple.ed diff --git a/example.ed b/example.ed index e80bb0e81..d863d7052 100644 --- a/example.ed +++ b/example.ed @@ -1,9 +1,9 @@ -fn add(a: i64, b: i64) -> i64 { +fn add(a: i32, b: i32) -> i32 { return a + b; } -fn main() { +fn main() -> i32 { let x = 2 + 3; let y = add(x, 4); - return; + return y; } diff --git a/simple.ed b/simple.ed new file mode 100644 index 000000000..d3131726c --- /dev/null +++ b/simple.ed @@ -0,0 +1,5 @@ + +fn main(x: i64) -> i64 { + let x = 2 + 3; + return x; +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7e1cf4815..41494b3af 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -23,7 +23,7 @@ impl OpCode { pub enum LiteralValue { String, Integer { - bits: usize, + bits: Option, signed: bool, value: String, }, diff --git a/src/codegen.rs b/src/codegen.rs index e050038f4..b37c99b63 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -1,4 +1,8 @@ -use std::collections::HashMap; +use std::{ + collections::HashMap, + path::{Path, PathBuf}, + todo, +}; use color_eyre::Result; use inkwell::{ @@ -10,21 +14,19 @@ use inkwell::{ values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum}, }; use itertools::{Either, Itertools}; -/* -use statement::Statement; -use crate::ast::{self, statement, Expression, Function, Identifier, OpCode, SpanValue, Term, LiteralValue}; +use crate::ast::{self, Expression, Function, LiteralValue, OpCode, Statement}; #[derive(Debug, Clone)] pub struct ProgramData { - pub filename: String, + pub filename: PathBuf, pub source: String, } impl ProgramData { - pub fn new(filename: &str, source: &str) -> Self { + pub fn new(filename: &Path, source: &str) -> Self { Self { - filename: filename.to_string(), + filename: filename.to_path_buf(), source: source.to_string(), } } @@ -65,9 +67,8 @@ impl<'ctx> CodeGen<'ctx> { // create the llvm functions first. for statement in &self.ast.statements { - match &statement.value { - Statement::Assignment(_) => unreachable!(), - Statement::Definition(_) => todo!(), + match &statement { + Statement::Variable { .. } => unreachable!(), Statement::Return(_) => unreachable!(), Statement::Function(function) => { functions.push(function); @@ -106,7 +107,7 @@ impl<'ctx> CodeGen<'ctx> { let args_types: Vec> = function .params .iter() - .map(|param| param.type_name.0.value.as_str()) + .map(|param| param.type_name.as_str()) .map(|t| self.get_llvm_type(t)) .try_collect()?; @@ -114,18 +115,17 @@ impl<'ctx> CodeGen<'ctx> { args_types.into_iter().map(|t| t.into()).collect_vec(); let fn_type = match &function.return_type { - Some(id) => self.get_llvm_type(&id.0.value)?.fn_type(&args_types, false), + Some(id) => self.get_llvm_type(id)?.fn_type(&args_types, false), None => self.context.void_type().fn_type(&args_types, false), }; - self.module - .add_function(&function.ident.0.value, fn_type, None); + self.module.add_function(&function.name, fn_type, None); Ok(()) } fn compile_function(&self, function: &Function) -> Result<()> { - let func = self.module.get_function(&function.ident.0.value).unwrap(); + let func = self.module.get_function(&function.name).unwrap(); let entry_block = self.context.append_basic_block(func, "entry"); self.builder.position_at_end(entry_block); @@ -135,7 +135,7 @@ impl<'ctx> CodeGen<'ctx> { for (i, param) in function.params.iter().enumerate() { let id = param.ident.clone(); variables.insert( - id.0.value.clone(), + id.clone(), func.get_nth_param(i.try_into().unwrap()) .expect("parameter"), ); @@ -145,7 +145,7 @@ impl<'ctx> CodeGen<'ctx> { let mut has_return = false; for statement in &function.body { - if let Statement::Return(_) = statement.value { + if let Statement::Return(_) = statement { has_return = true } self.compile_statement(&entry_block, statement, &mut variables)?; @@ -161,24 +161,17 @@ impl<'ctx> CodeGen<'ctx> { fn compile_statement( &self, block: &BasicBlock, - statement: &SpanValue, + statement: &Statement, variables: &mut HashMap>, ) -> Result<()> { - match &statement.value { + match statement { // Variable assignment - Statement::Assignment(body) => { + Statement::Variable { name, value } => { let result = self - .compile_expression(block, &body.expr, variables)? + .compile_expression(block, value, variables)? .expect("should have result"); - variables.insert(body.ident.0.value.clone(), result); - } - Statement::Definition(body) => { - let result = self - .compile_expression(block, &body.expr, variables)? - .expect("should have result"); - - variables.insert(body.ident.0.value.clone(), result); + variables.insert(name.clone(), result); } Statement::Return(ret) => { if let Some(ret) = ret { @@ -199,24 +192,28 @@ impl<'ctx> CodeGen<'ctx> { pub fn compile_expression( &self, block: &BasicBlock, - expr: &SpanValue>, + expr: &Expression, variables: &mut HashMap>, ) -> Result>> { - Ok(match &*expr.value { - Expression::Literal(term) => Some(self.compile_term(&term, variables)?), - Expression::Call(func_id, args) => self.compile_call(block, func_id, args, variables)?, - Expression::BinaryOp(lhs, op, rhs) => Some(self.compile_op(block, lhs, op, rhs, variables)?), + Ok(match expr { + Expression::Variable(term) => Some(self.compile_variable(term, variables)?), + Expression::Literal(term) => Some(self.compile_literal(term)?), + Expression::Call { function, args } => { + self.compile_call(block, function, args, variables)? + } + Expression::BinaryOp(lhs, op, rhs) => { + Some(self.compile_binary_op(block, lhs, op, rhs, variables)?) + } }) } pub fn compile_call( &self, block: &BasicBlock, - func_id: &Identifier, - args: &[SpanValue>], + func_name: &str, + args: &[Box], variables: &mut HashMap>, ) -> Result>> { - let func_name = &func_id.0.value; let function = self.module.get_function(func_name).expect("should exist"); let mut value_args: Vec = Vec::with_capacity(args.len()); @@ -239,12 +236,12 @@ impl<'ctx> CodeGen<'ctx> { }) } - pub fn compile_op( + pub fn compile_binary_op( &self, block: &BasicBlock, - lhs: &SpanValue>, + lhs: &Expression, op: &OpCode, - rhs: &SpanValue>, + rhs: &Expression, variables: &mut HashMap>, ) -> Result> { let lhs = self @@ -267,21 +264,33 @@ impl<'ctx> CodeGen<'ctx> { Ok(result.as_basic_value_enum()) } - pub fn compile_term( - &self, - term: &LiteralValue, - variables: &mut HashMap>, - ) -> Result> { + pub fn compile_literal(&self, term: &LiteralValue) -> Result> { let value = match term { - Term::Identifier(ident) => *variables.get(&ident.0.value).expect("value"), - Term::Number(num) => self - .context - .i64_type() - .const_int(num.0.value.try_into()?, true) - .as_basic_value_enum(), + LiteralValue::String => todo!(), + LiteralValue::Integer { + bits, + signed: _, + value, + } => { + // todo: type resolution for bit size? + let bits = bits.unwrap_or(32); + + self.context + .custom_width_int_type(bits) + .const_int(value.parse().unwrap(), false) + .as_basic_value_enum() + } }; Ok(value) } + + pub fn compile_variable( + &self, + variable: &str, + variables: &mut HashMap>, + ) -> Result> { + let var = *variables.get(variable).expect("value"); + Ok(var) + } } - */ diff --git a/src/grammar.lalrpop b/src/grammar.lalrpop index 8c9c125ce..7bd230317 100644 --- a/src/grammar.lalrpop +++ b/src/grammar.lalrpop @@ -1,8 +1,40 @@ use std::str::FromStr; -use crate::ast::*; +use crate::{ + ast, + tokens::Token, + lexer::LexicalError, +}; grammar; +extern { + type Location = usize; + type Error = LexicalError; + + enum Token { + "let" => Token::KeywordLet, + "print" => Token::KeywordPrint, + "identifier" => Token::Identifier(), + "int" => Token::Integer(), + "return" => Token::KeywordReturn, + "fn" => Token::KeywordFn, + "(" => Token::LeftParen, + ")" => Token::RightParen, + "{" => Token::LeftBracket, + "}" => Token::RightBracket, + "=" => Token::Assign, + ";" => Token::Semicolon, + ":" => Token::Colon, + "->" => Token::Arrow, + "," => Token::Coma, + "+" => Token::OperatorAdd, + "-" => Token::OperatorSub, + "*" => Token::OperatorMul, + "/" => Token::OperatorDiv, + "%" => Token::OperatorRem, + } +} + Comma: Vec = { ",")*> => match e { None => v, @@ -13,67 +45,11 @@ Comma: Vec = { } }; -ExprOp: OpCode = { - "+" => OpCode::Add, - "-" => OpCode::Sub, -}; - -FactorOp: OpCode = { - "*" => OpCode::Mul, - "/" => OpCode::Div, - "%" => OpCode::Rem, +pub Program: ast::Program = { + Statements => ast::Program::new(<>) } -Tier: Box = { - > => Box::new(Expression::BinaryOp(t, o, n)), - NextTier -}; - -Expr = Tier; -Factor = Tier; - -// Terms: variables, literals, calls -Term: Box = { - => Box::new(Expression::Variable(i)), - => Box::new(Expression::Literal(n)), - "(" > ")" => Box::new(Expression::Call { function: i, args: values}), - "(" ")" -}; - -Identifier: String = { - => i.to_string(), -}; - -Num: LiteralValue = => LiteralValue::Integer { bits: 32, signed: true, value: n.to_string()}; - -// Function handling -Param: Parameter = { - ":" => Parameter::new(<>) -}; - -Params = Comma; - -FunctionReturn: String = { - "->" => i.to_string(), -} - -Function: Function = { - "fn" "(" ")" "{" "}" => Function::new(i, a, s, r) -} - -// statements not including function definitions -BasicStatement: Statement = { - "let" "=" ";" => Statement::Variable { name: i, value: e}, - "=" ";" => Statement::Variable { name: i, value: e}, - "return" ";" => Statement::Return(e), -}; - -Statement: Statement = { - BasicStatement, - => Statement::Function(f), -}; - -Statements: Vec = { +Statements: Vec = { Statement => vec![<>], => { s.push(n); @@ -81,6 +57,58 @@ Statements: Vec = { }, }; -pub Program: Program = { - Statements => Program::new(<>) +Statement: ast::Statement = { + BasicStatement, + => ast::Statement::Function(f), +}; + +// statements not including function definitions +BasicStatement: ast::Statement = { + "let" "=" ";" => ast::Statement::Variable { name: i, value: e}, + "=" ";" => ast::Statement::Variable { name: i, value: e}, + "return" ";" => ast::Statement::Return(e), +}; + +ExprOp: ast::OpCode = { + "+" => ast::OpCode::Add, + "-" => ast::OpCode::Sub, +}; + +FactorOp: ast::OpCode = { + "*" => ast::OpCode::Mul, + "/" => ast::OpCode::Div, + "%" => ast::OpCode::Rem, +} + +Tier: Box = { + > => Box::new(ast::Expression::BinaryOp(t, o, n)), + NextTier +}; + +Expr = Tier; +Factor = Tier; + +// Terms: variables, literals, calls +Term: Box = { + => Box::new(ast::Expression::Variable(i)), + => Box::new(ast::Expression::Literal(n)), + "(" > ")" => Box::new(ast::Expression::Call { function: i, args: values}), + "(" ")" +}; + +Num: ast::LiteralValue = => ast::LiteralValue::Integer { bits: None, signed: true, value: n.to_string()}; + +// Function handling +Param: ast::Parameter = { + <"identifier"> ":" <"identifier"> => ast::Parameter::new(<>) +}; + +Params = Comma; + +FunctionReturn: String = { + "->" => i.to_string(), +} + +Function: ast::Function = { + "fn" "(" ")" "{" "}" => ast::Function::new(i, a, s, r) } diff --git a/src/lexer.rs b/src/lexer.rs index cbdb0ef0f..f56a76772 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -4,6 +4,7 @@ use crate::tokens::Token; pub type Spanned = Result<(Loc, Tok, Loc), Error>; +#[derive(Debug, Clone, Copy)] pub enum LexicalError { InvalidToken, } diff --git a/src/main.rs b/src/main.rs index 8ceee9275..bc19dcbfd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,14 @@ #![allow(clippy::too_many_arguments)] use clap::{Parser, Subcommand}; +use codegen::ProgramData; use color_eyre::Result; use inkwell::{context::Context, execution_engine::JitFunction, OptimizationLevel}; use lalrpop_util::lalrpop_mod; use std::{fs, path::PathBuf, println}; +use crate::{ast::Program, lexer::Lexer}; + pub mod ast; pub mod check; pub mod codegen; @@ -90,12 +93,12 @@ fn main() -> Result<()> { match args.command { Commands::Check { input } => { - let code = fs::read_to_string(&input)?; - + let code = fs::read_to_string(input)?; + let lexer = Lexer::new(code.as_str()); let parser = grammar::ProgramParser::new(); - let ast = parser.parse(&code).unwrap(); + let ast = parser.parse(lexer).unwrap(); - let str_path = input.to_string_lossy(); + //let str_path = input.to_string_lossy(); //let program = ProgramData::new(&str_path, &code); //check_program(&program, &ast); } @@ -106,21 +109,17 @@ fn main() -> Result<()> { optimize: _, } => { let code = fs::read_to_string(&input)?; - + let lexer = Lexer::new(code.as_str()); let parser = grammar::ProgramParser::new(); - let ast = parser.parse(&code).unwrap(); + let ast: Program = parser.parse(lexer).unwrap(); - println!("{:#?}", ast); - - /* - let str_path = input.to_string_lossy(); - let program = ProgramData::new(&str_path, &code); + let program = ProgramData::new(&input, &code); let file_name = input.file_name().unwrap().to_string_lossy(); - if !check_program(&program, &ast) { - return Ok(()); - } + //if !check_program(&program, &ast) { + // return Ok(()); + //} let context = Context::create(); let codegen = codegen::CodeGen::new(&context, &file_name, program, ast)?; @@ -132,16 +131,14 @@ fn main() -> Result<()> { } else { println!("{generated_llvm_ir}"); } - */ } Commands::Run { input } => { let code = fs::read_to_string(&input)?; - + let lexer = Lexer::new(&code[..]); let parser = grammar::ProgramParser::new(); - let ast = parser.parse(&code).unwrap(); - /* - let str_path = input.to_string_lossy(); - let program = ProgramData::new(&str_path, &code); + let ast = parser.parse(lexer).unwrap(); + + let program = ProgramData::new(&input, &code); let file_name = input.file_name().unwrap().to_string_lossy(); @@ -158,7 +155,6 @@ fn main() -> Result<()> { execution_engine.get_function("main")?; main.call(); }; - */ } } diff --git a/src/tokens.rs b/src/tokens.rs index 0663293bc..ac8a7ab44 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,27 +1,42 @@ use logos::Logos; use std::fmt; -#[derive(Logos, Debug, PartialEq)] +// todo: https://github.com/maciejhirsz/logos/issues/133#issuecomment-619444615 +#[derive(Logos, Debug, PartialEq, Clone)] #[logos(skip r"[ \t\n\f]+", skip r"#.*\n?")] pub enum Token { - #[token("var")] - KeywordVar, + #[token("let")] + KeywordLet, #[token("print")] KeywordPrint, + #[token("fn")] + KeywordFn, + #[token("return")] + KeywordReturn, - #[regex("[_a-zA-Z][_0-9a-zA-Z]*", |lex| lex.slice().parse().ok())] + #[regex(r"_?\p{XID_Start}\p{XID_Continue}*", |lex| lex.slice().parse().ok())] Identifier(String), #[regex(r"\d+", |lex| lex.slice().parse().ok())] - Integer(i64), + Integer(String), #[token("(")] - LParen, + LeftParen, #[token(")")] - RParen, + RightParen, + #[token("{")] + LeftBracket, + #[token("}")] + RightBracket, #[token("=")] Assign, #[token(";")] Semicolon, + #[token(":")] + Colon, + #[token("->")] + Arrow, + #[token(",")] + Coma, #[token("+")] OperatorAdd,