From bab8eec51fbe759c9faf6244d241a30e2ae6daed Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Sat, 27 May 2023 17:14:48 +0200 Subject: [PATCH] type info improvements --- Cargo.lock | 36 ++-- Cargo.toml | 2 +- simple.ed | 29 +-- src/ast/mod.rs | 68 ++++-- src/check.rs | 100 +++------ src/codegen.rs | 492 ++++++++++++++++++++++++++++++-------------- src/grammar.lalrpop | 83 +++++++- src/lexer.rs | 16 +- src/main.rs | 36 ++-- src/tokens.rs | 38 ++++ 10 files changed, 606 insertions(+), 294 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b2a412d41..d618a1646 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -415,9 +415,9 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ "hermit-abi", "libc", @@ -461,7 +461,7 @@ dependencies = [ "petgraph", "pico-args", "regex", - "regex-syntax 0.7.1", + "regex-syntax 0.7.2", "string_cache", "term", "tiny-keccak", @@ -491,9 +491,9 @@ checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" [[package]] name = "linux-raw-sys" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "llvm-sys" @@ -688,18 +688,18 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" [[package]] name = "proc-macro2" -version = "1.0.58" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8" +checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" dependencies = [ "proc-macro2", ] @@ -726,13 +726,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.1" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" +checksum = "81ca098a9821bd52d6b24fd8b10bd081f47d39c22778cafaa75a2857a62c6390" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.7.1", + "regex-syntax 0.7.2", ] [[package]] @@ -752,9 +752,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" [[package]] name = "rustc-demangle" @@ -836,9 +836,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "2.0.16" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" dependencies = [ "proc-macro2", "quote", @@ -969,9 +969,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" [[package]] name = "unicode-width" diff --git a/Cargo.toml b/Cargo.toml index 44866ebf3..59fb864ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ clap = { version = "4.3.0", features = ["derive"] } color-eyre = "0.6.2" itertools = "0.10.5" lalrpop-util = { version = "0.20.0", features = ["lexer"] } -regex = "1.8.1" +regex = "1.8.3" tracing = "0.1.37" tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } inkwell = { version = "0.2.0", features = ["llvm16-0"] } diff --git a/simple.ed b/simple.ed index 6e4d63acb..d0bb99f6c 100644 --- a/simple.ed +++ b/simple.ed @@ -1,13 +1,18 @@ -fn main(x: i64, z: i64) -> i64 { - let y: i64 = 0; - if x == 5 { - if x == z { - y = 2 * x; - } else { - y = z; - } - } else { - y = 3 * x; - } - return y; +struct Hello { + x: i32, + y: i32, +} + +fn test(x: Hello) { + return; +} + +fn works(x: i32) -> i32 { + return x * 4; +} + +fn main() -> i32 { + let y = 2; + let z = y; + return works(z); } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a9e94d996..2c01e44b9 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1,3 +1,15 @@ +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Spanned { + pub span: (usize, usize), + pub value: T, +} + +impl Spanned { + pub fn new(value: T, span: (usize, usize)) -> Self { + Self { value, span } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum OpCode { Add, @@ -27,20 +39,26 @@ impl OpCode { } } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum TypeExp { + Integer { bits: u32, signed: bool }, + Boolean, + Array { of: Box, len: Option }, + Pointer { target: Box }, + Other { id: String }, +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum LiteralValue { - String, - Integer { - bits: Option, - signed: bool, - value: String, - }, + String(String), + Integer(String), + Boolean(bool), } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Expression { Literal(LiteralValue), - Variable(String), + Variable(Spanned), Call { function: String, args: Vec>, @@ -51,12 +69,12 @@ pub enum Expression { #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Parameter { pub ident: String, - pub type_name: String, + pub type_exp: TypeExp, } impl Parameter { - pub const fn new(ident: String, type_name: String) -> Self { - Self { ident, type_name } + pub const fn new(ident: String, type_exp: TypeExp) -> Self { + Self { ident, type_exp } } } @@ -65,7 +83,7 @@ pub struct Function { pub name: String, pub params: Vec, pub body: Vec, - pub return_type: Option, + pub return_type: Option, } impl Function { @@ -73,7 +91,7 @@ impl Function { name: String, params: Vec, body: Vec, - return_type: Option, + return_type: Option, ) -> Self { Self { name, @@ -84,16 +102,39 @@ impl Function { } } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct StructField { + pub ident: String, + pub type_exp: TypeExp, +} + +impl StructField { + pub const fn new(ident: String, type_name: TypeExp) -> Self { + Self { + ident, + type_exp: type_name, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Struct { + pub name: String, + pub fields: Vec, +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Statement { Let { name: String, value: Box, - type_name: Option, + type_name: Option, + span: (usize, usize), }, Mutate { name: String, value: Box, + span: (usize, usize), }, If { condition: Box, @@ -102,6 +143,7 @@ pub enum Statement { }, Return(Option>), Function(Function), + Struct(Struct), } #[derive(Debug, Clone)] diff --git a/src/check.rs b/src/check.rs index e40aff618..cba22f397 100644 --- a/src/check.rs +++ b/src/check.rs @@ -1,4 +1,3 @@ -/* use crate::{ ast::{self, Statement}, codegen::ProgramData, @@ -19,8 +18,37 @@ pub fn check<'a>(data: &'a ProgramData, ast: &ast::Program) -> Vec> { let mut errors = vec![]; for statement in &ast.statements { - match &statement.value { - Statement::Assignment(_x) => { + match &statement { + Statement::Let { name: _, span, .. } => { + // can't have a top level assignment yet. + let snippet = Snippet { + title: Some(Annotation { + id: None, + label: Some("unexpected let at top level"), + annotation_type: AnnotationType::Error, + }), + footer: vec![], + slices: vec![Slice { + source: &data.source, + line_start: 1, + fold: true, + origin: None, + annotations: vec![SourceAnnotation { + label: "unexpected statement", + annotation_type: AnnotationType::Error, + range: *span, + }], + }], + opt: FormatOptions { + color: true, + ..Default::default() + }, + }; + + let dl = DisplayList::from(snippet); + errors.push(Check::Error(dl)); + } + Statement::Mutate { span, .. } => { // can't have a top level assignment yet. let snippet = Snippet { title: Some(Annotation { @@ -33,11 +61,11 @@ pub fn check<'a>(data: &'a ProgramData, ast: &ast::Program) -> Vec> { source: &data.source, line_start: 1, fold: true, - origin: Some(&data.filename), + origin: None, annotations: vec![SourceAnnotation { label: "unexpected statement", annotation_type: AnnotationType::Error, - range: statement.span.into(), + range: *span, }], }], opt: FormatOptions { @@ -49,68 +77,8 @@ pub fn check<'a>(data: &'a ProgramData, ast: &ast::Program) -> Vec> { let dl = DisplayList::from(snippet); errors.push(Check::Error(dl)); } - Statement::Definition(_) => { - // can't have a top level assignment yet. - let snippet = Snippet { - title: Some(Annotation { - id: None, - label: Some("unexpected definition at top level"), - annotation_type: AnnotationType::Error, - }), - footer: vec![], - slices: vec![Slice { - source: &data.source, - line_start: 1, - fold: true, - origin: Some(&data.filename), - annotations: vec![SourceAnnotation { - label: "unexpected statement", - annotation_type: AnnotationType::Error, - range: statement.span.into(), - }], - }], - opt: FormatOptions { - color: true, - ..Default::default() - }, - }; - - let dl = DisplayList::from(snippet); - errors.push(Check::Error(dl)); - } - Statement::Return(_x) => { - // can't have a top level assignment yet. - let snippet = Snippet { - title: Some(Annotation { - id: None, - label: Some("unexpected return"), - annotation_type: AnnotationType::Error, - }), - footer: vec![], - slices: vec![Slice { - source: &data.source, - line_start: 1, - fold: true, - origin: Some(&data.filename), - annotations: vec![SourceAnnotation { - label: "unexpected return", - annotation_type: AnnotationType::Error, - range: statement.span.into(), - }], - }], - opt: FormatOptions { - color: true, - ..Default::default() - }, - }; - - let dl = DisplayList::from(snippet); - errors.push(Check::Error(dl)); - } - Statement::Function(_function) => {} + _ => {} } } - errors } - */ diff --git a/src/codegen.rs b/src/codegen.rs index 8f31da497..d0b3a175d 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -9,13 +9,14 @@ use inkwell::{ builder::Builder, context::Context, module::Module, - types::{BasicMetadataTypeEnum, BasicType, BasicTypeEnum}, - values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum}, + types::{BasicMetadataTypeEnum, BasicType, BasicTypeEnum, StructType}, + values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum, FunctionValue}, IntPredicate, }; use itertools::{Either, Itertools}; +use tracing::info; -use crate::ast::{self, Expression, Function, LiteralValue, OpCode, Statement}; +use crate::ast::{self, Expression, Function, LiteralValue, OpCode, Statement, TypeExp}; #[derive(Debug, Clone)] pub struct ProgramData { @@ -36,13 +37,32 @@ pub struct CodeGen<'ctx> { context: &'ctx Context, pub module: Module<'ctx>, builder: Builder<'ctx>, - fn_types: VariableTypes<'ctx>, + types: TypeStorage<'ctx>, + struct_types: StructTypeStorage<'ctx>, + // function to return type + functions: HashMap, Option)>, _program: ProgramData, ast: ast::Program, } -type Variables<'ctx> = HashMap, usize)>; -type VariableTypes<'ctx> = HashMap>; +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Variable<'ctx> { + pub value: BasicValueEnum<'ctx>, + pub phi_counter: usize, + pub type_exp: TypeExp, +} + +pub type Variables<'ctx> = HashMap>; +pub type TypeStorage<'ctx> = HashMap>; + +/// Holds the struct type and maps fields to types and the location within the struct. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct StructTypeInfo<'ctx> { + ty: StructType<'ctx>, + fields: HashMap, +} + +type StructTypeStorage<'ctx> = HashMap>; impl<'ctx> CodeGen<'ctx> { pub fn new( @@ -59,7 +79,9 @@ impl<'ctx> CodeGen<'ctx> { builder: context.create_builder(), _program, ast, - fn_types: HashMap::new(), + types: HashMap::new(), + struct_types: HashMap::new(), + functions: HashMap::new(), }; Ok(codegen) @@ -67,29 +89,69 @@ impl<'ctx> CodeGen<'ctx> { pub fn compile_ast(&mut self) -> Result<()> { let mut functions = vec![]; - let mut types: VariableTypes<'ctx> = HashMap::new(); + let mut func_info = HashMap::new(); + let mut types: TypeStorage<'ctx> = HashMap::new(); + let mut struct_types: StructTypeStorage<'ctx> = HashMap::new(); // todo fix the grammar so top level statements are only functions and static vars. - // create the llvm functions first. - + // create types for statement in &self.ast.statements { - match &statement { - Statement::Let { .. } => unreachable!(), - Statement::Mutate { .. } => unreachable!(), - Statement::Return(_) => unreachable!(), - Statement::If { .. } => unreachable!(), - Statement::Function(function) => { - functions.push(function); - let ret_type = self.compile_function_signature(function)?; - if let Some(ret_type) = ret_type { - types.insert(function.name.clone(), ret_type); + if let Statement::Struct(s) = &statement { + let mut fields = HashMap::new(); + let mut field_types = vec![]; + + for (i, field) in s.fields.iter().enumerate() { + if !types.contains_key(&field.type_exp) { + types.insert(field.type_exp.clone(), self.get_llvm_type(&field.type_exp)?); } + let ty = self.get_llvm_type(&field.type_exp)?; + field_types.push(ty); + // todo: ensure alignment and padding here + fields.insert(field.ident.clone(), (i, field.type_exp.clone())); + } + + let ty = self.context.struct_type(&field_types, false); + + let struct_type = StructTypeInfo { fields, ty }; + struct_types.insert(s.name.clone(), struct_type); + } + } + + self.struct_types = struct_types; + + // create the llvm functions first. + for statement in &self.ast.statements { + if let Statement::Function(function) = &statement { + functions.push(function); + let (args, ret_type) = self.compile_function_signature(function)?; + let mut arg_types = vec![]; + for arg in args { + if !types.contains_key(&arg) { + let ty = self.get_llvm_type(&arg)?; + types.insert(arg.clone(), ty); + } + arg_types.push(arg); + } + if let Some(ret_type) = ret_type { + let ret_type = if !types.contains_key(&ret_type) { + let ty = self.get_llvm_type(&ret_type)?; + types.insert(ret_type.clone(), ty); + ret_type + } else { + ret_type + }; + func_info.insert(function.name.clone(), (arg_types, Some(ret_type))); + } else { + func_info.insert(function.name.clone(), (arg_types, None)); } } } - self.fn_types = types; + self.types = types; + self.functions = func_info; + + info!("functions:\n{:#?}", self.functions); // implement them. for function in functions { @@ -106,42 +168,67 @@ impl<'ctx> CodeGen<'ctx> { self.module.print_to_string().to_str().unwrap().to_string() } - fn get_llvm_type(&self, id: &str) -> Result> { - Ok(match id { - "i64" => self.context.i64_type().as_basic_type_enum(), - "i32" => self.context.i32_type().as_basic_type_enum(), - "i8" => self.context.i8_type().as_basic_type_enum(), - "u8" => self.context.i8_type().as_basic_type_enum(), - _ => todo!(), - }) + fn get_llvm_type(&self, id: &TypeExp) -> Result> { + if let Some(ty) = self.types.get(id) { + Ok(*ty) + } else { + Ok(match id { + TypeExp::Integer { bits, signed: _ } => self + .context + .custom_width_int_type(*bits) + .as_basic_type_enum(), + TypeExp::Boolean => self.context.bool_type().as_basic_type_enum(), + TypeExp::Array { of, len } => { + let ty = self.get_llvm_type(of)?; + ty.array_type(len.unwrap()).as_basic_type_enum() + } + TypeExp::Pointer { target } => { + let ty = self.get_llvm_type(target)?; + ty.ptr_type(Default::default()).as_basic_type_enum() + } + TypeExp::Other { id } => self + .struct_types + .get(id) + .expect("struct type not found") + .ty + .as_basic_type_enum(), + }) + } } /// creates the llvm function without the body, so other function bodies can call it. fn compile_function_signature( &self, function: &Function, - ) -> Result>> { + ) -> Result<(Vec, Option)> { let args_types: Vec> = function .params .iter() - .map(|param| param.type_name.as_str()) + .map(|param| ¶m.type_exp) .map(|t| self.get_llvm_type(t)) .try_collect()?; let args_types: Vec> = args_types.into_iter().map(|t| t.into()).collect_vec(); - let fn_type = match &function.return_type { + let (fn_type, ret_type) = match &function.return_type { Some(id) => { let return_type = self.get_llvm_type(id)?; - return_type.fn_type(&args_types, false) + (return_type.fn_type(&args_types, false), Some(id.clone())) } - None => self.context.void_type().fn_type(&args_types, false), + None => (self.context.void_type().fn_type(&args_types, false), None), }; self.module.add_function(&function.name, fn_type, None); - Ok(fn_type.get_return_type()) + Ok(( + function + .params + .iter() + .map(|param| param.type_exp.clone()) + .collect(), + ret_type, + )) } fn compile_function(&self, function: &Function) -> Result<()> { @@ -151,15 +238,21 @@ impl<'ctx> CodeGen<'ctx> { self.builder.position_at_end(entry_block); let mut variables: Variables = HashMap::new(); - let mut types: VariableTypes = HashMap::new(); + let mut types: TypeStorage = self.types.clone(); for (i, param) in function.params.iter().enumerate() { - let id = param.ident.clone(); - let param = func + let id = ¶m.ident; + let param_value = func .get_nth_param(i.try_into().unwrap()) .expect("parameter"); - variables.insert(id.clone(), (param, 0)); - types.insert(id.clone(), param.get_type()); + variables.insert( + id.clone(), + Variable { + value: param_value, + phi_counter: 0, + type_exp: param.type_exp.clone(), + }, + ); } let mut has_return = false; @@ -168,7 +261,7 @@ impl<'ctx> CodeGen<'ctx> { if let Statement::Return(_) = statement { has_return = true } - self.compile_statement(statement, &mut variables, &mut types)?; + self.compile_statement(function, func, statement, &mut variables, &mut types)?; } if !has_return { @@ -178,34 +271,46 @@ impl<'ctx> CodeGen<'ctx> { Ok(()) } - fn find_expr_type( - &self, - expr: &Expression, - types: &VariableTypes<'ctx>, - ) -> Option> { + fn find_expr_type(&self, expr: &Expression, variables: &Variables<'ctx>) -> Option { match expr { Expression::Literal(x) => match x { - LiteralValue::String => todo!(), - LiteralValue::Integer { - bits, - signed, - value, - } => bits.map(|bits| self.context.custom_width_int_type(bits).into()), + LiteralValue::String(_s) => { + todo!("make internal string struct") + /* todo: internal string structure here + Some( + self.context + .i8_type() + .array_type(s.bytes().len() as u32 + 1) + .as_basic_type_enum(), + ) */ + } + LiteralValue::Integer(_) => Some(TypeExp::Integer { + bits: 32, + signed: true, + }), + LiteralValue::Boolean(_) => Some(TypeExp::Boolean), + }, + Expression::Variable(x) => variables.get(&x.value).cloned().map(|x| x.type_exp), + Expression::Call { function, args: _ } => { + self.functions.get(function).unwrap().clone().1 + } + Expression::BinaryOp(lhs, op, rhs) => match op { + OpCode::Eq | OpCode::Ne => Some(TypeExp::Boolean), + _ => self + .find_expr_type(lhs, variables) + .or_else(|| self.find_expr_type(rhs, variables)), }, - Expression::Variable(x) => types.get(x).cloned(), - Expression::Call { function, args } => types.get(function).cloned(), - Expression::BinaryOp(lhs, op, rhs) => self - .find_expr_type(lhs, types) - .or_else(|| self.find_expr_type(rhs, types)), } } fn compile_statement( &self, + function: &Function, + function_value: FunctionValue, statement: &Statement, // value, assignments variables: &mut Variables<'ctx>, - types: &mut VariableTypes<'ctx>, + types: &mut TypeStorage<'ctx>, ) -> Result<()> { match statement { // Variable assignment @@ -213,37 +318,65 @@ impl<'ctx> CodeGen<'ctx> { name, value, type_name, + .. } => { let type_hint = if let Some(type_name) = type_name { - self.get_llvm_type(type_name)? + type_name.clone() } else { - self.find_expr_type(value, types) - .expect("type should be found") + let type_exp = self + .find_expr_type(value, variables) + .expect("type should be found"); + let ty = self.get_llvm_type(&type_exp)?; + types.insert(type_exp.clone(), ty); + type_exp }; - types.insert(name.clone(), type_hint); - let result = self + let (value, value_type) = self .compile_expression(value, variables, types, Some(type_hint))? .expect("should have result"); - variables.insert(name.clone(), (result, 0)); + if !types.contains_key(&value_type) { + let ty = self.get_llvm_type(&value_type)?; + types.insert(value_type.clone(), ty); + } + + info!("adding variable: name={}, ty={:?}", name, value_type); + + variables.insert( + name.clone(), + Variable { + value, + phi_counter: 0, + type_exp: value_type, + }, + ); } - Statement::Mutate { name, value } => { - let type_hint = *types.get(name).expect("should exist"); - let result = self + Statement::Mutate { name, value, .. } => { + let var = variables.get(name).cloned().expect("variable should exist"); + let type_hint = var.type_exp; + + let (value, value_type) = self .compile_expression(value, variables, types, Some(type_hint))? .expect("should have result"); - let (old_val, acc) = variables.get(name).expect("variable should exist"); - variables.insert(name.clone(), (result, acc + 1)); + let var = variables.get_mut(name).expect("variable should exist"); + var.phi_counter += 1; + var.value = value; + assert_eq!(var.type_exp, value_type, "variable type shouldn't change!"); + info!("mutated variable: name={}, ty={:?}", name, var.type_exp); } Statement::Return(ret) => { if let Some(ret) = ret { - let type_hint = self.find_expr_type(ret, types); - let result = self + let type_hint = self + .functions + .get(&function.name) + .expect("function should exist") + .clone() + .1; + let (value, _value_type) = self .compile_expression(ret, variables, types, type_hint)? .expect("should have result"); - self.builder.build_return(Some(&result)); + self.builder.build_return(Some(&value)); } else { self.builder.build_return(None); } @@ -253,21 +386,13 @@ impl<'ctx> CodeGen<'ctx> { body, else_body, } => { - let type_hint_cond = self.find_expr_type(condition, types); - let condition = self - .compile_expression(condition, variables, types, type_hint_cond)? + let (condition, _cond_type) = self + .compile_expression(condition, variables, types, Some(TypeExp::Boolean))? .expect("should produce a value"); - let func = self - .builder - .get_insert_block() - .unwrap() - .get_parent() - .expect("parent should exist"); - - let mut if_block = self.context.append_basic_block(func, "if"); - let mut else_block = self.context.append_basic_block(func, "else"); - let merge_block = self.context.append_basic_block(func, "merge"); + let mut if_block = self.context.append_basic_block(function_value, "if"); + let mut else_block = self.context.append_basic_block(function_value, "else"); + let merge_block = self.context.append_basic_block(function_value, "merge"); self.builder.build_conditional_branch( condition.into_int_value(), @@ -282,7 +407,7 @@ impl<'ctx> CodeGen<'ctx> { let mut variables_if = variables.clone(); self.builder.position_at_end(if_block); for s in body { - self.compile_statement(s, &mut variables_if, types)?; + self.compile_statement(function, function_value, s, &mut variables_if, types)?; } self.builder.build_unconditional_branch(merge_block); if_block = self.builder.get_insert_block().unwrap(); // update for phi @@ -292,7 +417,13 @@ impl<'ctx> CodeGen<'ctx> { self.builder.position_at_end(else_block); for s in else_body { - self.compile_statement(s, &mut variables_else, types)?; + self.compile_statement( + function, + function_value, + s, + &mut variables_else, + types, + )?; } self.builder.build_unconditional_branch(merge_block); else_block = self.builder.get_insert_block().unwrap(); // update for phi @@ -301,43 +432,52 @@ impl<'ctx> CodeGen<'ctx> { self.builder.position_at_end(merge_block); let mut processed_vars = HashMap::new(); - for (name, (value, acc)) in variables_if { + for (name, new_var) in variables_if { if variables.contains_key(&name) { - let (old_val, old_acc) = variables.get(&name).unwrap(); - if acc > *old_acc { + let old_var = variables.get(&name).unwrap(); + if new_var.phi_counter > old_var.phi_counter { let phi = self .builder - .build_phi(old_val.get_type(), &format!("{name}_phi")); - phi.add_incoming(&[(&value, if_block)]); - processed_vars.insert(name, (value, phi)); + .build_phi(old_var.value.get_type(), &format!("{name}_phi")); + phi.add_incoming(&[(&new_var.value, if_block)]); + processed_vars.insert(name, (phi, new_var.type_exp)); } } } if else_body.is_some() { - for (name, (value, acc)) in variables_else { + for (name, new_var) in variables_else { if variables.contains_key(&name) { - let (old_val, old_acc) = variables.get(&name).unwrap(); - if acc > *old_acc { - if let Some((_, phi)) = processed_vars.get(&name) { - phi.add_incoming(&[(&value, else_block)]); + let old_var = variables.get(&name).unwrap(); + if new_var.phi_counter > old_var.phi_counter { + if let Some((phi, _)) = processed_vars.get(&name) { + phi.add_incoming(&[(&new_var.value, else_block)]); } else { - let phi = self - .builder - .build_phi(old_val.get_type(), &format!("{name}_phi")); - phi.add_incoming(&[(&value, else_block)]); - processed_vars.insert(name, (value, phi)); + let phi = self.builder.build_phi( + old_var.value.get_type(), + &format!("{name}_phi"), + ); + phi.add_incoming(&[(&old_var.value, else_block)]); + processed_vars.insert(name, (phi, new_var.type_exp)); } } } } } - for (name, (_, phi)) in processed_vars { - variables.insert(name, (phi.as_basic_value(), 0)); + for (name, (phi, type_exp)) in processed_vars { + variables.insert( + name, + Variable { + value: phi.as_basic_value(), + phi_counter: 0, + type_exp, + }, + ); } } - Statement::Function(_function) => unreachable!(), + Statement::Function(_) => unreachable!(), + Statement::Struct(_) => unreachable!(), }; Ok(()) @@ -347,11 +487,11 @@ impl<'ctx> CodeGen<'ctx> { &self, expr: &Expression, variables: &mut Variables<'ctx>, - types: &mut VariableTypes<'ctx>, - type_hint: Option>, - ) -> Result>> { + types: &mut TypeStorage<'ctx>, + type_hint: Option, + ) -> Result, TypeExp)>> { Ok(match expr { - Expression::Variable(term) => Some(self.compile_variable(term, variables, types)?), + Expression::Variable(term) => Some(self.compile_variable(&term.value, variables)?), Expression::Literal(term) => Some(self.compile_literal(term, type_hint)?), Expression::Call { function, args } => { self.compile_call(function, args, variables, types)? @@ -367,16 +507,21 @@ impl<'ctx> CodeGen<'ctx> { func_name: &str, args: &[Box], variables: &mut Variables<'ctx>, - types: &mut VariableTypes<'ctx>, - ) -> Result>> { + types: &mut TypeStorage<'ctx>, + ) -> Result, TypeExp)>> { + info!("compiling fn call: func_name={}", func_name); let function = self.module.get_function(func_name).expect("should exist"); + let func_info = self + .functions + .get(func_name) + .cloned() + .expect("should exist"); let mut value_args: Vec = Vec::with_capacity(args.len()); - for arg in args { - let type_enum = self.find_expr_type(arg, types); - let res = self - .compile_expression(arg, variables, types, type_enum)? + for (arg, arg_type) in args.iter().zip(func_info.0.iter()) { + let (res, _res_type) = self + .compile_expression(arg, variables, types, Some(arg_type.clone()))? .expect("should have result"); value_args.push(res.into()); } @@ -387,7 +532,10 @@ impl<'ctx> CodeGen<'ctx> { .try_as_basic_value(); Ok(match result { - Either::Left(val) => Some(val), + Either::Left(val) => Some(( + val, + func_info.1.expect("should have ret type info if returns"), + )), Either::Right(_) => None, }) } @@ -398,18 +546,20 @@ impl<'ctx> CodeGen<'ctx> { op: &OpCode, rhs: &Expression, variables: &mut Variables<'ctx>, - types: &mut VariableTypes<'ctx>, - type_hint: Option>, - ) -> Result> { - let lhs = self - .compile_expression(lhs, variables, types, type_hint)? - .expect("should have result") - .into_int_value(); - let rhs = self + types: &mut TypeStorage<'ctx>, + type_hint: Option, + ) -> Result<(BasicValueEnum<'ctx>, TypeExp)> { + let (lhs, lhs_type) = self + .compile_expression(lhs, variables, types, type_hint.clone())? + .expect("should have result"); + let (rhs, _rhs_type) = self .compile_expression(rhs, variables, types, type_hint)? - .expect("should have result") - .into_int_value(); + .expect("should have result"); + let lhs = lhs.into_int_value(); + let rhs = rhs.into_int_value(); + + let mut bool_result = false; let result = match op { OpCode::Add => self.builder.build_int_add(lhs, rhs, "add"), OpCode::Sub => self.builder.build_int_sub(lhs, rhs, "sub"), @@ -418,41 +568,72 @@ impl<'ctx> CodeGen<'ctx> { OpCode::Rem => self.builder.build_int_signed_rem(lhs, rhs, "rem"), OpCode::And => self.builder.build_and(lhs, rhs, "and"), OpCode::Or => self.builder.build_or(lhs, rhs, "or"), - OpCode::Eq => self - .builder - .build_int_compare(IntPredicate::EQ, lhs, rhs, "eq"), - OpCode::Ne => self - .builder - .build_int_compare(IntPredicate::NE, lhs, rhs, "eq"), + OpCode::Eq => { + bool_result = true; + self.builder + .build_int_compare(IntPredicate::EQ, lhs, rhs, "eq") + } + OpCode::Ne => { + bool_result = true; + self.builder + .build_int_compare(IntPredicate::NE, lhs, rhs, "eq") + } }; - Ok(result.as_basic_value_enum()) + let mut res_type = lhs_type; + + if bool_result { + res_type = TypeExp::Integer { + bits: 1, + signed: false, + }; + } + + Ok((result.as_basic_value_enum(), res_type)) } pub fn compile_literal( &self, term: &LiteralValue, - type_hint: Option>, - ) -> Result> { + type_hint: Option, + ) -> Result<(BasicValueEnum<'ctx>, TypeExp)> { let value = match term { - LiteralValue::String => todo!(), - LiteralValue::Integer { - bits, - signed: _, - value, - } => { + LiteralValue::String(_s) => { + todo!() + /* + self + .context + .const_string(s.as_bytes(), true) + .as_basic_value_enum() */ + } + LiteralValue::Boolean(v) => ( + self.context + .bool_type() + .const_int((*v).into(), false) + .as_basic_value_enum(), + TypeExp::Boolean, + ), + LiteralValue::Integer(v) => { if let Some(type_hint) = type_hint { - type_hint - .into_int_type() - .const_int(value.parse().unwrap(), false) - .as_basic_value_enum() + ( + self.get_llvm_type(&type_hint)? + .into_int_type() + .const_int(v.parse().unwrap(), false) + .as_basic_value_enum(), + type_hint, + ) } else { - let bits = bits.unwrap_or(32); - - self.context - .custom_width_int_type(bits) - .const_int(value.parse().unwrap(), false) - .as_basic_value_enum() + let type_exp = TypeExp::Integer { + bits: 32, + signed: true, + }; + ( + self.get_llvm_type(&type_exp)? + .into_int_type() + .const_int(v.parse().unwrap(), false) + .as_basic_value_enum(), + type_exp, + ) } } }; @@ -464,9 +645,8 @@ impl<'ctx> CodeGen<'ctx> { &self, variable: &str, variables: &mut Variables<'ctx>, - types: &mut VariableTypes<'ctx>, - ) -> Result> { - let var = *variables.get(variable).expect("value"); - Ok(var.0) + ) -> Result<(BasicValueEnum<'ctx>, TypeExp)> { + let var = variables.get(variable).expect("value").clone(); + Ok((var.value, var.type_exp)) } } diff --git a/src/grammar.lalrpop b/src/grammar.lalrpop index 3edcc16bb..26bca94b3 100644 --- a/src/grammar.lalrpop +++ b/src/grammar.lalrpop @@ -1,6 +1,6 @@ use std::str::FromStr; use crate::{ - ast, + ast::{self, Spanned}, tokens::Token, lexer::LexicalError, }; @@ -18,13 +18,20 @@ extern { "if" => Token::KeywordIf, "else" => Token::KeywordElse, "identifier" => Token::Identifier(), - "int" => Token::Integer(), + "int literal" => Token::Integer(), + "string literal" => Token::String(), + "bool literal" => Token::Boolean(), "return" => Token::KeywordReturn, "fn" => Token::KeywordFn, + "ptr" => Token::KeywordPtr, "(" => Token::LeftParen, ")" => Token::RightParen, "{" => Token::LeftBracket, "}" => Token::RightBracket, + "[" => Token::LeftSquareBracket, + "]" => Token::RightSquareBracket, + "<" => Token::LessThanSign, + ">" => Token::MoreThanSign, "=" => Token::Assign, ";" => Token::Semicolon, ":" => Token::Colon, @@ -39,6 +46,17 @@ extern { "||" => Token::OperatorOr, "==" => Token::OperatorEq, "!=" => Token::OperatorNe, + + "bool" => Token::KeywordBool, + + "i8" => Token::Inti8, + "i16" => Token::Inti16, + "i32" => Token::Inti32, + "i64" => Token::Inti64, + "u8" => Token::Intu8, + "u16" => Token::Intu16, + "u32" => Token::Intu32, + "u64" => Token::Intu64, } } @@ -67,16 +85,17 @@ Statements: Vec = { Statement: ast::Statement = { BasicStatement, => ast::Statement::Function(f), + => ast::Statement::Struct(s), }; -TypeInfo: String = { - ":" => i +TypeInfo: ast::TypeExp = { + ":" => i }; // statements not including function definitions BasicStatement: ast::Statement = { - "let" "=" ";" => ast::Statement::Let { name: i, value: e, type_name: t}, - "=" ";" => ast::Statement::Mutate { name: i, value: e}, + "let" "=" ";" => ast::Statement::Let { name: i, value: e, type_name: t, span: (lo, hi) }, + "=" ";" => ast::Statement::Mutate { name: i, value: e, span: (lo, hi) }, "if" "{" "}" => ast::Statement::If { condition: cond, body: s, else_body: e}, "return" ";" => ast::Statement::Return(e), }; @@ -118,25 +137,67 @@ Expr4 = Tier; // Terms: variables, literals, calls Term: Box = { - => Box::new(ast::Expression::Variable(i)), + => Box::new(ast::Expression::Variable(Spanned::new(i, (lo, hi)))), => Box::new(ast::Expression::Literal(n)), + => Box::new(ast::Expression::Literal(n)), + => Box::new(ast::Expression::Literal(n)), "(" > ")" => Box::new(ast::Expression::Call { function: i, args: values}), "(" ")" }; -Number: ast::LiteralValue = => ast::LiteralValue::Integer { bits: None, signed: true, value: n.to_string()}; +Number: ast::LiteralValue = => ast::LiteralValue::Integer(n); + +StringLit: ast::LiteralValue = => ast::LiteralValue::String(n[1..(n.len()-1)].to_string()); + +BoolLiteral: ast::LiteralValue = => ast::LiteralValue::Boolean(n); + +ArrayLen: u32 = { + ";" => i.parse().unwrap(), +} + +LangType: ast::TypeExp = { + "ptr" "<" ">" => ast::TypeExp::Pointer { target: Box::new(target) }, + "[" "]" => ast::TypeExp::Array { of: Box::new(of), len }, + "i8" => ast::TypeExp::Integer { bits: 8, signed: true }, + "i16" => ast::TypeExp::Integer { bits: 16, signed: true }, + "i32" => ast::TypeExp::Integer { bits: 32, signed: true }, + "i64" => ast::TypeExp::Integer { bits: 64, signed: true }, + "u8" => ast::TypeExp::Integer { bits: 8, signed: false }, + "u16" => ast::TypeExp::Integer { bits: 16, signed: false }, + "u32" => ast::TypeExp::Integer { bits: 32, signed: false }, + "u64" => ast::TypeExp::Integer { bits: 64, signed: false }, + "bool" => ast::TypeExp::Boolean, + => ast::TypeExp::Other { id }, +}; // Function handling Param: ast::Parameter = { - <"identifier"> ":" <"identifier"> => ast::Parameter::new(<>) + <"identifier"> ":" => ast::Parameter::new(<>) }; Params = Comma; -FunctionReturn: String = { - "->" => i.to_string(), +FunctionReturn: ast::TypeExp = { + "->" => i, } Function: ast::Function = { "fn" "(" ")" "{" "}" => ast::Function::new(i, a, s, r) } + +// Structures + +StructField: ast::StructField = { + <"identifier"> ":" => ast::StructField::new(<>) +}; + +StructFields = Comma; + +Struct: ast::Struct = { + "struct" "{" "}" => { + ast::Struct { + name: i, + fields + } + } +} diff --git a/src/lexer.rs b/src/lexer.rs index f56a76772..2c6a5d61a 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,12 +1,22 @@ +use std::{fmt::Display, ops::Range}; + use logos::{Logos, SpannedIter}; use crate::tokens::Token; pub type Spanned = Result<(Loc, Tok, Loc), Error>; -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone)] pub enum LexicalError { - InvalidToken, + InvalidToken(Range), +} + +impl Display for LexicalError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LexicalError::InvalidToken(span) => write!(f, "lexical error at: {:?}", span), + } + } } pub struct Lexer<'input> { @@ -29,7 +39,7 @@ impl<'input> Iterator for Lexer<'input> { fn next(&mut self) -> Option { self.token_stream.next().map(|(token, span)| match token { Ok(token) => Ok((span.start, token, span.end)), - Err(()) => Err(LexicalError::InvalidToken), + Err(()) => Err(LexicalError::InvalidToken(span)), }) } } diff --git a/src/main.rs b/src/main.rs index c277715c2..a366bd52c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,7 +7,7 @@ use inkwell::{context::Context, execution_engine::JitFunction, OptimizationLevel use lalrpop_util::lalrpop_mod; use std::{fs, path::PathBuf, println}; -use crate::{ast::Program, lexer::Lexer}; +use crate::{ast::Program, check::Check, lexer::Lexer}; pub mod ast; pub mod check; @@ -36,6 +36,11 @@ enum Commands { /// The input file. input: PathBuf, }, + /// Prints the code AST. + Ast { + /// The input file. + input: PathBuf, + }, /// Compile the edlang source file. Compile { /// The input file. @@ -60,7 +65,6 @@ enum Commands { }, } -/* fn check_program(program: &ProgramData, ast: &ast::Program) -> bool { let errors = check::check(program, ast); @@ -84,7 +88,6 @@ fn check_program(program: &ProgramData, ast: &ast::Program) -> bool { error_count == 0 } -*/ fn main() -> Result<()> { color_eyre::install()?; @@ -93,14 +96,19 @@ fn main() -> Result<()> { match args.command { Commands::Check { input } => { - let code = fs::read_to_string(input)?; + let code = fs::read_to_string(&input)?; let lexer = Lexer::new(code.as_str()); let parser = grammar::ProgramParser::new(); - let ast = parser.parse(lexer).unwrap(); - - //let str_path = input.to_string_lossy(); - //let program = ProgramData::new(&str_path, &code); - //check_program(&program, &ast); + let ast = parser.parse(lexer)?; + let program = ProgramData::new(&input, &code); + check_program(&program, &ast); + } + Commands::Ast { input } => { + let code = fs::read_to_string(&input)?; + let lexer = Lexer::new(code.as_str()); + let parser = grammar::ProgramParser::new(); + let ast = parser.parse(lexer)?; + println!("{ast:#?}"); } Commands::Compile { input, @@ -111,17 +119,17 @@ fn main() -> Result<()> { let code = fs::read_to_string(&input)?; let lexer = Lexer::new(code.as_str()); let parser = grammar::ProgramParser::new(); - let ast: Program = parser.parse(lexer).unwrap(); + let ast: Program = parser.parse(lexer)?; let program = ProgramData::new(&input, &code); let file_name = input.file_name().unwrap().to_string_lossy(); - //if !check_program(&program, &ast) { - // return Ok(()); - //} + if !check_program(&program, &ast) { + return Ok(()); + } - println!("{:#?}", ast); + // println!("{:#?}", ast); let context = Context::create(); let mut codegen = codegen::CodeGen::new(&context, &file_name, program, ast)?; codegen.compile_ast()?; diff --git a/src/tokens.rs b/src/tokens.rs index 986fba6c4..21cc791b8 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -15,6 +15,8 @@ pub enum Token { KeywordReturn, #[token("struct")] KeywordStruct, + #[token("ptr")] + KeywordPtr, #[token("if")] KeywordIf, #[token("else")] @@ -24,6 +26,34 @@ pub enum Token { Identifier(String), #[regex(r"\d+", |lex| lex.slice().parse().ok())] Integer(String), + #[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())] + String(String), + #[regex(r"(true|false)", |lex| lex.slice().parse().ok())] + Boolean(bool), + + #[token("bool")] + KeywordBool, + #[token("i8")] + Inti8, + #[token("i16")] + Inti16, + #[token("i32")] + Inti32, + #[token("i64")] + Inti64, + #[token("u8")] + Intu8, + #[token("u16")] + Intu16, + #[token("u32")] + Intu32, + #[token("u64")] + Intu64, + + #[token("f32")] + Float32, + #[token("f64")] + Float64, #[token("(")] LeftParen, @@ -33,6 +63,10 @@ pub enum Token { LeftBracket, #[token("}")] RightBracket, + #[token("[")] + LeftSquareBracket, + #[token("]")] + RightSquareBracket, #[token("=")] Assign, #[token(";")] @@ -43,6 +77,10 @@ pub enum Token { Arrow, #[token(",")] Coma, + #[token("<")] + LessThanSign, + #[token(">")] + MoreThanSign, #[token("+")] OperatorAdd,