type info improvements

This commit is contained in:
Edgar 2023-05-27 17:14:48 +02:00
parent 81b57d646d
commit bab8eec51f
No known key found for this signature in database
GPG key ID: 70ADAE8F35904387
10 changed files with 606 additions and 294 deletions

36
Cargo.lock generated
View file

@ -415,9 +415,9 @@ dependencies = [
[[package]]
name = "io-lifetimes"
version = "1.0.10"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220"
checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
dependencies = [
"hermit-abi",
"libc",
@ -461,7 +461,7 @@ dependencies = [
"petgraph",
"pico-args",
"regex",
"regex-syntax 0.7.1",
"regex-syntax 0.7.2",
"string_cache",
"term",
"tiny-keccak",
@ -491,9 +491,9 @@ checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
[[package]]
name = "linux-raw-sys"
version = "0.3.7"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
[[package]]
name = "llvm-sys"
@ -688,18 +688,18 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro2"
version = "1.0.58"
version = "1.0.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8"
checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.27"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500"
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
dependencies = [
"proc-macro2",
]
@ -726,13 +726,13 @@ dependencies = [
[[package]]
name = "regex"
version = "1.8.1"
version = "1.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370"
checksum = "81ca098a9821bd52d6b24fd8b10bd081f47d39c22778cafaa75a2857a62c6390"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax 0.7.1",
"regex-syntax 0.7.2",
]
[[package]]
@ -752,9 +752,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.7.1"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c"
checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
[[package]]
name = "rustc-demangle"
@ -836,9 +836,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "2.0.16"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
dependencies = [
"proc-macro2",
"quote",
@ -969,9 +969,9 @@ dependencies = [
[[package]]
name = "unicode-ident"
version = "1.0.8"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
[[package]]
name = "unicode-width"

View file

@ -16,7 +16,7 @@ clap = { version = "4.3.0", features = ["derive"] }
color-eyre = "0.6.2"
itertools = "0.10.5"
lalrpop-util = { version = "0.20.0", features = ["lexer"] }
regex = "1.8.1"
regex = "1.8.3"
tracing = "0.1.37"
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
inkwell = { version = "0.2.0", features = ["llvm16-0"] }

View file

@ -1,13 +1,18 @@
fn main(x: i64, z: i64) -> i64 {
let y: i64 = 0;
if x == 5 {
if x == z {
y = 2 * x;
} else {
y = z;
}
} else {
y = 3 * x;
}
return y;
struct Hello {
x: i32,
y: i32,
}
fn test(x: Hello) {
return;
}
fn works(x: i32) -> i32 {
return x * 4;
}
fn main() -> i32 {
let y = 2;
let z = y;
return works(z);
}

View file

@ -1,3 +1,15 @@
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Spanned<T> {
pub span: (usize, usize),
pub value: T,
}
impl<T> Spanned<T> {
pub fn new(value: T, span: (usize, usize)) -> Self {
Self { value, span }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum OpCode {
Add,
@ -27,20 +39,26 @@ impl OpCode {
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum TypeExp {
Integer { bits: u32, signed: bool },
Boolean,
Array { of: Box<Self>, len: Option<u32> },
Pointer { target: Box<Self> },
Other { id: String },
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum LiteralValue {
String,
Integer {
bits: Option<u32>,
signed: bool,
value: String,
},
String(String),
Integer(String),
Boolean(bool),
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Expression {
Literal(LiteralValue),
Variable(String),
Variable(Spanned<String>),
Call {
function: String,
args: Vec<Box<Self>>,
@ -51,12 +69,12 @@ pub enum Expression {
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Parameter {
pub ident: String,
pub type_name: String,
pub type_exp: TypeExp,
}
impl Parameter {
pub const fn new(ident: String, type_name: String) -> Self {
Self { ident, type_name }
pub const fn new(ident: String, type_exp: TypeExp) -> Self {
Self { ident, type_exp }
}
}
@ -65,7 +83,7 @@ pub struct Function {
pub name: String,
pub params: Vec<Parameter>,
pub body: Vec<Statement>,
pub return_type: Option<String>,
pub return_type: Option<TypeExp>,
}
impl Function {
@ -73,7 +91,7 @@ impl Function {
name: String,
params: Vec<Parameter>,
body: Vec<Statement>,
return_type: Option<String>,
return_type: Option<TypeExp>,
) -> Self {
Self {
name,
@ -84,16 +102,39 @@ impl Function {
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct StructField {
pub ident: String,
pub type_exp: TypeExp,
}
impl StructField {
pub const fn new(ident: String, type_name: TypeExp) -> Self {
Self {
ident,
type_exp: type_name,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Struct {
pub name: String,
pub fields: Vec<StructField>,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Statement {
Let {
name: String,
value: Box<Expression>,
type_name: Option<String>,
type_name: Option<TypeExp>,
span: (usize, usize),
},
Mutate {
name: String,
value: Box<Expression>,
span: (usize, usize),
},
If {
condition: Box<Expression>,
@ -102,6 +143,7 @@ pub enum Statement {
},
Return(Option<Box<Expression>>),
Function(Function),
Struct(Struct),
}
#[derive(Debug, Clone)]

View file

@ -1,4 +1,3 @@
/*
use crate::{
ast::{self, Statement},
codegen::ProgramData,
@ -19,8 +18,37 @@ pub fn check<'a>(data: &'a ProgramData, ast: &ast::Program) -> Vec<Check<'a>> {
let mut errors = vec![];
for statement in &ast.statements {
match &statement.value {
Statement::Assignment(_x) => {
match &statement {
Statement::Let { name: _, span, .. } => {
// can't have a top level assignment yet.
let snippet = Snippet {
title: Some(Annotation {
id: None,
label: Some("unexpected let at top level"),
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![Slice {
source: &data.source,
line_start: 1,
fold: true,
origin: None,
annotations: vec![SourceAnnotation {
label: "unexpected statement",
annotation_type: AnnotationType::Error,
range: *span,
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
errors.push(Check::Error(dl));
}
Statement::Mutate { span, .. } => {
// can't have a top level assignment yet.
let snippet = Snippet {
title: Some(Annotation {
@ -33,11 +61,11 @@ pub fn check<'a>(data: &'a ProgramData, ast: &ast::Program) -> Vec<Check<'a>> {
source: &data.source,
line_start: 1,
fold: true,
origin: Some(&data.filename),
origin: None,
annotations: vec![SourceAnnotation {
label: "unexpected statement",
annotation_type: AnnotationType::Error,
range: statement.span.into(),
range: *span,
}],
}],
opt: FormatOptions {
@ -49,68 +77,8 @@ pub fn check<'a>(data: &'a ProgramData, ast: &ast::Program) -> Vec<Check<'a>> {
let dl = DisplayList::from(snippet);
errors.push(Check::Error(dl));
}
Statement::Definition(_) => {
// can't have a top level assignment yet.
let snippet = Snippet {
title: Some(Annotation {
id: None,
label: Some("unexpected definition at top level"),
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![Slice {
source: &data.source,
line_start: 1,
fold: true,
origin: Some(&data.filename),
annotations: vec![SourceAnnotation {
label: "unexpected statement",
annotation_type: AnnotationType::Error,
range: statement.span.into(),
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
errors.push(Check::Error(dl));
}
Statement::Return(_x) => {
// can't have a top level assignment yet.
let snippet = Snippet {
title: Some(Annotation {
id: None,
label: Some("unexpected return"),
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![Slice {
source: &data.source,
line_start: 1,
fold: true,
origin: Some(&data.filename),
annotations: vec![SourceAnnotation {
label: "unexpected return",
annotation_type: AnnotationType::Error,
range: statement.span.into(),
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
errors.push(Check::Error(dl));
}
Statement::Function(_function) => {}
_ => {}
}
}
errors
}
*/

View file

@ -9,13 +9,14 @@ use inkwell::{
builder::Builder,
context::Context,
module::Module,
types::{BasicMetadataTypeEnum, BasicType, BasicTypeEnum},
values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum},
types::{BasicMetadataTypeEnum, BasicType, BasicTypeEnum, StructType},
values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum, FunctionValue},
IntPredicate,
};
use itertools::{Either, Itertools};
use tracing::info;
use crate::ast::{self, Expression, Function, LiteralValue, OpCode, Statement};
use crate::ast::{self, Expression, Function, LiteralValue, OpCode, Statement, TypeExp};
#[derive(Debug, Clone)]
pub struct ProgramData {
@ -36,13 +37,32 @@ pub struct CodeGen<'ctx> {
context: &'ctx Context,
pub module: Module<'ctx>,
builder: Builder<'ctx>,
fn_types: VariableTypes<'ctx>,
types: TypeStorage<'ctx>,
struct_types: StructTypeStorage<'ctx>,
// function to return type
functions: HashMap<String, (Vec<TypeExp>, Option<TypeExp>)>,
_program: ProgramData,
ast: ast::Program,
}
type Variables<'ctx> = HashMap<String, (BasicValueEnum<'ctx>, usize)>;
type VariableTypes<'ctx> = HashMap<String, BasicTypeEnum<'ctx>>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Variable<'ctx> {
pub value: BasicValueEnum<'ctx>,
pub phi_counter: usize,
pub type_exp: TypeExp,
}
pub type Variables<'ctx> = HashMap<String, Variable<'ctx>>;
pub type TypeStorage<'ctx> = HashMap<TypeExp, BasicTypeEnum<'ctx>>;
/// Holds the struct type and maps fields to types and the location within the struct.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct StructTypeInfo<'ctx> {
ty: StructType<'ctx>,
fields: HashMap<String, (usize, TypeExp)>,
}
type StructTypeStorage<'ctx> = HashMap<String, StructTypeInfo<'ctx>>;
impl<'ctx> CodeGen<'ctx> {
pub fn new(
@ -59,7 +79,9 @@ impl<'ctx> CodeGen<'ctx> {
builder: context.create_builder(),
_program,
ast,
fn_types: HashMap::new(),
types: HashMap::new(),
struct_types: HashMap::new(),
functions: HashMap::new(),
};
Ok(codegen)
@ -67,29 +89,69 @@ impl<'ctx> CodeGen<'ctx> {
pub fn compile_ast(&mut self) -> Result<()> {
let mut functions = vec![];
let mut types: VariableTypes<'ctx> = HashMap::new();
let mut func_info = HashMap::new();
let mut types: TypeStorage<'ctx> = HashMap::new();
let mut struct_types: StructTypeStorage<'ctx> = HashMap::new();
// todo fix the grammar so top level statements are only functions and static vars.
// create the llvm functions first.
// create types
for statement in &self.ast.statements {
match &statement {
Statement::Let { .. } => unreachable!(),
Statement::Mutate { .. } => unreachable!(),
Statement::Return(_) => unreachable!(),
Statement::If { .. } => unreachable!(),
Statement::Function(function) => {
functions.push(function);
let ret_type = self.compile_function_signature(function)?;
if let Some(ret_type) = ret_type {
types.insert(function.name.clone(), ret_type);
if let Statement::Struct(s) = &statement {
let mut fields = HashMap::new();
let mut field_types = vec![];
for (i, field) in s.fields.iter().enumerate() {
if !types.contains_key(&field.type_exp) {
types.insert(field.type_exp.clone(), self.get_llvm_type(&field.type_exp)?);
}
let ty = self.get_llvm_type(&field.type_exp)?;
field_types.push(ty);
// todo: ensure alignment and padding here
fields.insert(field.ident.clone(), (i, field.type_exp.clone()));
}
let ty = self.context.struct_type(&field_types, false);
let struct_type = StructTypeInfo { fields, ty };
struct_types.insert(s.name.clone(), struct_type);
}
}
self.struct_types = struct_types;
// create the llvm functions first.
for statement in &self.ast.statements {
if let Statement::Function(function) = &statement {
functions.push(function);
let (args, ret_type) = self.compile_function_signature(function)?;
let mut arg_types = vec![];
for arg in args {
if !types.contains_key(&arg) {
let ty = self.get_llvm_type(&arg)?;
types.insert(arg.clone(), ty);
}
arg_types.push(arg);
}
if let Some(ret_type) = ret_type {
let ret_type = if !types.contains_key(&ret_type) {
let ty = self.get_llvm_type(&ret_type)?;
types.insert(ret_type.clone(), ty);
ret_type
} else {
ret_type
};
func_info.insert(function.name.clone(), (arg_types, Some(ret_type)));
} else {
func_info.insert(function.name.clone(), (arg_types, None));
}
}
}
self.fn_types = types;
self.types = types;
self.functions = func_info;
info!("functions:\n{:#?}", self.functions);
// implement them.
for function in functions {
@ -106,42 +168,67 @@ impl<'ctx> CodeGen<'ctx> {
self.module.print_to_string().to_str().unwrap().to_string()
}
fn get_llvm_type(&self, id: &str) -> Result<BasicTypeEnum<'ctx>> {
Ok(match id {
"i64" => self.context.i64_type().as_basic_type_enum(),
"i32" => self.context.i32_type().as_basic_type_enum(),
"i8" => self.context.i8_type().as_basic_type_enum(),
"u8" => self.context.i8_type().as_basic_type_enum(),
_ => todo!(),
})
fn get_llvm_type(&self, id: &TypeExp) -> Result<BasicTypeEnum<'ctx>> {
if let Some(ty) = self.types.get(id) {
Ok(*ty)
} else {
Ok(match id {
TypeExp::Integer { bits, signed: _ } => self
.context
.custom_width_int_type(*bits)
.as_basic_type_enum(),
TypeExp::Boolean => self.context.bool_type().as_basic_type_enum(),
TypeExp::Array { of, len } => {
let ty = self.get_llvm_type(of)?;
ty.array_type(len.unwrap()).as_basic_type_enum()
}
TypeExp::Pointer { target } => {
let ty = self.get_llvm_type(target)?;
ty.ptr_type(Default::default()).as_basic_type_enum()
}
TypeExp::Other { id } => self
.struct_types
.get(id)
.expect("struct type not found")
.ty
.as_basic_type_enum(),
})
}
}
/// creates the llvm function without the body, so other function bodies can call it.
fn compile_function_signature(
&self,
function: &Function,
) -> Result<Option<BasicTypeEnum<'ctx>>> {
) -> Result<(Vec<TypeExp>, Option<TypeExp>)> {
let args_types: Vec<BasicTypeEnum<'ctx>> = function
.params
.iter()
.map(|param| param.type_name.as_str())
.map(|param| &param.type_exp)
.map(|t| self.get_llvm_type(t))
.try_collect()?;
let args_types: Vec<BasicMetadataTypeEnum<'ctx>> =
args_types.into_iter().map(|t| t.into()).collect_vec();
let fn_type = match &function.return_type {
let (fn_type, ret_type) = match &function.return_type {
Some(id) => {
let return_type = self.get_llvm_type(id)?;
return_type.fn_type(&args_types, false)
(return_type.fn_type(&args_types, false), Some(id.clone()))
}
None => self.context.void_type().fn_type(&args_types, false),
None => (self.context.void_type().fn_type(&args_types, false), None),
};
self.module.add_function(&function.name, fn_type, None);
Ok(fn_type.get_return_type())
Ok((
function
.params
.iter()
.map(|param| param.type_exp.clone())
.collect(),
ret_type,
))
}
fn compile_function(&self, function: &Function) -> Result<()> {
@ -151,15 +238,21 @@ impl<'ctx> CodeGen<'ctx> {
self.builder.position_at_end(entry_block);
let mut variables: Variables = HashMap::new();
let mut types: VariableTypes = HashMap::new();
let mut types: TypeStorage = self.types.clone();
for (i, param) in function.params.iter().enumerate() {
let id = param.ident.clone();
let param = func
let id = &param.ident;
let param_value = func
.get_nth_param(i.try_into().unwrap())
.expect("parameter");
variables.insert(id.clone(), (param, 0));
types.insert(id.clone(), param.get_type());
variables.insert(
id.clone(),
Variable {
value: param_value,
phi_counter: 0,
type_exp: param.type_exp.clone(),
},
);
}
let mut has_return = false;
@ -168,7 +261,7 @@ impl<'ctx> CodeGen<'ctx> {
if let Statement::Return(_) = statement {
has_return = true
}
self.compile_statement(statement, &mut variables, &mut types)?;
self.compile_statement(function, func, statement, &mut variables, &mut types)?;
}
if !has_return {
@ -178,34 +271,46 @@ impl<'ctx> CodeGen<'ctx> {
Ok(())
}
fn find_expr_type(
&self,
expr: &Expression,
types: &VariableTypes<'ctx>,
) -> Option<BasicTypeEnum<'ctx>> {
fn find_expr_type(&self, expr: &Expression, variables: &Variables<'ctx>) -> Option<TypeExp> {
match expr {
Expression::Literal(x) => match x {
LiteralValue::String => todo!(),
LiteralValue::Integer {
bits,
signed,
value,
} => bits.map(|bits| self.context.custom_width_int_type(bits).into()),
LiteralValue::String(_s) => {
todo!("make internal string struct")
/* todo: internal string structure here
Some(
self.context
.i8_type()
.array_type(s.bytes().len() as u32 + 1)
.as_basic_type_enum(),
) */
}
LiteralValue::Integer(_) => Some(TypeExp::Integer {
bits: 32,
signed: true,
}),
LiteralValue::Boolean(_) => Some(TypeExp::Boolean),
},
Expression::Variable(x) => variables.get(&x.value).cloned().map(|x| x.type_exp),
Expression::Call { function, args: _ } => {
self.functions.get(function).unwrap().clone().1
}
Expression::BinaryOp(lhs, op, rhs) => match op {
OpCode::Eq | OpCode::Ne => Some(TypeExp::Boolean),
_ => self
.find_expr_type(lhs, variables)
.or_else(|| self.find_expr_type(rhs, variables)),
},
Expression::Variable(x) => types.get(x).cloned(),
Expression::Call { function, args } => types.get(function).cloned(),
Expression::BinaryOp(lhs, op, rhs) => self
.find_expr_type(lhs, types)
.or_else(|| self.find_expr_type(rhs, types)),
}
}
fn compile_statement(
&self,
function: &Function,
function_value: FunctionValue,
statement: &Statement,
// value, assignments
variables: &mut Variables<'ctx>,
types: &mut VariableTypes<'ctx>,
types: &mut TypeStorage<'ctx>,
) -> Result<()> {
match statement {
// Variable assignment
@ -213,37 +318,65 @@ impl<'ctx> CodeGen<'ctx> {
name,
value,
type_name,
..
} => {
let type_hint = if let Some(type_name) = type_name {
self.get_llvm_type(type_name)?
type_name.clone()
} else {
self.find_expr_type(value, types)
.expect("type should be found")
let type_exp = self
.find_expr_type(value, variables)
.expect("type should be found");
let ty = self.get_llvm_type(&type_exp)?;
types.insert(type_exp.clone(), ty);
type_exp
};
types.insert(name.clone(), type_hint);
let result = self
let (value, value_type) = self
.compile_expression(value, variables, types, Some(type_hint))?
.expect("should have result");
variables.insert(name.clone(), (result, 0));
if !types.contains_key(&value_type) {
let ty = self.get_llvm_type(&value_type)?;
types.insert(value_type.clone(), ty);
}
info!("adding variable: name={}, ty={:?}", name, value_type);
variables.insert(
name.clone(),
Variable {
value,
phi_counter: 0,
type_exp: value_type,
},
);
}
Statement::Mutate { name, value } => {
let type_hint = *types.get(name).expect("should exist");
let result = self
Statement::Mutate { name, value, .. } => {
let var = variables.get(name).cloned().expect("variable should exist");
let type_hint = var.type_exp;
let (value, value_type) = self
.compile_expression(value, variables, types, Some(type_hint))?
.expect("should have result");
let (old_val, acc) = variables.get(name).expect("variable should exist");
variables.insert(name.clone(), (result, acc + 1));
let var = variables.get_mut(name).expect("variable should exist");
var.phi_counter += 1;
var.value = value;
assert_eq!(var.type_exp, value_type, "variable type shouldn't change!");
info!("mutated variable: name={}, ty={:?}", name, var.type_exp);
}
Statement::Return(ret) => {
if let Some(ret) = ret {
let type_hint = self.find_expr_type(ret, types);
let result = self
let type_hint = self
.functions
.get(&function.name)
.expect("function should exist")
.clone()
.1;
let (value, _value_type) = self
.compile_expression(ret, variables, types, type_hint)?
.expect("should have result");
self.builder.build_return(Some(&result));
self.builder.build_return(Some(&value));
} else {
self.builder.build_return(None);
}
@ -253,21 +386,13 @@ impl<'ctx> CodeGen<'ctx> {
body,
else_body,
} => {
let type_hint_cond = self.find_expr_type(condition, types);
let condition = self
.compile_expression(condition, variables, types, type_hint_cond)?
let (condition, _cond_type) = self
.compile_expression(condition, variables, types, Some(TypeExp::Boolean))?
.expect("should produce a value");
let func = self
.builder
.get_insert_block()
.unwrap()
.get_parent()
.expect("parent should exist");
let mut if_block = self.context.append_basic_block(func, "if");
let mut else_block = self.context.append_basic_block(func, "else");
let merge_block = self.context.append_basic_block(func, "merge");
let mut if_block = self.context.append_basic_block(function_value, "if");
let mut else_block = self.context.append_basic_block(function_value, "else");
let merge_block = self.context.append_basic_block(function_value, "merge");
self.builder.build_conditional_branch(
condition.into_int_value(),
@ -282,7 +407,7 @@ impl<'ctx> CodeGen<'ctx> {
let mut variables_if = variables.clone();
self.builder.position_at_end(if_block);
for s in body {
self.compile_statement(s, &mut variables_if, types)?;
self.compile_statement(function, function_value, s, &mut variables_if, types)?;
}
self.builder.build_unconditional_branch(merge_block);
if_block = self.builder.get_insert_block().unwrap(); // update for phi
@ -292,7 +417,13 @@ impl<'ctx> CodeGen<'ctx> {
self.builder.position_at_end(else_block);
for s in else_body {
self.compile_statement(s, &mut variables_else, types)?;
self.compile_statement(
function,
function_value,
s,
&mut variables_else,
types,
)?;
}
self.builder.build_unconditional_branch(merge_block);
else_block = self.builder.get_insert_block().unwrap(); // update for phi
@ -301,43 +432,52 @@ impl<'ctx> CodeGen<'ctx> {
self.builder.position_at_end(merge_block);
let mut processed_vars = HashMap::new();
for (name, (value, acc)) in variables_if {
for (name, new_var) in variables_if {
if variables.contains_key(&name) {
let (old_val, old_acc) = variables.get(&name).unwrap();
if acc > *old_acc {
let old_var = variables.get(&name).unwrap();
if new_var.phi_counter > old_var.phi_counter {
let phi = self
.builder
.build_phi(old_val.get_type(), &format!("{name}_phi"));
phi.add_incoming(&[(&value, if_block)]);
processed_vars.insert(name, (value, phi));
.build_phi(old_var.value.get_type(), &format!("{name}_phi"));
phi.add_incoming(&[(&new_var.value, if_block)]);
processed_vars.insert(name, (phi, new_var.type_exp));
}
}
}
if else_body.is_some() {
for (name, (value, acc)) in variables_else {
for (name, new_var) in variables_else {
if variables.contains_key(&name) {
let (old_val, old_acc) = variables.get(&name).unwrap();
if acc > *old_acc {
if let Some((_, phi)) = processed_vars.get(&name) {
phi.add_incoming(&[(&value, else_block)]);
let old_var = variables.get(&name).unwrap();
if new_var.phi_counter > old_var.phi_counter {
if let Some((phi, _)) = processed_vars.get(&name) {
phi.add_incoming(&[(&new_var.value, else_block)]);
} else {
let phi = self
.builder
.build_phi(old_val.get_type(), &format!("{name}_phi"));
phi.add_incoming(&[(&value, else_block)]);
processed_vars.insert(name, (value, phi));
let phi = self.builder.build_phi(
old_var.value.get_type(),
&format!("{name}_phi"),
);
phi.add_incoming(&[(&old_var.value, else_block)]);
processed_vars.insert(name, (phi, new_var.type_exp));
}
}
}
}
}
for (name, (_, phi)) in processed_vars {
variables.insert(name, (phi.as_basic_value(), 0));
for (name, (phi, type_exp)) in processed_vars {
variables.insert(
name,
Variable {
value: phi.as_basic_value(),
phi_counter: 0,
type_exp,
},
);
}
}
Statement::Function(_function) => unreachable!(),
Statement::Function(_) => unreachable!(),
Statement::Struct(_) => unreachable!(),
};
Ok(())
@ -347,11 +487,11 @@ impl<'ctx> CodeGen<'ctx> {
&self,
expr: &Expression,
variables: &mut Variables<'ctx>,
types: &mut VariableTypes<'ctx>,
type_hint: Option<BasicTypeEnum<'ctx>>,
) -> Result<Option<BasicValueEnum<'ctx>>> {
types: &mut TypeStorage<'ctx>,
type_hint: Option<TypeExp>,
) -> Result<Option<(BasicValueEnum<'ctx>, TypeExp)>> {
Ok(match expr {
Expression::Variable(term) => Some(self.compile_variable(term, variables, types)?),
Expression::Variable(term) => Some(self.compile_variable(&term.value, variables)?),
Expression::Literal(term) => Some(self.compile_literal(term, type_hint)?),
Expression::Call { function, args } => {
self.compile_call(function, args, variables, types)?
@ -367,16 +507,21 @@ impl<'ctx> CodeGen<'ctx> {
func_name: &str,
args: &[Box<Expression>],
variables: &mut Variables<'ctx>,
types: &mut VariableTypes<'ctx>,
) -> Result<Option<BasicValueEnum<'ctx>>> {
types: &mut TypeStorage<'ctx>,
) -> Result<Option<(BasicValueEnum<'ctx>, TypeExp)>> {
info!("compiling fn call: func_name={}", func_name);
let function = self.module.get_function(func_name).expect("should exist");
let func_info = self
.functions
.get(func_name)
.cloned()
.expect("should exist");
let mut value_args: Vec<BasicMetadataValueEnum> = Vec::with_capacity(args.len());
for arg in args {
let type_enum = self.find_expr_type(arg, types);
let res = self
.compile_expression(arg, variables, types, type_enum)?
for (arg, arg_type) in args.iter().zip(func_info.0.iter()) {
let (res, _res_type) = self
.compile_expression(arg, variables, types, Some(arg_type.clone()))?
.expect("should have result");
value_args.push(res.into());
}
@ -387,7 +532,10 @@ impl<'ctx> CodeGen<'ctx> {
.try_as_basic_value();
Ok(match result {
Either::Left(val) => Some(val),
Either::Left(val) => Some((
val,
func_info.1.expect("should have ret type info if returns"),
)),
Either::Right(_) => None,
})
}
@ -398,18 +546,20 @@ impl<'ctx> CodeGen<'ctx> {
op: &OpCode,
rhs: &Expression,
variables: &mut Variables<'ctx>,
types: &mut VariableTypes<'ctx>,
type_hint: Option<BasicTypeEnum<'ctx>>,
) -> Result<BasicValueEnum<'ctx>> {
let lhs = self
.compile_expression(lhs, variables, types, type_hint)?
.expect("should have result")
.into_int_value();
let rhs = self
types: &mut TypeStorage<'ctx>,
type_hint: Option<TypeExp>,
) -> Result<(BasicValueEnum<'ctx>, TypeExp)> {
let (lhs, lhs_type) = self
.compile_expression(lhs, variables, types, type_hint.clone())?
.expect("should have result");
let (rhs, _rhs_type) = self
.compile_expression(rhs, variables, types, type_hint)?
.expect("should have result")
.into_int_value();
.expect("should have result");
let lhs = lhs.into_int_value();
let rhs = rhs.into_int_value();
let mut bool_result = false;
let result = match op {
OpCode::Add => self.builder.build_int_add(lhs, rhs, "add"),
OpCode::Sub => self.builder.build_int_sub(lhs, rhs, "sub"),
@ -418,41 +568,72 @@ impl<'ctx> CodeGen<'ctx> {
OpCode::Rem => self.builder.build_int_signed_rem(lhs, rhs, "rem"),
OpCode::And => self.builder.build_and(lhs, rhs, "and"),
OpCode::Or => self.builder.build_or(lhs, rhs, "or"),
OpCode::Eq => self
.builder
.build_int_compare(IntPredicate::EQ, lhs, rhs, "eq"),
OpCode::Ne => self
.builder
.build_int_compare(IntPredicate::NE, lhs, rhs, "eq"),
OpCode::Eq => {
bool_result = true;
self.builder
.build_int_compare(IntPredicate::EQ, lhs, rhs, "eq")
}
OpCode::Ne => {
bool_result = true;
self.builder
.build_int_compare(IntPredicate::NE, lhs, rhs, "eq")
}
};
Ok(result.as_basic_value_enum())
let mut res_type = lhs_type;
if bool_result {
res_type = TypeExp::Integer {
bits: 1,
signed: false,
};
}
Ok((result.as_basic_value_enum(), res_type))
}
pub fn compile_literal(
&self,
term: &LiteralValue,
type_hint: Option<BasicTypeEnum<'ctx>>,
) -> Result<BasicValueEnum<'ctx>> {
type_hint: Option<TypeExp>,
) -> Result<(BasicValueEnum<'ctx>, TypeExp)> {
let value = match term {
LiteralValue::String => todo!(),
LiteralValue::Integer {
bits,
signed: _,
value,
} => {
LiteralValue::String(_s) => {
todo!()
/*
self
.context
.const_string(s.as_bytes(), true)
.as_basic_value_enum() */
}
LiteralValue::Boolean(v) => (
self.context
.bool_type()
.const_int((*v).into(), false)
.as_basic_value_enum(),
TypeExp::Boolean,
),
LiteralValue::Integer(v) => {
if let Some(type_hint) = type_hint {
type_hint
.into_int_type()
.const_int(value.parse().unwrap(), false)
.as_basic_value_enum()
(
self.get_llvm_type(&type_hint)?
.into_int_type()
.const_int(v.parse().unwrap(), false)
.as_basic_value_enum(),
type_hint,
)
} else {
let bits = bits.unwrap_or(32);
self.context
.custom_width_int_type(bits)
.const_int(value.parse().unwrap(), false)
.as_basic_value_enum()
let type_exp = TypeExp::Integer {
bits: 32,
signed: true,
};
(
self.get_llvm_type(&type_exp)?
.into_int_type()
.const_int(v.parse().unwrap(), false)
.as_basic_value_enum(),
type_exp,
)
}
}
};
@ -464,9 +645,8 @@ impl<'ctx> CodeGen<'ctx> {
&self,
variable: &str,
variables: &mut Variables<'ctx>,
types: &mut VariableTypes<'ctx>,
) -> Result<BasicValueEnum<'ctx>> {
let var = *variables.get(variable).expect("value");
Ok(var.0)
) -> Result<(BasicValueEnum<'ctx>, TypeExp)> {
let var = variables.get(variable).expect("value").clone();
Ok((var.value, var.type_exp))
}
}

View file

@ -1,6 +1,6 @@
use std::str::FromStr;
use crate::{
ast,
ast::{self, Spanned},
tokens::Token,
lexer::LexicalError,
};
@ -18,13 +18,20 @@ extern {
"if" => Token::KeywordIf,
"else" => Token::KeywordElse,
"identifier" => Token::Identifier(<String>),
"int" => Token::Integer(<String>),
"int literal" => Token::Integer(<String>),
"string literal" => Token::String(<String>),
"bool literal" => Token::Boolean(<bool>),
"return" => Token::KeywordReturn,
"fn" => Token::KeywordFn,
"ptr" => Token::KeywordPtr,
"(" => Token::LeftParen,
")" => Token::RightParen,
"{" => Token::LeftBracket,
"}" => Token::RightBracket,
"[" => Token::LeftSquareBracket,
"]" => Token::RightSquareBracket,
"<" => Token::LessThanSign,
">" => Token::MoreThanSign,
"=" => Token::Assign,
";" => Token::Semicolon,
":" => Token::Colon,
@ -39,6 +46,17 @@ extern {
"||" => Token::OperatorOr,
"==" => Token::OperatorEq,
"!=" => Token::OperatorNe,
"bool" => Token::KeywordBool,
"i8" => Token::Inti8,
"i16" => Token::Inti16,
"i32" => Token::Inti32,
"i64" => Token::Inti64,
"u8" => Token::Intu8,
"u16" => Token::Intu16,
"u32" => Token::Intu32,
"u64" => Token::Intu64,
}
}
@ -67,16 +85,17 @@ Statements: Vec<ast::Statement> = {
Statement: ast::Statement = {
BasicStatement,
<f:Function> => ast::Statement::Function(f),
<s:Struct> => ast::Statement::Struct(s),
};
TypeInfo: String = {
":" <i:"identifier"> => i
TypeInfo: ast::TypeExp = {
":" <i:LangType> => i
};
// statements not including function definitions
BasicStatement: ast::Statement = {
"let" <i:"identifier"> <t:TypeInfo?> "=" <e:Expr> ";" => ast::Statement::Let { name: i, value: e, type_name: t},
<i:"identifier"> "=" <e:Expr> ";" => ast::Statement::Mutate { name: i, value: e},
<lo:@L> "let" <i:"identifier"> <t:TypeInfo?> "=" <e:Expr> ";" <hi:@R> => ast::Statement::Let { name: i, value: e, type_name: t, span: (lo, hi) },
<lo:@L> <i:"identifier"> "=" <e:Expr> ";" <hi:@R> => ast::Statement::Mutate { name: i, value: e, span: (lo, hi) },
"if" <cond:Expr> "{" <s:Statements> "}" <e:ElseExpr?> => ast::Statement::If { condition: cond, body: s, else_body: e},
"return" <e:Expr?> ";" => ast::Statement::Return(e),
};
@ -118,25 +137,67 @@ Expr4 = Tier<Level3_Op, Term>;
// Terms: variables, literals, calls
Term: Box<ast::Expression> = {
<i:"identifier"> => Box::new(ast::Expression::Variable(i)),
<lo:@L> <i:"identifier"> <hi:@R> => Box::new(ast::Expression::Variable(Spanned::new(i, (lo, hi)))),
<n:Number> => Box::new(ast::Expression::Literal(n)),
<n:StringLit> => Box::new(ast::Expression::Literal(n)),
<n:BoolLiteral> => Box::new(ast::Expression::Literal(n)),
<i:"identifier"> "(" <values:Comma<Term>> ")" => Box::new(ast::Expression::Call { function: i, args: values}),
"(" <Term> ")"
};
Number: ast::LiteralValue = <n:"int"> => ast::LiteralValue::Integer { bits: None, signed: true, value: n.to_string()};
Number: ast::LiteralValue = <n:"int literal"> => ast::LiteralValue::Integer(n);
StringLit: ast::LiteralValue = <n:"string literal"> => ast::LiteralValue::String(n[1..(n.len()-1)].to_string());
BoolLiteral: ast::LiteralValue = <n:"bool literal"> => ast::LiteralValue::Boolean(n);
ArrayLen: u32 = {
";" <i:"int literal"> => i.parse().unwrap(),
}
LangType: ast::TypeExp = {
"ptr" "<" <target:LangType> ">" => ast::TypeExp::Pointer { target: Box::new(target) },
"[" <of:LangType> <len:ArrayLen?> "]" => ast::TypeExp::Array { of: Box::new(of), len },
"i8" => ast::TypeExp::Integer { bits: 8, signed: true },
"i16" => ast::TypeExp::Integer { bits: 16, signed: true },
"i32" => ast::TypeExp::Integer { bits: 32, signed: true },
"i64" => ast::TypeExp::Integer { bits: 64, signed: true },
"u8" => ast::TypeExp::Integer { bits: 8, signed: false },
"u16" => ast::TypeExp::Integer { bits: 16, signed: false },
"u32" => ast::TypeExp::Integer { bits: 32, signed: false },
"u64" => ast::TypeExp::Integer { bits: 64, signed: false },
"bool" => ast::TypeExp::Boolean,
<id:"identifier"> => ast::TypeExp::Other { id },
};
// Function handling
Param: ast::Parameter = {
<"identifier"> ":" <"identifier"> => ast::Parameter::new(<>)
<"identifier"> ":" <LangType> => ast::Parameter::new(<>)
};
Params = Comma<Param>;
FunctionReturn: String = {
"->" <i:"identifier"> => i.to_string(),
FunctionReturn: ast::TypeExp = {
"->" <i:LangType> => i,
}
Function: ast::Function = {
"fn" <i:"identifier"> "(" <a:Params> ")" <r:FunctionReturn?> "{" <s:Statements> "}" => ast::Function::new(i, a, s, r)
}
// Structures
StructField: ast::StructField = {
<"identifier"> ":" <LangType> => ast::StructField::new(<>)
};
StructFields = Comma<StructField>;
Struct: ast::Struct = {
"struct" <i:"identifier"> "{" <fields:StructFields> "}" => {
ast::Struct {
name: i,
fields
}
}
}

View file

@ -1,12 +1,22 @@
use std::{fmt::Display, ops::Range};
use logos::{Logos, SpannedIter};
use crate::tokens::Token;
pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone)]
pub enum LexicalError {
InvalidToken,
InvalidToken(Range<usize>),
}
impl Display for LexicalError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexicalError::InvalidToken(span) => write!(f, "lexical error at: {:?}", span),
}
}
}
pub struct Lexer<'input> {
@ -29,7 +39,7 @@ impl<'input> Iterator for Lexer<'input> {
fn next(&mut self) -> Option<Self::Item> {
self.token_stream.next().map(|(token, span)| match token {
Ok(token) => Ok((span.start, token, span.end)),
Err(()) => Err(LexicalError::InvalidToken),
Err(()) => Err(LexicalError::InvalidToken(span)),
})
}
}

View file

@ -7,7 +7,7 @@ use inkwell::{context::Context, execution_engine::JitFunction, OptimizationLevel
use lalrpop_util::lalrpop_mod;
use std::{fs, path::PathBuf, println};
use crate::{ast::Program, lexer::Lexer};
use crate::{ast::Program, check::Check, lexer::Lexer};
pub mod ast;
pub mod check;
@ -36,6 +36,11 @@ enum Commands {
/// The input file.
input: PathBuf,
},
/// Prints the code AST.
Ast {
/// The input file.
input: PathBuf,
},
/// Compile the edlang source file.
Compile {
/// The input file.
@ -60,7 +65,6 @@ enum Commands {
},
}
/*
fn check_program(program: &ProgramData, ast: &ast::Program) -> bool {
let errors = check::check(program, ast);
@ -84,7 +88,6 @@ fn check_program(program: &ProgramData, ast: &ast::Program) -> bool {
error_count == 0
}
*/
fn main() -> Result<()> {
color_eyre::install()?;
@ -93,14 +96,19 @@ fn main() -> Result<()> {
match args.command {
Commands::Check { input } => {
let code = fs::read_to_string(input)?;
let code = fs::read_to_string(&input)?;
let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new();
let ast = parser.parse(lexer).unwrap();
//let str_path = input.to_string_lossy();
//let program = ProgramData::new(&str_path, &code);
//check_program(&program, &ast);
let ast = parser.parse(lexer)?;
let program = ProgramData::new(&input, &code);
check_program(&program, &ast);
}
Commands::Ast { input } => {
let code = fs::read_to_string(&input)?;
let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new();
let ast = parser.parse(lexer)?;
println!("{ast:#?}");
}
Commands::Compile {
input,
@ -111,17 +119,17 @@ fn main() -> Result<()> {
let code = fs::read_to_string(&input)?;
let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new();
let ast: Program = parser.parse(lexer).unwrap();
let ast: Program = parser.parse(lexer)?;
let program = ProgramData::new(&input, &code);
let file_name = input.file_name().unwrap().to_string_lossy();
//if !check_program(&program, &ast) {
// return Ok(());
//}
if !check_program(&program, &ast) {
return Ok(());
}
println!("{:#?}", ast);
// println!("{:#?}", ast);
let context = Context::create();
let mut codegen = codegen::CodeGen::new(&context, &file_name, program, ast)?;
codegen.compile_ast()?;

View file

@ -15,6 +15,8 @@ pub enum Token {
KeywordReturn,
#[token("struct")]
KeywordStruct,
#[token("ptr")]
KeywordPtr,
#[token("if")]
KeywordIf,
#[token("else")]
@ -24,6 +26,34 @@ pub enum Token {
Identifier(String),
#[regex(r"\d+", |lex| lex.slice().parse().ok())]
Integer(String),
#[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())]
String(String),
#[regex(r"(true|false)", |lex| lex.slice().parse().ok())]
Boolean(bool),
#[token("bool")]
KeywordBool,
#[token("i8")]
Inti8,
#[token("i16")]
Inti16,
#[token("i32")]
Inti32,
#[token("i64")]
Inti64,
#[token("u8")]
Intu8,
#[token("u16")]
Intu16,
#[token("u32")]
Intu32,
#[token("u64")]
Intu64,
#[token("f32")]
Float32,
#[token("f64")]
Float64,
#[token("(")]
LeftParen,
@ -33,6 +63,10 @@ pub enum Token {
LeftBracket,
#[token("}")]
RightBracket,
#[token("[")]
LeftSquareBracket,
#[token("]")]
RightSquareBracket,
#[token("=")]
Assign,
#[token(";")]
@ -43,6 +77,10 @@ pub enum Token {
Arrow,
#[token(",")]
Coma,
#[token("<")]
LessThanSign,
#[token(">")]
MoreThanSign,
#[token("+")]
OperatorAdd,