new start

This commit is contained in:
Edgar 2024-01-14 09:36:46 +01:00
parent df37905b1e
commit 57a5621961
No known key found for this signature in database
GPG key ID: 70ADAE8F35904387
24 changed files with 1235 additions and 2122 deletions

1287
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,27 +1,19 @@
[package]
name = "edlang"
version = "0.1.0"
authors = ["Edgar Luque <edgar@edgarluque.com>"]
description = "A experimental language using LLVM."
edition = "2021"
readme = "README.md"
keywords = ["llvm", "compiler"]
license = "AGPL-3.0-only"
categories = ["compilers"]
[workspace]
resolver = "2"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
members = [ "bin/edlang", "lib/edlang_ast", "lib/edlang_check", "lib/edlang_codegen_mlir", "lib/edlang_driver","lib/edlang_parser", "lib/edlang_session"]
[dependencies]
clap = { version = "4.3.3", features = ["derive"] }
color-eyre = "0.6.2"
itertools = "0.11"
lalrpop-util = { version = "0.20.0", features = ["lexer"] }
regex = "1.9"
tracing = "0.1.37"
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
inkwell = { version = "0.2.0", features = ["llvm16-0"] }
annotate-snippets = { version = "0.9.1", features = ["color"] }
logos = "0.13.0"
[profile.release]
lto = true
codegen-units = 1
[build-dependencies]
lalrpop = "0.20.0"
# Set the settings for build scripts and proc-macros.
[profile.dev.build-override]
opt-level = 3
# On dev optimize dependencies a bit so it's not as slow.
[profile.dev.package."*"]
opt-level = 1
[workspace.dependencies]
tracing = "0.1.40"

15
bin/edlang/Cargo.toml Normal file
View file

@ -0,0 +1,15 @@
[package]
name = "edlang"
version = "0.1.0"
authors = ["Edgar Luque <edgar@edgarluque.com>"]
description = "A experimental language using LLVM."
edition = "2021"
readme = "README.md"
keywords = ["llvm", "compiler"]
license = "AGPL-3.0-only"
categories = ["compilers"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
edlang_driver = { version = "0.1.0", path = "../../lib/edlang_driver" }

5
bin/edlang/src/main.rs Normal file
View file

@ -0,0 +1,5 @@
use std::error::Error;
fn main() -> Result<(), Box<dyn Error>> {
edlang_driver::main()
}

13
lib/edlang_ast/Cargo.toml Normal file
View file

@ -0,0 +1,13 @@
[package]
name = "edlang_ast"
version = "0.1.0"
authors = ["Edgar Luque <edgar@edgarluque.com>"]
description = "edlang AST"
edition = "2021"
keywords = ["llvm", "compiler"]
license = "AGPL-3.0-only"
categories = ["compilers"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View file

@ -0,0 +1 @@

View file

@ -0,0 +1,16 @@
[package]
name = "edlang_check"
version = "0.1.0"
authors = ["Edgar Luque <edgar@edgarluque.com>"]
description = "edlang check"
edition = "2021"
keywords = ["llvm", "compiler"]
license = "AGPL-3.0-only"
categories = ["compilers"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
ariadne = { version = "0.4.0", features = ["auto-color"] }
edlang_ast = { version = "0.1.0", path = "../edlang_ast" }
tracing = { workspace = true }

View file

@ -0,0 +1 @@

View file

@ -0,0 +1,19 @@
[package]
name = "edlang_codegen_mlir"
version = "0.1.0"
authors = ["Edgar Luque <edgar@edgarluque.com>"]
description = "edlang MLIR codegen"
edition = "2021"
keywords = ["llvm", "compiler"]
license = "AGPL-3.0-only"
categories = ["compilers"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
edlang_ast = { version = "0.1.0", path = "../edlang_ast" }
edlang_parser = { version = "0.1.0", path = "../edlang_parser" }
edlang_session = { version = "0.1.0", path = "../edlang_session" }
llvm-sys = "170.0.1"
melior = { version = "0.15.0", features = ["ods-dialects"] }
mlir-sys = "0.2.1"

View file

@ -0,0 +1 @@

View file

@ -0,0 +1,22 @@
[package]
name = "edlang_driver"
version = "0.1.0"
authors = ["Edgar Luque <edgar@edgarluque.com>"]
description = "edlang compiler driver library"
edition = "2021"
keywords = ["llvm", "compiler"]
license = "AGPL-3.0-only"
categories = ["compilers"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = { version = "4.4.16", features = ["derive"] }
color-eyre = "0.6.2"
edlang_ast = { version = "0.1.0", path = "../edlang_ast" }
edlang_check = { version = "0.1.0", path = "../edlang_check" }
edlang_codegen_mlir = { version = "0.1.0", path = "../edlang_codegen_mlir" }
edlang_parser = { version = "0.1.0", path = "../edlang_parser" }
edlang_session = { version = "0.1.0", path = "../edlang_session" }
tracing = { workspace = true }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }

View file

@ -0,0 +1,5 @@
use std::error::Error;
pub fn main() -> Result<(), Box<dyn Error>> {
Ok(())
}

View file

@ -0,0 +1,20 @@
[package]
name = "edlang_parser"
version = "0.1.0"
authors = ["Edgar Luque <edgar@edgarluque.com>"]
description = "edlang parser"
edition = "2021"
keywords = ["llvm", "compiler"]
license = "AGPL-3.0-only"
categories = ["compilers"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
edlang_ast = { version = "0.1.0", path = "../edlang_ast" }
lalrpop-util = { version = "0.20.0", features = ["lexer"] }
logos = "0.13.0"
tracing = { workspace = true }
[build-dependencies]
lalrpop = "0.20.0"

View file

@ -0,0 +1 @@

View file

@ -0,0 +1,8 @@
[package]
name = "edlang_session"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View file

@ -0,0 +1 @@

View file

@ -1,178 +0,0 @@
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Spanned<T> {
pub span: (usize, usize),
pub value: T,
}
impl<T> Spanned<T> {
pub fn new(value: T, span: (usize, usize)) -> Self {
Self { value, span }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum OpCode {
Add,
Sub,
Mul,
Div,
Rem,
And,
Or,
Eq,
Ne,
}
impl OpCode {
pub fn to_str(&self) -> &'static str {
match self {
OpCode::Add => "addi",
OpCode::Sub => "subi",
OpCode::Mul => "muli",
OpCode::Div => "divi",
OpCode::Rem => "remi",
OpCode::And => "and",
OpCode::Or => "or",
OpCode::Eq => "eq",
OpCode::Ne => "ne",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum TypeExp {
Integer {
bits: u32,
signed: bool,
},
Boolean,
Array {
of: Spanned<Box<Self>>,
len: Option<u32>,
},
Pointer {
target: Spanned<Box<Self>>,
},
Other {
id: String,
},
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum LiteralValue {
String(String),
Integer {
value: String,
bits: u32,
signed: bool,
},
Boolean(bool),
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Expression {
Literal(LiteralValue),
Variable {
name: String,
},
Call {
function: Spanned<String>,
args: Vec<Spanned<Box<Self>>>,
},
BinaryOp(Spanned<Box<Self>>, OpCode, Spanned<Box<Self>>),
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Parameter {
pub ident: Spanned<String>,
pub type_exp: Spanned<TypeExp>,
}
impl Parameter {
pub const fn new(ident: Spanned<String>, type_exp: Spanned<TypeExp>) -> Self {
Self { ident, type_exp }
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Function {
pub name: Spanned<String>,
pub params: Vec<Parameter>,
pub body: Vec<Spanned<Statement>>,
pub scope_type_info: HashMap<String, Vec<TypeExp>>,
pub return_type: Option<Spanned<TypeExp>>,
}
impl Function {
pub fn new(
name: Spanned<String>,
params: Vec<Parameter>,
body: Vec<Spanned<Statement>>,
return_type: Option<Spanned<TypeExp>>,
) -> Self {
Self {
name,
params,
body,
return_type,
scope_type_info: HashMap::new(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct StructField {
pub ident: Spanned<String>,
pub field_type: Spanned<TypeExp>,
}
impl StructField {
pub const fn new(ident: Spanned<String>, type_name: Spanned<TypeExp>) -> Self {
Self {
ident,
field_type: type_name,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Struct {
pub name: Spanned<String>,
pub fields: Vec<StructField>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Statement {
Let {
name: Spanned<String>,
value: Spanned<Box<Expression>>,
value_type: Option<Spanned<TypeExp>>,
},
Mutate {
name: Spanned<String>,
value: Spanned<Box<Expression>>,
},
If {
condition: Spanned<Box<Expression>>,
body: Vec<Spanned<Statement>>,
scope_type_info: HashMap<String, Vec<TypeExp>>,
else_body: Option<Vec<Spanned<Statement>>>,
else_body_scope_type_info: HashMap<String, Vec<TypeExp>>,
},
Return(Option<Spanned<Box<Expression>>>),
Function(Function),
Struct(Struct),
}
#[derive(Debug, Clone)]
pub struct Program {
pub statements: Vec<Spanned<Statement>>,
}
impl Program {
pub fn new(statements: Vec<Spanned<Statement>>) -> Self {
Self { statements }
}
}

View file

@ -1,218 +0,0 @@
use crate::{
ast::{self, Statement},
codegen::ProgramData,
lexer::LexicalError,
tokens::Token,
type_analysis::TypeError,
};
use annotate_snippets::{
display_list::{DisplayList, FormatOptions},
snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation},
};
use lalrpop_util::ParseError;
#[derive(Debug)]
pub enum Check<'a> {
Warning(DisplayList<'a>),
Error(DisplayList<'a>),
}
/// Checks this is a valid edlang program.
pub fn check<'a>(data: &'a ProgramData, ast: &ast::Program) -> Vec<Check<'a>> {
let mut errors = vec![];
for statement in &ast.statements {
match &statement.value {
Statement::Let { name, .. } => {
// can't have a top level assignment yet.
let snippet = Snippet {
title: Some(Annotation {
id: None,
label: Some("unexpected let at top level"),
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![Slice {
source: &data.source,
line_start: 1,
fold: true,
origin: None,
annotations: vec![SourceAnnotation {
label: "unexpected statement",
annotation_type: AnnotationType::Error,
range: name.span,
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
errors.push(Check::Error(dl));
}
Statement::Mutate { name, .. } => {
// can't have a top level assignment yet.
let snippet = Snippet {
title: Some(Annotation {
id: None,
label: Some("unexpected assignment at top level"),
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![Slice {
source: &data.source,
line_start: 1,
fold: true,
origin: None,
annotations: vec![SourceAnnotation {
label: "unexpected statement",
annotation_type: AnnotationType::Error,
range: name.span,
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
errors.push(Check::Error(dl));
}
_ => {}
}
}
errors
}
pub fn print_error(source: &str, err: ParseError<usize, Token, LexicalError>) {
match err {
ParseError::InvalidToken { location } => {
let snippet = Snippet {
title: None,
footer: vec![],
slices: vec![Slice {
source,
line_start: 1,
fold: true,
origin: None,
annotations: vec![SourceAnnotation {
label: "invalid token",
annotation_type: AnnotationType::Error,
range: (location, location),
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
println!("{dl}");
}
ParseError::UnrecognizedEof {
location: _,
expected: _,
} => todo!(),
ParseError::UnrecognizedToken {
token: _,
expected: _,
} => todo!(),
ParseError::ExtraToken { token: _ } => todo!(),
ParseError::User { error } => match error {
LexicalError::InvalidToken(err, range) => {
let title = format!("invalid token (lexical error): {:?}", err);
let snippet = Snippet {
title: Some(Annotation {
id: None,
label: Some(&title),
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![Slice {
source,
line_start: 1,
fold: false,
origin: None,
annotations: vec![SourceAnnotation {
label: "invalid token (lexical error)",
annotation_type: AnnotationType::Error,
range: (range.start, range.end),
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
println!("{dl}");
}
},
};
}
pub fn print_type_error(source: &str, err: TypeError) {
match err {
TypeError::Mismatch {
found: _,
expected: _,
span,
} => {
let snippet = Snippet {
title: Some(Annotation {
id: None,
label: Some("type mismatch"),
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![Slice {
source,
line_start: 1,
fold: false,
origin: None,
annotations: vec![SourceAnnotation {
label: "type mismatch",
annotation_type: AnnotationType::Error,
range: span,
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
println!("{dl}");
}
TypeError::UndeclaredVariable { name: _, span } => {
let snippet = Snippet {
title: Some(Annotation {
id: None,
label: Some("undeclared variable"),
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![Slice {
source,
line_start: 1,
fold: false,
origin: None,
annotations: vec![SourceAnnotation {
label: "undeclared variable",
annotation_type: AnnotationType::Error,
range: span,
}],
}],
opt: FormatOptions {
color: true,
..Default::default()
},
};
let dl = DisplayList::from(snippet);
println!("{dl}");
}
};
}

View file

@ -1,585 +0,0 @@
use std::{
collections::HashMap,
path::{Path, PathBuf},
todo,
};
use color_eyre::Result;
use inkwell::{
builder::Builder,
context::Context,
module::Module,
targets::{CodeModel, InitializationConfig, RelocMode, Target, TargetMachine},
types::{BasicMetadataTypeEnum, BasicType, BasicTypeEnum, StructType},
values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum, FunctionValue},
IntPredicate, OptimizationLevel,
};
use itertools::{Either, Itertools};
use tracing::info;
use crate::ast::{self, Expression, Function, LiteralValue, OpCode, Spanned, Statement, TypeExp};
#[derive(Debug, Clone)]
pub struct ProgramData {
pub filename: PathBuf,
pub source: String,
}
impl ProgramData {
pub fn new(filename: &Path, source: &str) -> Self {
Self {
filename: filename.to_path_buf(),
source: source.to_string(),
}
}
}
pub struct CodeGen<'ctx> {
context: &'ctx Context,
pub module: Module<'ctx>,
builder: Builder<'ctx>,
//types: TypeStorage<'ctx>,
struct_types: StructTypeStorage<'ctx>,
// function to return type
functions: HashMap<String, Function>,
_program: ProgramData,
ast: ast::Program,
target_machine: TargetMachine,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Variable<'ctx> {
pub value: BasicValueEnum<'ctx>,
pub type_counter: usize,
pub phi_counter: usize,
}
pub type Variables<'ctx> = HashMap<String, Variable<'ctx>>;
/// Holds the struct type and maps fields to types and the location within the struct.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct StructTypeInfo<'ctx> {
ty: StructType<'ctx>,
fields: HashMap<String, (usize, TypeExp)>,
}
type StructTypeStorage<'ctx> = HashMap<String, StructTypeInfo<'ctx>>;
impl<'ctx> CodeGen<'ctx> {
pub fn new(
context: &'ctx Context,
module_name: &str,
_program: ProgramData,
ast: ast::Program,
) -> Result<Self> {
let module = context.create_module(module_name);
Target::initialize_native(&InitializationConfig::default())
.expect("Failed to initialize native target");
let triple = TargetMachine::get_default_triple();
// https://thedan64.github.io/inkwell/inkwell/targets/struct.TargetMachine.html#method.write_to_memory_buffer
let opt = OptimizationLevel::Default;
let reloc = RelocMode::Default;
let model = CodeModel::Default;
let target = Target::from_name("x86-64").unwrap();
let target_machine = target
.create_target_machine(
&triple,
"x86-64",
TargetMachine::get_host_cpu_features().to_str()?,
opt,
reloc,
model,
)
.unwrap();
module.set_data_layout(&target_machine.get_target_data().get_data_layout());
module.set_triple(&triple);
let codegen = CodeGen {
context,
module,
builder: context.create_builder(),
_program,
ast,
struct_types: HashMap::new(),
functions: HashMap::new(),
target_machine,
};
Ok(codegen)
}
pub fn compile_ast(&mut self) -> Result<()> {
let mut functions = HashMap::new();
// let mut types: TypeStorage<'ctx> = HashMap::new();
let mut struct_types: StructTypeStorage<'ctx> = HashMap::new();
// todo fix the grammar so top level statements are only functions and static vars.
let target_data = self.target_machine.get_target_data();
// create struct types
for statement in &self.ast.statements {
if let Statement::Struct(s) = &statement.value {
let mut fields = HashMap::new();
let mut field_types: Vec<(BasicTypeEnum<'_>, Option<usize>)> = vec![];
for (i, field) in s.fields.iter().enumerate() {
// todo: this doesnt handle out of order structs well
let ty = self.get_llvm_type(&field.field_type.value)?;
field_types.push((ty, Some(i)));
fields.insert(
field.ident.value.clone(),
(i, field.field_type.value.clone()),
);
}
field_types.sort_by(|a, b| {
target_data
.get_bit_size(&b.0)
.cmp(&target_data.get_bit_size(&a.0))
});
let total_byte_size: u32 = field_types
.iter()
.map(|x| (target_data.get_bit_size(&x.0) + 7) / 8)
.sum::<u64>()
.try_into()
.unwrap();
if !total_byte_size.is_power_of_two() {
let next = total_byte_size.next_power_of_two();
let diff = next - total_byte_size;
let padding = self.context.i8_type().array_type(diff);
field_types.push((padding.as_basic_type_enum(), None))
}
for (current_i, ty) in field_types.iter().enumerate() {
for field in fields.values_mut() {
if let Some(i) = ty.1 {
if i == field.0 {
field.0 = current_i;
break;
}
}
}
}
let ty = self
.context
.struct_type(&field_types.into_iter().map(|x| x.0).collect_vec(), false);
let struct_type = StructTypeInfo { fields, ty };
struct_types.insert(s.name.value.clone(), struct_type);
}
}
self.struct_types = struct_types;
// create the llvm functions first.
for statement in &self.ast.statements {
if let Statement::Function(function) = &statement.value {
functions.insert(function.name.value.clone(), function.clone());
self.compile_function_signature(function)?;
}
}
self.functions = functions;
info!("functions:\n{:#?}", self.functions);
// implement them.
for function in self.functions.values() {
self.compile_function(function)?;
}
Ok(())
}
pub fn generated_code(&self) -> String {
if let Err(err) = self.module.verify() {
eprintln!("error:\n{}", err);
}
// compile:
//let buffer = self.target_machine.write_to_memory_buffer(&self.module, FileType::Assembly).unwrap();
//let mut x = buffer.as_slice();
//let mut what = String::new();
//x.read_to_string(&mut what);
//println!("{}", what);
self.module.print_to_string().to_str().unwrap().to_string()
}
fn get_llvm_type(&self, id: &TypeExp) -> Result<BasicTypeEnum<'ctx>> {
Ok(match id {
TypeExp::Integer { bits, signed: _ } => self
.context
.custom_width_int_type(*bits)
.as_basic_type_enum(),
TypeExp::Boolean => self.context.bool_type().as_basic_type_enum(),
TypeExp::Array { of, len } => {
let ty = self.get_llvm_type(&of.value)?;
ty.array_type(len.unwrap()).as_basic_type_enum()
}
TypeExp::Pointer { target } => {
let ty = self.get_llvm_type(&target.value)?;
ty.ptr_type(Default::default()).as_basic_type_enum()
}
TypeExp::Other { id } => self
.struct_types
.get(id)
.expect("struct type not found")
.ty
.as_basic_type_enum(),
})
}
/// creates the llvm function without the body, so other function bodies can call it.
fn compile_function_signature(&self, function: &Function) -> Result<()> {
let args_types: Vec<BasicTypeEnum<'ctx>> = function
.params
.iter()
.map(|param| &param.type_exp)
.map(|t| self.get_llvm_type(&t.value))
.try_collect()?;
let args_types: Vec<BasicMetadataTypeEnum<'ctx>> =
args_types.into_iter().map(|t| t.into()).collect_vec();
let fn_type = match &function.return_type {
Some(id) => {
let return_type = self.get_llvm_type(&id.value)?;
return_type.fn_type(&args_types, false)
}
None => self.context.void_type().fn_type(&args_types, false),
};
self.module
.add_function(&function.name.value, fn_type, None);
Ok(())
}
fn compile_function(&self, function: &Function) -> Result<()> {
let func = self.module.get_function(&function.name.value).unwrap();
let entry_block = self.context.append_basic_block(func, "entry");
self.builder.position_at_end(entry_block);
let mut variables: Variables = HashMap::new();
for (i, param) in function.params.iter().enumerate() {
let id = &param.ident;
let param_value = func
.get_nth_param(i.try_into().unwrap())
.expect("parameter");
variables.insert(
id.value.clone(),
Variable {
value: param_value,
phi_counter: 0,
type_counter: 0,
},
);
}
let mut has_return = false;
for statement in &function.body {
if let Statement::Return(_) = statement.value {
has_return = true
}
self.compile_statement(func, statement, &mut variables, &function.scope_type_info)?;
}
if !has_return {
self.builder.build_return(None);
}
Ok(())
}
fn compile_statement(
&self,
function_value: FunctionValue,
statement: &Spanned<Statement>,
// value, assignments
variables: &mut Variables<'ctx>,
scope_info: &HashMap<String, Vec<TypeExp>>,
) -> Result<()> {
match &statement.value {
// Variable assignment
Statement::Let {
name,
value,
value_type: _,
..
} => {
let value = self
.compile_expression(value, variables, scope_info)?
.expect("should have result");
variables.insert(
name.value.clone(),
Variable {
value,
phi_counter: 0,
type_counter: 0,
},
);
}
Statement::Mutate { name, value, .. } => {
let value = self
.compile_expression(value, variables, scope_info)?
.expect("should have result");
let var = variables
.get_mut(&name.value)
.expect("variable should exist");
var.phi_counter += 1;
var.value = value;
}
Statement::Return(ret) => {
if let Some(ret) = ret {
let value = self
.compile_expression(ret, variables, scope_info)?
.expect("should have result");
self.builder.build_return(Some(&value));
} else {
self.builder.build_return(None);
}
}
Statement::If {
condition,
body,
else_body,
scope_type_info,
else_body_scope_type_info,
} => {
let condition = self
.compile_expression(condition, variables, scope_info)?
.expect("should produce a value");
let mut if_block = self.context.append_basic_block(function_value, "if");
let mut else_block = self.context.append_basic_block(function_value, "else");
let merge_block = self.context.append_basic_block(function_value, "merge");
self.builder.build_conditional_branch(
condition.into_int_value(),
if_block,
if else_body.is_some() {
else_block
} else {
merge_block
},
);
let mut variables_if = variables.clone();
self.builder.position_at_end(if_block);
for s in body {
self.compile_statement(function_value, s, &mut variables_if, scope_type_info)?;
}
self.builder.build_unconditional_branch(merge_block);
if_block = self.builder.get_insert_block().unwrap(); // update for phi
let mut variables_else = variables.clone();
if let Some(else_body) = else_body {
self.builder.position_at_end(else_block);
for s in else_body {
self.compile_statement(
function_value,
s,
&mut variables_else,
else_body_scope_type_info,
)?;
}
self.builder.build_unconditional_branch(merge_block);
else_block = self.builder.get_insert_block().unwrap(); // update for phi
}
self.builder.position_at_end(merge_block);
let mut processed_vars = HashMap::new();
for (name, new_var) in variables_if {
if variables.contains_key(&name) {
let old_var = variables.get(&name).unwrap();
if new_var.phi_counter > old_var.phi_counter {
let phi = self
.builder
.build_phi(old_var.value.get_type(), &format!("{name}_phi"));
phi.add_incoming(&[(&new_var.value, if_block)]);
processed_vars.insert(name, phi);
}
}
}
if else_body.is_some() {
for (name, new_var) in variables_else {
if variables.contains_key(&name) {
let old_var = variables.get(&name).unwrap();
if new_var.phi_counter > old_var.phi_counter {
if let Some(phi) = processed_vars.get(&name) {
phi.add_incoming(&[(&new_var.value, else_block)]);
} else {
let phi = self.builder.build_phi(
old_var.value.get_type(),
&format!("{name}_phi"),
);
phi.add_incoming(&[(&old_var.value, else_block)]);
processed_vars.insert(name, phi);
}
}
}
}
}
for (name, phi) in processed_vars {
/*
variables.insert(
name,
Variable {
value: phi.as_basic_value(),
phi_counter: 0,
},
);
*/
let mut var = variables.get_mut(&name).unwrap();
var.value = phi.as_basic_value();
}
}
Statement::Function(_) => unreachable!(),
Statement::Struct(_) => unreachable!(),
};
Ok(())
}
pub fn compile_expression(
&self,
expr: &Spanned<Box<Expression>>,
variables: &mut Variables<'ctx>,
scope_info: &HashMap<String, Vec<TypeExp>>,
) -> Result<Option<BasicValueEnum<'ctx>>> {
Ok(match &*expr.value {
Expression::Variable { name } => Some(self.compile_variable(name, variables)?),
Expression::Literal(term) => Some(self.compile_literal(term)?),
Expression::Call { function, args } => {
self.compile_call(function, args, variables, scope_info)?
}
Expression::BinaryOp(lhs, op, rhs) => {
Some(self.compile_binary_op(lhs, op, rhs, variables, scope_info)?)
}
})
}
pub fn compile_call(
&self,
func_name: &Spanned<String>,
args: &[Spanned<Box<Expression>>],
variables: &mut Variables<'ctx>,
scope_info: &HashMap<String, Vec<TypeExp>>,
) -> Result<Option<BasicValueEnum<'ctx>>> {
info!("compiling fn call: func_name={}", func_name.value);
let function = self
.module
.get_function(&func_name.value)
.expect("should exist");
let mut value_args: Vec<BasicMetadataValueEnum> = Vec::with_capacity(args.len());
for arg in args.iter() {
let res = self
.compile_expression(arg, variables, scope_info)?
.expect("should have result");
value_args.push(res.into());
}
let result = self
.builder
.build_call(function, &value_args, &format!("{}_call", func_name.value))
.try_as_basic_value();
Ok(match result {
Either::Left(val) => Some(val),
Either::Right(_) => None,
})
}
pub fn compile_binary_op(
&self,
lhs: &Spanned<Box<Expression>>,
op: &OpCode,
rhs: &Spanned<Box<Expression>>,
variables: &mut Variables<'ctx>,
scope_info: &HashMap<String, Vec<TypeExp>>,
) -> Result<BasicValueEnum<'ctx>> {
let lhs = self
.compile_expression(lhs, variables, scope_info)?
.expect("should have result");
let rhs = self
.compile_expression(rhs, variables, scope_info)?
.expect("should have result");
assert_eq!(lhs.get_type(), rhs.get_type(), "type mismatch");
let lhs = lhs.into_int_value();
let rhs = rhs.into_int_value();
let result = match op {
OpCode::Add => self.builder.build_int_add(lhs, rhs, "add"),
OpCode::Sub => self.builder.build_int_sub(lhs, rhs, "sub"),
OpCode::Mul => self.builder.build_int_mul(lhs, rhs, "mul"),
OpCode::Div => self.builder.build_int_signed_div(lhs, rhs, "div"),
OpCode::Rem => self.builder.build_int_signed_rem(lhs, rhs, "rem"),
OpCode::And => self.builder.build_and(lhs, rhs, "and"),
OpCode::Or => self.builder.build_or(lhs, rhs, "or"),
OpCode::Eq => self
.builder
.build_int_compare(IntPredicate::EQ, lhs, rhs, "eq"),
OpCode::Ne => self
.builder
.build_int_compare(IntPredicate::NE, lhs, rhs, "eq"),
};
Ok(result.as_basic_value_enum())
}
pub fn compile_literal(&self, term: &LiteralValue) -> Result<BasicValueEnum<'ctx>> {
let value = match term {
LiteralValue::String(s) => {
// todo: search string folding and interning.
// self.builder.build_global_string_ptr(s, "literal_str");
todo!()
}
LiteralValue::Boolean(v) => self
.context
.bool_type()
.const_int((*v).into(), false)
.as_basic_value_enum(),
LiteralValue::Integer {
value,
bits,
signed: _,
} => {
let bits = *bits;
self.context
.custom_width_int_type(bits)
.const_int(value.parse().unwrap(), false)
.as_basic_value_enum()
}
};
Ok(value)
}
pub fn compile_variable(
&self,
variable: &str,
variables: &mut Variables<'ctx>,
) -> Result<BasicValueEnum<'ctx>> {
let var = variables.get(variable).expect("value").clone();
Ok(var.value)
}
}

View file

@ -1,265 +0,0 @@
use crate::{
ast::{self, Spanned},
tokens::Token,
lexer::LexicalError,
};
grammar;
extern {
type Location = usize;
type Error = LexicalError;
enum Token {
"let" => Token::KeywordLet,
"print" => Token::KeywordPrint,
"struct" => Token::KeywordStruct,
"if" => Token::KeywordIf,
"else" => Token::KeywordElse,
"identifier" => Token::Identifier(<String>),
"int literal" => Token::Integer(<String>),
"string literal" => Token::String(<String>),
"bool literal" => Token::Boolean(<bool>),
"return" => Token::KeywordReturn,
"fn" => Token::KeywordFn,
"ptr" => Token::KeywordPtr,
"_" => Token::KeywordUnderscore,
"(" => Token::LeftParen,
")" => Token::RightParen,
"{" => Token::LeftBracket,
"}" => Token::RightBracket,
"[" => Token::LeftSquareBracket,
"]" => Token::RightSquareBracket,
"<" => Token::LessThanSign,
">" => Token::MoreThanSign,
"=" => Token::Assign,
";" => Token::Semicolon,
":" => Token::Colon,
"->" => Token::Arrow,
"," => Token::Coma,
"+" => Token::OperatorAdd,
"-" => Token::OperatorSub,
"*" => Token::OperatorMul,
"/" => Token::OperatorDiv,
"%" => Token::OperatorRem,
"&&" => Token::OperatorAnd,
"||" => Token::OperatorOr,
"==" => Token::OperatorEq,
"!=" => Token::OperatorNe,
"bool" => Token::KeywordBool,
"i8" => Token::Inti8,
"i16" => Token::Inti16,
"i32" => Token::Inti32,
"i64" => Token::Inti64,
"u8" => Token::Intu8,
"u16" => Token::Intu16,
"u32" => Token::Intu32,
"u64" => Token::Intu64,
}
}
Comma<T>: Vec<T> = {
<mut v:(<T> ",")*> <e:T?> => match e {
None => v,
Some(e) => {
v.push(e);
v
}
}
};
pub Program: ast::Program = {
Statements => ast::Program::new(<>)
}
Statements: Vec<Spanned<ast::Statement>> = {
<Statement> => vec![<>],
<mut s:Statements> <n:Statement> => {
s.push(n);
s
},
};
Statement: Spanned<ast::Statement> = {
BasicStatement,
<lo:@L> <f:Function> <hi:@R> => Spanned::new(ast::Statement::Function(f), (lo, hi)),
<lo:@L> <s:Struct> <hi:@R> => Spanned::new(ast::Statement::Struct(s), (lo, hi)),
};
TypeInfo: Spanned<ast::TypeExp> = {
":" <i:LangType> => i
};
Identifier: Spanned<String> = {
<lo:@L> <i:"identifier"> <hi:@R> => Spanned::new(i, (lo, hi))
}
// statements not including function definitions
BasicStatement: Spanned<ast::Statement> = {
<lo:@L> "let" <i:Identifier> <t:TypeInfo?> "=" <e:Expr> ";" <hi:@R> =>
Spanned::new(ast::Statement::Let { name: i, value: e, value_type: t }, (lo, hi)),
<lo:@L> <i:Identifier> "=" <e:Expr> ";" <hi:@R> =>
Spanned::new(ast::Statement::Mutate { name: i, value: e }, (lo, hi)),
<lo:@L> "if" <cond:Expr> "{" <s:Statements> "}" <e:ElseExpr?> <hi:@R> =>
Spanned::new(ast::Statement::If {
condition: cond,
body: s,
else_body: e,
scope_type_info: Default::default(),
else_body_scope_type_info: Default::default(),
}, (lo, hi)),
<lo:@L> "return" <e:Expr?> ";" <hi:@R> => Spanned::new(ast::Statement::Return(e), (lo, hi)),
};
ElseExpr: Vec<Spanned<ast::Statement>> = {
"else" "{" <s:Statements> "}" => s
}
Level0_Op: ast::OpCode = {
"&&" => ast::OpCode::And,
"||" => ast::OpCode::Or,
}
Level1_Op: ast::OpCode = {
"==" => ast::OpCode::Eq,
"!=" => ast::OpCode::Ne,
}
Level2_Op: ast::OpCode = {
"+" => ast::OpCode::Add,
"-" => ast::OpCode::Sub,
};
Level3_Op: ast::OpCode = {
"*" => ast::OpCode::Mul,
"/" => ast::OpCode::Div,
"%" => ast::OpCode::Rem,
}
Tier<Op,NextTier>: Spanned<Box<ast::Expression>> = {
<lo:@L> <t:Tier<Op,NextTier>> <o:Op> <n:NextTier> <hi:@R> => Spanned::new(Box::new(ast::Expression::BinaryOp(t, o, n)), (lo, hi)),
NextTier
};
Expr = Tier<Level0_Op, Expr2>;
Expr2 = Tier<Level1_Op, Expr3>;
Expr3 = Tier<Level2_Op, Expr4>;
Expr4 = Tier<Level3_Op, Term>;
// Terms: variables, literals, calls
Term: Spanned<Box<ast::Expression>> = {
<lo:@L> <i:"identifier"> <hi:@R> => Spanned::new(Box::new(ast::Expression::Variable {
name: i
}), (lo, hi)),
<lo:@L> <n:Number> <hi:@R> => Spanned::new(Box::new(ast::Expression::Literal(n)), (lo, hi)),
<lo:@L> <n:StringLit> <hi:@R> => Spanned::new(Box::new(ast::Expression::Literal(n)), (lo, hi)),
<lo:@L> <n:BoolLiteral> <hi:@R> => Spanned::new(Box::new(ast::Expression::Literal(n)), (lo, hi)),
<lo:@L> <i:Identifier> "(" <values:Comma<Term>> ")" <hi:@R> => Spanned::new(Box::new(ast::Expression::Call { function: i, args: values }), (lo, hi)),
"(" <Term> ")"
};
Number: ast::LiteralValue = {
<n:"int literal"> "_"? "i8" => ast::LiteralValue::Integer {
value: n,
bits: 8,
signed: true,
},
<n:"int literal"> "_"? "i16" => ast::LiteralValue::Integer {
value: n,
bits: 16,
signed: true,
},
<n:"int literal"> "_"? "i32" => ast::LiteralValue::Integer {
value: n,
bits: 32,
signed: true,
},
<n:"int literal"> "_"? "i64" => ast::LiteralValue::Integer {
value: n,
bits: 64,
signed: true,
},
<n:"int literal"> "_"? "u8" => ast::LiteralValue::Integer {
value: n,
bits: 8,
signed: false,
},
<n:"int literal"> "_"? "u16" => ast::LiteralValue::Integer {
value: n,
bits: 16,
signed: false,
},
<n:"int literal"> "_"? "u32" => ast::LiteralValue::Integer {
value: n,
bits: 32,
signed: false,
},
<n:"int literal"> "_"? "u64" => ast::LiteralValue::Integer {
value: n,
bits: 64,
signed: false,
},
<n:"int literal"> => ast::LiteralValue::Integer {
value: n,
bits: 32,
signed: true,
},
};
StringLit: ast::LiteralValue = <n:"string literal"> => ast::LiteralValue::String(n[1..(n.len()-1)].to_string());
BoolLiteral: ast::LiteralValue = <n:"bool literal"> => ast::LiteralValue::Boolean(n);
ArrayLen: u32 = {
";" <i:"int literal"> => i.parse().unwrap(),
}
LangType: Spanned<ast::TypeExp> = {
<lo:@L> "ptr" "<" <target:LangType> ">" <hi:@R> => Spanned::new(ast::TypeExp::Pointer { target: Spanned::new(Box::new(target.value), target.span) }, (lo, hi)),
<lo:@L> "[" <of:LangType> <len:ArrayLen?> "]" <hi:@R> => Spanned::new(ast::TypeExp::Array { of: Spanned::new(Box::new(of.value), of.span), len }, (lo, hi)),
<lo:@L> "i8" <hi:@R> => Spanned::new(ast::TypeExp::Integer { bits: 8, signed: true }, (lo, hi)),
<lo:@L> "i16" <hi:@R> => Spanned::new(ast::TypeExp::Integer { bits: 16, signed: true }, (lo, hi)),
<lo:@L> "i32" <hi:@R> => Spanned::new(ast::TypeExp::Integer { bits: 32, signed: true }, (lo, hi)),
<lo:@L> "i64" <hi:@R> => Spanned::new(ast::TypeExp::Integer { bits: 64, signed: true }, (lo, hi)),
<lo:@L> "u8" <hi:@R> => Spanned::new(ast::TypeExp::Integer { bits: 8, signed: false }, (lo, hi)),
<lo:@L> "u16" <hi:@R> => Spanned::new(ast::TypeExp::Integer { bits: 16, signed: false }, (lo, hi)),
<lo:@L> "u32" <hi:@R> => Spanned::new(ast::TypeExp::Integer { bits: 32, signed: false }, (lo, hi)),
<lo:@L> "u64" <hi:@R> => Spanned::new(ast::TypeExp::Integer { bits: 64, signed: false }, (lo, hi)),
<lo:@L> "bool" <hi:@R> => Spanned::new(ast::TypeExp::Boolean, (lo, hi)),
<lo:@L> <id:"identifier"> <hi:@R> => Spanned::new(ast::TypeExp::Other { id }, (lo, hi)),
};
// Function handling
Param: ast::Parameter = {
<Identifier> ":" <LangType> => ast::Parameter::new(<>)
};
Params = Comma<Param>;
FunctionReturn: Spanned<ast::TypeExp> = {
"->" <i:LangType> => i,
}
Function: ast::Function = {
"fn" <i:Identifier> "(" <a:Params> ")" <r:FunctionReturn?> "{" <s:Statements> "}" => ast::Function::new(i, a, s, r)
}
// Structures
StructField: ast::StructField = {
<Identifier> ":" <LangType> => ast::StructField::new(<>)
};
StructFields = Comma<StructField>;
Struct: ast::Struct = {
"struct" <i:Identifier> "{" <fields:StructFields> "}" => {
ast::Struct {
name: i,
fields
}
}
}

View file

@ -1,47 +0,0 @@
use std::{fmt::Display, ops::Range};
use logos::{Logos, SpannedIter};
use crate::tokens::{LexingError, Token};
pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
#[derive(Debug, Clone)]
pub enum LexicalError {
InvalidToken(LexingError, Range<usize>),
}
impl Display for LexicalError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexicalError::InvalidToken(err, span) => {
write!(f, "lexical error at ({:?}): {:?}", err, span)
}
}
}
}
pub struct Lexer<'input> {
// instead of an iterator over characters, we have a token iterator
token_stream: SpannedIter<'input, Token>,
}
impl<'input> Lexer<'input> {
pub fn new(input: &'input str) -> Self {
// the Token::lexer() method is provided by the Logos trait
Self {
token_stream: Token::lexer(input).spanned(),
}
}
}
impl<'input> Iterator for Lexer<'input> {
type Item = Spanned<Token, usize, LexicalError>;
fn next(&mut self) -> Option<Self::Item> {
self.token_stream.next().map(|(token, span)| match token {
Ok(token) => Ok((span.start, token, span.end)),
Err(err) => Err(LexicalError::InvalidToken(err, span)),
})
}
}

View file

@ -1,198 +0,0 @@
#![allow(clippy::too_many_arguments)]
use check::print_error;
use clap::{Parser, Subcommand};
use codegen::ProgramData;
use inkwell::{context::Context, execution_engine::JitFunction, OptimizationLevel};
use lalrpop_util::lalrpop_mod;
use std::{fs, path::PathBuf, println};
use crate::{
ast::Program,
check::{print_type_error, Check},
lexer::Lexer,
};
pub mod ast;
pub mod check;
pub mod codegen;
pub mod lexer;
pub mod tokens;
pub mod type_analysis;
lalrpop_mod!(pub grammar);
#[derive(Parser)]
#[command(
author,
version,
about,
long_about = r#"A experimental language using LLVM."#
)]
struct Args {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Check if the code is valid.
Check {
/// The input file.
input: PathBuf,
},
/// Prints the code AST.
Ast {
/// The input file.
input: PathBuf,
},
/// Compile the edlang source file.
Compile {
/// The input file.
input: PathBuf,
/// Output optimized llvm ir.
#[arg(long)]
optimize: bool,
/// The output file. If not specified its output will be stdout.
#[arg(short, long)]
output: Option<PathBuf>,
/// Turn on debug info.
#[arg(short, long)]
debug: bool,
},
/// Compile and run a program. Main needs to return nothing.
Run {
/// The input file.
input: PathBuf,
},
}
fn check_program(program: &ProgramData, ast: &ast::Program) -> bool {
let errors = check::check(program, ast);
let mut error_count = 0;
let mut warn_count = 0;
for error in errors {
match error {
Check::Warning(x) => {
warn_count += 1;
println!("{}", x)
}
Check::Error(x) => {
error_count += 1;
println!("{}", x)
}
}
}
println!("Warnings: {warn_count}");
println!("Errors: {error_count}");
error_count == 0
}
fn main() -> color_eyre::Result<()> {
color_eyre::install()?;
tracing_subscriber::fmt::init();
let args = Args::parse();
match args.command {
Commands::Check { input } => {
let code = fs::read_to_string(&input)?;
let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new();
let mut ast = parser.parse(lexer)?;
if let Err(e) = type_analysis::type_check(&mut ast) {
print_type_error(&code, e);
return Ok(());
}
let program = ProgramData::new(&input, &code);
check_program(&program, &ast);
}
Commands::Ast { input } => {
let code = fs::read_to_string(input)?;
let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new();
match parser.parse(lexer) {
Ok(mut ast) => {
if let Err(e) = type_analysis::type_check(&mut ast) {
print_type_error(&code, e);
return Ok(());
}
println!("{ast:#?}");
}
Err(e) => {
print_error(&code, e);
}
}
}
Commands::Compile {
input,
output,
debug: _,
optimize: _,
} => {
let code = fs::read_to_string(&input)?;
let lexer = Lexer::new(code.as_str());
let parser = grammar::ProgramParser::new();
let mut ast: Program = parser.parse(lexer)?;
if let Err(e) = type_analysis::type_check(&mut ast) {
print_type_error(&code, e);
return Ok(());
}
let program = ProgramData::new(&input, &code);
let file_name = input.file_name().unwrap().to_string_lossy();
if !check_program(&program, &ast) {
return Ok(());
}
// println!("{:#?}", ast);
let context = Context::create();
let mut codegen = codegen::CodeGen::new(&context, &file_name, program, ast)?;
codegen.compile_ast()?;
let generated_llvm_ir = codegen.generated_code();
if let Some(output) = output {
fs::write(output, generated_llvm_ir)?;
} else {
println!("{generated_llvm_ir}");
}
}
Commands::Run { input } => {
let code = fs::read_to_string(&input)?;
let lexer = Lexer::new(&code[..]);
let parser = grammar::ProgramParser::new();
let mut ast: Program = parser.parse(lexer)?;
if let Err(e) = type_analysis::type_check(&mut ast) {
print_type_error(&code, e);
return Ok(());
}
let program = ProgramData::new(&input, &code);
let file_name = input.file_name().unwrap().to_string_lossy();
let context = Context::create();
let mut codegen = codegen::CodeGen::new(&context, &file_name, program, ast)?;
codegen.compile_ast()?;
let execution_engine = codegen
.module
.create_jit_execution_engine(OptimizationLevel::Aggressive)
.unwrap();
unsafe {
let main: JitFunction<unsafe extern "C" fn() -> ()> =
execution_engine.get_function("main")?;
main.call();
};
}
}
Ok(())
}

View file

@ -1,132 +0,0 @@
use logos::Logos;
use std::{convert::Infallible, fmt};
// https://github.com/maciejhirsz/logos/issues/133
#[derive(Debug, PartialEq, Clone, Default)]
pub enum LexingError {
NumberParseError,
#[default]
Other,
}
impl From<std::num::ParseIntError> for LexingError {
fn from(_: std::num::ParseIntError) -> Self {
LexingError::NumberParseError
}
}
impl From<Infallible> for LexingError {
fn from(_: Infallible) -> Self {
LexingError::Other
}
}
// todo: https://github.com/maciejhirsz/logos/issues/133#issuecomment-619444615
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(error = LexingError, skip r"[ \t\n\f]+", skip r"#.*\n?")]
pub enum Token {
#[token("let")]
KeywordLet,
#[token("print")]
KeywordPrint,
#[token("fn")]
KeywordFn,
#[token("return")]
KeywordReturn,
#[token("struct")]
KeywordStruct,
#[token("ptr")]
KeywordPtr,
#[token("if")]
KeywordIf,
#[token("else")]
KeywordElse,
#[token("_")]
KeywordUnderscore,
#[regex(r"_?\p{XID_Start}\p{XID_Continue}*", |lex| lex.slice().to_string())]
Identifier(String),
#[regex(r"\d+", |lex| lex.slice().to_string())]
Integer(String),
#[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())]
String(String),
#[regex(r"(true|false)", |lex| lex.slice().parse::<bool>().unwrap())]
Boolean(bool),
#[token("bool")]
KeywordBool,
#[token("i8")]
Inti8,
#[token("i16")]
Inti16,
#[token("i32")]
Inti32,
#[token("i64")]
Inti64,
#[token("u8")]
Intu8,
#[token("u16")]
Intu16,
#[token("u32")]
Intu32,
#[token("u64")]
Intu64,
#[token("f32")]
Float32,
#[token("f64")]
Float64,
#[token("(")]
LeftParen,
#[token(")")]
RightParen,
#[token("{")]
LeftBracket,
#[token("}")]
RightBracket,
#[token("[")]
LeftSquareBracket,
#[token("]")]
RightSquareBracket,
#[token("=")]
Assign,
#[token(";")]
Semicolon,
#[token(":")]
Colon,
#[token("->")]
Arrow,
#[token(",")]
Coma,
#[token("<")]
LessThanSign,
#[token(">")]
MoreThanSign,
#[token("+")]
OperatorAdd,
#[token("-")]
OperatorSub,
#[token("*")]
OperatorMul,
#[token("/")]
OperatorDiv,
#[token("%")]
OperatorRem,
#[token("&&")]
OperatorAnd,
#[token("||")]
OperatorOr,
#[token("==")]
OperatorEq,
#[token("!=")]
OperatorNe,
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}

View file

@ -1,279 +0,0 @@
use std::collections::{HashMap, HashSet};
use crate::ast::{self, Expression, Function, Spanned, Statement, TypeExp};
#[derive(Debug, Clone)]
pub enum TypeError {
Mismatch {
found: TypeExp,
expected: TypeExp,
span: (usize, usize),
},
UndeclaredVariable {
name: String,
span: (usize, usize),
},
}
#[derive(Debug, Clone, Default)]
struct Storage {
structs: HashMap<String, HashMap<String, TypeExp>>,
functions: HashMap<String, Function>,
}
type ScopeMap = HashMap<String, Vec<Option<TypeExp>>>;
// this works, but need to find a way to store the found info + handle literal integer types (or not?)
// maybe use scope ids
pub fn type_check(ast: &mut ast::Program) -> Result<(), TypeError> {
let mut storage = Storage::default();
// gather global constructs first
for statement in ast.statements.iter_mut() {
match &mut statement.value {
Statement::Struct(st) => {
let fields = st
.fields
.iter()
.map(|x| (x.ident.value.clone(), x.field_type.value.clone()))
.collect();
storage.structs.insert(st.name.value.clone(), fields);
}
Statement::Function(function) => {
storage
.functions
.insert(function.name.value.clone(), function.clone());
}
// todo: find globals here too
_ => {}
}
}
for statement in ast.statements.iter_mut() {
if let Statement::Function(function) = &mut statement.value {
let mut scope_vars: ScopeMap = HashMap::new();
for arg in &function.params {
scope_vars.insert(
arg.ident.value.clone(),
vec![Some(arg.type_exp.value.clone())],
);
}
let func_info = function.clone();
let (new_scope_vars, _) =
type_inference_scope(&mut function.body, &scope_vars, &func_info, &storage)?;
// todo: check all vars have type info?
function.scope_type_info = new_scope_vars
.into_iter()
.map(|(a, b)| (a, b.into_iter().map(Option::unwrap).collect()))
.collect();
}
}
Ok(())
}
/// Finds variable types in the scope, returns newly created variables to handle shadowing
fn type_inference_scope(
statements: &mut [Spanned<ast::Statement>],
scope_vars: &ScopeMap,
func: &Function,
storage: &Storage,
) -> Result<(ScopeMap, HashSet<String>), TypeError> {
let mut scope_vars = scope_vars.clone();
let mut new_vars: HashSet<String> = HashSet::new();
for statement in statements {
match &mut statement.value {
Statement::Let {
name,
value,
value_type,
} => {
new_vars.insert(name.value.clone());
let exp_type = type_inference_expression(value, &mut scope_vars, storage, None)?;
if !scope_vars.contains_key(&name.value) {
scope_vars.insert(name.value.clone(), vec![]);
}
let var = scope_vars.get_mut(&name.value).unwrap();
if value_type.is_none() {
var.push(exp_type);
} else {
if exp_type.is_some() && exp_type != value_type.clone().map(|x| x.value) {
Err(TypeError::Mismatch {
found: exp_type.clone().unwrap(),
expected: value_type.clone().map(|x| x.value).unwrap(),
span: statement.span,
})?;
}
var.push(value_type.clone().map(|x| x.value));
}
}
Statement::Mutate { name, value } => {
if !scope_vars.contains_key(&name.value) {
Err(TypeError::UndeclaredVariable {
name: name.value.clone(),
span: name.span,
})?;
}
let exp_type = type_inference_expression(value, &mut scope_vars, storage, None)?;
let var = scope_vars.get_mut(&name.value).unwrap().last_mut().unwrap();
if var.is_none() {
*var = exp_type;
} else if exp_type.is_some() && &exp_type != var {
Err(TypeError::Mismatch {
found: exp_type.clone().unwrap(),
expected: var.clone().unwrap(),
span: statement.span,
})?;
}
}
Statement::If {
condition,
body,
else_body,
scope_type_info,
else_body_scope_type_info,
} => {
type_inference_expression(
condition,
&mut scope_vars,
storage,
Some(TypeExp::Boolean),
)?;
let (new_scope_vars, new_vars) =
type_inference_scope(body, &scope_vars, func, storage)?;
for (k, v) in new_scope_vars.iter() {
// not a new var within the scope (shadowing), so type info is valid
if scope_vars.contains_key(k) && !new_vars.contains(k) {
scope_vars.insert(k.clone(), v.clone());
}
}
*scope_type_info = new_scope_vars
.into_iter()
.map(|(a, b)| (a, b.into_iter().map(Option::unwrap).collect()))
.collect();
if let Some(body) = else_body {
let (new_scope_vars, new_vars) =
type_inference_scope(body, &scope_vars, func, storage)?;
for (k, v) in new_scope_vars.iter() {
// not a new var within the scope (shadowing), so type info is valid
if scope_vars.contains_key(k) && !new_vars.contains(k) {
scope_vars.insert(k.clone(), v.clone());
}
}
*else_body_scope_type_info = new_scope_vars
.into_iter()
.map(|(a, b)| (a, b.into_iter().map(Option::unwrap).collect()))
.collect();
}
}
Statement::Return(exp) => {
if let Some(exp) = exp {
type_inference_expression(
exp,
&mut scope_vars,
storage,
func.return_type.clone().map(|x| x.value),
)?;
}
}
Statement::Function(_) => unreachable!(),
Statement::Struct(_) => unreachable!(),
}
}
Ok((scope_vars, new_vars))
}
fn type_inference_expression(
exp: &Spanned<Box<Expression>>,
scope_vars: &mut ScopeMap,
storage: &Storage,
expected_type: Option<TypeExp>,
) -> Result<Option<TypeExp>, TypeError> {
Ok(match &*exp.value {
Expression::Literal(lit) => {
match lit {
ast::LiteralValue::String(_) => None, // todo
ast::LiteralValue::Integer {
value: _,
bits,
signed,
} => Some(TypeExp::Integer {
bits: *bits,
signed: *signed,
}),
ast::LiteralValue::Boolean(_) => Some(TypeExp::Boolean),
}
}
Expression::Variable { name } => {
let var = scope_vars
.get_mut(name)
.expect("to exist")
.last_mut()
.unwrap();
if expected_type.is_some() {
if var.is_none() {
*var = expected_type.clone();
expected_type
} else if expected_type.is_some() {
if *var != expected_type {
Err(TypeError::Mismatch {
found: expected_type.clone().unwrap(),
expected: var.clone().unwrap(),
span: exp.span,
})?;
}
expected_type
} else {
var.clone()
}
} else {
var.clone()
}
}
Expression::Call { function, args } => {
let func = storage.functions.get(&function.value).cloned().unwrap();
for (i, arg) in args.iter().enumerate() {
let arg_type = func.params[i].type_exp.clone();
// result is ignored, but need these to infer call arg types
type_inference_expression(arg, scope_vars, storage, Some(arg_type.value))?;
}
func.return_type.map(|x| x.value)
}
Expression::BinaryOp(lhs, op, rhs) => match op {
ast::OpCode::Eq | ast::OpCode::Ne => Some(TypeExp::Boolean),
_ => {
let lhs_type =
type_inference_expression(lhs, scope_vars, storage, expected_type.clone())?;
let rhs_type = type_inference_expression(rhs, scope_vars, storage, expected_type)?;
if lhs_type.is_some() && rhs_type.is_some() && lhs_type != rhs_type {
Err(TypeError::Mismatch {
found: rhs_type.clone().unwrap(),
expected: lhs_type.clone().unwrap(),
span: (0, 0), // todo
})?;
}
lhs_type.or(rhs_type)
}
},
})
}