From 2cd380e931442445a7f55ca0fe7882ba48777fc6 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Tue, 19 Dec 2023 10:41:50 +0100 Subject: [PATCH] progreess --- Cargo.lock | 45 ++++++-------- Cargo.toml | 2 +- src/ast.rs | 45 +++++++++++--- src/grammar.lalrpop | 138 ++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 144 ++++++++++++++++++++++++++++++++++++++++++++ src/tokens.rs | 18 +++++- 6 files changed, 353 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3c0fa56..1e041d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,12 +28,12 @@ dependencies = [ [[package]] name = "annotate-snippets" -version = "0.9.2" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccaf7e9dfbb6ab22c82e473cd1a8a7bd313c19a5b7e40970f3d89ef5a5c9e81e" +checksum = "140de53162eac850c5a6711763157ee2ceffbeb0514f6c83511728052e880b17" dependencies = [ + "anstyle", "unicode-width", - "yansi-term", ] [[package]] @@ -154,7 +154,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.40", + "syn 2.0.41", "which", ] @@ -177,7 +177,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.40", + "syn 2.0.41", "which", ] @@ -281,7 +281,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.40", + "syn 2.0.41", ] [[package]] @@ -527,9 +527,9 @@ dependencies = [ [[package]] name = "eyre" -version = "0.6.10" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bbb8258be8305fb0237d7b295f47bb24ff1b136a535f473baf40e70468515aa" +checksum = "b6267a1fa6f59179ea4afc8e50fd8612a3cc60bc858f786ff877a4a8cb042799" dependencies = [ "indenter", "once_cell", @@ -802,7 +802,7 @@ dependencies = [ "proc-macro2", "quote", "regex-syntax 0.6.29", - "syn 2.0.40", + "syn 2.0.41", ] [[package]] @@ -847,7 +847,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.40", + "syn 2.0.41", "tblgen", "unindent", ] @@ -1079,7 +1079,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.40", + "syn 2.0.41", ] [[package]] @@ -1254,7 +1254,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.40", + "syn 2.0.41", ] [[package]] @@ -1343,9 +1343,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.40" +version = "2.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13fa70a4ee923979ffb522cacce59d34421ebdea5625e1073c4326ef9d2dd42e" +checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" dependencies = [ "proc-macro2", "quote", @@ -1424,7 +1424,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.40", + "syn 2.0.41", ] [[package]] @@ -1494,7 +1494,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.40", + "syn 2.0.41", ] [[package]] @@ -1637,7 +1637,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.40", + "syn 2.0.41", "wasm-bindgen-shared", ] @@ -1659,7 +1659,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.40", + "syn 2.0.41", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1859,12 +1859,3 @@ checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" dependencies = [ "linked-hash-map", ] - -[[package]] -name = "yansi-term" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe5c30ade05e61656247b2e334a031dfd0cc466fadef865bdcdea8d537951bf1" -dependencies = [ - "winapi", -] diff --git a/Cargo.toml b/Cargo.toml index c609397..86eb76e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ lalrpop-util = { version = "0.20.0", features = ["lexer"] } regex = "1.9" tracing = "0.1.37" tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } -annotate-snippets = { version = "0.9.1", features = ["color"] } +annotate-snippets = { version = "0.10.0" } logos = "0.13.0" [build-dependencies] diff --git a/src/ast.rs b/src/ast.rs index 2abcea8..aed10d4 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -26,14 +26,38 @@ pub enum SimpleType { } #[derive(Debug, Clone, PartialEq, Eq)] -pub enum RecordField<'input> { - Fixed { - identifier_list: Vec<&'input str>, - type_denoter: Type<'input>, - }, - Case { - // TODO: 6.4.3.3 Record-types - }, +pub struct VariantSelector<'input> { + pub tag_field: Option<&'input str>, + pub tag_type: Box>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CaseVariant<'input> { + pub case_constants: Vec>, + pub field_list: Option>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RecordSection<'input> { + pub identifier_list: Vec<&'input str>, + pub type_denoter: Box>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RecordFixedPart<'input> { + pub records: Vec>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RecordVariantPart<'input> { + pub variant_selector: VariantSelector<'input>, + pub variants: Vec>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RecordFieldList<'input> { + pub fixed_part: Option>, + pub variant_part: Option>, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -41,7 +65,7 @@ pub enum Type<'input> { Identifier(&'input str), Simple(SimpleType), Enumerated(Vec<&'input str>), - Subrange { + SubRange { start: Constant<'input>, end: Constant<'input>, }, @@ -51,7 +75,8 @@ pub enum Type<'input> { packed: bool, }, Record { - fields: Vec>, + field_list: Option>, + packed: bool, }, } diff --git a/src/grammar.lalrpop b/src/grammar.lalrpop index 9e2b498..87b46a0 100644 --- a/src/grammar.lalrpop +++ b/src/grammar.lalrpop @@ -19,6 +19,24 @@ extern { "-" => Token::SpecialMinus, "+" => Token::SpecialPlus, "=" => Token::SpecialEqual, + "(" => Token::SpecialOpenParen, + ")" => Token::SpecialCloseParen, + "[" => Token::SpecialOpenBracket, + "]" => Token::SpecialCloseBracket, + "," => Token::SpecialComma, + ":" => Token::SpecialColon, + ";" => Token::SpecialSemiColon, + ".." => Token::SpecialRange, + "array" => Token::WordArray, + "of" => Token::WordOf, + "packed" => Token::WordPacked, + "record" => Token::WordRecord, + "end" => Token::WordEnd, + "case" => Token::WordCase, + "type_integer" => Token::TypeInteger, + "type_real" => Token::TypeReal, + "type_boolean" => Token::TypeBoolean, + "type_char" => Token::TypeChar, } } @@ -32,6 +50,26 @@ Comma: Vec = { } }; +CommaNoTrailing: Vec = { + )*> )?> => match e { + None => v, + Some(e) => { + v.push(e); + v + } + } +}; + +SemiColonNoTrailing: Vec = { + )*> )?> => match e { + None => v, + Some(e) => { + v.push(e); + v + } + } +}; + pub Number: ast::Number<'input> = { <"integer"> => ast::Number::Integer(<>), <"real"> => ast::Number::Real(<>), @@ -50,3 +88,103 @@ pub ConstantDef: ast::ConstantDef<'input> = { value } } + +pub SimpleType: ast::SimpleType = { + "type_integer" => ast::SimpleType::Integer, + "type_real" => ast::SimpleType::Real, + "type_boolean" => ast::SimpleType::Boolean, + "type_char" => ast::SimpleType::Char, +} + +pub EnumeratedType: Vec<&'input str> = { + "(" > ")" => <> +} + +pub SubRangeType: ast::Type<'input> = { + ".." => ast::Type::SubRange { + start, + end + } +} + +ArrayIndexCompatibleType: ast::Type<'input> = { + => ast::Type::Simple(<>), + => <>, +} + +RecordSection: ast::RecordSection<'input> = { + > ":" => ast::RecordSection { + identifier_list, + type_denoter: Box::new(type_denoter) + } +} + +RecordFixedPart: ast::RecordFixedPart<'input> = { + > => ast::RecordFixedPart { + records + } +} + +VariantSelector: ast::VariantSelector<'input> = { + ":" => ast::VariantSelector { + tag_field: Some(tag_field), + tag_type: Box::new(tag_type) + }, + => ast::VariantSelector { + tag_field: None, + tag_type: Box::new(tag_type) + }, +} + +CaseVariant: ast::CaseVariant<'input> = { + > ":" "(" ")" => ast::CaseVariant { + case_constants, + field_list + }, +} + +RecordVariantPart: ast::RecordVariantPart<'input> = { + "case" "of" > => ast::RecordVariantPart { + variant_selector, + variants + }, +} + +RecordFieldList: ast::RecordFieldList<'input> = { + ";"? => ast::RecordFieldList { + fixed_part: Some(fixed_part), + variant_part: None + }, + ";" ";"? => ast::RecordFieldList { + fixed_part: Some(fixed_part), + variant_part: Some(variant_part) + }, + ";"? => ast::RecordFieldList { + fixed_part: None, + variant_part: Some(variant_part) + }, +} + +pub Type: ast::Type<'input> = { + "array" "[" > "]" "of" => ast::Type::Array { + index, + component: Box::new(c), + packed: p.is_some(), + }, + => ast::Type::Simple(<>), + <"identifier"> => ast::Type::Identifier(<>), + => ast::Type::Enumerated(<>), + => <>, + // fixme + "record" "end" => ast::Type::Record { + field_list, + packed: p.is_some(), + } +} + +pub TypeDef: ast::TypeDef<'input> = { + "=" => ast::TypeDef { + ident, + value, + } +} diff --git a/src/main.rs b/src/main.rs index 318f92d..1d06412 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,7 @@ use lalrpop_util::lalrpop_mod; +use crate::lexer::Lexer; + lalrpop_mod!(pub grammar); pub mod ast; @@ -8,6 +10,19 @@ pub mod tokens; fn main() { println!("Hello, world!"); + + let input = r#" +record + year : 0..2000; + month : 1..12; + day : 1..31 +end + "#; + + let lexer = Lexer::new(input); + let parser = grammar::TypeParser::new(); + let parsed = parser.parse("", lexer).unwrap(); + println!("{:#?}", parsed) } #[cfg(test)] @@ -90,4 +105,133 @@ mod test { }, ); } + + #[test] + fn parse_simple_type() { + #[track_caller] + fn check(input: &str, value: ast::SimpleType) { + let lexer = Lexer::new(input); + let parser = grammar::SimpleTypeParser::new(); + assert_eq!(parser.parse("", lexer).unwrap(), value) + } + + check("Boolean", ast::SimpleType::Boolean); + check("char", ast::SimpleType::Char); + check("integer", ast::SimpleType::Integer); + check("real", ast::SimpleType::Real); + } + + #[test] + fn parse_enumerated_type() { + #[track_caller] + fn check(input: &str, value: Vec<&str>) { + let lexer = Lexer::new(input); + let parser = grammar::EnumeratedTypeParser::new(); + assert_eq!(parser.parse("", lexer).unwrap(), value) + } + + check("(hello, world)", vec!["hello", "world"]); + check("(hello,world)", vec!["hello", "world"]); + check("(hello)", vec!["hello"]); + } + + #[test] + #[should_panic] + fn parse_enumerated_type_panic() { + #[track_caller] + fn check(input: &str, value: Vec<&str>) { + let lexer = Lexer::new(input); + let parser = grammar::EnumeratedTypeParser::new(); + assert_eq!(parser.parse("", lexer).unwrap(), value) + } + + check("(hello,world,)", vec!["hello", "world"]); + } + + #[test] + fn parse_type() { + #[track_caller] + fn check(input: &str, value: ast::Type) { + let lexer = Lexer::new(input); + let parser = grammar::TypeParser::new(); + assert_eq!(parser.parse("", lexer).unwrap(), value) + } + + check("Boolean", ast::Type::Simple(ast::SimpleType::Boolean)); + check("mytype", ast::Type::Identifier("mytype")); + check( + "1..100", + ast::Type::SubRange { + start: ast::Constant::Number(ast::Number::Integer("1")), + end: ast::Constant::Number(ast::Number::Integer("100")), + }, + ); + check( + "array [1..100] of real", + ast::Type::Array { + index: vec![ast::Type::SubRange { + start: ast::Constant::Number(ast::Number::Integer("1")), + end: ast::Constant::Number(ast::Number::Integer("100")), + }], + component: Box::new(ast::Type::Simple(ast::SimpleType::Real)), + packed: false, + }, + ); + check( + "array [1..100, Boolean] of real", + ast::Type::Array { + index: vec![ + ast::Type::SubRange { + start: ast::Constant::Number(ast::Number::Integer("1")), + end: ast::Constant::Number(ast::Number::Integer("100")), + }, + ast::Type::Simple(ast::SimpleType::Boolean), + ], + component: Box::new(ast::Type::Simple(ast::SimpleType::Real)), + packed: false, + }, + ); + + check( + "packed array [Boolean] of packed array [0..10] of real", + ast::Type::Array { + index: vec![ast::Type::Simple(ast::SimpleType::Boolean)], + component: Box::new(ast::Type::Array { + index: vec![ast::Type::SubRange { + start: ast::Constant::Number(ast::Number::Integer("0")), + end: ast::Constant::Number(ast::Number::Integer("10")), + }], + component: Box::new(ast::Type::Simple(ast::SimpleType::Real)), + packed: true, + }), + packed: true, + }, + ); + } + + #[test] + fn parse_type_def() { + #[track_caller] + fn check(input: &str, value: ast::TypeDef) { + let lexer = Lexer::new(input); + let parser = grammar::TypeDefParser::new(); + assert_eq!(parser.parse("", lexer).unwrap(), value) + } + + check( + "mytype = Boolean", + ast::TypeDef { + ident: "mytype", + value: ast::Type::Simple(ast::SimpleType::Boolean), + }, + ); + + check( + "mytype = myident2", + ast::TypeDef { + ident: "mytype", + value: ast::Type::Identifier("myident2"), + }, + ); + } } diff --git a/src/tokens.rs b/src/tokens.rs index 4c88df3..cae5682 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -59,7 +59,7 @@ pub enum Token<'input> { #[token(",")] SpecialComma, #[token(";")] - SpecialDotComma, + SpecialSemiColon, #[token("\"")] SpecialQuotation, #[token("(")] @@ -74,6 +74,8 @@ pub enum Token<'input> { SpecialGreaterEqual, #[token(":=")] SpecialAssign, + #[token(":")] + SpecialColon, #[token("..")] SpecialRange, @@ -148,4 +150,18 @@ pub enum Token<'input> { WordWhile, #[token("with")] WordWith, + + #[token("integer")] + TypeInteger, + #[token("real")] + TypeReal, + #[token("Boolean")] + TypeBoolean, + #[token("char")] + TypeChar, + + #[token("true")] + WordTrue, + #[token("false")] + WordFalse, }