From 2cd380e931442445a7f55ca0fe7882ba48777fc6 Mon Sep 17 00:00:00 2001
From: Edgar Luque <git@edgarluque.com>
Date: Tue, 19 Dec 2023 10:41:50 +0100
Subject: [PATCH] progreess

---
 Cargo.lock          |  45 ++++++--------
 Cargo.toml          |   2 +-
 src/ast.rs          |  45 +++++++++++---
 src/grammar.lalrpop | 138 ++++++++++++++++++++++++++++++++++++++++++
 src/main.rs         | 144 ++++++++++++++++++++++++++++++++++++++++++++
 src/tokens.rs       |  18 +++++-
 6 files changed, 353 insertions(+), 39 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 3c0fa56..1e041d8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -28,12 +28,12 @@ dependencies = [
 
 [[package]]
 name = "annotate-snippets"
-version = "0.9.2"
+version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccaf7e9dfbb6ab22c82e473cd1a8a7bd313c19a5b7e40970f3d89ef5a5c9e81e"
+checksum = "140de53162eac850c5a6711763157ee2ceffbeb0514f6c83511728052e880b17"
 dependencies = [
+ "anstyle",
  "unicode-width",
- "yansi-term",
 ]
 
 [[package]]
@@ -154,7 +154,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.40",
+ "syn 2.0.41",
  "which",
 ]
 
@@ -177,7 +177,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.40",
+ "syn 2.0.41",
  "which",
 ]
 
@@ -281,7 +281,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.40",
+ "syn 2.0.41",
 ]
 
 [[package]]
@@ -527,9 +527,9 @@ dependencies = [
 
 [[package]]
 name = "eyre"
-version = "0.6.10"
+version = "0.6.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bbb8258be8305fb0237d7b295f47bb24ff1b136a535f473baf40e70468515aa"
+checksum = "b6267a1fa6f59179ea4afc8e50fd8612a3cc60bc858f786ff877a4a8cb042799"
 dependencies = [
  "indenter",
  "once_cell",
@@ -802,7 +802,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex-syntax 0.6.29",
- "syn 2.0.40",
+ "syn 2.0.41",
 ]
 
 [[package]]
@@ -847,7 +847,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex",
- "syn 2.0.40",
+ "syn 2.0.41",
  "tblgen",
  "unindent",
 ]
@@ -1079,7 +1079,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
 dependencies = [
  "proc-macro2",
- "syn 2.0.40",
+ "syn 2.0.41",
 ]
 
 [[package]]
@@ -1254,7 +1254,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.40",
+ "syn 2.0.41",
 ]
 
 [[package]]
@@ -1343,9 +1343,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.40"
+version = "2.0.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13fa70a4ee923979ffb522cacce59d34421ebdea5625e1073c4326ef9d2dd42e"
+checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1424,7 +1424,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.40",
+ "syn 2.0.41",
 ]
 
 [[package]]
@@ -1494,7 +1494,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.40",
+ "syn 2.0.41",
 ]
 
 [[package]]
@@ -1637,7 +1637,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.40",
+ "syn 2.0.41",
  "wasm-bindgen-shared",
 ]
 
@@ -1659,7 +1659,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.40",
+ "syn 2.0.41",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -1859,12 +1859,3 @@ checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
 dependencies = [
  "linked-hash-map",
 ]
-
-[[package]]
-name = "yansi-term"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe5c30ade05e61656247b2e334a031dfd0cc466fadef865bdcdea8d537951bf1"
-dependencies = [
- "winapi",
-]
diff --git a/Cargo.toml b/Cargo.toml
index c609397..86eb76e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@ lalrpop-util = { version = "0.20.0", features = ["lexer"] }
 regex = "1.9"
 tracing = "0.1.37"
 tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
-annotate-snippets = { version = "0.9.1", features = ["color"] }
+annotate-snippets = { version = "0.10.0" }
 logos = "0.13.0"
 
 [build-dependencies]
diff --git a/src/ast.rs b/src/ast.rs
index 2abcea8..aed10d4 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -26,14 +26,38 @@ pub enum SimpleType {
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
-pub enum RecordField<'input> {
-    Fixed {
-        identifier_list: Vec<&'input str>,
-        type_denoter: Type<'input>,
-    },
-    Case {
-        // TODO: 6.4.3.3 Record-types
-    },
+pub struct VariantSelector<'input> {
+    pub tag_field: Option<&'input str>,
+    pub tag_type: Box<Type<'input>>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct CaseVariant<'input> {
+    pub case_constants: Vec<Constant<'input>>,
+    pub field_list: Option<RecordFieldList<'input>>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct RecordSection<'input> {
+    pub identifier_list: Vec<&'input str>,
+    pub type_denoter: Box<Type<'input>>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct RecordFixedPart<'input> {
+    pub records: Vec<RecordSection<'input>>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct RecordVariantPart<'input> {
+    pub variant_selector: VariantSelector<'input>,
+    pub variants: Vec<CaseVariant<'input>>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct RecordFieldList<'input> {
+    pub fixed_part: Option<RecordFixedPart<'input>>,
+    pub variant_part: Option<RecordVariantPart<'input>>,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -41,7 +65,7 @@ pub enum Type<'input> {
     Identifier(&'input str),
     Simple(SimpleType),
     Enumerated(Vec<&'input str>),
-    Subrange {
+    SubRange {
         start: Constant<'input>,
         end: Constant<'input>,
     },
@@ -51,7 +75,8 @@ pub enum Type<'input> {
         packed: bool,
     },
     Record {
-        fields: Vec<RecordField<'input>>,
+        field_list: Option<RecordFieldList<'input>>,
+        packed: bool,
     },
 }
 
diff --git a/src/grammar.lalrpop b/src/grammar.lalrpop
index 9e2b498..87b46a0 100644
--- a/src/grammar.lalrpop
+++ b/src/grammar.lalrpop
@@ -19,6 +19,24 @@ extern {
         "-" => Token::SpecialMinus,
         "+" => Token::SpecialPlus,
         "=" => Token::SpecialEqual,
+        "(" => Token::SpecialOpenParen,
+        ")" => Token::SpecialCloseParen,
+        "[" => Token::SpecialOpenBracket,
+        "]" => Token::SpecialCloseBracket,
+        "," => Token::SpecialComma,
+        ":" => Token::SpecialColon,
+        ";" => Token::SpecialSemiColon,
+        ".." => Token::SpecialRange,
+        "array" => Token::WordArray,
+        "of" => Token::WordOf,
+        "packed" => Token::WordPacked,
+        "record" => Token::WordRecord,
+        "end" => Token::WordEnd,
+        "case" => Token::WordCase,
+        "type_integer" => Token::TypeInteger,
+        "type_real" => Token::TypeReal,
+        "type_boolean" => Token::TypeBoolean,
+        "type_char" => Token::TypeChar,
     }
 }
 
@@ -32,6 +50,26 @@ Comma<T>: Vec<T> = {
     }
 };
 
+CommaNoTrailing<T>: Vec<T> = {
+    <mut v:(<T>)*> <e:("," <T>)?> => match e {
+        None => v,
+        Some(e) => {
+            v.push(e);
+            v
+        }
+    }
+};
+
+SemiColonNoTrailing<T>: Vec<T> = {
+    <mut v:(<T>)*> <e:(";" <T>)?> => match e {
+        None => v,
+        Some(e) => {
+            v.push(e);
+            v
+        }
+    }
+};
+
 pub Number: ast::Number<'input> = {
     <"integer"> => ast::Number::Integer(<>),
     <"real"> => ast::Number::Real(<>),
@@ -50,3 +88,103 @@ pub ConstantDef: ast::ConstantDef<'input> = {
         value
     }
 }
+
+pub SimpleType: ast::SimpleType = {
+    "type_integer" => ast::SimpleType::Integer,
+    "type_real" => ast::SimpleType::Real,
+    "type_boolean" => ast::SimpleType::Boolean,
+    "type_char" => ast::SimpleType::Char,
+}
+
+pub EnumeratedType: Vec<&'input str> = {
+    "(" <CommaNoTrailing<"identifier">> ")" => <>
+}
+
+pub SubRangeType: ast::Type<'input> = {
+    <start:Constant> ".." <end:Constant> => ast::Type::SubRange {
+        start,
+        end
+    }
+}
+
+ArrayIndexCompatibleType: ast::Type<'input> = {
+    <SimpleType> => ast::Type::Simple(<>),
+    <SubRangeType> => <>,
+}
+
+RecordSection: ast::RecordSection<'input> = {
+    <identifier_list:CommaNoTrailing<"identifier">> ":" <type_denoter:Type> => ast::RecordSection {
+        identifier_list,
+        type_denoter: Box::new(type_denoter)
+    }
+}
+
+RecordFixedPart: ast::RecordFixedPart<'input> = {
+    <records:SemiColonNoTrailing<RecordSection>> => ast::RecordFixedPart {
+        records
+    }
+}
+
+VariantSelector: ast::VariantSelector<'input> = {
+    <tag_field:"identifier"> ":" <tag_type:Type> => ast::VariantSelector {
+        tag_field: Some(tag_field),
+        tag_type: Box::new(tag_type)
+    },
+    <tag_type:Type> => ast::VariantSelector {
+        tag_field: None,
+        tag_type: Box::new(tag_type)
+    },
+}
+
+CaseVariant: ast::CaseVariant<'input> = {
+    <case_constants:CommaNoTrailing<Constant>> ":" "(" <field_list:RecordFieldList?> ")" => ast::CaseVariant {
+        case_constants,
+        field_list
+    },
+}
+
+RecordVariantPart: ast::RecordVariantPart<'input> = {
+    "case" <variant_selector:VariantSelector> "of" <variants:SemiColonNoTrailing<CaseVariant>> => ast::RecordVariantPart {
+        variant_selector,
+        variants
+    },
+}
+
+RecordFieldList: ast::RecordFieldList<'input> = {
+    <fixed_part:RecordFixedPart> ";"? => ast::RecordFieldList {
+        fixed_part: Some(fixed_part),
+        variant_part: None
+    },
+    <fixed_part:RecordFixedPart> ";" <variant_part:RecordVariantPart> ";"? => ast::RecordFieldList {
+        fixed_part: Some(fixed_part),
+        variant_part: Some(variant_part)
+    },
+    <variant_part:RecordVariantPart> ";"? => ast::RecordFieldList {
+        fixed_part: None,
+        variant_part: Some(variant_part)
+    },
+}
+
+pub Type: ast::Type<'input> = {
+    <p:"packed"?> "array" "[" <index:CommaNoTrailing<ArrayIndexCompatibleType>> "]" "of" <c:Type> => ast::Type::Array {
+        index,
+        component: Box::new(c),
+        packed: p.is_some(),
+    },
+    <SimpleType> => ast::Type::Simple(<>),
+    <"identifier"> => ast::Type::Identifier(<>),
+    <EnumeratedType> => ast::Type::Enumerated(<>),
+    <SubRangeType> => <>,
+    // fixme
+    <p:"packed"?> "record" <field_list:RecordFieldList?> "end" => ast::Type::Record {
+        field_list,
+        packed: p.is_some(),
+    }
+}
+
+pub TypeDef: ast::TypeDef<'input> = {
+    <ident:"identifier"> "=" <value:Type> => ast::TypeDef {
+        ident,
+        value,
+    }
+}
diff --git a/src/main.rs b/src/main.rs
index 318f92d..1d06412 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,5 +1,7 @@
 use lalrpop_util::lalrpop_mod;
 
+use crate::lexer::Lexer;
+
 lalrpop_mod!(pub grammar);
 
 pub mod ast;
@@ -8,6 +10,19 @@ pub mod tokens;
 
 fn main() {
     println!("Hello, world!");
+
+    let input = r#"
+record
+    year : 0..2000;
+    month : 1..12;
+    day : 1..31
+end
+    "#;
+
+    let lexer = Lexer::new(input);
+    let parser = grammar::TypeParser::new();
+    let parsed = parser.parse("", lexer).unwrap();
+    println!("{:#?}", parsed)
 }
 
 #[cfg(test)]
@@ -90,4 +105,133 @@ mod test {
             },
         );
     }
+
+    #[test]
+    fn parse_simple_type() {
+        #[track_caller]
+        fn check(input: &str, value: ast::SimpleType) {
+            let lexer = Lexer::new(input);
+            let parser = grammar::SimpleTypeParser::new();
+            assert_eq!(parser.parse("", lexer).unwrap(), value)
+        }
+
+        check("Boolean", ast::SimpleType::Boolean);
+        check("char", ast::SimpleType::Char);
+        check("integer", ast::SimpleType::Integer);
+        check("real", ast::SimpleType::Real);
+    }
+
+    #[test]
+    fn parse_enumerated_type() {
+        #[track_caller]
+        fn check(input: &str, value: Vec<&str>) {
+            let lexer = Lexer::new(input);
+            let parser = grammar::EnumeratedTypeParser::new();
+            assert_eq!(parser.parse("", lexer).unwrap(), value)
+        }
+
+        check("(hello, world)", vec!["hello", "world"]);
+        check("(hello,world)", vec!["hello", "world"]);
+        check("(hello)", vec!["hello"]);
+    }
+
+    #[test]
+    #[should_panic]
+    fn parse_enumerated_type_panic() {
+        #[track_caller]
+        fn check(input: &str, value: Vec<&str>) {
+            let lexer = Lexer::new(input);
+            let parser = grammar::EnumeratedTypeParser::new();
+            assert_eq!(parser.parse("", lexer).unwrap(), value)
+        }
+
+        check("(hello,world,)", vec!["hello", "world"]);
+    }
+
+    #[test]
+    fn parse_type() {
+        #[track_caller]
+        fn check(input: &str, value: ast::Type) {
+            let lexer = Lexer::new(input);
+            let parser = grammar::TypeParser::new();
+            assert_eq!(parser.parse("", lexer).unwrap(), value)
+        }
+
+        check("Boolean", ast::Type::Simple(ast::SimpleType::Boolean));
+        check("mytype", ast::Type::Identifier("mytype"));
+        check(
+            "1..100",
+            ast::Type::SubRange {
+                start: ast::Constant::Number(ast::Number::Integer("1")),
+                end: ast::Constant::Number(ast::Number::Integer("100")),
+            },
+        );
+        check(
+            "array [1..100] of real",
+            ast::Type::Array {
+                index: vec![ast::Type::SubRange {
+                    start: ast::Constant::Number(ast::Number::Integer("1")),
+                    end: ast::Constant::Number(ast::Number::Integer("100")),
+                }],
+                component: Box::new(ast::Type::Simple(ast::SimpleType::Real)),
+                packed: false,
+            },
+        );
+        check(
+            "array [1..100, Boolean] of real",
+            ast::Type::Array {
+                index: vec![
+                    ast::Type::SubRange {
+                        start: ast::Constant::Number(ast::Number::Integer("1")),
+                        end: ast::Constant::Number(ast::Number::Integer("100")),
+                    },
+                    ast::Type::Simple(ast::SimpleType::Boolean),
+                ],
+                component: Box::new(ast::Type::Simple(ast::SimpleType::Real)),
+                packed: false,
+            },
+        );
+
+        check(
+            "packed array [Boolean] of packed array [0..10] of real",
+            ast::Type::Array {
+                index: vec![ast::Type::Simple(ast::SimpleType::Boolean)],
+                component: Box::new(ast::Type::Array {
+                    index: vec![ast::Type::SubRange {
+                        start: ast::Constant::Number(ast::Number::Integer("0")),
+                        end: ast::Constant::Number(ast::Number::Integer("10")),
+                    }],
+                    component: Box::new(ast::Type::Simple(ast::SimpleType::Real)),
+                    packed: true,
+                }),
+                packed: true,
+            },
+        );
+    }
+
+    #[test]
+    fn parse_type_def() {
+        #[track_caller]
+        fn check(input: &str, value: ast::TypeDef) {
+            let lexer = Lexer::new(input);
+            let parser = grammar::TypeDefParser::new();
+            assert_eq!(parser.parse("", lexer).unwrap(), value)
+        }
+
+        check(
+            "mytype = Boolean",
+            ast::TypeDef {
+                ident: "mytype",
+                value: ast::Type::Simple(ast::SimpleType::Boolean),
+            },
+        );
+
+        check(
+            "mytype = myident2",
+            ast::TypeDef {
+                ident: "mytype",
+                value: ast::Type::Identifier("myident2"),
+            },
+        );
+    }
 }
diff --git a/src/tokens.rs b/src/tokens.rs
index 4c88df3..cae5682 100644
--- a/src/tokens.rs
+++ b/src/tokens.rs
@@ -59,7 +59,7 @@ pub enum Token<'input> {
     #[token(",")]
     SpecialComma,
     #[token(";")]
-    SpecialDotComma,
+    SpecialSemiColon,
     #[token("\"")]
     SpecialQuotation,
     #[token("(")]
@@ -74,6 +74,8 @@ pub enum Token<'input> {
     SpecialGreaterEqual,
     #[token(":=")]
     SpecialAssign,
+    #[token(":")]
+    SpecialColon,
     #[token("..")]
     SpecialRange,
 
@@ -148,4 +150,18 @@ pub enum Token<'input> {
     WordWhile,
     #[token("with")]
     WordWith,
+
+    #[token("integer")]
+    TypeInteger,
+    #[token("real")]
+    TypeReal,
+    #[token("Boolean")]
+    TypeBoolean,
+    #[token("char")]
+    TypeChar,
+
+    #[token("true")]
+    WordTrue,
+    #[token("false")]
+    WordFalse,
 }