From 74c04a0e4ebbf7ef4f108fd4c2acb8291d69e01f Mon Sep 17 00:00:00 2001 From: Arnaud DABY-SEESARAM Date: Fri, 9 Dec 2022 14:26:28 +0100 Subject: [PATCH] [parser] new parser, no more pp (for now) --- src/ast.ml | 161 +++++++++++++++++++++++++++++++------------ src/lexer.mll | 24 +++---- src/main.ml | 12 ++-- src/parser.mly | 180 ++++++++++++++++++++++++++++++++----------------- src/pp.ml | 8 ++- 5 files changed, 264 insertions(+), 121 deletions(-) diff --git a/src/ast.ml b/src/ast.ml index af0ba62..5a31f22 100644 --- a/src/ast.ml +++ b/src/ast.ml @@ -1,65 +1,142 @@ -type location = Lexing.position * Lexing.position +exception MyTypeError of string -type ident = string +type location = Lexing.position * Lexing.position type real = float -type _ const = - | CReal: real -> real const - | CBool: bool -> bool const - | CInt: int -> int const +type ident = string + +type base_ty = + | TBool + | TInt + | TReal + +type const = + | CReal of real + | CBool of bool + | CInt of int type monop = | MOp_not | MOp_minus | MOp_pre type binop = | BOp_add | BOp_sub | BOp_mul | BOp_div | BOp_mod - | BOp_and | BOp_or | BOp_when + | BOp_and | BOp_or | BOp_arrow type compop = - | BOp_eq | BOp_neq - | BOp_le | BOp_lt | BOp_ge | BOp_gt + | COp_eq | COp_neq + | COp_le | COp_lt | COp_ge | COp_gt type triop = | TOp_if | TOp_merge -type _ t_var = - | BVar: ident -> bool t_var - | IVar: ident -> int t_var - | RVar: ident -> real t_var +type t_var = + | BVar of ident + | IVar of ident + | RVar of ident -type _ t_expression = - | EVar: 'a t_var -> 'a t_expression - | EMonOp: monop * 'a t_expression -> 'a t_expression - | EBinOp: binop * 'a t_expression * 'a t_expression -> 'a t_expression - | ETriOp: triop * bool t_expression * 'a t_expression * 'a t_expression -> 'a t_expression - | EComp: compop * 'a t_expression * 'a t_expression -> bool t_expression - | EConst: 'a const -> 'a t_expression - | ETuple: 'a t_expression * 'b t_expression -> ('a * 'b) t_expression - | EApp: (('a -> 'b) t_node) * 'a t_expression -> 'b t_expression +type t_expression = + | EVar of t_var + | EMonOp of monop * t_expression + | EBinOp of binop * t_expression * t_expression + | ETriOp of triop * t_expression * t_expression * t_expression + | EComp of compop * t_expression * t_expression + | EWhen of t_expression * t_expression + | EConst of const + | ETuple of t_expression list + | EApp of t_node * t_expression -and _ t_varlist = - | NVar: 'a t_varlist - | CVar: 'a t_var * 'b t_varlist -> ('a * 'b) t_varlist +and t_varlist = t_var list -and 'a t_equation = 'a t_varlist * 'a t_expression +and t_equation = t_varlist * t_expression -and _ t_eqlist = - | NEql: unit t_eqlist - | CEql: 'a t_equation * 'b t_eqlist -> ('a * 'b) t_eqlist +and t_eqlist = t_equation list -and _ t_node = - | MakeNode: - ident - * 'i t_varlist * 'o t_varlist - * 'l t_varlist * 'e t_eqlist - -> ('i -> 'o) t_node +and t_node = + { + n_name : ident; + n_inputs: t_varlist; + n_outputs: t_varlist; + n_local_vars: t_varlist; + n_equations: t_eqlist; + } -type _ t_nodelist = - | NNode: unit t_nodelist - | CNode: ('a -> 'b) t_node * 'c t_nodelist -> (('a -> 'b) * 'c) t_nodelist +type t_nodelist = t_node list + + +type full_ty = + | FTArr of full_ty * full_ty + | FTList of full_ty list + | FTBase of base_ty + +let varlist_get_type (vl: t_varlist): full_ty = + FTList + (List.map (function + | BVar _ -> FTBase TBool + | IVar _ -> FTBase TInt + | RVar _ -> FTBase TReal) vl) + + +let rec expression_get_type : t_expression -> full_ty = function + | EVar (BVar s) -> FTBase TBool + | EVar (IVar s) -> FTBase TInt + | EVar (RVar s) -> FTBase TReal + | EMonOp (_, e) -> expression_get_type e + | EBinOp (_, e1, e2) | EComp (_, e1, e2) -> + begin + let t1 = expression_get_type e1 in + let t2 = expression_get_type e2 in + if t1 = t2 + then t1 + else raise (MyTypeError "A binary operator only works on pairs of \ + expressions of the same type.") + end + | ETriOp (_, e1, e2, e3) -> + begin + let t1 = expression_get_type e1 in + let t2 = expression_get_type e2 in + let t3 = expression_get_type e3 in + if t1 = FTBase TBool && t2 = t3 + then t2 + else raise (MyTypeError "A tertiary operator only works when its \ + first argument is a boolean expressions, and its other expressions \ + have the same type.") + end + | EWhen (e1, e2) -> + begin + let t1 = expression_get_type e1 in + let t2 = expression_get_type e2 in + if t2 = FTBase TBool + then t1 + else raise (MyTypeError "The [when] keywork can only be used if its \ + second argument is a boolean expression") + end + | EConst (CInt _) -> FTBase TInt + | EConst (CReal _) -> FTBase TReal + | EConst (CBool _) -> FTBase TBool + | ETuple l -> + begin + FTList ( + List.fold_left (fun acc (expr: t_expression) -> + let t = expression_get_type expr in + match t with + | FTList lt -> lt @ acc + | _ -> t :: acc) [] l) + end + | EApp (n, e) -> + begin + let tn = node_get_type n in + let te = expression_get_type e in + match tn with + | FTArr (targs, tout) -> + if te = targs + then tout + else raise (MyTypeError "When applying another node [n], the \ + the type of your arguments should match the type of the inputs \ + of [n].") + | _ -> raise (MyTypeError "You cannot apply something that is not a \ + node, it does not make sense.") + end +and node_get_type n = + FTArr (varlist_get_type n.n_inputs, varlist_get_type n.n_outputs) -type base_ty = - | TBool - | TInt - | TReal diff --git a/src/lexer.mll b/src/lexer.mll index 8c9e539..5acc6ea 100644 --- a/src/lexer.mll +++ b/src/lexer.mll @@ -13,24 +13,23 @@ ("node", NODE); ("returns", RETURNS); ("var", VAR); - ("int", INT); - ("bool", BOOL); - ("<=", BO_le); - (">=", BO_ge); + ("int", TYP(Ast.TInt)); + ("bool", TYP(Ast.TBool)); + ("<=", CMP_le); + (">=", CMP_ge); ("not", MO_not); ("mod", BO_mod); ("&&", BO_and); ("and", BO_and); ("||", BO_or); ("or", BO_or); - ("<>", BO_neq); + ("<>", CMP_neq); ("if", IF); ("then", THEN); ("else", ELSE); - ("≤", BO_le); - ("≥", BO_ge ); - ("¬", MO_not); - ("pre", PRE); + ("merge", TO_merge); + ("when", WHEN); + ("pre", MO_pre); ("true", CONST_BOOL(true)); ("false", CONST_BOOL(false)); ]; @@ -46,20 +45,21 @@ rule token = parse ['\n' ' ' '\t'] { token lexbuf } (* skip blanks and newlines *) | ident { id_or_keywork (lexeme lexbuf) } | digit+ { CONST_INT(int_of_string (lexeme lexbuf)) } + | digit*'.'digit+ { CONST_REAL(float_of_string (lexeme lexbuf)) } | ',' { COMMA } | '=' { EQUAL } | '(' { LPAREN } | ')' { RPAREN } | ';' { SEMICOL } | ':' { COLON } - | '<' { BO_lt } - | '>' { BO_gt } + | '<' { CMP_lt } + | '>' { CMP_gt } | '+' { PLUS } | '-' { MINUS } | '*' { BO_mul } | '/' { BO_div } | '%' { BO_mod } - | "->" { ARROW } + | "->" { BO_arrow } | eof { EOF } | _ { raise (Lexing_error (Format.sprintf "Erruer à la vue de %s" (lexeme lexbuf)))} diff --git a/src/main.ml b/src/main.ml index 6c1f250..0dd1951 100644 --- a/src/main.ml +++ b/src/main.ml @@ -19,11 +19,11 @@ let print_verbose v s = * - The equations are well typed * - The output is set *) -let check_well_formedness (a: p_prog) = Some a -let check_dependencies (a: p_prog) = Some a -let simplify_prog (a: p_prog) = Some a +let check_well_formedness (a: t_nodelist) = Some a +let check_dependencies (a: t_nodelist) = Some a +let simplify_prog (a: t_nodelist) = Some a -let run verbose debug (passes: (p_prog -> p_prog option) list) +let run verbose debug (passes: (t_nodelist -> t_nodelist option) list) = verbose "kjlksjf" let _ = @@ -53,7 +53,7 @@ let _ = let print_debug = print_debug !verbose in (** Definition of the passes table *) - let passes_table : (string, p_prog -> p_prog option) Hashtbl.t = Hashtbl.create 100 in + let passes_table : (string, t_nodelist -> t_nodelist option) Hashtbl.t = Hashtbl.create 100 in List.iter (fun (s, k) -> Hashtbl.add passes_table s k) [ ("check_form", check_well_formedness); @@ -73,7 +73,7 @@ let _ = with Lexer.Lexing_error s -> exit_error (Format.sprintf "Code d'erreur:\n\t%s\n\n" s); exit 0 in - if !ppast then Format.printf "%a" Pp.pp_prog ast + if !ppast then Format.printf "%a" Pp.pp_ast ast else let passes = List.map (fun (pass: string) -> match Hashtbl.find_opt passes_table pass with diff --git a/src/parser.mly b/src/parser.mly index 6ff2047..c6d6fa5 100644 --- a/src/parser.mly +++ b/src/parser.mly @@ -1,5 +1,26 @@ %{ + exception MyParsingError of string + let current_location () = symbol_start_pos (), symbol_end_pos () + + let defined_nodes : (Ast.ident, Ast.t_node) Hashtbl.t = Hashtbl.create 100 + + let defined_vars : (Ast.ident, Ast.t_var) Hashtbl.t = Hashtbl.create 100 + + let fetch_node (n: Ast.ident) = + match Hashtbl.find_opt defined_nodes n with + | None -> + raise (MyParsingError + ("The node %s does not exist.")) + | Some node -> node + + let fetch_var (n: Ast.ident) = + match Hashtbl.find_opt defined_vars n with + | None -> + raise (MyParsingError + ("The var %s does not exist.")) + | Some var -> var + %} %token EOF @@ -17,23 +38,26 @@ %token VAR %token EQUAL %token COMMA -%token PRE -%token ARROW +%token TYP %token MO_not -%token BO_le -%token BO_lt -%token BO_ge -%token BO_gt -%token BO_mod +%token MO_pre +%token PLUS +%token MINUS %token BO_and %token BO_or %token BO_mul -%token BO_neq %token BO_div +%token BO_mod +%token BO_arrow +%token CMP_le +%token CMP_lt +%token CMP_ge +%token CMP_gt +%token CMP_neq +%token TO_merge -%token PLUS -%token MINUS +%token WHEN %token IF %token THEN @@ -41,10 +65,11 @@ %token CONST_INT %token CONST_BOOL +%token CONST_REAL /* The Entry Point */ %start main -%type main +%type main %% @@ -55,22 +80,24 @@ nodes: | node nodes { $1 :: $2 }; node: - NODE IDENT LPAREN in_params RPAREN + NODE node_content { (* Flush known variables *) Hashtbl.clear defined_vars; $2 } + +node_content: + IDENT LPAREN in_params RPAREN RETURNS LPAREN out_params RPAREN SEMICOL local_params LET equations TEL - { { pn_name = $2; - pn_input = $4; - pn_output = $8; - pn_local_vars = $11; - pn_equations = $13; - pn_loc = current_location (); } + { { n_name = $1; + n_inputs = $3; + n_outputs = $7; + n_local_vars = $10; + n_equations = $12; } } ; in_params: | /* empty */ { [] } | param_list { $1 } - ; +; out_params: param_list { $1 } ; @@ -80,8 +107,7 @@ local_params: ; param_list_semicol: - | param SEMICOL { $1 } - | param SEMICOL param_list_semicol { $1 @ $3 } + | param_list SEMICOL { $1 } param_list: | param { $1 } @@ -89,10 +115,23 @@ param_list: ; param: - ident_comma_list COLON typ - { let typ = $3 in List.map (fun i -> (i, typ)) $1 } + ident_comma_list COLON TYP + { let typ = $3 in + let idents = $1 in + Ast.( + match typ with + | TBool -> + List.map (fun s -> Hashtbl.add defined_vars s (BVar s); BVar s) idents + | TReal -> + List.map (fun s -> Hashtbl.add defined_vars s (RVar s); RVar s) idents + | TInt -> + List.map (fun s -> Hashtbl.add defined_vars s (IVar s); IVar s) idents) } ; +ident_comma_list: + | IDENT { [$1] } + | IDENT COMMA ident_comma_list { $1 :: $3 } + equations: | /* empty */ { [] } | equation SEMICOL equations @@ -101,53 +140,74 @@ equations: equation: pattern EQUAL expr - { { peq_patt = $1; peq_expr = $3; } } + { ($1, $3) } ; pattern: - | IDENT { PP_var ($1) } - | LPAREN IDENT COMMA indent_comma_list RPAREN { PP_tuple ($2 :: $4) }; + | IDENT { [fetch_var $1] } + | LPAREN ident_comma_list_patt RPAREN { $2 }; -indent_comma_list: - | IDENT { [$1] } - | IDENT COMMA indent_comma_list { $1 :: $3 } +ident_comma_list_patt: + | IDENT { [fetch_var $1] } + | IDENT COMMA ident_comma_list_patt { (fetch_var $1) :: $3 } expr: - /* Note: PLUS, MINUS and EQUAL do not follow the nomenclature BO_ MO_, ... */ + /* Note: EQUAL does not follow the nomenclature CMP_, ... */ | LPAREN expr RPAREN { $2 } - | IDENT { PE_Var $1 } - | MO_not expr { PE_MonOp(MOp_not, $2) } - | PLUS expr { $2 } /* +e = e for all e integer expression. */ - | MINUS expr { PE_MonOp(MOp_minus, $2) } - | expr PLUS expr { PE_BinOp(BOp_add, $1, $3) } - | expr MINUS expr { PE_BinOp(BOp_sub, $1, $3) } - | expr BO_mul expr { PE_BinOp(BOp_mul, $1, $3) } - | expr BO_div expr { PE_BinOp(BOp_div, $1, $3) } - | expr BO_mod expr { PE_BinOp(BOp_mod, $1, $3) } - | expr BO_and expr { PE_BinOp(BOp_and, $1, $3) } - | expr BO_or expr { PE_BinOp(BOp_or, $1, $3) } - | expr EQUAL expr { PE_BinOp(BOp_eq, $1, $3) } - | expr BO_neq expr { PE_BinOp(BOp_neq, $1, $3) } - | expr BO_le expr { PE_BinOp(BOp_le, $1, $3) } - | expr BO_lt expr { PE_BinOp(BOp_lt, $1, $3) } - | expr BO_ge expr { PE_BinOp(BOp_ge, $1, $3) } - | expr BO_gt expr { PE_BinOp(BOp_gt, $1, $3) } - | IF expr THEN expr ELSE expr { PE_TriOp(TOp_if, $2, $4, $6) } - | IDENT LPAREN expr_comma_list RPAREN{ PE_app ($1, $3) } - | LPAREN expr_comma_list RPAREN { PE_tuple($2) } - | CONST_INT { PE_Const(CInt $1 ) } - | CONST_BOOL { PE_Const(CBool $1 ) } - | PRE expr { PE_pre $2 } - | expr ARROW expr { PE_arrow ($1, $3) } + | IDENT { EVar (fetch_var $1) } + /* Unary operators */ + | MO_not expr { EMonOp (MOp_not, $2) } + | MO_pre expr { EMonOp (MOp_pre, $2) } + | MINUS expr { EMonOp (MOp_minus, $2) } + | PLUS expr { $2 } + /* Binary operators */ + | expr PLUS expr { EBinOp (BOp_add, $1, $3) } + | expr MINUS expr { EBinOp (BOp_sub, $1, $3) } + | expr BO_mul expr { EBinOp (BOp_mul, $1, $3) } + | expr BO_div expr { EBinOp (BOp_div, $1, $3) } + | expr BO_mod expr { EBinOp (BOp_mod, $1, $3) } + | expr BO_and expr { EBinOp (BOp_and, $1, $3) } + | expr BO_or expr { EBinOp (BOp_or, $1, $3) } + | expr BO_arrow expr { EBinOp (BOp_arrow, $1, $3) } + /* Comparison operators */ + | expr EQUAL expr { EComp (COp_eq, $1, $3) } + | expr CMP_neq expr { EComp (COp_neq, $1, $3) } + | expr CMP_le expr { EComp (COp_le, $1, $3) } + | expr CMP_lt expr { EComp (COp_lt, $1, $3) } + | expr CMP_ge expr { EComp (COp_ge, $1, $3) } + | expr CMP_gt expr { EComp (COp_gt, $1, $3) } + /* Tertiary operators */ + | IF expr THEN expr ELSE expr { ETriOp (TOp_if, $2, $4, $6) } + | TO_merge expr expr expr { ETriOp (TOp_merge, $2, $3, $4) } + /* When is neither a binop (a * 'a -> 'a) or a comp ('a * 'a -> bool) */ + | WHEN expr expr { EWhen ($2, $3) } + /* Constants */ + | CONST_INT { EConst (CInt $1) } + | CONST_BOOL { EConst (CBool $1) } + | CONST_REAL { EConst (CReal $1) } + /* Tuples */ + | LPAREN expr_comma_list RPAREN { $2 } + /* Applications */ + | IDENT LPAREN expr_comma_list RPAREN + { let name = $1 in + let args = $3 in + EApp (fetch_node name, args) } ; expr_comma_list: - | expr { [$1] } - | expr COMMA expr_comma_list { $1 :: $3 } - -typ: - | BOOL { Tbool } - | INT { Tint } + | expr + { let e = $1 in + match e with + | ETuple _ -> e + | _ -> ETuple [e] } + | expr COMMA expr_comma_list + { let e = $1 in + let le = $3 in + match e, le with + | ETuple t, ETuple t' -> ETuple (t @ t') + | _, ETuple t' -> ETuple (e :: t') + | _, _ -> raise (MyParsingError "This exception should not have been \ + raised.") } ; ident_comma_list: diff --git a/src/pp.ml b/src/pp.ml index 264150d..60f79e9 100644 --- a/src/pp.ml +++ b/src/pp.ml @@ -1,5 +1,11 @@ open Ast +let pp_ident fmt (s: ident): unit = Format.fprintf fmt "%s" s + +let pp_ast fmt (nodes: t_nodelist) = () + +(*open Ast + let pp_loc fmt (start, stop) = Lexing.( Format.fprintf fmt "%s: -- " @@ -127,4 +133,4 @@ let pp_prog fmt prog = (List.length prog) pp_nodes prog - +*)