From 8894e08c90c4a21ef057c07722af6523dd272bac Mon Sep 17 00:00:00 2001 From: peshwar9 Date: Sun, 14 Jun 2020 12:35:11 +0530 Subject: [PATCH] Added fixes and documentation --- chapter2/ast.rs | 39 ++++------------------ chapter2/mod.rs | 1 + chapter2/parser.rs | 75 ++++++++++++++++++++++++++++++++----------- chapter2/token.rs | 8 +++++ chapter2/tokenizer.rs | 47 ++++++++------------------- 5 files changed, 84 insertions(+), 86 deletions(-) diff --git a/chapter2/ast.rs b/chapter2/ast.rs index 2a91bce..523e52a 100644 --- a/chapter2/ast.rs +++ b/chapter2/ast.rs @@ -1,14 +1,11 @@ +/// This program contains list of valid AST nodes that can be constructed and also evaluates an AST to compute a value // Standard lib use std::error; -use std::fmt; -//Primary external libraries - -//utility externa libraries - -//internal modules //structs +// List of allowed AST nodes that can be constructed by Parser +// Tokens can be arithmetic operators or a Number #[derive(Debug, Clone, PartialEq)] pub enum Node { Add(Box, Box), @@ -16,14 +13,12 @@ pub enum Node { Multiply(Box, Box), Divide(Box, Box), Caret(Box, Box), - LeftParen(Box, Box), - RightParen(Box, Box), Negative(Box), - Absolute(Box), Number(f64), } -pub fn eval(expr: Node) -> Result { +// Given an AST, calculate the numeric value. +pub fn eval(expr: Node) -> Result> { use self::Node::*; match expr { Number(i) => Ok(i), @@ -33,32 +28,10 @@ pub fn eval(expr: Node) -> Result { Divide(expr1, expr2) => Ok(eval(*expr1)? / eval(*expr2)?), Negative(expr1) => Ok(-(eval(*expr1)?)), Caret(expr1, expr2) => Ok(eval(*expr1)?.powf(eval(*expr2)?)), - Absolute(expr1) => Ok(eval(*expr1)?.abs()), - _ => Err(EvaluationError::UnableToEvaluate( - "No clue, sorry".to_string(), - )), - } -} -#[derive(Debug)] -pub enum EvaluationError { - UnableToEvaluate(String), -} - -impl fmt::Display for EvaluationError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match &self { - self::EvaluationError::UnableToEvaluate(e) => write!(f, "Error in evaluating {}", e), - } - } -} -impl error::Error for EvaluationError { - fn description(&self) -> &str { - match &self { - self::EvaluationError::UnableToEvaluate(e) => &e, - } } } +//Unit tests #[cfg(test)] mod tests { use super::*; diff --git a/chapter2/mod.rs b/chapter2/mod.rs index b9b222d..dba3699 100644 --- a/chapter2/mod.rs +++ b/chapter2/mod.rs @@ -1,3 +1,4 @@ +/// Module Parsemath pub mod ast; pub mod parser; pub mod token; diff --git a/chapter2/parser.rs b/chapter2/parser.rs index 55fe780..96ac299 100644 --- a/chapter2/parser.rs +++ b/chapter2/parser.rs @@ -1,34 +1,39 @@ +/// This program reads tokens returned by Tokenizer and converts them into AST. // Standard lib use std::error; use std::fmt; -// Primary external libs - -// Secondary internal libs // Internal modules use super::ast::Node; use super::token::{OperPrec, Token}; use super::tokenizer::Tokenizer; + //Structs and constants +// Parser struct pub struct Parser<'a> { tokenizer: Tokenizer<'a>, current_token: Token, } +// Public methods of Parser + impl<'a> Parser<'a> { + // Create a new instance of Parser pub fn new(expr: &'a str) -> Result { let mut lexer = Tokenizer::new(expr); - let cur_token = lexer.next().unwrap(); + let cur_token = match lexer.next() { + Some(token) => token, + None => return Err(ParseError::InvalidOperator("Invalid character".into())), + }; Ok(Parser { tokenizer: lexer, current_token: cur_token, }) } - fn next_token(&mut self) -> Result<(), ParseError> { - self.current_token = self.tokenizer.next().unwrap(); - Ok(()) - } + + // Take an arithmetic expression as input and return an AST + pub fn parse(&mut self) -> Result { let ast = self.generate_ast(OperPrec::DefaultZero); match ast { @@ -36,6 +41,22 @@ impl<'a> Parser<'a> { Err(e) => Err(e), } } +} + +// Private methods of Parser + +impl<'a> Parser<'a> { + // Retrieve the next token from arithmetic expression and set it to current_token field in Parser struct + fn get_next_token(&mut self) -> Result<(), ParseError> { + let next_token = match self.tokenizer.next() { + Some(token) => token, + None => return Err(ParseError::InvalidOperator("Invalid character".into())), + }; + self.current_token = next_token; + Ok(()) + } + + // Main workhorse method that is called recursively fn generate_ast(&mut self, oper_prec: OperPrec) -> Result { let mut left_expr = self.parse_number()?; @@ -50,22 +71,24 @@ impl<'a> Parser<'a> { Ok(left_expr) } + // Construct AST node for numbers, taking into account negative prefixes while handling parenthesis + fn parse_number(&mut self) -> Result { let token = self.current_token.clone(); match token { Token::Subtract => { - self.next_token()?; + self.get_next_token()?; let expr = self.generate_ast(OperPrec::Negative)?; Ok(Node::Negative(Box::new(expr))) } Token::Num(i) => { - self.next_token()?; + self.get_next_token()?; Ok(Node::Number(i)) } Token::LeftParen => { - self.next_token()?; + self.get_next_token()?; let expr = self.generate_ast(OperPrec::DefaultZero)?; - self.expect(Token::RightParen)?; + self.check_paren(Token::RightParen)?; if self.current_token == Token::LeftParen { let right = self.generate_ast(OperPrec::MulDiv)?; return Ok(Node::Multiply(Box::new(expr), Box::new(right))); @@ -77,9 +100,11 @@ impl<'a> Parser<'a> { } } - fn expect(&mut self, expected: Token) -> Result<(), ParseError> { + // Check for balancing parenthesis + + fn check_paren(&mut self, expected: Token) -> Result<(), ParseError> { if expected == self.current_token { - self.next_token()?; + self.get_next_token()?; Ok(()) } else { Err(ParseError::InvalidOperator(format!( @@ -89,34 +114,36 @@ impl<'a> Parser<'a> { } } + // Construct Operator AST nodes + fn convert_token_to_node(&mut self, left_expr: Node) -> Result { match self.current_token { Token::Add => { - self.next_token()?; + self.get_next_token()?; //Get right-side expression let right_expr = self.generate_ast(OperPrec::AddSub)?; Ok(Node::Add(Box::new(left_expr), Box::new(right_expr))) } Token::Subtract => { - self.next_token()?; + self.get_next_token()?; //Get right-side expression let right_expr = self.generate_ast(OperPrec::AddSub)?; Ok(Node::Subtract(Box::new(left_expr), Box::new(right_expr))) } Token::Multiply => { - self.next_token()?; + self.get_next_token()?; //Get right-side expression let right_expr = self.generate_ast(OperPrec::MulDiv)?; Ok(Node::Multiply(Box::new(left_expr), Box::new(right_expr))) } Token::Divide => { - self.next_token()?; + self.get_next_token()?; //Get right-side expression let right_expr = self.generate_ast(OperPrec::MulDiv)?; Ok(Node::Divide(Box::new(left_expr), Box::new(right_expr))) } Token::Caret => { - self.next_token()?; + self.get_next_token()?; //Get right-side expression let right_expr = self.generate_ast(OperPrec::Power)?; Ok(Node::Caret(Box::new(left_expr), Box::new(right_expr))) @@ -129,6 +156,7 @@ impl<'a> Parser<'a> { } } +// Custom error handler for Parser #[derive(Debug)] pub enum ParseError { UnableToParse(String), @@ -143,6 +171,7 @@ impl fmt::Display for ParseError { } } } + impl error::Error for ParseError { fn description(&self) -> &str { match &self { @@ -152,6 +181,14 @@ impl error::Error for ParseError { } } +// Handle error thrown from Tokenizer + +impl std::convert::From> for ParseError { + fn from(_evalerr: std::boxed::Box) -> Self { + return ParseError::UnableToParse("Unable to parse".into()); + } +} + // Unit tests #[cfg(test)] diff --git a/chapter2/token.rs b/chapter2/token.rs index 1d77762..8bcdf3d 100644 --- a/chapter2/token.rs +++ b/chapter2/token.rs @@ -1,3 +1,7 @@ +/// This contains enum for list of Tokens, and handles Operator precedence rules. + +// List of valid tokens that can be constructed from arithmetic expression by Tokenizer + #[derive(Debug, PartialEq, Clone)] pub enum Token { Add, @@ -11,6 +15,8 @@ pub enum Token { EOF, } +// Order of operators as per operator precedence rules (low to high) + #[derive(Debug, PartialEq, PartialOrd)] /// Defines all the OperPrec levels, from lowest to highest. pub enum OperPrec { @@ -21,6 +27,8 @@ pub enum OperPrec { Negative, } +// This contains methods to retrieve operator precedence for a given arithmetic operator + impl Token { pub fn get_oper_prec(&self) -> OperPrec { use self::OperPrec::*; diff --git a/chapter2/tokenizer.rs b/chapter2/tokenizer.rs index 63b93ad..716c664 100644 --- a/chapter2/tokenizer.rs +++ b/chapter2/tokenizer.rs @@ -1,22 +1,20 @@ +/// This module reads characters in arithmetic expression and converts them to tokens. +/// The allowed tokens are defined in ast module. // Standard lib -use std::error; -use std::fmt; use std::iter::Peekable; use std::str::Chars; -// Primary external libraries - -// Utility external libraries - //Other internal modules use super::token::Token; // Other structs +// Tokenizer struct contains a Peekable iterator on the arithmetic expression pub struct Tokenizer<'a> { expr: Peekable>, } +// Constructs a new instance of Tokenizer impl<'a> Tokenizer<'a> { pub fn new(new_expr: &'a str) -> Self { Tokenizer { @@ -25,6 +23,9 @@ impl<'a> Tokenizer<'a> { } } +// Implement Iterator trait for Tokenizer struct. +// With this, we can use next() method on tokenier to retrieve the next token from arithmetic expression + impl<'a> Iterator for Tokenizer<'a> { type Item = Token; @@ -33,14 +34,18 @@ impl<'a> Iterator for Tokenizer<'a> { match next_char { Some('0'..='9') => { - let mut number = next_char.unwrap().to_string(); + let mut number = next_char?.to_string(); + while let Some(next_char) = self.expr.peek() { if next_char.is_numeric() || next_char == &'.' { - number.push(self.expr.next().unwrap()); + number.push(self.expr.next()?); + } else if next_char == &'(' { + return None; } else { break; } } + Some(Token::Num(number.parse::().unwrap())) } Some('+') => Some(Token::Add), @@ -56,32 +61,6 @@ impl<'a> Iterator for Tokenizer<'a> { } } -#[derive(Debug)] -/// Defines the various errors that can occur during evaluation. -pub enum TokenizerError { - CharacterIsInvalid(String), -} - -impl fmt::Display for TokenizerError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use self::TokenizerError::*; - - match *self { - CharacterIsInvalid(ref e) => write!(f, "Lexing error: {}", e), - } - } -} - -impl error::Error for TokenizerError { - fn description(&self) -> &str { - use self::TokenizerError::*; - - match *self { - CharacterIsInvalid(ref e) => e, - } - } -} - // Unit tests #[cfg(test)] mod tests {