Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save arockwell/5ce76c4f464f8c2c299ef4a62b72ef56 to your computer and use it in GitHub Desktop.

Select an option

Save arockwell/5ce76c4f464f8c2c299ef4a62b72ef56 to your computer and use it in GitHub Desktop.
Complete refactor of number parsing for lexer - mega-mind edition
// BEFORE: Complex, nested conditionals with inconsistent flow
// AFTER: State machine approach with clear phases
fn number(&mut self, graphemes: &[&str]) {
let token_col = self.column - 2;
let mut number = graphemes[self.column - 2].to_string();
// Use an enum to track parsing state
enum NumberState {
Digits, // Consuming integer digits
AfterDecimal, // After decimal point, consuming fraction
AfterE, // After 'e' or 'E', looking for sign or digits
AfterSign, // After +/- in exponent, must have digit
ExponentDigits, // Consuming exponent digits
}
let mut state = NumberState::Digits;
let mut is_float = false;
// Process until we complete the number
while self.column - 1 < graphemes.len() {
let grapheme = graphemes[self.column - 1];
let c = grapheme.chars().next().unwrap_or('\0');
match state {
NumberState::Digits => {
if c.is_ascii_digit() {
number.push_str(grapheme);
self.column += 1;
} else if grapheme == "." {
is_float = true;
number.push_str(grapheme);
self.column += 1;
state = NumberState::AfterDecimal;
} else if grapheme == "e" || grapheme == "E" {
is_float = true;
number.push_str(grapheme);
self.column += 1;
state = NumberState::AfterE;
} else if grapheme == "_" {
// Skip underscores in digits
self.column += 1;
} else {
// Not part of the number
break;
}
},
NumberState::AfterDecimal => {
if c.is_ascii_digit() {
number.push_str(grapheme);
self.column += 1;
} else if grapheme == "e" || grapheme == "E" {
number.push_str(grapheme);
self.column += 1;
state = NumberState::AfterE;
} else if grapheme == "_" {
self.column += 1;
} else if grapheme == "." {
self.report_error(
graphemes,
"number may not contain multiple decimal points"
);
return;
} else {
// Not part of the number
break;
}
},
NumberState::AfterE => {
if c.is_ascii_digit() {
number.push_str(grapheme);
self.column += 1;
state = NumberState::ExponentDigits;
} else if grapheme == "+" || grapheme == "-" {
number.push_str(grapheme);
self.column += 1;
state = NumberState::AfterSign;
} else {
self.report_error(
graphemes,
"expected sign or digit after exponent marker"
);
return;
}
},
NumberState::AfterSign => {
if c.is_ascii_digit() {
number.push_str(grapheme);
self.column += 1;
state = NumberState::ExponentDigits;
} else {
self.report_error(
graphemes,
"expected digit after exponent sign"
);
return;
}
},
NumberState::ExponentDigits => {
if c.is_ascii_digit() {
number.push_str(grapheme);
self.column += 1;
} else if grapheme == "_" {
self.column += 1;
} else if grapheme == "e" || grapheme == "E" {
self.report_error(
graphemes,
"number may not contain multiple exponents"
);
return;
} else {
// Not part of the number
break;
}
}
}
}
// Validate final number state
match state {
NumberState::AfterDecimal if \!number.ends_with('.') => {
// Valid - we have digits after decimal
},
NumberState::AfterDecimal => {
self.report_error(
graphemes,
"expected at least one digit after decimal point"
);
return;
},
NumberState::AfterE | NumberState::AfterSign => {
self.report_error(
graphemes,
"incomplete exponent in number"
);
return;
},
_ => {
// Other states are valid at end of number
}
}
self.tokens.push(Token::new(
if is_float { TokenType::Float } else { TokenType::Integer },
number,
self.line,
token_col,
));
}
// Helper for consistent error reporting
fn report_error(&mut self, graphemes: &[&str], message: &str) {
self.errors.push(LexError::new(
graphemes.concat(),
self.line,
self.column - 1,
message.to_owned(),
));
}
EOF < /dev/null
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment