Skip to content

Instantly share code, notes, and snippets.

@johnrcui
Created February 21, 2025 00:50
Show Gist options
  • Select an option

  • Save johnrcui/a1d06daa80904ec13b300441847598b3 to your computer and use it in GitHub Desktop.

Select an option

Save johnrcui/a1d06daa80904ec13b300441847598b3 to your computer and use it in GitHub Desktop.
Levenshtein distance in MySQL with short circuit logic
DELIMITER //
-- Compute the Levenshtein distance between two strings within a given max distance
-- If any point it becomes certain max distance threshold will be crossed, the function
-- short circuits and returns immedately with a value of `max_distance + 1`
--
-- NOTE: This code was generated and optimized using DeepSeek R1
--
CREATE FUNCTION MAX_LEVENSHTEIN(s1 VARCHAR(255), s2 VARCHAR(255), max_distance INT) RETURNS INT
BEGIN
DECLARE s1_len, s2_len, i, j, val, cost, current_min, diag, up, left_val INT;
DECLARE prev_row, curr_row TEXT;
SET s1_len = CHAR_LENGTH(s1), s2_len = CHAR_LENGTH(s2);
-- Early return if strings are identical
IF s1 = s2 THEN
RETURN 0;
END IF;
-- Check if the difference in lengths exceeds max_distance
IF ABS(s1_len - s2_len) > max_distance THEN
RETURN max_distance + 1;
END IF;
-- Handle edge cases where either string is empty
IF s1_len = 0 THEN
RETURN IF(s2_len <= max_distance, s2_len, max_distance + 1);
END IF;
IF s2_len = 0 THEN
RETURN IF(s1_len <= max_distance, s1_len, max_distance + 1);
END IF;
-- Initialize the previous row of distances
SET prev_row = '';
SET j = 0;
WHILE j <= s2_len DO
SET prev_row = CONCAT(prev_row, ',', j);
SET j = j + 1;
END WHILE;
SET prev_row = SUBSTRING(prev_row FROM 2);
-- Iterate over each character in s1
SET i = 1;
WHILE i <= s1_len DO
SET curr_row = '';
SET current_min = 2147483647; -- Initialize to a large value
SET j = 0;
WHILE j <= s2_len DO
IF j = 0 THEN
-- First column represents transforming to empty string
SET val = i;
ELSE
-- Calculate cost based on current characters
IF SUBSTRING(s1, i, 1) = SUBSTRING(s2, j, 1) THEN
SET cost = 0;
ELSE
SET cost = 1;
END IF;
-- Retrieve previous row values for diagonal, up, and left
SET diag = CAST(SUBSTRING_INDEX(SUBSTRING_INDEX(prev_row, ',', j), ',', -1) AS UNSIGNED);
IF j > 0 THEN
SET up = CAST(SUBSTRING_INDEX(SUBSTRING_INDEX(prev_row, ',', j + 1), ',', -1) AS UNSIGNED);
ELSE
SET up = i;
END IF;
IF LENGTH(curr_row) > 0 THEN
SET left_val = CAST(SUBSTRING_INDEX(curr_row, ',', -1) AS UNSIGNED);
ELSE
SET left_val = i;
END IF;
-- Compute the current cell value
SET val = LEAST(up + 1, left_val + 1, diag + cost);
END IF;
-- Update current row and track the minimum value
SET curr_row = CONCAT(curr_row, ',', val);
IF val < current_min THEN
SET current_min = val;
END IF;
SET j = j + 1;
END WHILE;
SET curr_row = SUBSTRING(curr_row FROM 2);
-- Short-circuit if current minimum exceeds max_distance
IF current_min > max_distance THEN
RETURN max_distance + 1;
END IF;
-- Prepare for next iteration
SET prev_row = curr_row;
SET i = i + 1;
END WHILE;
-- Extract the final Levenshtein distance from the last row
SET val = CAST(SUBSTRING_INDEX(prev_row, ',', -1) AS UNSIGNED);
-- Return the result if within max_distance, else max_distance + 1
RETURN IF(val <= max_distance, val, max_distance + 1);
END//
DELIMITER ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment