Created
December 3, 2025 15:21
-
-
Save bohwaz/2a650bac72fc411e76f7110ee783ceed to your computer and use it in GitHub Desktop.
rank function for SQLite3 FTS3/FTS4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /** | |
| * SQLite search ranking user defined function | |
| * Converted from C from SQLite manual: https://www.sqlite.org/fts3.html#appendix_a | |
| */ | |
| function sqlite_rank(string $aMatchinfo, ...$weights): float | |
| { | |
| /* Check that the number of arguments passed to this function is correct. | |
| ** If not, jump to wrong_number_args. Set aMatchinfo to point to the array | |
| ** of unsigned integer values returned by FTS function matchinfo. Set | |
| ** nPhrase to contain the number of reportable phrases in the users full-text | |
| ** query, and nCol to the number of columns in the table. | |
| */ | |
| $nCol = 0; | |
| $nPhrase = 0; | |
| $match_info = unpack('V*', $aMatchinfo); | |
| $nMatchinfo = count($match_info); | |
| $match_info = array_values(array_map('intval', $match_info)); | |
| if ($nMatchinfo >= 2) { | |
| $nPhrase = $match_info[0]; | |
| $nCol = $match_info[1]; | |
| } | |
| if ($nMatchinfo !== (2 + 3 * $nCol * $nPhrase)) { | |
| throw new \BadMethodCallException('invalid matchinfo blob passed to function rank()'); | |
| } | |
| if (count($weights) !== $nCol) { | |
| throw new \BadMethodCallException('Invalid number of arguments: ' . $nCol); | |
| } | |
| $score = 0.0; | |
| $weights = array_map('floatval', $weights); | |
| // Iterate through each phrase in the users query. // | |
| for ($iPhrase = 0; $iPhrase < $nPhrase; $iPhrase++) { | |
| /* Now iterate through each column in the users query. For each column, | |
| ** increment the relevancy score by: | |
| ** | |
| ** (<hit count> / <global hit count>) * <column weight> | |
| ** | |
| ** aPhraseinfo[] points to the start of the data for phrase iPhrase. So | |
| ** the hit count and global hit counts for each column are found in | |
| ** aPhraseinfo[iCol*3] and aPhraseinfo[iCol*3+1], respectively. | |
| */ | |
| $aPhraseinfoOffset = 2 + $iPhrase * $nCol * 3; | |
| for ($iCol = 0; $iCol < $nCol; $iCol++) { | |
| $idxHit = $aPhraseinfoOffset + $iCol * 3; | |
| $nHitCount = $match_info[$idxHit]; | |
| $nGlobalHitCount = $match_info[$idxHit + 1]; | |
| $weight = $weights[$iCol]; | |
| if ($nHitCount > 0 && $nGlobalHitCount !== 0) { | |
| $score += ($nHitCount / $nGlobalHitCount) * $weight; | |
| } | |
| } | |
| } | |
| return $score; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment