Created
January 16, 2026 18:48
-
-
Save nagataka/efde63affcbbdf6f0fdd7b5605d423e9 to your computer and use it in GitHub Desktop.
A naive script to construct an inverted index
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def construct_index(docs): | |
| term_dict = {} | |
| postings = {} | |
| for id, doc in docs.items(): | |
| terms = list(set(doc.lower().split())) | |
| for term in terms: | |
| term_count = term_dict.get(term, 0) | |
| term_posting = postings.get(term, None) | |
| # update the dictionary | |
| if term_count == 0: | |
| term_dict.setdefault(term, 1) | |
| else: | |
| term_dict[term] += 1 | |
| # update the postings | |
| if term_posting is None: | |
| postings[term] = [id] | |
| else: | |
| term_posting.append(id) | |
| postings[term] = term_posting | |
| return term_dict, postings | |
| def main(): | |
| # from IR book Ch. 1-2 Exercise 1.1 | |
| test_docs = { | |
| 1:"new home sales top forecasts", | |
| 2:"home sales rise in july", | |
| 3:"increase in home sales in july", | |
| 4:"july new home sales rise" | |
| } | |
| term_dict, postings = construct_index(test_docs) | |
| print(term_dict) | |
| print(postings) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment