Skip to content

Instantly share code, notes, and snippets.

@nagataka
Created January 16, 2026 18:48
Show Gist options
  • Select an option

  • Save nagataka/efde63affcbbdf6f0fdd7b5605d423e9 to your computer and use it in GitHub Desktop.

Select an option

Save nagataka/efde63affcbbdf6f0fdd7b5605d423e9 to your computer and use it in GitHub Desktop.
A naive script to construct an inverted index
def construct_index(docs):
term_dict = {}
postings = {}
for id, doc in docs.items():
terms = list(set(doc.lower().split()))
for term in terms:
term_count = term_dict.get(term, 0)
term_posting = postings.get(term, None)
# update the dictionary
if term_count == 0:
term_dict.setdefault(term, 1)
else:
term_dict[term] += 1
# update the postings
if term_posting is None:
postings[term] = [id]
else:
term_posting.append(id)
postings[term] = term_posting
return term_dict, postings
def main():
# from IR book Ch. 1-2 Exercise 1.1
test_docs = {
1:"new home sales top forecasts",
2:"home sales rise in july",
3:"increase in home sales in july",
4:"july new home sales rise"
}
term_dict, postings = construct_index(test_docs)
print(term_dict)
print(postings)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment