Skip to content

Instantly share code, notes, and snippets.

@vjcitn
Created November 6, 2025 10:10
Show Gist options
  • Select an option

  • Save vjcitn/8dea42432f511117a5a7d65dd46a7c06 to your computer and use it in GitHub Desktop.

Select an option

Save vjcitn/8dea42432f511117a5a7d65dd46a7c06 to your computer and use it in GitHub Desktop.
for a combination of string and ontology (importable via owlready2) compute term specificity
from owlready2 import get_ontology, onto_path
import numpy as np
def term_to_class(user_term, onto):
all_terms = list(onto.classes())
excluded_terms = get_excluded_terms(onto)
# Try to find by label first, then by name
found = None
for cls in all_terms:
if cls in excluded_terms:
continue
label = cls.label.first() if hasattr(cls, "label") and cls.label else None
if label and user_term.lower() == label.lower():
found = cls
break
if user_term == cls.name:
found = cls
break
if found is None:
print(f"Term '{user_term}' not found in ontology (or is deprecated).")
sys.exit(1)
return found
def get_excluded_terms(onto):
# Find DeprecatedClass by name (not label)
deprecated_cls = None
for cls in onto.classes():
if cls.name == "DeprecatedClass":
deprecated_cls = cls
break
if not deprecated_cls:
return set()
# Get all descendants of DeprecatedClass (include DeprecatedClass itself for safety)
excluded = set(deprecated_cls.descendants())
excluded.add(deprecated_cls)
return excluded
def term_specificity(term, onto):
all_terms = list(onto.classes())
excluded_terms = get_excluded_terms(onto)
# Exclude any ancestors/descendants that are in the excluded set
ancestors = [x for x in list(term.ancestors())[:-1] if x not in excluded_terms] # exclude self
descendants = [x for x in list(term.descendants())[1:] if x not in excluded_terms] # exclude self
num_ancestors = len(ancestors)
num_descendants = len(descendants)
valid_terms = [t for t in all_terms if t not in excluded_terms]
if valid_terms:
max_num_ancestors = max(len([a for a in list(c.ancestors())[:-1] if a not in excluded_terms]) for c in valid_terms)
else:
max_num_ancestors = 1
specificity = (np.log(num_ancestors + 1) /
np.log(max_num_ancestors + 1)) / (num_descendants + 1)
return specificity
@vjcitn
Copy link
Author

vjcitn commented Nov 6, 2025

Here is a main that tests the above, assuming EDAM_1.25.owl is available

from term_specificity import term_specificity, term_to_class
from owlready2 import onto_path, get_ontology

def main():
    onto_path.append(".")  # assumes current directory
    onto = get_ontology("edam_1.25.owl").load()
    tt = term_to_class("Molecular genetics", onto)
    print(term_specificity(tt, onto))

if __name__ == "__main__":
    main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment