Created
November 6, 2025 10:10
-
-
Save vjcitn/8dea42432f511117a5a7d65dd46a7c06 to your computer and use it in GitHub Desktop.
for a combination of string and ontology (importable via owlready2) compute term specificity
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from owlready2 import get_ontology, onto_path | |
| import numpy as np | |
| def term_to_class(user_term, onto): | |
| all_terms = list(onto.classes()) | |
| excluded_terms = get_excluded_terms(onto) | |
| # Try to find by label first, then by name | |
| found = None | |
| for cls in all_terms: | |
| if cls in excluded_terms: | |
| continue | |
| label = cls.label.first() if hasattr(cls, "label") and cls.label else None | |
| if label and user_term.lower() == label.lower(): | |
| found = cls | |
| break | |
| if user_term == cls.name: | |
| found = cls | |
| break | |
| if found is None: | |
| print(f"Term '{user_term}' not found in ontology (or is deprecated).") | |
| sys.exit(1) | |
| return found | |
| def get_excluded_terms(onto): | |
| # Find DeprecatedClass by name (not label) | |
| deprecated_cls = None | |
| for cls in onto.classes(): | |
| if cls.name == "DeprecatedClass": | |
| deprecated_cls = cls | |
| break | |
| if not deprecated_cls: | |
| return set() | |
| # Get all descendants of DeprecatedClass (include DeprecatedClass itself for safety) | |
| excluded = set(deprecated_cls.descendants()) | |
| excluded.add(deprecated_cls) | |
| return excluded | |
| def term_specificity(term, onto): | |
| all_terms = list(onto.classes()) | |
| excluded_terms = get_excluded_terms(onto) | |
| # Exclude any ancestors/descendants that are in the excluded set | |
| ancestors = [x for x in list(term.ancestors())[:-1] if x not in excluded_terms] # exclude self | |
| descendants = [x for x in list(term.descendants())[1:] if x not in excluded_terms] # exclude self | |
| num_ancestors = len(ancestors) | |
| num_descendants = len(descendants) | |
| valid_terms = [t for t in all_terms if t not in excluded_terms] | |
| if valid_terms: | |
| max_num_ancestors = max(len([a for a in list(c.ancestors())[:-1] if a not in excluded_terms]) for c in valid_terms) | |
| else: | |
| max_num_ancestors = 1 | |
| specificity = (np.log(num_ancestors + 1) / | |
| np.log(max_num_ancestors + 1)) / (num_descendants + 1) | |
| return specificity |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is a main that tests the above, assuming EDAM_1.25.owl is available