Source code for kgx.utils.graph_utils

from typing import List, Set, Dict, Optional
import stringcase
from cachetools import cached

from kgx.config import get_logger
from kgx.graph.base_graph import BaseGraph
from kgx.utils.kgx_utils import get_toolkit, get_cache, get_curie_lookup_service
from kgx.prefix_manager import PrefixManager

ONTOLOGY_PREFIX_MAP: Dict = {}
ONTOLOGY_GRAPH_CACHE: Dict = {}

log = get_logger()


[docs]def get_parents(graph: BaseGraph, node: str, relations: List[str] = None) -> List[str]: """ Return all direct `parents` of a specified node, filtered by ``relations``. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph Graph to traverse node: str node identifier relations: List[str] list of relations Returns ------- List[str] A list of parent node(s) """ parents = [] if graph.has_node(node): out_edges = [x for x in graph.out_edges(node, keys=False, data=True)] if relations is None: parents = [x[1] for x in out_edges] else: parents = [x[1] for x in out_edges if x[2]["predicate"] in relations] return parents
[docs]def get_ancestors( graph: BaseGraph, node: str, relations: List[str] = None ) -> List[str]: """ Return all `ancestors` of specified node, filtered by ``relations``. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph Graph to traverse node: str node identifier relations: List[str] list of relations Returns ------- List[str] A list of ancestor nodes """ seen = [] nextnodes = [node] while len(nextnodes) > 0: nn = nextnodes.pop() if nn not in seen: seen.append(nn) nextnodes += get_parents(graph, nn, relations=relations) seen.remove(node) return seen
[docs]@cached(get_cache()) def get_category_via_superclass( graph: BaseGraph, curie: str, load_ontology: bool = True ) -> Set[str]: """ Get category for a given CURIE by tracing its superclass, via ``subclass_of`` hierarchy, and getting the most appropriate category based on the superclass. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph Graph to traverse curie: str Input CURIE load_ontology: bool Determines whether to load ontology, based on CURIE prefix, or to simply rely on ``subclass_of`` hierarchy from graph Returns ------- Set[str] A set containing one (or more) category for the given CURIE """ log.debug("curie: {}".format(curie)) new_categories = [] toolkit = get_toolkit() if PrefixManager.is_curie(curie): ancestors = get_ancestors(graph, curie, relations=["subclass_of"]) if len(ancestors) == 0 and load_ontology: cls = get_curie_lookup_service() ontology_graph = cls.ontology_graph new_categories += [ x for x in get_category_via_superclass(ontology_graph, curie, False) ] log.debug("Ancestors for CURIE {} via subClassOf: {}".format(curie, ancestors)) seen = [] for anc in ancestors: mapping = toolkit.get_by_mapping(anc) seen.append(anc) if mapping: # there is direct mapping to BioLink Model log.debug("Ancestor {} mapped to {}".format(anc, mapping)) seen_labels = [ graph.nodes()[x]["name"] for x in seen if "name" in graph.nodes()[x] ] new_categories += [x for x in seen_labels] new_categories += [x for x in toolkit.ancestors(mapping)] break return set(new_categories)
[docs]def curie_lookup(curie: str) -> Optional[str]: """ Given a CURIE, find its label. This method first does a lookup in predefined maps. If none found, it makes use of CurieLookupService to look for the CURIE in a set of preloaded ontologies. Parameters ---------- curie: str A CURIE Returns ------- Optional[str] The label corresponding to the given CURIE """ cls = get_curie_lookup_service() name: Optional[str] = None prefix = PrefixManager.get_prefix(curie) if prefix in ["OIO", "OWL", "owl", "OBO", "rdfs"]: name = stringcase.snakecase(curie.split(":", 1)[1]) elif curie in cls.curie_map: name = cls.curie_map[curie] elif curie in cls.ontology_graph: name = cls.ontology_graph.nodes()[curie]["name"] return name