Source code for kgx.prefix_manager

import re
from typing import Dict, Optional, Any

import prefixcommons.curie_util as cu
from cachetools import LRUCache, cached

from kgx.config import get_jsonld_context, get_logger
from kgx.utils.kgx_utils import contract, expand

log = get_logger()


[docs]class PrefixManager(object): """ Manages prefix mappings. These include mappings for CURIEs such as GO:0008150, as well as shortforms such as biolink types such as Disease """ DEFAULT_NAMESPACE = "https://www.example.org/UNKNOWN/" prefix_map: Dict[str, str] reverse_prefix_map: Dict[str, str]
[docs] def __init__(self, url: str = None): """ Initialize an instance of PrefixManager. Parameters ---------- url: str The URL from which to read a JSON-LD context for prefix mappings """ if url: context = cu.read_remote_jsonld_context(url) else: context = get_jsonld_context() self.set_prefix_map(context)
[docs] def set_prefix_map(self, m: Dict) -> None: """ Populate `prefix_map` with contents from a JSON-LD context from self.url Parameters ---------- m: dict Dictionary of prefix to URI mappings """ self.prefix_map = {} for k, v in m.items(): if isinstance(v, str): self.prefix_map[k] = v else: self.prefix_map[k] = v.get("@id") if "biolink" not in self.prefix_map: self.prefix_map["biolink"] = ( self.prefix_map["@vocab"] if "@vocab" in self.prefix_map else "https://w3id.org/biolink/vocab/" ) if "owlstar" not in self.prefix_map: self.prefix_map["owlstar"] = "http://w3id.org/owlstar/" if "@vocab" in self.prefix_map: del self.prefix_map["@vocab"] if "MONARCH" not in self.prefix_map: self.prefix_map["MONARCH"] = "https://monarchinitiative.org/" self.prefix_map["MONARCH_NODE"] = "https://monarchinitiative.org/MONARCH_" if "" in self.prefix_map: log.info( f"Replacing default prefix mapping from {self.prefix_map['']} to 'www.example.org/UNKNOWN/'" ) else: self.prefix_map[""] = self.DEFAULT_NAMESPACE self.reverse_prefix_map = {y: x for x, y in self.prefix_map.items()}
[docs] def update_prefix_map(self, m: Dict[str, str]) -> None: """ Update prefix maps with new mappings. Parameters ---------- m: Dict New prefix to IRI mappings """ for k, v in m.items(): self.prefix_map[k] = v
[docs] def update_reverse_prefix_map(self, m: Dict[str, str]) -> None: """ Update reverse prefix maps with new mappings. Parameters ---------- m: Dict New IRI to prefix mappings """ self.reverse_prefix_map.update(m)
[docs] @cached(LRUCache(maxsize=1024)) def expand(self, curie: str, fallback: bool = True) -> str: """ Expand a given CURIE to an URI, based on mappings from `prefix_map`. Parameters ---------- curie: str A CURIE fallback: bool Determines whether to fallback to default prefix mappings, as determined by `prefixcommons.curie_util`, when CURIE prefix is not found in `prefix_map`. Returns ------- str A URI corresponding to the CURIE """ uri = expand(curie, [self.prefix_map], fallback) return uri
[docs] @cached(LRUCache(maxsize=1024)) def contract(self, uri: str, fallback: bool = True) -> Optional[str]: """ Contract a given URI to a CURIE, based on mappings from `prefix_map`. Parameters ---------- uri: str A URI fallback: bool Determines whether to fallback to default prefix mappings, as determined by `prefixcommons.curie_util`, when URI prefix is not found in `reverse_prefix_map`. Returns ------- Optional[str] A CURIE corresponding to the URI """ # always prioritize non-CURIE shortform if self.reverse_prefix_map and uri in self.reverse_prefix_map: curie = self.reverse_prefix_map[uri] else: curie = contract(uri, [self.prefix_map], fallback) return str(curie)
[docs] @staticmethod @cached(LRUCache(maxsize=1024)) def is_curie(s: str) -> bool: """ Check if a given string is a CURIE. Parameters ---------- s: str A string Returns ------- bool Whether or not the given string is a CURIE """ if isinstance(s, str): m = re.match(r"^[^ <()>:]*:[^/ :]+$", s) return bool(m) else: return False
[docs] @staticmethod @cached(LRUCache(maxsize=1024)) def is_iri(s: str) -> bool: """ Check if a given string as an IRI. Parameters ---------- s: str A string Returns ------- bool Whether or not the given string is an IRI. """ if isinstance(s, str): return s.startswith("http") or s.startswith("https") else: return False
@staticmethod @cached(LRUCache(maxsize=1024)) def has_urlfragment(s: str) -> bool: if "#" in s: return True else: return False
[docs] @staticmethod @cached(LRUCache(maxsize=1024)) def get_prefix(curie: str) -> Optional[str]: """ Get the prefix from a given CURIE. Parameters ---------- curie: str The CURIE Returns ------- str The CURIE prefix """ prefix: Optional[str] = None if PrefixManager.is_curie(curie): prefix = curie.split(":", 1)[0] return prefix
[docs] @staticmethod @cached(LRUCache(maxsize=1024)) def get_reference(curie: str) -> Optional[str]: """ Get the reference of a given CURIE. Parameters ---------- curie: str The CURIE Returns ------- Optional[str] The reference of a CURIE """ reference: Optional[str] = None if PrefixManager.is_curie(curie): reference = curie.split(":", 1)[1] return reference