Source code for kgx.source.json_source

import gzip
from typing import Optional, Generator, Any

import ijson
from itertools import chain

from kgx.source.tsv_source import TsvSource


[docs]class JsonSource(TsvSource): """ JsonSource is responsible for reading data as records from a JSON. """ def __init__(self): super().__init__() self.compression = None
[docs] def parse( self, filename: str, format: str = "json", compression: Optional[str] = None, **kwargs: Any ) -> Generator: """ This method reads from a JSON and yields records. Parameters ---------- filename: str The filename to parse format: str The format (``json``) compression: Optional[str] The compression type (``gz``) kwargs: Any Any additional arguments Returns ------- Generator A generator for node and edge records read from the file """ self.set_provenance_map(kwargs) self.compression = compression n = self.read_nodes(filename) e = self.read_edges(filename) yield from chain(n, e)
[docs] def read_nodes(self, filename: str) -> Generator: """ Read node records from a JSON. Parameters ---------- filename: str The filename to read from Returns ------- Generator A generator for node records """ if self.compression == "gz": FH = gzip.open(filename, "rb") else: FH = open(filename, "rb") for n in ijson.items(FH, "nodes.item"): yield self.read_node(n)
[docs] def read_edges(self, filename: str) -> Generator: """ Read edge records from a JSON. Parameters ---------- filename: str The filename to read from Returns ------- Generator A generator for edge records """ if self.compression == "gz": FH = gzip.open(filename, "rb") else: FH = open(filename, "rb") for e in ijson.items(FH, "edges.item"): yield self.read_edge(e)