import gzip
from typing import Optional, Generator, Any
import ijson
from itertools import chain
from kgx.source.tsv_source import TsvSource
[docs]class JsonSource(TsvSource):
"""
JsonSource is responsible for reading data as records
from a JSON.
"""
def __init__(self):
super().__init__()
self.compression = None
[docs] def parse(
self,
filename: str,
format: str = "json",
compression: Optional[str] = None,
**kwargs: Any
) -> Generator:
"""
This method reads from a JSON and yields records.
Parameters
----------
filename: str
The filename to parse
format: str
The format (``json``)
compression: Optional[str]
The compression type (``gz``)
kwargs: Any
Any additional arguments
Returns
-------
Generator
A generator for node and edge records read from the file
"""
self.set_provenance_map(kwargs)
self.compression = compression
n = self.read_nodes(filename)
e = self.read_edges(filename)
yield from chain(n, e)
[docs] def read_nodes(self, filename: str) -> Generator:
"""
Read node records from a JSON.
Parameters
----------
filename: str
The filename to read from
Returns
-------
Generator
A generator for node records
"""
if self.compression == "gz":
FH = gzip.open(filename, "rb")
else:
FH = open(filename, "rb")
for n in ijson.items(FH, "nodes.item"):
yield self.read_node(n)
[docs] def read_edges(self, filename: str) -> Generator:
"""
Read edge records from a JSON.
Parameters
----------
filename: str
The filename to read from
Returns
-------
Generator
A generator for edge records
"""
if self.compression == "gz":
FH = gzip.open(filename, "rb")
else:
FH = open(filename, "rb")
for e in ijson.items(FH, "edges.item"):
yield self.read_edge(e)