import shutil, tempfile, randomdiff
Get ipynb diffs by cell
random.seed(42)td = Path(tempfile.mkdtemp(prefix='nbdiff_test_'))
g = Git(td)
g.init(b='main')
nb_path = td/'test.ipynb'
nb = new_nb(['x=1', 'y=2'])
write_nb(nb, nb_path)
g.add('test.ipynb')
g.commit(m='initial notebook')
nb.cells[0].source = 'x = 100'
nb.cells.append(mk_cell('z=3'))
write_nb(nb, nb_path)read_nb_from_git
def read_nb_from_git(
g:Git, # The git object
path, # The path to the notebook (absolute or relative to git root)
ref:NoneType=None, # The git ref to read from (e.g. HEAD); None for working dir
)->AttrDict: # The notebook
Read notebook from git ref (e.g. HEAD) at path, or working dir if ref is None
read_nb_from_git(g, 'test.ipynb', 'HEAD').cells[{'cell_type': 'code',
'execution_count': 0,
'id': '390c8c7d',
'metadata': {},
'outputs': [],
'source': 'x=1',
'idx_': 0},
{'cell_type': 'code',
'execution_count': 0,
'id': '7247342c',
'metadata': {},
'outputs': [],
'source': 'y=2',
'idx_': 1}]
nbs_pair
def nbs_pair(
nb_path, # Path to the notebook
ref_a:str='HEAD', # First git ref (None for working dir)
ref_b:NoneType=None, # Second git ref (None for working dir)
f:function=<function noop at 0x7f549ea9d900>, # Function to call on contents
):
NBs at two refs; None means working dir. By default provides HEAD and working dir
a,b = nbs_pair(nb_path)
a{'390c8c7d': {'cell_type': 'code',
'execution_count': 0,
'id': '390c8c7d',
'metadata': {},
'outputs': [],
'source': 'x=1',
'idx_': 0},
'7247342c': {'cell_type': 'code',
'execution_count': 0,
'id': '7247342c',
'metadata': {},
'outputs': [],
'source': 'y=2',
'idx_': 1}}
changed_cells
def changed_cells(
nb_path, ref_a:str='HEAD', # First git ref (None for working dir)
ref_b:NoneType=None, # Second git ref (None for working dir)
adds:bool=True, # Include cells in b but not in a
changes:bool=True, # Include cells with different content
dels:bool=False, # Include cells in a but not in b
metadata:bool=False, # Consider cell metadata when comparing
outputs:bool=False, # Consider cell outputs when comparing
):
Return set of cell IDs for changed/added/deleted cells between two refs
changed_cells(td/'test.ipynb'){'390c8c7d', 'd8100f2f'}
source_diff
def source_diff(
old_source, # Original source string
new_source, # New source string
):
Return unified diff string for source change
print(source_diff('x = 1\ny=2', 'x = 100\ny=2'))---
+++
@@ -1,2 +1,2 @@
-x = 1
+x = 100
y=2
cell_diffs
def cell_diffs(
nb_path, ref_a:str='HEAD', # First git ref (None for working dir)
ref_b:NoneType=None, # Second git ref (None for working dir)
adds:bool=True, # Include cells in b but not in a
changes:bool=True, # Include cells with different content
dels:bool=False, # Include cells in a but not in b
metadata:bool=False, # Consider cell metadata when comparing
outputs:bool=False, # Consider cell outputs when comparing
):
{cell_id:diff} for changed/added/deleted cells between two refs
d = cell_diffs(td/'test.ipynb')
d{'d8100f2f': '--- \n+++ \n@@ -0,0 +1 @@\n+z=3',
'390c8c7d': '--- \n+++ \n@@ -1 +1 @@\n-x=1\n+x = 100'}
g.add('test.ipynb')
g.commit(m='update notebook')
assert not changed_cells(td/'test.ipynb')
assert not cell_diffs(td/'test.ipynb')shutil.rmtree(td)