diff

Get ipynb diffs by cell
import shutil, tempfile, random
random.seed(42)
td = Path(tempfile.mkdtemp(prefix='nbdiff_test_'))
g = Git(td)
g.init(b='main')
nb_path = td/'test.ipynb'
nb = new_nb(['x=1', 'y=2'])
write_nb(nb, nb_path)
g.add('test.ipynb')
g.commit(m='initial notebook')
nb.cells[0].source = 'x = 100'
nb.cells.append(mk_cell('z=3'))
write_nb(nb, nb_path)

source

read_nb_from_git


def read_nb_from_git(
    g:Git, # The git object
    path, # The path to the notebook (absolute or relative to git root)
    ref:NoneType=None, # The git ref to read from (e.g. HEAD); None for working dir
)->AttrDict: # The notebook

Read notebook from git ref (e.g. HEAD) at path, or working dir if ref is None

read_nb_from_git(g, 'test.ipynb', 'HEAD').cells
[{'cell_type': 'code',
  'execution_count': 0,
  'id': '390c8c7d',
  'metadata': {},
  'outputs': [],
  'source': 'x=1',
  'idx_': 0},
 {'cell_type': 'code',
  'execution_count': 0,
  'id': '7247342c',
  'metadata': {},
  'outputs': [],
  'source': 'y=2',
  'idx_': 1}]

source

nbs_pair


def nbs_pair(
    nb_path, # Path to the notebook
    ref_a:str='HEAD', # First git ref (None for working dir)
    ref_b:NoneType=None, # Second git ref (None for working dir)
    f:function=<function noop at 0x7f549ea9d900>, # Function to call on contents
):

NBs at two refs; None means working dir. By default provides HEAD and working dir

a,b = nbs_pair(nb_path)
a
{'390c8c7d': {'cell_type': 'code',
  'execution_count': 0,
  'id': '390c8c7d',
  'metadata': {},
  'outputs': [],
  'source': 'x=1',
  'idx_': 0},
 '7247342c': {'cell_type': 'code',
  'execution_count': 0,
  'id': '7247342c',
  'metadata': {},
  'outputs': [],
  'source': 'y=2',
  'idx_': 1}}

source

changed_cells


def changed_cells(
    nb_path, ref_a:str='HEAD', # First git ref (None for working dir)
    ref_b:NoneType=None, # Second git ref (None for working dir)
    adds:bool=True, # Include cells in b but not in a
    changes:bool=True, # Include cells with different content
    dels:bool=False, # Include cells in a but not in b
    metadata:bool=False, # Consider cell metadata when comparing
    outputs:bool=False, # Consider cell outputs when comparing
):

Return set of cell IDs for changed/added/deleted cells between two refs

changed_cells(td/'test.ipynb')
{'390c8c7d', 'd8100f2f'}

source

source_diff


def source_diff(
    old_source, # Original source string
    new_source, # New source string
):

Return unified diff string for source change

print(source_diff('x = 1\ny=2', 'x = 100\ny=2'))
--- 
+++ 
@@ -1,2 +1,2 @@
-x = 1
+x = 100
 y=2

source

cell_diffs


def cell_diffs(
    nb_path, ref_a:str='HEAD', # First git ref (None for working dir)
    ref_b:NoneType=None, # Second git ref (None for working dir)
    adds:bool=True, # Include cells in b but not in a
    changes:bool=True, # Include cells with different content
    dels:bool=False, # Include cells in a but not in b
    metadata:bool=False, # Consider cell metadata when comparing
    outputs:bool=False, # Consider cell outputs when comparing
):

{cell_id:diff} for changed/added/deleted cells between two refs

d = cell_diffs(td/'test.ipynb')
d
{'d8100f2f': '--- \n+++ \n@@ -0,0 +1 @@\n+z=3',
 '390c8c7d': '--- \n+++ \n@@ -1 +1 @@\n-x=1\n+x = 100'}
g.add('test.ipynb')
g.commit(m='update notebook')
assert not changed_cells(td/'test.ipynb')
assert not cell_diffs(td/'test.ipynb')
shutil.rmtree(td)