From 9ed78b17d5dd41a9525bd6a57b2dc321b806f265 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 14:03:17 +0200 Subject: [PATCH 01/28] Graph.rg: remove member It wasn't used. --- sizes/graph.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index 7e923f6..9187907 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -69,7 +69,7 @@ func ScanRepositoryUsingGraph( ctx, cancel := context.WithCancel(context.TODO()) defer cancel() - graph := NewGraph(rg, nameStyle) + graph := NewGraph(nameStyle) refIter, err := repo.NewReferenceIter(ctx) if err != nil { @@ -337,8 +337,6 @@ func ScanRepositoryUsingGraph( // Graph is an object graph that is being built up. type Graph struct { - rg RefGrouper - blobLock sync.Mutex blobSizes map[git.OID]BlobSize @@ -361,10 +359,8 @@ type Graph struct { } // NewGraph creates and returns a new `*Graph` instance. -func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { +func NewGraph(nameStyle NameStyle) *Graph { return &Graph{ - rg: rg, - blobSizes: make(map[git.OID]BlobSize), treeRecords: make(map[git.OID]*treeRecord), From 559b030c9aa7b8fbc8803863e20aae4a720cbb18 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 16:57:25 +0200 Subject: [PATCH 02/28] Collect references before starting the object traversal This provides a better separation of concerns, which will be taken advantage of shortly. --- sizes/graph.go | 79 +++++----------------------------------------- sizes/grouper.go | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 71 deletions(-) create mode 100644 sizes/grouper.go diff --git a/sizes/graph.go b/sizes/graph.go index 9187907..a56cbc2 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -11,50 +11,6 @@ import ( "github.com/github/git-sizer/meter" ) -// RefGroupSymbol is the string "identifier" that is used to refer to -// a refgroup, for example in the gitconfig. Nesting of refgroups is -// inferred from their names, using "." as separator between -// components. For example, if there are three refgroups with symbols -// "tags", "tags.releases", and "foo.bar", then "tags.releases" is -// considered to be nested within "tags", and "foo.bar" is considered -// to be nested within "foo", the latter being created automatically -// if it was not configured explicitly. -type RefGroupSymbol string - -// RefGroup is a group of references, for example "branches" or -// "tags". Reference groups might overlap. -type RefGroup struct { - // Symbol is the unique string by which this `RefGroup` is - // identified and configured. It consists of dot-separated - // components, which implicitly makes a nested tree-like - // structure. - Symbol RefGroupSymbol - - // Name is the name for this `ReferenceGroup` to be presented - // in user-readable output. - Name string -} - -// RefGrouper describes a type that can collate reference names into -// groups and decide which ones to walk. -type RefGrouper interface { - // Categorize tells whether `refname` should be walked at all, - // and if so, the symbols of the reference groups to which it - // belongs. - Categorize(refname string) (bool, []RefGroupSymbol) - - // Groups returns the list of `ReferenceGroup`s, in the order - // that they should be presented. The return value might - // depend on which references have been seen so far. - Groups() []RefGroup -} - -type refSeen struct { - git.Reference - walked bool - groups []RefGroupSymbol -} - // ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which // references to scan and how to group them. `nameStyle` specifies // whether the output should include full names, hashes only, or @@ -71,9 +27,9 @@ func ScanRepositoryUsingGraph( graph := NewGraph(nameStyle) - refIter, err := repo.NewReferenceIter(ctx) + refsSeen, err := CollectReferences(ctx, repo, rg) if err != nil { - return HistorySize{}, err + return HistorySize{}, fmt.Errorf("reading references: %w", err) } objIter, err := repo.NewObjectIter(context.TODO()) @@ -82,41 +38,22 @@ func ScanRepositoryUsingGraph( } errChan := make(chan error, 1) - var refsSeen []refSeen - // Feed the references that we want into the stdin of the object - // iterator: + // Feed the references that we want to walk into the stdin of the + // object iterator: go func() { defer objIter.Close() errChan <- func() error { - for { - ref, ok, err := refIter.Next() - if err != nil { - return err - } - if !ok { - return nil - } - - walk, groups := rg.Categorize(ref.Refname) - - refsSeen = append( - refsSeen, - refSeen{ - Reference: ref, - walked: walk, - groups: groups, - }, - ) - - if !walk { + for _, refSeen := range refsSeen { + if !refSeen.walked { continue } - if err := objIter.AddRoot(ref.OID); err != nil { + if err := objIter.AddRoot(refSeen.OID); err != nil { return err } } + return nil }() }() diff --git a/sizes/grouper.go b/sizes/grouper.go new file mode 100644 index 0000000..a5b8a26 --- /dev/null +++ b/sizes/grouper.go @@ -0,0 +1,82 @@ +package sizes + +import ( + "context" + + "github.com/github/git-sizer/git" +) + +// RefGroupSymbol is the string "identifier" that is used to refer to +// a refgroup, for example in the gitconfig. Nesting of refgroups is +// inferred from their names, using "." as separator between +// components. For example, if there are three refgroups with symbols +// "tags", "tags.releases", and "foo.bar", then "tags.releases" is +// considered to be nested within "tags", and "foo.bar" is considered +// to be nested within "foo", the latter being created automatically +// if it was not configured explicitly. +type RefGroupSymbol string + +// RefGroup is a group of references, for example "branches" or +// "tags". Reference groups might overlap. +type RefGroup struct { + // Symbol is the unique string by which this `RefGroup` is + // identified and configured. It consists of dot-separated + // components, which implicitly makes a nested tree-like + // structure. + Symbol RefGroupSymbol + + // Name is the name for this `ReferenceGroup` to be presented + // in user-readable output. + Name string +} + +// RefGrouper describes a type that can collate reference names into +// groups and decide which ones to walk. +type RefGrouper interface { + // Categorize tells whether `refname` should be walked at all, + // and if so, the symbols of the reference groups to which it + // belongs. + Categorize(refname string) (bool, []RefGroupSymbol) + + // Groups returns the list of `ReferenceGroup`s, in the order + // that they should be presented. The return value might + // depend on which references have been seen so far. + Groups() []RefGroup +} + +type refSeen struct { + git.Reference + walked bool + groups []RefGroupSymbol +} + +func CollectReferences( + ctx context.Context, repo *git.Repository, rg RefGrouper, +) ([]refSeen, error) { + refIter, err := repo.NewReferenceIter(ctx) + if err != nil { + return nil, err + } + + var refsSeen []refSeen + for { + ref, ok, err := refIter.Next() + if err != nil { + return nil, err + } + if !ok { + return refsSeen, nil + } + + walk, groups := rg.Categorize(ref.Refname) + + refsSeen = append( + refsSeen, + refSeen{ + Reference: ref, + walked: walk, + groups: groups, + }, + ) + } +} From fdfa791791c392324ec0cde0e42d070f6c9b96c3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 17:57:31 +0200 Subject: [PATCH 03/28] ScanRepositoryUsingGraph(): take a context argument --- git-sizer.go | 10 +++++++--- git_sizer_test.go | 11 ++++++----- sizes/graph.go | 6 ++---- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index d1e075c..6c9e7a3 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -1,6 +1,7 @@ package main import ( + "context" "encoding/json" "errors" "fmt" @@ -93,14 +94,17 @@ var ReleaseVersion string var BuildVersion string func main() { - err := mainImplementation(os.Stdout, os.Stderr, os.Args[1:]) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } } -func mainImplementation(stdout, stderr io.Writer, args []string) error { +func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []string) error { var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool @@ -288,7 +292,7 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progressMeter) + historySize, err := sizes.ScanRepositoryUsingGraph(ctx, repo, rg, nameStyle, progressMeter) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git_sizer_test.go b/git_sizer_test.go index 6ab132f..b08985b 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -2,6 +2,7 @@ package main_test import ( "bytes" + "context" "encoding/json" "fmt" "io" @@ -563,7 +564,7 @@ func TestBomb(t *testing.T) { newGitBomb(t, repo, 10, 10, "boom!\n") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, ) require.NoError(t, err) @@ -636,7 +637,7 @@ func TestTaggedTags(t *testing.T) { require.NoError(t, cmd.Run(), "creating tag 3") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -658,7 +659,7 @@ func TestFromSubdir(t *testing.T) { require.NoError(t, cmd.Run(), "creating commit") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -711,7 +712,7 @@ func TestSubmodule(t *testing.T) { // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - mainRepo.Repository(t), + context.Background(), mainRepo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -724,7 +725,7 @@ func TestSubmodule(t *testing.T) { Path: filepath.Join(mainRepo.Path, "sub"), } h, err = sizes.ScanRepositoryUsingGraph( - submRepo2.Repository(t), + context.Background(), submRepo2.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") diff --git a/sizes/graph.go b/sizes/graph.go index a56cbc2..1b908cc 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -19,12 +19,10 @@ import ( // // It returns the size data for the repository. func ScanRepositoryUsingGraph( + ctx context.Context, repo *git.Repository, rg RefGrouper, nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() - graph := NewGraph(nameStyle) refsSeen, err := CollectReferences(ctx, repo, rg) @@ -32,7 +30,7 @@ func ScanRepositoryUsingGraph( return HistorySize{}, fmt.Errorf("reading references: %w", err) } - objIter, err := repo.NewObjectIter(context.TODO()) + objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err } From 1a2c0b51069b8eedecac2fccf532b7e6da11a1d3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 18:00:39 +0200 Subject: [PATCH 04/28] refSeen: make type and its members public and rename it to `RefRoot` --- sizes/graph.go | 12 ++++++------ sizes/grouper.go | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index 1b908cc..59a6365 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -25,7 +25,7 @@ func ScanRepositoryUsingGraph( ) (HistorySize, error) { graph := NewGraph(nameStyle) - refsSeen, err := CollectReferences(ctx, repo, rg) + refRoots, err := CollectReferences(ctx, repo, rg) if err != nil { return HistorySize{}, fmt.Errorf("reading references: %w", err) } @@ -42,12 +42,12 @@ func ScanRepositoryUsingGraph( defer objIter.Close() errChan <- func() error { - for _, refSeen := range refsSeen { - if !refSeen.walked { + for _, refRoot := range refRoots { + if !refRoot.Walk { continue } - if err := objIter.AddRoot(refSeen.OID); err != nil { + if err := objIter.AddRoot(refRoot.OID); err != nil { return err } } @@ -261,9 +261,9 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, refSeen := range refsSeen { + for _, refRoot := range refRoots { progressMeter.Inc() - graph.RegisterReference(refSeen.Reference, refSeen.walked, refSeen.groups) + graph.RegisterReference(refRoot.Reference, refRoot.Walk, refRoot.Groups) } progressMeter.Done() diff --git a/sizes/grouper.go b/sizes/grouper.go index a5b8a26..3807b0e 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -44,21 +44,21 @@ type RefGrouper interface { Groups() []RefGroup } -type refSeen struct { +type RefRoot struct { git.Reference - walked bool - groups []RefGroupSymbol + Walk bool + Groups []RefGroupSymbol } func CollectReferences( ctx context.Context, repo *git.Repository, rg RefGrouper, -) ([]refSeen, error) { +) ([]RefRoot, error) { refIter, err := repo.NewReferenceIter(ctx) if err != nil { return nil, err } - var refsSeen []refSeen + var refsSeen []RefRoot for { ref, ok, err := refIter.Next() if err != nil { @@ -72,10 +72,10 @@ func CollectReferences( refsSeen = append( refsSeen, - refSeen{ + RefRoot{ Reference: ref, - walked: walk, - groups: groups, + Walk: walk, + Groups: groups, }, ) } From 757866b5adda4d0cff52d917d48eab0dc92275ae Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 18:13:34 +0200 Subject: [PATCH 05/28] ScanRepositoryUsingGraph(): take a list of `RefRoot`s as argument --- git-sizer.go | 9 +++- git_sizer_test.go | 110 ++++++++++++++++++++++++++++++---------------- sizes/graph.go | 7 +-- 3 files changed, 81 insertions(+), 45 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 6c9e7a3..0336d13 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -292,7 +292,14 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - historySize, err := sizes.ScanRepositoryUsingGraph(ctx, repo, rg, nameStyle, progressMeter) + refRoots, err := sizes.CollectReferences(ctx, repo, rg) + if err != nil { + return fmt.Errorf("determining which reference to scan: %w", err) + } + + historySize, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, refRoots, nameStyle, progressMeter, + ) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git_sizer_test.go b/git_sizer_test.go index b08985b..54d90d5 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -558,14 +558,21 @@ func (rg refGrouper) Groups() []sizes.RefGroup { func TestBomb(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, true, "bomb") - t.Cleanup(func() { repo.Remove(t) }) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, true, "bomb") + t.Cleanup(func() { testRepo.Remove(t) }) + + newGitBomb(t, testRepo, 10, 10, "boom!\n") - newGitBomb(t, repo, 10, 10, "boom!\n") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, + ctx, repo, + refRoots, sizes.NameStyleFull, meter.NoProgressMeter, ) require.NoError(t, err) @@ -613,32 +620,39 @@ func TestBomb(t *testing.T) { func TestTaggedTags(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "tagged-tags") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "tagged-tags") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - cmd := repo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = repo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = repo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = repo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), repo, + refRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -647,20 +661,27 @@ func TestTaggedTags(t *testing.T) { func TestFromSubdir(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "subdir") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "subdir") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - repo.AddFile(t, "subdir/file.txt", "Hello, world!\n") + testRepo.AddFile(t, "subdir/file.txt", "Hello, world!\n") - cmd := repo.GitCommand(t, "commit", "-m", "initial") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), testRepo.Repository(t), + refRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -669,6 +690,8 @@ func TestFromSubdir(t *testing.T) { func TestSubmodule(t *testing.T) { t.Parallel() + ctx := context.Background() + tmp, err := ioutil.TempDir("", "submodule") require.NoError(t, err, "creating temporary directory") @@ -678,42 +701,47 @@ func TestSubmodule(t *testing.T) { timestamp := time.Unix(1112911993, 0) - submRepo := testutils.TestRepo{ + submTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "subm"), } - submRepo.Init(t, false) - submRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") - submRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") - submRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") + submTestRepo.Init(t, false) + submTestRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") + submTestRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") + submTestRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") - cmd := submRepo.GitCommand(t, "commit", "-m", "subm initial") + cmd := submTestRepo.GitCommand(t, "commit", "-m", "subm initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") - mainRepo := testutils.TestRepo{ + mainTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "main"), } - mainRepo.Init(t, false) + mainTestRepo.Init(t, false) - mainRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") + mainTestRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") - cmd = mainRepo.GitCommand(t, "commit", "-m", "main initial") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "main initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = mainRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submRepo.Path, "sub") - cmd.Dir = mainRepo.Path + cmd = mainTestRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submTestRepo.Path, "sub") + cmd.Dir = mainTestRepo.Path require.NoError(t, cmd.Run(), "adding submodule") - cmd = mainRepo.GitCommand(t, "commit", "-m", "add submodule") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "add submodule") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") + mainRepo := mainTestRepo.Repository(t) + + mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) + require.NoError(t, err) + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), mainRepo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), mainTestRepo.Repository(t), + mainRefRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -721,12 +749,18 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(1), h.MaxExpandedSubmoduleCount, "max expanded submodule count") // Analyze the submodule: - submRepo2 := testutils.TestRepo{ - Path: filepath.Join(mainRepo.Path, "sub"), + submTestRepo2 := testutils.TestRepo{ + Path: filepath.Join(mainTestRepo.Path, "sub"), } + + submRepo2 := submTestRepo2.Repository(t) + + submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) + require.NoError(t, err) + h, err = sizes.ScanRepositoryUsingGraph( - context.Background(), submRepo2.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), submRepo2, + submRefRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/sizes/graph.go b/sizes/graph.go index 59a6365..e9033ef 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -20,16 +20,11 @@ import ( // It returns the size data for the repository. func ScanRepositoryUsingGraph( ctx context.Context, - repo *git.Repository, rg RefGrouper, nameStyle NameStyle, + repo *git.Repository, refRoots []RefRoot, nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { graph := NewGraph(nameStyle) - refRoots, err := CollectReferences(ctx, repo, rg) - if err != nil { - return HistorySize{}, fmt.Errorf("reading references: %w", err) - } - objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err From 897baa1a96585fbc44238d0a536c92bf8a11f3ec Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 18:28:54 +0200 Subject: [PATCH 06/28] RefRoot: add some methods We want to add another type of root, so start the virtualization process. --- sizes/graph.go | 6 +++--- sizes/grouper.go | 17 +++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index e9033ef..660f682 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -38,11 +38,11 @@ func ScanRepositoryUsingGraph( errChan <- func() error { for _, refRoot := range refRoots { - if !refRoot.Walk { + if !refRoot.Walk() { continue } - if err := objIter.AddRoot(refRoot.OID); err != nil { + if err := objIter.AddRoot(refRoot.OID()); err != nil { return err } } @@ -258,7 +258,7 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing references: %d") for _, refRoot := range refRoots { progressMeter.Inc() - graph.RegisterReference(refRoot.Reference, refRoot.Walk, refRoot.Groups) + graph.RegisterReference(refRoot.Reference(), refRoot.Walk(), refRoot.Groups()) } progressMeter.Done() diff --git a/sizes/grouper.go b/sizes/grouper.go index 3807b0e..32d63ca 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -45,11 +45,16 @@ type RefGrouper interface { } type RefRoot struct { - git.Reference - Walk bool - Groups []RefGroupSymbol + ref git.Reference + walk bool + groups []RefGroupSymbol } +func (rr RefRoot) OID() git.OID { return rr.ref.OID } +func (rr RefRoot) Reference() git.Reference { return rr.ref } +func (rr RefRoot) Walk() bool { return rr.walk } +func (rr RefRoot) Groups() []RefGroupSymbol { return rr.groups } + func CollectReferences( ctx context.Context, repo *git.Repository, rg RefGrouper, ) ([]RefRoot, error) { @@ -73,9 +78,9 @@ func CollectReferences( refsSeen = append( refsSeen, RefRoot{ - Reference: ref, - Walk: walk, - Groups: groups, + ref: ref, + walk: walk, + groups: groups, }, ) } From 9e8b14fe3012f05c163ffdf79a32bcb2b48ea422 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 20:14:59 +0200 Subject: [PATCH 07/28] Allow arbitrary reachability roots to be fed in Instead of only traversing objects starting at references, allow the user to specify explicit Git objects via the command line. In that case, the traversal includes objects reachable from those objects. --- git-sizer.go | 50 ++++++-- git/obj_resolver.go | 20 +++ git/ref_filter.go | 16 ++- git_sizer_test.go | 178 ++++++++++++++++++-------- internal/refopts/ref_group_builder.go | 9 +- sizes/explicit_root.go | 19 +++ sizes/graph.go | 41 ++++-- sizes/grouper.go | 1 + sizes/path_resolver.go | 60 +++++---- 9 files changed, 290 insertions(+), 104 deletions(-) create mode 100644 git/obj_resolver.go create mode 100644 sizes/explicit_root.go diff --git a/git-sizer.go b/git-sizer.go index 0336d13..7cfd6ff 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -20,7 +20,9 @@ import ( "github.com/github/git-sizer/sizes" ) -const usage = `usage: git-sizer [OPTS] +const usage = `usage: git-sizer [OPTS] [ROOT...] + + Scan objects in your Git repository and emit statistics about them. --threshold THRESHOLD minimum level of concern (i.e., number of stars) that should be reported. Default: @@ -46,12 +48,29 @@ const usage = `usage: git-sizer [OPTS] be set via gitconfig: 'sizer.progress'. --version only report the git-sizer version number + Object selection: + + git-sizer traverses through your Git history to find objects to + process. By default, it processes all objects that are reachable from + any reference. You can tell it to process only some of your + references; see "Reference selection" below. + + If explicit ROOTs are specified on the command line, each one should + be a string that 'git rev-parse' can convert into a single Git object + ID, like 'main', 'main~:src', or an abbreviated SHA-1. See + git-rev-parse(1) for details. In that case, git-sizer also treats + those objects as starting points for its traversal, and also includes + the Git objects that are reachable from those roots in the analysis. + + As a special case, if one or more ROOTs are specified on the command + line but _no_ reference selection options, then _only_ the specified + ROOTs are traversed, and no references. + Reference selection: - By default, git-sizer processes all Git objects that are reachable - from any reference. The following options can be used to limit which - references to process. The last rule matching a reference determines - whether that reference is processed. + The following options can be used to limit which references to + process. The last rule matching a reference determines whether that + reference is processed. --[no-]branches process [don't process] branches --[no-]tags process [don't process] tags @@ -220,10 +239,6 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return nil } - if len(flags.Args()) != 0 { - return errors.New("excess arguments") - } - if repoErr != nil { return fmt.Errorf("couldn't open Git repository: %w", repoErr) } @@ -277,7 +292,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st progress = v } - rg, err := rgb.Finish() + rg, err := rgb.Finish(len(flags.Args()) == 0) if err != nil { return err } @@ -297,8 +312,21 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return fmt.Errorf("determining which reference to scan: %w", err) } + roots := make([]sizes.Root, 0, len(refRoots)+len(flags.Args())) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + for _, arg := range flags.Args() { + oid, err := repo.ResolveObject(arg) + if err != nil { + return fmt.Errorf("resolving command-line argument %q: %w", arg, err) + } + roots = append(roots, sizes.NewExplicitRoot(arg, oid)) + } + historySize, err := sizes.ScanRepositoryUsingGraph( - ctx, repo, refRoots, nameStyle, progressMeter, + ctx, repo, roots, nameStyle, progressMeter, ) if err != nil { return fmt.Errorf("error scanning repository: %w", err) diff --git a/git/obj_resolver.go b/git/obj_resolver.go new file mode 100644 index 0000000..418e293 --- /dev/null +++ b/git/obj_resolver.go @@ -0,0 +1,20 @@ +package git + +import ( + "bytes" + "fmt" +) + +func (repo *Repository) ResolveObject(name string) (OID, error) { + cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) + output, err := cmd.Output() + if err != nil { + return NullOID, fmt.Errorf("resolving object %q: %w", name, err) + } + oidString := string(bytes.TrimSpace(output)) + oid, err := NewOID(oidString) + if err != nil { + return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + } + return oid, nil +} diff --git a/git/ref_filter.go b/git/ref_filter.go index 8eb8a9b..46aff66 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -83,15 +83,23 @@ func (_ allReferencesFilter) Filter(_ string) bool { var AllReferencesFilter allReferencesFilter +type noReferencesFilter struct{} + +func (_ noReferencesFilter) Filter(_ string) bool { + return false +} + +var NoReferencesFilter noReferencesFilter + // PrefixFilter returns a `ReferenceFilter` that matches references // whose names start with the specified `prefix`, which must match at // a component boundary. For example, // -// * Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not -// "refs/foobar". +// - Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not +// "refs/foobar". // -// * Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or -// "refs/foobar". +// - Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or +// "refs/foobar". func PrefixFilter(prefix string) ReferenceFilter { if prefix == "" { return AllReferencesFilter diff --git a/git_sizer_test.go b/git_sizer_test.go index 54d90d5..16d58c9 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -567,54 +567,112 @@ func TestBomb(t *testing.T) { repo := testRepo.Repository(t) - refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) - require.NoError(t, err) + t.Run("full", func(t *testing.T) { + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) - h, err := sizes.ScanRepositoryUsingGraph( - ctx, repo, - refRoots, sizes.NameStyleFull, meter.NoProgressMeter, - ) - require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") + assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.BestPath(), "max commit size commit") + assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.BestPath(), "max parent count commit") + + assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) + + t.Run("partial", func(t *testing.T) { + name := "master:d0/d0" + oid, err := repo.ResolveObject(name) + require.NoError(t, err) + roots := []sizes.Root{sizes.NewExplicitRoot(name, oid)} - assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") - assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") - assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") - assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.Path(), "max commit size commit") - assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") - assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") - assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.Path(), "max parent count commit") - - assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") - assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") - assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") - assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.Path(), "max tree entries tree") - - assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") - assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") - assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.Path(), "max blob size blob") - - assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") - assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") - - assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") - - assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.Path(), "max path depth tree") - assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.Path(), "max path length tree") - - assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.Path(), "max expanded tree count tree") - assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.Path(), "max expanded blob count tree") - assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.Path(), "max expanded blob size tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") - assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") - assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(0), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(0), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(0), h.MaxCommitSize, "max commit size") + assert.Nil(t, h.MaxCommitSizeCommit) + assert.Equal(t, counts.Count32(0), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Nil(t, h.MaxParentCountCommit, "max parent count commit") + + assert.Equal(t, counts.Count32(8), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2330), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(80), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(0), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(8), h.MaxPathDepth, "max path depth") + assert.Equal(t, "master:d0/d0", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(23), h.MaxPathLength, "max path length") + assert.Equal(t, "master:d0/d0", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 8)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(pow(10, 8)), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 8)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) } func TestTaggedTags(t *testing.T) { @@ -650,9 +708,14 @@ func TestTaggedTags(t *testing.T) { refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( context.Background(), repo, - refRoots, sizes.NameStyleNone, meter.NoProgressMeter, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -679,9 +742,14 @@ func TestFromSubdir(t *testing.T) { refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( context.Background(), testRepo.Repository(t), - refRoots, sizes.NameStyleNone, meter.NoProgressMeter, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -738,10 +806,15 @@ func TestSubmodule(t *testing.T) { mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) require.NoError(t, err) + mainRoots := make([]sizes.Root, 0, len(mainRefRoots)) + for _, refRoot := range mainRefRoots { + mainRoots = append(mainRoots, refRoot) + } + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( context.Background(), mainTestRepo.Repository(t), - mainRefRoots, sizes.NameStyleNone, meter.NoProgressMeter, + mainRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -758,9 +831,14 @@ func TestSubmodule(t *testing.T) { submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) require.NoError(t, err) + submRoots2 := make([]sizes.Root, 0, len(submRefRoots2)) + for _, refRoot := range submRefRoots2 { + submRoots2 = append(submRoots2, refRoot) + } + h, err = sizes.ScanRepositoryUsingGraph( context.Background(), submRepo2, - submRefRoots2, sizes.NameStyleNone, meter.NoProgressMeter, + submRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 3c3179e..48f1190 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -254,9 +254,14 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { // Finish collects the information gained from processing the options // and returns a `sizes.RefGrouper`. -func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { +func (rgb *RefGroupBuilder) Finish(defaultAll bool) (sizes.RefGrouper, error) { if rgb.topLevelGroup.filter == nil { - rgb.topLevelGroup.filter = git.AllReferencesFilter + // User didn't specify any reference options. + if defaultAll { + rgb.topLevelGroup.filter = git.AllReferencesFilter + } else { + rgb.topLevelGroup.filter = git.NoReferencesFilter + } } refGrouper := refGrouper{ diff --git a/sizes/explicit_root.go b/sizes/explicit_root.go new file mode 100644 index 0000000..09348db --- /dev/null +++ b/sizes/explicit_root.go @@ -0,0 +1,19 @@ +package sizes + +import "github.com/github/git-sizer/git" + +type ExplicitRoot struct { + name string + oid git.OID +} + +func NewExplicitRoot(name string, oid git.OID) ExplicitRoot { + return ExplicitRoot{ + name: name, + oid: oid, + } +} + +func (er ExplicitRoot) Name() string { return er.name } +func (er ExplicitRoot) OID() git.OID { return er.oid } +func (er ExplicitRoot) Walk() bool { return true } diff --git a/sizes/graph.go b/sizes/graph.go index 660f682..0fb1c8a 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -11,6 +11,18 @@ import ( "github.com/github/git-sizer/meter" ) +type Root interface { + Name() string + OID() git.OID + Walk() bool +} + +type ReferenceRoot interface { + Root + Reference() git.Reference + Groups() []RefGroupSymbol +} + // ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which // references to scan and how to group them. `nameStyle` specifies // whether the output should include full names, hashes only, or @@ -20,7 +32,9 @@ import ( // It returns the size data for the repository. func ScanRepositoryUsingGraph( ctx context.Context, - repo *git.Repository, refRoots []RefRoot, nameStyle NameStyle, + repo *git.Repository, + roots []Root, + nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { graph := NewGraph(nameStyle) @@ -37,12 +51,12 @@ func ScanRepositoryUsingGraph( defer objIter.Close() errChan <- func() error { - for _, refRoot := range refRoots { - if !refRoot.Walk() { + for _, root := range roots { + if !root.Walk() { continue } - if err := objIter.AddRoot(refRoot.OID()); err != nil { + if err := objIter.AddRoot(root.OID()); err != nil { return err } } @@ -256,9 +270,15 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, refRoot := range refRoots { + for _, root := range roots { progressMeter.Inc() - graph.RegisterReference(refRoot.Reference(), refRoot.Walk(), refRoot.Groups()) + if refRoot, ok := root.(ReferenceRoot); ok { + graph.RegisterReference(refRoot.Reference(), refRoot.Groups()) + } + + if root.Walk() { + graph.pathResolver.RecordName(root.Name(), root.OID()) + } } progressMeter.Done() @@ -310,17 +330,18 @@ func NewGraph(nameStyle NameStyle) *Graph { } // RegisterReference records the specified reference in `g`. -func (g *Graph) RegisterReference(ref git.Reference, walked bool, groups []RefGroupSymbol) { +func (g *Graph) RegisterReference(ref git.Reference, groups []RefGroupSymbol) { g.historyLock.Lock() g.historySize.recordReference(g, ref) for _, group := range groups { g.historySize.recordReferenceGroup(g, group) } g.historyLock.Unlock() +} - if walked { - g.pathResolver.RecordReference(ref) - } +// Register a name that can be used for the specified OID. +func (g *Graph) RegisterName(name string, oid git.OID) { + g.pathResolver.RecordName(name, oid) } // HistorySize returns the size data that have been collected. diff --git a/sizes/grouper.go b/sizes/grouper.go index 32d63ca..fdaa927 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -50,6 +50,7 @@ type RefRoot struct { groups []RefGroupSymbol } +func (rr RefRoot) Name() string { return rr.ref.Refname } func (rr RefRoot) OID() git.OID { return rr.ref.OID } func (rr RefRoot) Reference() git.Reference { return rr.ref } func (rr RefRoot) Walk() bool { return rr.walk } diff --git a/sizes/path_resolver.go b/sizes/path_resolver.go index 2a3bb1c..275d19a 100644 --- a/sizes/path_resolver.go +++ b/sizes/path_resolver.go @@ -12,15 +12,15 @@ import ( // `rev-parse` input, including commit and/or file path) by which // specified objects are reachable. It is used as follows: // -// * Request an object's path using `RequestPath()`. The returned -// `Path` object is a placeholder for the object's path. +// - Request an object's path using `RequestPath()`. The returned +// `Path` object is a placeholder for the object's path. // -// * Tell the `PathResolver` about objects that might be along the -// object's reachability path, *in depth-first* order (i.e., -// referents before referers) by calling `RecordTree()`, -// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. +// - Tell the `PathResolver` about objects that might be along the +// object's reachability path, *in depth-first* order (i.e., +// referents before referers) by calling `RecordTree()`, +// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. // -// * Read the path out of the `Path` object using `Path.Path()`. +// - Read the path out of the `Path` object using `Path.Path()`. // // Multiple objects can be processed at once. // @@ -34,7 +34,7 @@ import ( type PathResolver interface { RequestPath(oid git.OID, objectType string) *Path ForgetPath(p *Path) - RecordReference(ref git.Reference) + RecordName(name string, oid git.OID) RecordTreeEntry(oid git.OID, name string, childOID git.OID) RecordCommit(oid, tree git.OID) RecordTag(oid git.OID, tag *git.Tag) @@ -60,7 +60,7 @@ func (n NullPathResolver) RequestPath(oid git.OID, objectType string) *Path { func (_ NullPathResolver) ForgetPath(p *Path) {} -func (_ NullPathResolver) RecordReference(ref git.Reference) {} +func (_ NullPathResolver) RecordName(name string, oid git.OID) {} func (_ NullPathResolver) RecordTreeEntry(oid git.OID, name string, childOID git.OID) {} @@ -77,19 +77,19 @@ type InOrderPathResolver struct { // (e.g., the biggest blob, or a tree containing the biggest blob, or // a commit whose tree contains the biggest blob). Valid states: // -// * `parent == nil && relativePath == ""`—we have not yet found -// anything that refers to this object. +// - `parent == nil && relativePath == ""`—we have not yet found +// anything that refers to this object. // -// * `parent != nil && relativePath == ""`—this object is a tree, and -// we have found a commit that refers to it. +// - `parent != nil && relativePath == ""`—this object is a tree, and +// we have found a commit that refers to it. // -// * `parent == nil && relativePath != ""`—we have found a reference -// that points directly at this object; `relativePath` is the full -// name of the reference. +// - `parent == nil && relativePath != ""`—we have found a reference +// that points directly at this object; `relativePath` is the full +// name of the reference. // -// * `parent != nil && relativePath != ""`—this object is a blob or -// tree, and we have found another tree that refers to it; -// `relativePath` is the corresponding tree entry name. +// - `parent != nil && relativePath != ""`—this object is a blob or +// tree, and we have found another tree that refers to it; +// `relativePath` is the corresponding tree entry name. type Path struct { // The OID of the object whose path we seek. This member is always // set. @@ -122,7 +122,8 @@ type Path struct { func (p *Path) TreePrefix() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return p.parent.TreePrefix() @@ -130,7 +131,9 @@ func (p *Path) TreePrefix() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath + "/" } - } else { + case p.relativePath != "": + return p.relativePath + "/" + default: return "???" } case "commit", "tag": @@ -153,7 +156,8 @@ func (p *Path) TreePrefix() string { func (p *Path) Path() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) @@ -161,7 +165,9 @@ func (p *Path) Path() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath } - } else { + case p.relativePath != "": + return p.relativePath + default: return "" } case "commit", "tag": @@ -274,18 +280,18 @@ func (pr *InOrderPathResolver) forgetPathLocked(p *Path) { } } -func (pr *InOrderPathResolver) RecordReference(ref git.Reference) { +func (pr *InOrderPathResolver) RecordName(name string, oid git.OID) { pr.lock.Lock() defer pr.lock.Unlock() - p, ok := pr.soughtPaths[ref.OID] + p, ok := pr.soughtPaths[oid] if !ok { // Nobody is looking for the path to the referent. return } - p.relativePath = ref.Refname - delete(pr.soughtPaths, ref.OID) + p.relativePath = name + delete(pr.soughtPaths, oid) } // Record that the tree with OID `oid` has an entry with the specified From 5d339ec292a3cc126f802efa98de90ea6a804626 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 19 Aug 2023 15:25:51 +0200 Subject: [PATCH 08/28] There's no reason to make this context cancelable --- git-sizer.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 7cfd6ff..0888d78 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -113,8 +113,7 @@ var ReleaseVersion string var BuildVersion string func main() { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() + ctx := context.Background() err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil { From e8d9c2eebde3389f80ec8a67a9d45f907d57298a Mon Sep 17 00:00:00 2001 From: rajhawaldar Date: Sat, 23 Sep 2023 10:34:59 +0530 Subject: [PATCH 09/28] Update the installation steps to use 'go install' Signed-off-by: rajhawaldar --- docs/BUILDING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/BUILDING.md b/docs/BUILDING.md index a977a2c..7f9fdef 100644 --- a/docs/BUILDING.md +++ b/docs/BUILDING.md @@ -7,11 +7,11 @@ Most people can just install a released version of `git-sizer`, [as described in 1. Make sure that you have a recent version of the [Go language toolchain](https://golang.org/doc/install) installed and that you have set `GOPATH`. -2. Get `git-sizer` using `go get`: +2. Get `git-sizer` using `go install`: - go get github.com/github/git-sizer + go install github.com/github/git-sizer@latest - This should fetch and compile the source code and write the executable file to `$GOPATH/bin/`. + This should install the executable file to `$GOPATH/bin/`. 3. Either add `$GOPATH/bin` to your `PATH`, or copy the executable file (`git-sizer` or `git-sizer.exe`) to a directory that is already in your `PATH`. From 1b0ecde670f17563805ee2f297155f9faf2c1f24 Mon Sep 17 00:00:00 2001 From: elhmn Date: Fri, 3 Nov 2023 11:26:36 +0100 Subject: [PATCH 10/28] Upgrade build scripts to go1.21 --- script/ensure-go-installed.sh | 6 +++--- script/install-vendored-go | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/script/ensure-go-installed.sh b/script/ensure-go-installed.sh index 3111b9e..1e301fd 100644 --- a/script/ensure-go-installed.sh +++ b/script/ensure-go-installed.sh @@ -4,17 +4,17 @@ if [ -z "$ROOTDIR" ]; then echo 1>&2 'ensure-go-installed.sh invoked without ROOTDIR set!' fi -# Is go installed, and at least 1.16? +# Is go installed, and at least 1.21? go_ok() { set -- $(go version 2>/dev/null | sed -n 's/.*go\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1 \2/p' | head -n 1) - [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 16 ] + [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 21 ] } # If a local go is installed, use it. set_up_vendored_go() { - GO_VERSION=go1.16.3 + GO_VERSION=go1.21.3 VENDORED_GOROOT="$ROOTDIR/vendor/$GO_VERSION/go" if [ -x "$VENDORED_GOROOT/bin/go" ]; then export GOROOT="$VENDORED_GOROOT" diff --git a/script/install-vendored-go b/script/install-vendored-go index 2407618..45ace01 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -1,20 +1,21 @@ #!/bin/sh # The checksums below must correspond to the downloads for this version. -GO_VERSION=go1.16.3 +# The checksums can be found on https://go.dev/dl +GO_VERSION=go1.21.3 case "$(uname -s):$(uname -m)" in Linux:x86_64) GO_PKG=${GO_VERSION}.linux-amd64.tar.gz - GO_PKG_SHA=951a3c7c6ce4e56ad883f97d9db74d3d6d80d5fec77455c6ada6c1f7ac4776d2 + GO_PKG_SHA=1241381b2843fae5a9707eec1f8fb2ef94d827990582c7c7c32f5bdfbfd420c8 ;; Darwin:x86_64) GO_PKG=${GO_VERSION}.darwin-amd64.tar.gz - GO_PKG_SHA=6bb1cf421f8abc2a9a4e39140b7397cdae6aca3e8d36dcff39a1a77f4f1170ac + GO_PKG_SHA=27014fc69e301d7588a169ca239b3cc609f0aa1abf38528bf0d20d3b259211eb ;; Darwin:arm64) GO_PKG=${GO_VERSION}.darwin-arm64.tar.gz - GO_PKG_SHA=f4e96bbcd5d2d1942f5b55d9e4ab19564da4fad192012f6d7b0b9b055ba4208f + GO_PKG_SHA=65302a7a9f7a4834932b3a7a14cb8be51beddda757b567a2f9e0cbd0d7b5a6ab ;; *) echo 1>&2 "I don't know how to install Go on your platform." From b1712756e47dd4f761b26fa326afab2e9b47f252 Mon Sep 17 00:00:00 2001 From: elhmn Date: Thu, 9 Nov 2023 16:15:51 +0100 Subject: [PATCH 11/28] Generate automatic draft release We needed a way to generate draft releases for git-sizer binaries. This commit adds a new `.github/workflows/release.yml` github action that will generate a draft release when a new tag version is pushed. the action will be triggered After the tag is created and pushed using: ``` git tag -as v$VERSION git push origin v$VERSION ``` --- .github/workflows/release.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..58af3d6 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,34 @@ +name: Release + +on: + push: + tags: + - "v*" + +permissions: + contents: write + +jobs: + lint: + name: Release + runs-on: ubuntu-latest + steps: + - name: Setup + uses: + actions/setup-go@v4 + with: + go-version: 1.21 + + - name: Checkout + uses: actions/checkout@v4 + + - name: Build releases + run: | + make releases VERSION=$GITHUB_REF_NAME + + - name: Release + uses: softprops/action-gh-release@v1 + with: + draft: true + files: | + releases/git-sizer-* From 2ed1053ff9d440ec0e405e2f25157e25926633dd Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 13:45:06 +0100 Subject: [PATCH 12/28] Stop using deprecated function `ioutil.TempDir()` --- git_sizer_test.go | 3 +-- internal/testutils/repoutils.go | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 16d58c9..fbf470d 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -6,7 +6,6 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -760,7 +759,7 @@ func TestSubmodule(t *testing.T) { ctx := context.Background() - tmp, err := ioutil.TempDir("", "submodule") + tmp, err := os.MkdirTemp("", "submodule") require.NoError(t, err, "creating temporary directory") defer func() { diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 60a2f9b..954cff4 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -29,7 +28,7 @@ type TestRepo struct { func NewTestRepo(t *testing.T, bare bool, pattern string) *TestRepo { t.Helper() - path, err := ioutil.TempDir("", pattern) + path, err := os.MkdirTemp("", pattern) require.NoError(t, err) repo := TestRepo{Path: path} @@ -73,7 +72,7 @@ func (repo *TestRepo) Remove(t *testing.T) { func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { t.Helper() - path, err := ioutil.TempDir("", pattern) + path, err := os.MkdirTemp("", pattern) require.NoError(t, err) err = repo.GitCommand( From c20cbb8693f82594b73d4a279390a1c7aa2b7644 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 12:45:42 +0100 Subject: [PATCH 13/28] Repository.gitDir: rename member from `path` The name `gitDir` is less ambiguous. Also rename method `Path()` to `GitDir()`. --- git/git.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/git/git.go b/git/git.go index f451c54..5531a6f 100644 --- a/git/git.go +++ b/git/git.go @@ -15,7 +15,10 @@ type ObjectType string // Repository represents a Git repository on disk. type Repository struct { - path string + // gitDir is the path to the `GIT_DIR` for this repository. It + // might be absolute or it might be relative to the current + // directory. + gitDir string // gitBin is the path of the `git` executable that should be used // when running commands in this repository. @@ -79,7 +82,7 @@ func NewRepository(path string) (*Repository, error) { } return &Repository{ - path: gitDir, + gitDir: gitDir, gitBin: gitBin, }, nil } @@ -103,7 +106,7 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { cmd.Env = append( os.Environ(), - "GIT_DIR="+repo.path, + "GIT_DIR="+repo.gitDir, // Disable grafts when running our commands: "GIT_GRAFT_FILE="+os.DevNull, ) @@ -111,7 +114,8 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { return cmd } -// Path returns the path to `repo`. -func (repo *Repository) Path() string { - return repo.path +// GitDir returns the path to `repo`'s `GIT_DIR`. It might be absolute +// or it might be relative to the current directory. +func (repo *Repository) GitDir() string { + return repo.gitDir } From 29fc88208a3a38f54fda3e7e555469bd6c8fff29 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 12:58:23 +0100 Subject: [PATCH 14/28] Repository.GitPath(): new method, extracted from `NewRepository()` Add a method `Repository.GitPath(relPath)`, which invokes `git rev-parse --git-path $relPath` to find the path to a file within the Git repository. In `NewRepository()`, instantiate the `Repository` object earlier so that the new method can be used to find the path to `shallow`. --- git/git.go | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/git/git.go b/git/git.go index 5531a6f..cba262d 100644 --- a/git/git.go +++ b/git/git.go @@ -66,25 +66,22 @@ func NewRepository(path string) (*Repository, error) { } gitDir := smartJoin(path, string(bytes.TrimSpace(out))) - //nolint:gosec // `gitBin` is chosen carefully. - cmd = exec.Command(gitBin, "rev-parse", "--git-path", "shallow") - cmd.Dir = gitDir - out, err = cmd.Output() + repo := Repository{ + gitDir: gitDir, + gitBin: gitBin, + } + + shallow, err := repo.GitPath("shallow") if err != nil { - return nil, fmt.Errorf( - "could not run 'git rev-parse --git-path shallow': %w", err, - ) + return nil, err } - shallow := smartJoin(gitDir, string(bytes.TrimSpace(out))) + _, err = os.Lstat(shallow) if err == nil { return nil, errors.New("this appears to be a shallow clone; full clone required") } - return &Repository{ - gitDir: gitDir, - gitBin: gitBin, - }, nil + return &repo, nil } func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { @@ -119,3 +116,20 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { func (repo *Repository) GitDir() string { return repo.gitDir } + +// GitPath returns that path of a file within the git repository, by +// calling `git rev-parse --git-path $relPath`. The returned path is +// relative to the current directory. +func (repo *Repository) GitPath(relPath string) (string, error) { + cmd := repo.GitCommand("rev-parse", "--git-path", relPath) + out, err := cmd.Output() + if err != nil { + return "", fmt.Errorf( + "running 'git rev-parse --git-path %s': %w", relPath, err, + ) + } + // `git rev-parse --git-path` is documented to return the path + // relative to the current directory. Since we haven't changed the + // current directory, we can use it as-is: + return string(bytes.TrimSpace(out)), nil +} From 1d75c744e2ed1ad45f469a356897b0e07ba9b7a2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 13:15:08 +0100 Subject: [PATCH 15/28] Repository.IsFull(): new method Extract a method to determine whether the repository seems to be a full clone. Call it from `NewRepository()`. --- git/git.go | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/git/git.go b/git/git.go index cba262d..a82d14c 100644 --- a/git/git.go +++ b/git/git.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "io/fs" "os" "os/exec" "path/filepath" @@ -71,17 +72,36 @@ func NewRepository(path string) (*Repository, error) { gitBin: gitBin, } + full, err := repo.IsFull() + if err != nil { + return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) + } + if !full { + return nil, errors.New("this appears to be a shallow clone; full clone required") + } + + return &repo, nil +} + +// IsFull returns `true` iff `repo` appears to be a full clone. +func (repo *Repository) IsFull() (bool, error) { shallow, err := repo.GitPath("shallow") if err != nil { - return nil, err + return false, err } _, err = os.Lstat(shallow) if err == nil { - return nil, errors.New("this appears to be a shallow clone; full clone required") + return false, nil } - return &repo, nil + if !errors.Is(err, fs.ErrNotExist) { + return false, err + } + + // The `shallow` file is absent, which is what we expect + // for a full clone. + return true, nil } func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { From 39102dfaa3c2fc57e53c9a909042bee382af1d11 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 13:27:01 +0100 Subject: [PATCH 16/28] findGitBin(): memoize the result --- git/git_bin.go | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/git/git_bin.go b/git/git_bin.go index fc03435..526e9bb 100644 --- a/git/git_bin.go +++ b/git/git_bin.go @@ -2,10 +2,20 @@ package git import ( "path/filepath" + "sync" "github.com/cli/safeexec" ) +// This variable will be used to memoize the result of `findGitBin()`, +// since its return value only depends on the environment. +var gitBinMemo struct { + once sync.Once + + gitBin string + err error +} + // findGitBin finds the `git` binary in PATH that should be used by // the rest of `git-sizer`. It uses `safeexec` to find the executable, // because on Windows, `exec.Cmd` looks not only in PATH, but also in @@ -13,15 +23,20 @@ import ( // being scanned is hostile and non-bare because it might possibly // contain an executable file named `git`. func findGitBin() (string, error) { - gitBin, err := safeexec.LookPath("git") - if err != nil { - return "", err - } + gitBinMemo.once.Do(func() { + p, err := safeexec.LookPath("git") + if err != nil { + gitBinMemo.err = err + return + } - gitBin, err = filepath.Abs(gitBin) - if err != nil { - return "", err - } + p, err = filepath.Abs(p) + if err != nil { + gitBinMemo.err = err + return + } - return gitBin, nil + gitBinMemo.gitBin = p + }) + return gitBinMemo.gitBin, gitBinMemo.err } From 51cf26bdfd5f80d278cc274427d91d593b585235 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 17:53:15 +0100 Subject: [PATCH 17/28] smartJoin(): improve docstring --- git/git.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/git/git.go b/git/git.go index a82d14c..3f51c53 100644 --- a/git/git.go +++ b/git/git.go @@ -26,9 +26,10 @@ type Repository struct { gitBin string } -// smartJoin returns the path that can be described as `relPath` -// relative to `path`, given that `path` is either absolute or is -// relative to the current directory. +// smartJoin returns `relPath` if it is an absolute path. If not, it +// assumes that `relPath` is relative to `path`, so it joins them +// together and returns the result. In that case, if `path` itself is +// relative, then the return value is also relative. func smartJoin(path, relPath string) string { if filepath.IsAbs(relPath) { return relPath From 02928f10bf9a42654333abc5d288c3e36b405477 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 13:35:19 +0100 Subject: [PATCH 18/28] NewRepositoryFromGitDir(): new function If you already have the desired `GIT_DIR`, there's no need to determine it from the current path. --- git/git.go | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/git/git.go b/git/git.go index 3f51c53..60a468a 100644 --- a/git/git.go +++ b/git/git.go @@ -37,8 +37,36 @@ func smartJoin(path, relPath string) string { return filepath.Join(path, relPath) } -// NewRepository creates a new repository object that can be used for -// running `git` commands within that repository. +// NewRepositoryFromGitDir creates a new `Repository` object that can +// be used for running `git` commands, given the value of `GIT_DIR` +// for the repository. +func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { + // Find the `git` executable to be used: + gitBin, err := findGitBin() + if err != nil { + return nil, fmt.Errorf( + "could not find 'git' executable (is it in your PATH?): %w", err, + ) + } + + repo := Repository{ + gitDir: gitDir, + gitBin: gitBin, + } + + full, err := repo.IsFull() + if err != nil { + return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) + } + if !full { + return nil, errors.New("this appears to be a shallow clone; full clone required") + } + + return &repo, nil +} + +// NewRepository creates a new `Repository` object that can be used +// for running `git` commands within `path`. func NewRepository(path string) (*Repository, error) { // Find the `git` executable to be used: gitBin, err := findGitBin() @@ -68,20 +96,7 @@ func NewRepository(path string) (*Repository, error) { } gitDir := smartJoin(path, string(bytes.TrimSpace(out))) - repo := Repository{ - gitDir: gitDir, - gitBin: gitBin, - } - - full, err := repo.IsFull() - if err != nil { - return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) - } - if !full { - return nil, errors.New("this appears to be a shallow clone; full clone required") - } - - return &repo, nil + return NewRepositoryFromGitDir(gitDir) } // IsFull returns `true` iff `repo` appears to be a full clone. From f9aec5023a77e9336b6ec2f29bad7804caca57a6 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 17:56:00 +0100 Subject: [PATCH 19/28] NewRepositoryFromPath(): function renamed from `NewRepository()` --- git-sizer.go | 2 +- git/git.go | 9 +++++---- internal/testutils/repoutils.go | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 0888d78..1ef9812 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -134,7 +134,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st // Try to open the repository, but it's not an error yet if this // fails, because the user might only be asking for `--help`. - repo, repoErr := git.NewRepository(".") + repo, repoErr := git.NewRepositoryFromPath(".") flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) flags.Usage = func() { diff --git a/git/git.go b/git/git.go index 60a468a..096ce81 100644 --- a/git/git.go +++ b/git/git.go @@ -65,10 +65,11 @@ func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { return &repo, nil } -// NewRepository creates a new `Repository` object that can be used -// for running `git` commands within `path`. -func NewRepository(path string) (*Repository, error) { - // Find the `git` executable to be used: +// NewRepositoryFromPath creates a new `Repository` object that can be +// used for running `git` commands within `path`. It does so by asking +// `git` what `GIT_DIR` to use. Git, in turn, bases its decision on +// the path and the environment. +func NewRepositoryFromPath(path string) (*Repository, error) { gitBin, err := findGitBin() if err != nil { return nil, fmt.Errorf( diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 954cff4..e530925 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -89,7 +89,7 @@ func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { func (repo *TestRepo) Repository(t *testing.T) *git.Repository { t.Helper() - r, err := git.NewRepository(repo.Path) + r, err := git.NewRepositoryFromPath(repo.Path) require.NoError(t, err) return r } From d605cdb7c5e61f2d24cc29445f30255488a046c0 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 17:56:31 +0100 Subject: [PATCH 20/28] TestRepo: for bare repositories, use `NewRepositoryFromGitDir()` There's no need to deduce the `GIT_DIR` for a bare repository. --- internal/testutils/repoutils.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index e530925..48a8759 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -20,6 +20,7 @@ import ( // TestRepo represents a git repository used for tests. type TestRepo struct { Path string + bare bool } // NewTestRepo creates and initializes a test repository in a @@ -37,6 +38,7 @@ func NewTestRepo(t *testing.T, bare bool, pattern string) *TestRepo { return &TestRepo{ Path: path, + bare: bare, } } @@ -89,9 +91,15 @@ func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { func (repo *TestRepo) Repository(t *testing.T) *git.Repository { t.Helper() - r, err := git.NewRepositoryFromPath(repo.Path) - require.NoError(t, err) - return r + if repo.bare { + r, err := git.NewRepositoryFromGitDir(repo.Path) + require.NoError(t, err) + return r + } else { + r, err := git.NewRepositoryFromPath(repo.Path) + require.NoError(t, err) + return r + } } // localEnvVars is a list of the variable names that should be cleared From fb78b414e22c5c95dfb4c4847b6e7cee58b1b1af Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 12 Dec 2023 11:27:10 +0100 Subject: [PATCH 21/28] Be mindful of `safe.bareRepository` in the tests As of Git v2.38.0, there is an option to prevent Git from accessing bare repositories unless asked for explicitly (via `--git-dir` or `GIT_DIR`): `safe.bareRepository`. The tests of `git sizer`, however, assume that Git will access a bare repository when the current directory points inside that repository. This only works if `safe.bareRepository` indicates that this is safe. If that is not the case, i.e. if `safe.bareRepository` is set to `explicit`, Git demands that the environment variable `GIT_DIR` is set (either explicitly, or via `--git-dir`) when accessing bare repositories. So let's set `GIT_DIR` for the test cases that work on bare repositories. Signed-off-by: Johannes Schindelin --- git_sizer_test.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index fbf470d..8a7a2d2 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -272,7 +272,10 @@ func TestRefSelections(t *testing.T) { args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} args = append(args, p.args...) cmd := exec.Command(executable, args...) - cmd.Dir = repo.Path + cmd.Env = append( + os.Environ(), + "GIT_DIR="+repo.Path, + ) var stdout bytes.Buffer cmd.Stdout = &stdout var stderr bytes.Buffer @@ -519,7 +522,10 @@ References (included references marked with '+'): args := append([]string{"--show-refs", "-v", "--no-progress"}, p.args...) cmd := exec.Command(executable, args...) - cmd.Dir = repo.Path + cmd.Env = append( + os.Environ(), + "GIT_DIR="+repo.Path, + ) var stdout bytes.Buffer cmd.Stdout = &stdout var stderr bytes.Buffer From 10cb5b3e54f41bf4d8d323226fb7895cfc98ab7d Mon Sep 17 00:00:00 2001 From: J23 Date: Sat, 23 Aug 2025 13:52:23 +0800 Subject: [PATCH 22/28] Introduced sha256 support for git-sizer --- git/git.go | 27 ++++++++++++-- git/obj_iter.go | 6 ++-- git/obj_resolver.go | 4 +-- git/oid.go | 63 +++++++++++++++++++++++++++------ git/tree.go | 19 +++++----- git_sizer_test.go | 37 +++++++++++++++++++ internal/testutils/repoutils.go | 2 +- sizes/graph.go | 2 +- sizes/output.go | 4 +-- 9 files changed, 135 insertions(+), 29 deletions(-) diff --git a/git/git.go b/git/git.go index 096ce81..ef3cbc6 100644 --- a/git/git.go +++ b/git/git.go @@ -24,6 +24,8 @@ type Repository struct { // gitBin is the path of the `git` executable that should be used // when running commands in this repository. gitBin string + // hashAgo is repository hash algo + hashAlgo HashAlgo } // smartJoin returns `relPath` if it is an absolute path. If not, it @@ -49,9 +51,18 @@ func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { ) } + hashAlgo := HashSHA1 + cmd := exec.Command(gitBin, "--git-dir", gitDir, "rev-parse", "--show-object-format") //nolint:gosec + if out, err := cmd.Output(); err == nil { + if string(bytes.TrimSpace(out)) == "sha256" { + hashAlgo = HashSHA256 + } + } + repo := Repository{ - gitDir: gitDir, - gitBin: gitBin, + gitDir: gitDir, + gitBin: gitBin, + hashAlgo: hashAlgo, } full, err := repo.IsFull() @@ -170,3 +181,15 @@ func (repo *Repository) GitPath(relPath string) (string, error) { // current directory, we can use it as-is: return string(bytes.TrimSpace(out)), nil } + +func (repo *Repository) HashAlgo() HashAlgo { + return repo.hashAlgo +} + +func (repo *Repository) HashSize() int { + return repo.hashAlgo.HashSize() +} + +func (repo *Repository) NullOID() OID { + return repo.hashAlgo.NullOID() +} diff --git a/git/obj_iter.go b/git/obj_iter.go index cecdc2a..c367f11 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -30,7 +30,7 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) errCh: make(chan error), headerCh: make(chan BatchHeader), } - + hashHexSize := repo.HashSize() * 2 iter.p.Add( // Read OIDs from `iter.oidCh` and write them to `git // rev-list`: @@ -68,10 +68,10 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) pipe.LinewiseFunction( "copy-oids", func(_ context.Context, _ pipe.Env, line []byte, stdout *bufio.Writer) error { - if len(line) < 40 { + if len(line) < hashHexSize { return fmt.Errorf("line too short: '%s'", line) } - if _, err := stdout.Write(line[:40]); err != nil { + if _, err := stdout.Write(line[:hashHexSize]); err != nil { return fmt.Errorf("writing OID to 'git cat-file': %w", err) } if err := stdout.WriteByte('\n'); err != nil { diff --git a/git/obj_resolver.go b/git/obj_resolver.go index 418e293..fbeb246 100644 --- a/git/obj_resolver.go +++ b/git/obj_resolver.go @@ -9,12 +9,12 @@ func (repo *Repository) ResolveObject(name string) (OID, error) { cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) output, err := cmd.Output() if err != nil { - return NullOID, fmt.Errorf("resolving object %q: %w", name, err) + return repo.NullOID(), fmt.Errorf("resolving object %q: %w", name, err) } oidString := string(bytes.TrimSpace(output)) oid, err := NewOID(oidString) if err != nil { - return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + return repo.NullOID(), fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) } return oid, nil } diff --git a/git/oid.go b/git/oid.go index 2aefbcb..2a2bdfc 100644 --- a/git/oid.go +++ b/git/oid.go @@ -1,32 +1,75 @@ package git import ( + "bytes" + "crypto/sha1" //nolint:gosec + "crypto/sha256" "encoding/hex" "errors" ) +const ( + HashSizeSHA256 = sha256.Size + HashSizeSHA1 = sha1.Size + HashSizeMax = HashSizeSHA256 +) + +type HashAlgo int + +const ( + HashUnknown HashAlgo = iota + HashSHA1 + HashSHA256 +) + // OID represents the SHA-1 object ID of a Git object, in binary // format. type OID struct { - v [20]byte + v [HashSizeMax]byte + hashSize int } -// NullOID is the null object ID; i.e., all zeros. -var NullOID OID +func (h HashAlgo) NullOID() OID { + switch h { + case HashSHA1: + return OID{hashSize: HashSizeSHA1} + case HashSHA256: + return OID{hashSize: HashSizeSHA256} + } + return OID{} +} + +func (h HashAlgo) HashSize() int { + switch h { + case HashSHA1: + return HashSizeSHA1 + case HashSHA256: + return HashSizeSHA256 + } + return 0 +} + +// defaultNullOID is the null object ID; i.e., all zeros. +var defaultNullOID OID + +func IsNullOID(o OID) bool { + return bytes.Equal(o.v[:], defaultNullOID.v[:]) +} // OIDFromBytes converts a byte slice containing an object ID in // binary format into an `OID`. func OIDFromBytes(oidBytes []byte) (OID, error) { var oid OID - if len(oidBytes) != len(oid.v) { + oidSize := len(oidBytes) + if oidSize != HashSizeSHA1 && oidSize != HashSizeSHA256 { return OID{}, errors.New("bytes oid has the wrong length") } - copy(oid.v[0:20], oidBytes) + oid.hashSize = oidSize + copy(oid.v[0:oidSize], oidBytes) return oid, nil } -// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40}`) -// into an `OID`. +// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40,64}`) into an `OID`. func NewOID(s string) (OID, error) { oidBytes, err := hex.DecodeString(s) if err != nil { @@ -37,18 +80,18 @@ func NewOID(s string) (OID, error) { // String formats `oid` as a string in hex format. func (oid OID) String() string { - return hex.EncodeToString(oid.v[:]) + return hex.EncodeToString(oid.v[:oid.hashSize]) } // Bytes returns a byte slice view of `oid`, in binary format. func (oid OID) Bytes() []byte { - return oid.v[:] + return oid.v[:oid.hashSize] } // MarshalJSON expresses `oid` as a JSON string with its enclosing // quotation marks. func (oid OID) MarshalJSON() ([]byte, error) { - src := oid.v[:] + src := oid.v[:oid.hashSize] dst := make([]byte, hex.EncodedLen(len(src))+2) dst[0] = '"' dst[len(dst)-1] = '"' diff --git a/git/tree.go b/git/tree.go index c31fa78..18cb3ee 100644 --- a/git/tree.go +++ b/git/tree.go @@ -10,13 +10,14 @@ import ( // Tree represents a Git tree object. type Tree struct { - data string + data string + hashSize int } // ParseTree parses the tree object whose contents are contained in // `data`. `oid` is currently unused. func ParseTree(oid OID, data []byte) (*Tree, error) { - return &Tree{string(data)}, nil + return &Tree{string(data), oid.hashSize}, nil } // Size returns the size of the tree object. @@ -36,13 +37,15 @@ type TreeEntry struct { // TreeIter is an iterator over the entries in a Git tree object. type TreeIter struct { // The as-yet-unread part of the tree's data. - data string + data string + hashSize int } // Iter returns an iterator over the entries in `tree`. func (tree *Tree) Iter() *TreeIter { return &TreeIter{ - data: tree.data, + data: tree.data, + hashSize: tree.hashSize, } } @@ -74,12 +77,12 @@ func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { entry.Name = iter.data[:nulAt] iter.data = iter.data[nulAt+1:] - if len(iter.data) < 20 { + if len(iter.data) < iter.hashSize { return TreeEntry{}, false, errors.New("tree entry ends unexpectedly") } - - copy(entry.OID.v[0:20], iter.data[0:20]) - iter.data = iter.data[20:] + entry.OID.hashSize = iter.hashSize + copy(entry.OID.v[0:iter.hashSize], iter.data[0:iter.hashSize]) + iter.data = iter.data[iter.hashSize:] return entry, true, nil } diff --git a/git_sizer_test.go b/git_sizer_test.go index 8a7a2d2..c74b459 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -849,3 +849,40 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") assert.Equal(t, counts.Count32(3), h.MaxExpandedBlobCount, "max expanded blob count") } + +func TestSHA256(t *testing.T) { + t.Parallel() + + ctx := context.Background() + + t.Helper() + + path, err := os.MkdirTemp("", "sha256") + require.NoError(t, err) + + testRepo := testutils.TestRepo{Path: path} + defer testRepo.Remove(t) + + // Don't use `GitCommand()` because the directory might not + // exist yet: + cmd := exec.Command("git", "init", "--object-format", "sha256", testRepo.Path) + cmd.Env = testutils.CleanGitEnv() + err = cmd.Run() + require.NoError(t, err) + + timestamp := time.Unix(1112911993, 0) + + testRepo.AddFile(t, "hello.txt", "Hello, world!\n") + cmd = testRepo.GitCommand(t, "commit", "-m", "initial") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating initial commit") + + cmd = testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating commit") + + repo := testRepo.Repository(t) + + _, err = sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) +} diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 48a8759..e14e487 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -165,7 +165,7 @@ func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) { var cmd *exec.Cmd - if oid == git.NullOID { + if git.IsNullOID(oid) { cmd = repo.GitCommand(t, "update-ref", "-d", refname) } else { cmd = repo.GitCommand(t, "update-ref", refname, oid.String()) diff --git a/sizes/graph.go b/sizes/graph.go index 0fb1c8a..2101a00 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -134,7 +134,7 @@ func ScanRepositoryUsingGraph( case "tree": trees = append(trees, ObjectHeader{obj.OID, obj.ObjectSize}) case "commit": - commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, git.NullOID}) + commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, repo.NullOID()}) case "tag": tags = append(tags, ObjectHeader{obj.OID, obj.ObjectSize}) default: diff --git a/sizes/output.go b/sizes/output.go index 933cc05..037f905 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -155,7 +155,7 @@ func (i *item) Emit(t *table) { } func (i *item) Footnote(nameStyle NameStyle) string { - if i.path == nil || i.path.OID == git.NullOID { + if i.path == nil || git.IsNullOID(i.path.OID) { return "" } switch nameStyle { @@ -214,7 +214,7 @@ func (i *item) MarshalJSON() ([]byte, error) { LevelOfConcern: float64(value) / i.scale, } - if i.path != nil && i.path.OID != git.NullOID { + if i.path != nil && !git.IsNullOID(i.path.OID) { stat.ObjectName = i.path.OID.String() stat.ObjectDescription = i.path.Path() } From cf4ba45f9251b113a46f6636da087cb3a9d126a0 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 21 Nov 2025 18:47:53 +0000 Subject: [PATCH 23/28] workflows: add document header This is a best practice and yamllint warns about omitting it. --- .github/workflows/lint.yml | 1 + .github/workflows/release.yml | 1 + .github/workflows/test.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 52a9f07..0b08cfe 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,3 +1,4 @@ +--- name: Lint on: push: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 58af3d6..b35a733 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,3 +1,4 @@ +--- name: Release on: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f658b81..9340467 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,3 +1,4 @@ +--- on: [push, pull_request] name: Test jobs: From 3ca5f0e3dcf46c416dbea72976aa225575ee650a Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 21 Nov 2025 18:50:42 +0000 Subject: [PATCH 24/28] workflows: add permissions block We'd like to run GitHub Actions with the least possible permissions assigned to the token for security reasons. To make this possible, let's add a permissions block to each workflow that lacks one. --- .github/workflows/lint.yml | 3 +++ .github/workflows/test.yml | 2 ++ 2 files changed, 5 insertions(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 0b08cfe..f8cfb4b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -12,6 +12,9 @@ on: - go.mod - go.sum +permissions: + contents: read + jobs: lint: runs-on: ubuntu-latest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9340467..8efc5ea 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,8 @@ --- on: [push, pull_request] name: Test +permissions: + contents: read jobs: test: strategy: From 9d29e5a1b5bdf415f0ba81b711a42fa28b470be0 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Mon, 1 Dec 2025 15:24:57 -0800 Subject: [PATCH 25/28] install-vendored-go: update download link The Google storage account appears to no longer be valid, so let's use the official download link from https://go.dev. --- script/install-vendored-go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/install-vendored-go b/script/install-vendored-go index 45ace01..76d2195 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -39,7 +39,7 @@ fi ROOTDIR="$( cd "$( dirname "$0" )/.." && pwd )" VENDORDIR="$ROOTDIR/vendor" -DOWNLOAD_URL=https://storage.googleapis.com/golang/$GO_PKG +DOWNLOAD_URL=https://go.dev/dl/$GO_PKG ARCHIVE="$VENDORDIR/$GO_PKG" INSTALLDIR="$VENDORDIR/$GO_VERSION" export GOROOT="$INSTALLDIR/go" From dba52c5e298c0d9966af9aa87969bde1dc481cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 16 Jan 2026 12:41:00 +0100 Subject: [PATCH 26/28] Skip the SHA256 test if git has not support for it If you are building and running the tests in an environment with an older version of git, it might not have SHA256 support. This should not cause the git-sizer test suite to fail as it's not an issue with git-sizer. Detect this situation and skip the test. --- git_sizer_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index c74b459..09f088f 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -867,7 +867,11 @@ func TestSHA256(t *testing.T) { // exist yet: cmd := exec.Command("git", "init", "--object-format", "sha256", testRepo.Path) cmd.Env = testutils.CleanGitEnv() - err = cmd.Run() + output, err := cmd.CombinedOutput() + + if err != nil && strings.HasPrefix(string(output), "error: unknown option `object-format'") { + t.Skip("skipping due to lack of SHA256 support") + } require.NoError(t, err) timestamp := time.Unix(1112911993, 0) From 0579f1812beaf09679e0651fbd0b36047759f5e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 16 Jan 2026 12:48:24 +0100 Subject: [PATCH 27/28] ci: update the setup-go version Version 2 wants to use the old URL so that also fails to run. The latest is version 6 so let's update to that and at the same time update to the same Go version that we want to download in the build script. --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8efc5ea..542f410 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,9 +12,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - name: Set up Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v6 with: - go-version: '1.17' + go-version: '1.21.3' - name: Check out code uses: actions/checkout@v2 From 37ca70f5f033785298587bb642b83fce66616322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 16 Jan 2026 15:23:00 +0100 Subject: [PATCH 28/28] test: loosen the object-format error matching As pointed out by the robot, this can be an issue with different locales. It is enough for our purposes to know that the error message includes "object-format" so we know it's unhappy with it. --- git_sizer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 09f088f..f5c8006 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -869,7 +869,7 @@ func TestSHA256(t *testing.T) { cmd.Env = testutils.CleanGitEnv() output, err := cmd.CombinedOutput() - if err != nil && strings.HasPrefix(string(output), "error: unknown option `object-format'") { + if err != nil && strings.Contains(string(output), "object-format") { t.Skip("skipping due to lack of SHA256 support") } require.NoError(t, err)