diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 52a9f07..f8cfb4b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,3 +1,4 @@ +--- name: Lint on: push: @@ -11,6 +12,9 @@ on: - go.mod - go.sum +permissions: + contents: read + jobs: lint: runs-on: ubuntu-latest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..b35a733 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,35 @@ +--- +name: Release + +on: + push: + tags: + - "v*" + +permissions: + contents: write + +jobs: + lint: + name: Release + runs-on: ubuntu-latest + steps: + - name: Setup + uses: + actions/setup-go@v4 + with: + go-version: 1.21 + + - name: Checkout + uses: actions/checkout@v4 + + - name: Build releases + run: | + make releases VERSION=$GITHUB_REF_NAME + + - name: Release + uses: softprops/action-gh-release@v1 + with: + draft: true + files: | + releases/git-sizer-* diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f658b81..542f410 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,8 @@ +--- on: [push, pull_request] name: Test +permissions: + contents: read jobs: test: strategy: @@ -9,9 +12,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - name: Set up Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v6 with: - go-version: '1.17' + go-version: '1.21.3' - name: Check out code uses: actions/checkout@v2 diff --git a/docs/BUILDING.md b/docs/BUILDING.md index a977a2c..7f9fdef 100644 --- a/docs/BUILDING.md +++ b/docs/BUILDING.md @@ -7,11 +7,11 @@ Most people can just install a released version of `git-sizer`, [as described in 1. Make sure that you have a recent version of the [Go language toolchain](https://golang.org/doc/install) installed and that you have set `GOPATH`. -2. Get `git-sizer` using `go get`: +2. Get `git-sizer` using `go install`: - go get github.com/github/git-sizer + go install github.com/github/git-sizer@latest - This should fetch and compile the source code and write the executable file to `$GOPATH/bin/`. + This should install the executable file to `$GOPATH/bin/`. 3. Either add `$GOPATH/bin` to your `PATH`, or copy the executable file (`git-sizer` or `git-sizer.exe`) to a directory that is already in your `PATH`. diff --git a/git-sizer.go b/git-sizer.go index d1e075c..1ef9812 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -1,6 +1,7 @@ package main import ( + "context" "encoding/json" "errors" "fmt" @@ -19,7 +20,9 @@ import ( "github.com/github/git-sizer/sizes" ) -const usage = `usage: git-sizer [OPTS] +const usage = `usage: git-sizer [OPTS] [ROOT...] + + Scan objects in your Git repository and emit statistics about them. --threshold THRESHOLD minimum level of concern (i.e., number of stars) that should be reported. Default: @@ -45,12 +48,29 @@ const usage = `usage: git-sizer [OPTS] be set via gitconfig: 'sizer.progress'. --version only report the git-sizer version number + Object selection: + + git-sizer traverses through your Git history to find objects to + process. By default, it processes all objects that are reachable from + any reference. You can tell it to process only some of your + references; see "Reference selection" below. + + If explicit ROOTs are specified on the command line, each one should + be a string that 'git rev-parse' can convert into a single Git object + ID, like 'main', 'main~:src', or an abbreviated SHA-1. See + git-rev-parse(1) for details. In that case, git-sizer also treats + those objects as starting points for its traversal, and also includes + the Git objects that are reachable from those roots in the analysis. + + As a special case, if one or more ROOTs are specified on the command + line but _no_ reference selection options, then _only_ the specified + ROOTs are traversed, and no references. + Reference selection: - By default, git-sizer processes all Git objects that are reachable - from any reference. The following options can be used to limit which - references to process. The last rule matching a reference determines - whether that reference is processed. + The following options can be used to limit which references to + process. The last rule matching a reference determines whether that + reference is processed. --[no-]branches process [don't process] branches --[no-]tags process [don't process] tags @@ -93,14 +113,16 @@ var ReleaseVersion string var BuildVersion string func main() { - err := mainImplementation(os.Stdout, os.Stderr, os.Args[1:]) + ctx := context.Background() + + err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } } -func mainImplementation(stdout, stderr io.Writer, args []string) error { +func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []string) error { var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool @@ -112,7 +134,7 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { // Try to open the repository, but it's not an error yet if this // fails, because the user might only be asking for `--help`. - repo, repoErr := git.NewRepository(".") + repo, repoErr := git.NewRepositoryFromPath(".") flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) flags.Usage = func() { @@ -216,10 +238,6 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { return nil } - if len(flags.Args()) != 0 { - return errors.New("excess arguments") - } - if repoErr != nil { return fmt.Errorf("couldn't open Git repository: %w", repoErr) } @@ -273,7 +291,7 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { progress = v } - rg, err := rgb.Finish() + rg, err := rgb.Finish(len(flags.Args()) == 0) if err != nil { return err } @@ -288,7 +306,27 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progressMeter) + refRoots, err := sizes.CollectReferences(ctx, repo, rg) + if err != nil { + return fmt.Errorf("determining which reference to scan: %w", err) + } + + roots := make([]sizes.Root, 0, len(refRoots)+len(flags.Args())) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + for _, arg := range flags.Args() { + oid, err := repo.ResolveObject(arg) + if err != nil { + return fmt.Errorf("resolving command-line argument %q: %w", arg, err) + } + roots = append(roots, sizes.NewExplicitRoot(arg, oid)) + } + + historySize, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, nameStyle, progressMeter, + ) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git/git.go b/git/git.go index f451c54..ef3cbc6 100644 --- a/git/git.go +++ b/git/git.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "io/fs" "os" "os/exec" "path/filepath" @@ -15,16 +16,22 @@ type ObjectType string // Repository represents a Git repository on disk. type Repository struct { - path string + // gitDir is the path to the `GIT_DIR` for this repository. It + // might be absolute or it might be relative to the current + // directory. + gitDir string // gitBin is the path of the `git` executable that should be used // when running commands in this repository. gitBin string + // hashAgo is repository hash algo + hashAlgo HashAlgo } -// smartJoin returns the path that can be described as `relPath` -// relative to `path`, given that `path` is either absolute or is -// relative to the current directory. +// smartJoin returns `relPath` if it is an absolute path. If not, it +// assumes that `relPath` is relative to `path`, so it joins them +// together and returns the result. In that case, if `path` itself is +// relative, then the return value is also relative. func smartJoin(path, relPath string) string { if filepath.IsAbs(relPath) { return relPath @@ -32,9 +39,10 @@ func smartJoin(path, relPath string) string { return filepath.Join(path, relPath) } -// NewRepository creates a new repository object that can be used for -// running `git` commands within that repository. -func NewRepository(path string) (*Repository, error) { +// NewRepositoryFromGitDir creates a new `Repository` object that can +// be used for running `git` commands, given the value of `GIT_DIR` +// for the repository. +func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { // Find the `git` executable to be used: gitBin, err := findGitBin() if err != nil { @@ -43,6 +51,43 @@ func NewRepository(path string) (*Repository, error) { ) } + hashAlgo := HashSHA1 + cmd := exec.Command(gitBin, "--git-dir", gitDir, "rev-parse", "--show-object-format") //nolint:gosec + if out, err := cmd.Output(); err == nil { + if string(bytes.TrimSpace(out)) == "sha256" { + hashAlgo = HashSHA256 + } + } + + repo := Repository{ + gitDir: gitDir, + gitBin: gitBin, + hashAlgo: hashAlgo, + } + + full, err := repo.IsFull() + if err != nil { + return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) + } + if !full { + return nil, errors.New("this appears to be a shallow clone; full clone required") + } + + return &repo, nil +} + +// NewRepositoryFromPath creates a new `Repository` object that can be +// used for running `git` commands within `path`. It does so by asking +// `git` what `GIT_DIR` to use. Git, in turn, bases its decision on +// the path and the environment. +func NewRepositoryFromPath(path string) (*Repository, error) { + gitBin, err := findGitBin() + if err != nil { + return nil, fmt.Errorf( + "could not find 'git' executable (is it in your PATH?): %w", err, + ) + } + //nolint:gosec // `gitBin` is chosen carefully, and `path` is the // path to the repository. cmd := exec.Command(gitBin, "-C", path, "rev-parse", "--git-dir") @@ -63,25 +108,28 @@ func NewRepository(path string) (*Repository, error) { } gitDir := smartJoin(path, string(bytes.TrimSpace(out))) - //nolint:gosec // `gitBin` is chosen carefully. - cmd = exec.Command(gitBin, "rev-parse", "--git-path", "shallow") - cmd.Dir = gitDir - out, err = cmd.Output() + return NewRepositoryFromGitDir(gitDir) +} + +// IsFull returns `true` iff `repo` appears to be a full clone. +func (repo *Repository) IsFull() (bool, error) { + shallow, err := repo.GitPath("shallow") if err != nil { - return nil, fmt.Errorf( - "could not run 'git rev-parse --git-path shallow': %w", err, - ) + return false, err } - shallow := smartJoin(gitDir, string(bytes.TrimSpace(out))) + _, err = os.Lstat(shallow) if err == nil { - return nil, errors.New("this appears to be a shallow clone; full clone required") + return false, nil } - return &Repository{ - path: gitDir, - gitBin: gitBin, - }, nil + if !errors.Is(err, fs.ErrNotExist) { + return false, err + } + + // The `shallow` file is absent, which is what we expect + // for a full clone. + return true, nil } func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { @@ -103,7 +151,7 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { cmd.Env = append( os.Environ(), - "GIT_DIR="+repo.path, + "GIT_DIR="+repo.gitDir, // Disable grafts when running our commands: "GIT_GRAFT_FILE="+os.DevNull, ) @@ -111,7 +159,37 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { return cmd } -// Path returns the path to `repo`. -func (repo *Repository) Path() string { - return repo.path +// GitDir returns the path to `repo`'s `GIT_DIR`. It might be absolute +// or it might be relative to the current directory. +func (repo *Repository) GitDir() string { + return repo.gitDir +} + +// GitPath returns that path of a file within the git repository, by +// calling `git rev-parse --git-path $relPath`. The returned path is +// relative to the current directory. +func (repo *Repository) GitPath(relPath string) (string, error) { + cmd := repo.GitCommand("rev-parse", "--git-path", relPath) + out, err := cmd.Output() + if err != nil { + return "", fmt.Errorf( + "running 'git rev-parse --git-path %s': %w", relPath, err, + ) + } + // `git rev-parse --git-path` is documented to return the path + // relative to the current directory. Since we haven't changed the + // current directory, we can use it as-is: + return string(bytes.TrimSpace(out)), nil +} + +func (repo *Repository) HashAlgo() HashAlgo { + return repo.hashAlgo +} + +func (repo *Repository) HashSize() int { + return repo.hashAlgo.HashSize() +} + +func (repo *Repository) NullOID() OID { + return repo.hashAlgo.NullOID() } diff --git a/git/git_bin.go b/git/git_bin.go index fc03435..526e9bb 100644 --- a/git/git_bin.go +++ b/git/git_bin.go @@ -2,10 +2,20 @@ package git import ( "path/filepath" + "sync" "github.com/cli/safeexec" ) +// This variable will be used to memoize the result of `findGitBin()`, +// since its return value only depends on the environment. +var gitBinMemo struct { + once sync.Once + + gitBin string + err error +} + // findGitBin finds the `git` binary in PATH that should be used by // the rest of `git-sizer`. It uses `safeexec` to find the executable, // because on Windows, `exec.Cmd` looks not only in PATH, but also in @@ -13,15 +23,20 @@ import ( // being scanned is hostile and non-bare because it might possibly // contain an executable file named `git`. func findGitBin() (string, error) { - gitBin, err := safeexec.LookPath("git") - if err != nil { - return "", err - } + gitBinMemo.once.Do(func() { + p, err := safeexec.LookPath("git") + if err != nil { + gitBinMemo.err = err + return + } - gitBin, err = filepath.Abs(gitBin) - if err != nil { - return "", err - } + p, err = filepath.Abs(p) + if err != nil { + gitBinMemo.err = err + return + } - return gitBin, nil + gitBinMemo.gitBin = p + }) + return gitBinMemo.gitBin, gitBinMemo.err } diff --git a/git/obj_iter.go b/git/obj_iter.go index cecdc2a..c367f11 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -30,7 +30,7 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) errCh: make(chan error), headerCh: make(chan BatchHeader), } - + hashHexSize := repo.HashSize() * 2 iter.p.Add( // Read OIDs from `iter.oidCh` and write them to `git // rev-list`: @@ -68,10 +68,10 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) pipe.LinewiseFunction( "copy-oids", func(_ context.Context, _ pipe.Env, line []byte, stdout *bufio.Writer) error { - if len(line) < 40 { + if len(line) < hashHexSize { return fmt.Errorf("line too short: '%s'", line) } - if _, err := stdout.Write(line[:40]); err != nil { + if _, err := stdout.Write(line[:hashHexSize]); err != nil { return fmt.Errorf("writing OID to 'git cat-file': %w", err) } if err := stdout.WriteByte('\n'); err != nil { diff --git a/git/obj_resolver.go b/git/obj_resolver.go new file mode 100644 index 0000000..fbeb246 --- /dev/null +++ b/git/obj_resolver.go @@ -0,0 +1,20 @@ +package git + +import ( + "bytes" + "fmt" +) + +func (repo *Repository) ResolveObject(name string) (OID, error) { + cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) + output, err := cmd.Output() + if err != nil { + return repo.NullOID(), fmt.Errorf("resolving object %q: %w", name, err) + } + oidString := string(bytes.TrimSpace(output)) + oid, err := NewOID(oidString) + if err != nil { + return repo.NullOID(), fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + } + return oid, nil +} diff --git a/git/oid.go b/git/oid.go index 2aefbcb..2a2bdfc 100644 --- a/git/oid.go +++ b/git/oid.go @@ -1,32 +1,75 @@ package git import ( + "bytes" + "crypto/sha1" //nolint:gosec + "crypto/sha256" "encoding/hex" "errors" ) +const ( + HashSizeSHA256 = sha256.Size + HashSizeSHA1 = sha1.Size + HashSizeMax = HashSizeSHA256 +) + +type HashAlgo int + +const ( + HashUnknown HashAlgo = iota + HashSHA1 + HashSHA256 +) + // OID represents the SHA-1 object ID of a Git object, in binary // format. type OID struct { - v [20]byte + v [HashSizeMax]byte + hashSize int } -// NullOID is the null object ID; i.e., all zeros. -var NullOID OID +func (h HashAlgo) NullOID() OID { + switch h { + case HashSHA1: + return OID{hashSize: HashSizeSHA1} + case HashSHA256: + return OID{hashSize: HashSizeSHA256} + } + return OID{} +} + +func (h HashAlgo) HashSize() int { + switch h { + case HashSHA1: + return HashSizeSHA1 + case HashSHA256: + return HashSizeSHA256 + } + return 0 +} + +// defaultNullOID is the null object ID; i.e., all zeros. +var defaultNullOID OID + +func IsNullOID(o OID) bool { + return bytes.Equal(o.v[:], defaultNullOID.v[:]) +} // OIDFromBytes converts a byte slice containing an object ID in // binary format into an `OID`. func OIDFromBytes(oidBytes []byte) (OID, error) { var oid OID - if len(oidBytes) != len(oid.v) { + oidSize := len(oidBytes) + if oidSize != HashSizeSHA1 && oidSize != HashSizeSHA256 { return OID{}, errors.New("bytes oid has the wrong length") } - copy(oid.v[0:20], oidBytes) + oid.hashSize = oidSize + copy(oid.v[0:oidSize], oidBytes) return oid, nil } -// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40}`) -// into an `OID`. +// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40,64}`) into an `OID`. func NewOID(s string) (OID, error) { oidBytes, err := hex.DecodeString(s) if err != nil { @@ -37,18 +80,18 @@ func NewOID(s string) (OID, error) { // String formats `oid` as a string in hex format. func (oid OID) String() string { - return hex.EncodeToString(oid.v[:]) + return hex.EncodeToString(oid.v[:oid.hashSize]) } // Bytes returns a byte slice view of `oid`, in binary format. func (oid OID) Bytes() []byte { - return oid.v[:] + return oid.v[:oid.hashSize] } // MarshalJSON expresses `oid` as a JSON string with its enclosing // quotation marks. func (oid OID) MarshalJSON() ([]byte, error) { - src := oid.v[:] + src := oid.v[:oid.hashSize] dst := make([]byte, hex.EncodedLen(len(src))+2) dst[0] = '"' dst[len(dst)-1] = '"' diff --git a/git/ref_filter.go b/git/ref_filter.go index 8eb8a9b..46aff66 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -83,15 +83,23 @@ func (_ allReferencesFilter) Filter(_ string) bool { var AllReferencesFilter allReferencesFilter +type noReferencesFilter struct{} + +func (_ noReferencesFilter) Filter(_ string) bool { + return false +} + +var NoReferencesFilter noReferencesFilter + // PrefixFilter returns a `ReferenceFilter` that matches references // whose names start with the specified `prefix`, which must match at // a component boundary. For example, // -// * Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not -// "refs/foobar". +// - Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not +// "refs/foobar". // -// * Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or -// "refs/foobar". +// - Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or +// "refs/foobar". func PrefixFilter(prefix string) ReferenceFilter { if prefix == "" { return AllReferencesFilter diff --git a/git/tree.go b/git/tree.go index c31fa78..18cb3ee 100644 --- a/git/tree.go +++ b/git/tree.go @@ -10,13 +10,14 @@ import ( // Tree represents a Git tree object. type Tree struct { - data string + data string + hashSize int } // ParseTree parses the tree object whose contents are contained in // `data`. `oid` is currently unused. func ParseTree(oid OID, data []byte) (*Tree, error) { - return &Tree{string(data)}, nil + return &Tree{string(data), oid.hashSize}, nil } // Size returns the size of the tree object. @@ -36,13 +37,15 @@ type TreeEntry struct { // TreeIter is an iterator over the entries in a Git tree object. type TreeIter struct { // The as-yet-unread part of the tree's data. - data string + data string + hashSize int } // Iter returns an iterator over the entries in `tree`. func (tree *Tree) Iter() *TreeIter { return &TreeIter{ - data: tree.data, + data: tree.data, + hashSize: tree.hashSize, } } @@ -74,12 +77,12 @@ func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { entry.Name = iter.data[:nulAt] iter.data = iter.data[nulAt+1:] - if len(iter.data) < 20 { + if len(iter.data) < iter.hashSize { return TreeEntry{}, false, errors.New("tree entry ends unexpectedly") } - - copy(entry.OID.v[0:20], iter.data[0:20]) - iter.data = iter.data[20:] + entry.OID.hashSize = iter.hashSize + copy(entry.OID.v[0:iter.hashSize], iter.data[0:iter.hashSize]) + iter.data = iter.data[iter.hashSize:] return entry, true, nil } diff --git a/git_sizer_test.go b/git_sizer_test.go index 6ab132f..f5c8006 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -2,10 +2,10 @@ package main_test import ( "bytes" + "context" "encoding/json" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -272,7 +272,10 @@ func TestRefSelections(t *testing.T) { args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} args = append(args, p.args...) cmd := exec.Command(executable, args...) - cmd.Dir = repo.Path + cmd.Env = append( + os.Environ(), + "GIT_DIR="+repo.Path, + ) var stdout bytes.Buffer cmd.Stdout = &stdout var stderr bytes.Buffer @@ -519,7 +522,10 @@ References (included references marked with '+'): args := append([]string{"--show-refs", "-v", "--no-progress"}, p.args...) cmd := exec.Command(executable, args...) - cmd.Dir = repo.Path + cmd.Env = append( + os.Environ(), + "GIT_DIR="+repo.Path, + ) var stdout bytes.Buffer cmd.Stdout = &stdout var stderr bytes.Buffer @@ -557,87 +563,164 @@ func (rg refGrouper) Groups() []sizes.RefGroup { func TestBomb(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, true, "bomb") - t.Cleanup(func() { repo.Remove(t) }) + ctx := context.Background() - newGitBomb(t, repo, 10, 10, "boom!\n") + testRepo := testutils.NewTestRepo(t, true, "bomb") + t.Cleanup(func() { testRepo.Remove(t) }) - h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), - refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, - ) - require.NoError(t, err) + newGitBomb(t, testRepo, 10, 10, "boom!\n") + + repo := testRepo.Repository(t) + + t.Run("full", func(t *testing.T) { + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) - assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") - assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") - assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") - assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.Path(), "max commit size commit") - assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") - assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") - assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.Path(), "max parent count commit") - - assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") - assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") - assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") - assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.Path(), "max tree entries tree") - - assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") - assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") - assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.Path(), "max blob size blob") - - assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") - assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") - - assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") - - assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.Path(), "max path depth tree") - assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.Path(), "max path length tree") - - assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.Path(), "max expanded tree count tree") - assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.Path(), "max expanded blob count tree") - assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.Path(), "max expanded blob size tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") - assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") - assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") + assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.BestPath(), "max commit size commit") + assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.BestPath(), "max parent count commit") + + assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) + + t.Run("partial", func(t *testing.T) { + name := "master:d0/d0" + oid, err := repo.ResolveObject(name) + require.NoError(t, err) + roots := []sizes.Root{sizes.NewExplicitRoot(name, oid)} + + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(0), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(0), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(0), h.MaxCommitSize, "max commit size") + assert.Nil(t, h.MaxCommitSizeCommit) + assert.Equal(t, counts.Count32(0), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Nil(t, h.MaxParentCountCommit, "max parent count commit") + + assert.Equal(t, counts.Count32(8), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2330), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(80), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(0), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(8), h.MaxPathDepth, "max path depth") + assert.Equal(t, "master:d0/d0", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(23), h.MaxPathLength, "max path length") + assert.Equal(t, "master:d0/d0", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 8)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(pow(10, 8)), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 8)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) } func TestTaggedTags(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "tagged-tags") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "tagged-tags") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - cmd := repo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = repo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = repo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = repo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), repo, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -646,20 +729,32 @@ func TestTaggedTags(t *testing.T) { func TestFromSubdir(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "subdir") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "subdir") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - repo.AddFile(t, "subdir/file.txt", "Hello, world!\n") + testRepo.AddFile(t, "subdir/file.txt", "Hello, world!\n") - cmd := repo.GitCommand(t, "commit", "-m", "initial") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), testRepo.Repository(t), + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -668,7 +763,9 @@ func TestFromSubdir(t *testing.T) { func TestSubmodule(t *testing.T) { t.Parallel() - tmp, err := ioutil.TempDir("", "submodule") + ctx := context.Background() + + tmp, err := os.MkdirTemp("", "submodule") require.NoError(t, err, "creating temporary directory") defer func() { @@ -677,42 +774,52 @@ func TestSubmodule(t *testing.T) { timestamp := time.Unix(1112911993, 0) - submRepo := testutils.TestRepo{ + submTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "subm"), } - submRepo.Init(t, false) - submRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") - submRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") - submRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") + submTestRepo.Init(t, false) + submTestRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") + submTestRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") + submTestRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") - cmd := submRepo.GitCommand(t, "commit", "-m", "subm initial") + cmd := submTestRepo.GitCommand(t, "commit", "-m", "subm initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") - mainRepo := testutils.TestRepo{ + mainTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "main"), } - mainRepo.Init(t, false) + mainTestRepo.Init(t, false) - mainRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") + mainTestRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") - cmd = mainRepo.GitCommand(t, "commit", "-m", "main initial") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "main initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = mainRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submRepo.Path, "sub") - cmd.Dir = mainRepo.Path + cmd = mainTestRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submTestRepo.Path, "sub") + cmd.Dir = mainTestRepo.Path require.NoError(t, cmd.Run(), "adding submodule") - cmd = mainRepo.GitCommand(t, "commit", "-m", "add submodule") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "add submodule") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") + mainRepo := mainTestRepo.Repository(t) + + mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) + require.NoError(t, err) + + mainRoots := make([]sizes.Root, 0, len(mainRefRoots)) + for _, refRoot := range mainRefRoots { + mainRoots = append(mainRoots, refRoot) + } + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - mainRepo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), mainTestRepo.Repository(t), + mainRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -720,14 +827,66 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(1), h.MaxExpandedSubmoduleCount, "max expanded submodule count") // Analyze the submodule: - submRepo2 := testutils.TestRepo{ - Path: filepath.Join(mainRepo.Path, "sub"), + submTestRepo2 := testutils.TestRepo{ + Path: filepath.Join(mainTestRepo.Path, "sub"), + } + + submRepo2 := submTestRepo2.Repository(t) + + submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) + require.NoError(t, err) + + submRoots2 := make([]sizes.Root, 0, len(submRefRoots2)) + for _, refRoot := range submRefRoots2 { + submRoots2 = append(submRoots2, refRoot) } + h, err = sizes.ScanRepositoryUsingGraph( - submRepo2.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), submRepo2, + submRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") assert.Equal(t, counts.Count32(3), h.MaxExpandedBlobCount, "max expanded blob count") } + +func TestSHA256(t *testing.T) { + t.Parallel() + + ctx := context.Background() + + t.Helper() + + path, err := os.MkdirTemp("", "sha256") + require.NoError(t, err) + + testRepo := testutils.TestRepo{Path: path} + defer testRepo.Remove(t) + + // Don't use `GitCommand()` because the directory might not + // exist yet: + cmd := exec.Command("git", "init", "--object-format", "sha256", testRepo.Path) + cmd.Env = testutils.CleanGitEnv() + output, err := cmd.CombinedOutput() + + if err != nil && strings.Contains(string(output), "object-format") { + t.Skip("skipping due to lack of SHA256 support") + } + require.NoError(t, err) + + timestamp := time.Unix(1112911993, 0) + + testRepo.AddFile(t, "hello.txt", "Hello, world!\n") + cmd = testRepo.GitCommand(t, "commit", "-m", "initial") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating initial commit") + + cmd = testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating commit") + + repo := testRepo.Repository(t) + + _, err = sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) +} diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 3c3179e..48f1190 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -254,9 +254,14 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { // Finish collects the information gained from processing the options // and returns a `sizes.RefGrouper`. -func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { +func (rgb *RefGroupBuilder) Finish(defaultAll bool) (sizes.RefGrouper, error) { if rgb.topLevelGroup.filter == nil { - rgb.topLevelGroup.filter = git.AllReferencesFilter + // User didn't specify any reference options. + if defaultAll { + rgb.topLevelGroup.filter = git.AllReferencesFilter + } else { + rgb.topLevelGroup.filter = git.NoReferencesFilter + } } refGrouper := refGrouper{ diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 60a2f9b..e14e487 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -21,6 +20,7 @@ import ( // TestRepo represents a git repository used for tests. type TestRepo struct { Path string + bare bool } // NewTestRepo creates and initializes a test repository in a @@ -29,7 +29,7 @@ type TestRepo struct { func NewTestRepo(t *testing.T, bare bool, pattern string) *TestRepo { t.Helper() - path, err := ioutil.TempDir("", pattern) + path, err := os.MkdirTemp("", pattern) require.NoError(t, err) repo := TestRepo{Path: path} @@ -38,6 +38,7 @@ func NewTestRepo(t *testing.T, bare bool, pattern string) *TestRepo { return &TestRepo{ Path: path, + bare: bare, } } @@ -73,7 +74,7 @@ func (repo *TestRepo) Remove(t *testing.T) { func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { t.Helper() - path, err := ioutil.TempDir("", pattern) + path, err := os.MkdirTemp("", pattern) require.NoError(t, err) err = repo.GitCommand( @@ -90,9 +91,15 @@ func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { func (repo *TestRepo) Repository(t *testing.T) *git.Repository { t.Helper() - r, err := git.NewRepository(repo.Path) - require.NoError(t, err) - return r + if repo.bare { + r, err := git.NewRepositoryFromGitDir(repo.Path) + require.NoError(t, err) + return r + } else { + r, err := git.NewRepositoryFromPath(repo.Path) + require.NoError(t, err) + return r + } } // localEnvVars is a list of the variable names that should be cleared @@ -158,7 +165,7 @@ func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) { var cmd *exec.Cmd - if oid == git.NullOID { + if git.IsNullOID(oid) { cmd = repo.GitCommand(t, "update-ref", "-d", refname) } else { cmd = repo.GitCommand(t, "update-ref", refname, oid.String()) diff --git a/script/ensure-go-installed.sh b/script/ensure-go-installed.sh index 3111b9e..1e301fd 100644 --- a/script/ensure-go-installed.sh +++ b/script/ensure-go-installed.sh @@ -4,17 +4,17 @@ if [ -z "$ROOTDIR" ]; then echo 1>&2 'ensure-go-installed.sh invoked without ROOTDIR set!' fi -# Is go installed, and at least 1.16? +# Is go installed, and at least 1.21? go_ok() { set -- $(go version 2>/dev/null | sed -n 's/.*go\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1 \2/p' | head -n 1) - [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 16 ] + [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 21 ] } # If a local go is installed, use it. set_up_vendored_go() { - GO_VERSION=go1.16.3 + GO_VERSION=go1.21.3 VENDORED_GOROOT="$ROOTDIR/vendor/$GO_VERSION/go" if [ -x "$VENDORED_GOROOT/bin/go" ]; then export GOROOT="$VENDORED_GOROOT" diff --git a/script/install-vendored-go b/script/install-vendored-go index 2407618..76d2195 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -1,20 +1,21 @@ #!/bin/sh # The checksums below must correspond to the downloads for this version. -GO_VERSION=go1.16.3 +# The checksums can be found on https://go.dev/dl +GO_VERSION=go1.21.3 case "$(uname -s):$(uname -m)" in Linux:x86_64) GO_PKG=${GO_VERSION}.linux-amd64.tar.gz - GO_PKG_SHA=951a3c7c6ce4e56ad883f97d9db74d3d6d80d5fec77455c6ada6c1f7ac4776d2 + GO_PKG_SHA=1241381b2843fae5a9707eec1f8fb2ef94d827990582c7c7c32f5bdfbfd420c8 ;; Darwin:x86_64) GO_PKG=${GO_VERSION}.darwin-amd64.tar.gz - GO_PKG_SHA=6bb1cf421f8abc2a9a4e39140b7397cdae6aca3e8d36dcff39a1a77f4f1170ac + GO_PKG_SHA=27014fc69e301d7588a169ca239b3cc609f0aa1abf38528bf0d20d3b259211eb ;; Darwin:arm64) GO_PKG=${GO_VERSION}.darwin-arm64.tar.gz - GO_PKG_SHA=f4e96bbcd5d2d1942f5b55d9e4ab19564da4fad192012f6d7b0b9b055ba4208f + GO_PKG_SHA=65302a7a9f7a4834932b3a7a14cb8be51beddda757b567a2f9e0cbd0d7b5a6ab ;; *) echo 1>&2 "I don't know how to install Go on your platform." @@ -38,7 +39,7 @@ fi ROOTDIR="$( cd "$( dirname "$0" )/.." && pwd )" VENDORDIR="$ROOTDIR/vendor" -DOWNLOAD_URL=https://storage.googleapis.com/golang/$GO_PKG +DOWNLOAD_URL=https://go.dev/dl/$GO_PKG ARCHIVE="$VENDORDIR/$GO_PKG" INSTALLDIR="$VENDORDIR/$GO_VERSION" export GOROOT="$INSTALLDIR/go" diff --git a/sizes/explicit_root.go b/sizes/explicit_root.go new file mode 100644 index 0000000..09348db --- /dev/null +++ b/sizes/explicit_root.go @@ -0,0 +1,19 @@ +package sizes + +import "github.com/github/git-sizer/git" + +type ExplicitRoot struct { + name string + oid git.OID +} + +func NewExplicitRoot(name string, oid git.OID) ExplicitRoot { + return ExplicitRoot{ + name: name, + oid: oid, + } +} + +func (er ExplicitRoot) Name() string { return er.name } +func (er ExplicitRoot) OID() git.OID { return er.oid } +func (er ExplicitRoot) Walk() bool { return true } diff --git a/sizes/graph.go b/sizes/graph.go index 7e923f6..2101a00 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -11,48 +11,16 @@ import ( "github.com/github/git-sizer/meter" ) -// RefGroupSymbol is the string "identifier" that is used to refer to -// a refgroup, for example in the gitconfig. Nesting of refgroups is -// inferred from their names, using "." as separator between -// components. For example, if there are three refgroups with symbols -// "tags", "tags.releases", and "foo.bar", then "tags.releases" is -// considered to be nested within "tags", and "foo.bar" is considered -// to be nested within "foo", the latter being created automatically -// if it was not configured explicitly. -type RefGroupSymbol string - -// RefGroup is a group of references, for example "branches" or -// "tags". Reference groups might overlap. -type RefGroup struct { - // Symbol is the unique string by which this `RefGroup` is - // identified and configured. It consists of dot-separated - // components, which implicitly makes a nested tree-like - // structure. - Symbol RefGroupSymbol - - // Name is the name for this `ReferenceGroup` to be presented - // in user-readable output. - Name string +type Root interface { + Name() string + OID() git.OID + Walk() bool } -// RefGrouper describes a type that can collate reference names into -// groups and decide which ones to walk. -type RefGrouper interface { - // Categorize tells whether `refname` should be walked at all, - // and if so, the symbols of the reference groups to which it - // belongs. - Categorize(refname string) (bool, []RefGroupSymbol) - - // Groups returns the list of `ReferenceGroup`s, in the order - // that they should be presented. The return value might - // depend on which references have been seen so far. - Groups() []RefGroup -} - -type refSeen struct { - git.Reference - walked bool - groups []RefGroupSymbol +type ReferenceRoot interface { + Root + Reference() git.Reference + Groups() []RefGroupSymbol } // ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which @@ -63,60 +31,36 @@ type refSeen struct { // // It returns the size data for the repository. func ScanRepositoryUsingGraph( - repo *git.Repository, rg RefGrouper, nameStyle NameStyle, + ctx context.Context, + repo *git.Repository, + roots []Root, + nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() - - graph := NewGraph(rg, nameStyle) + graph := NewGraph(nameStyle) - refIter, err := repo.NewReferenceIter(ctx) - if err != nil { - return HistorySize{}, err - } - - objIter, err := repo.NewObjectIter(context.TODO()) + objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err } errChan := make(chan error, 1) - var refsSeen []refSeen - // Feed the references that we want into the stdin of the object - // iterator: + // Feed the references that we want to walk into the stdin of the + // object iterator: go func() { defer objIter.Close() errChan <- func() error { - for { - ref, ok, err := refIter.Next() - if err != nil { - return err - } - if !ok { - return nil - } - - walk, groups := rg.Categorize(ref.Refname) - - refsSeen = append( - refsSeen, - refSeen{ - Reference: ref, - walked: walk, - groups: groups, - }, - ) - - if !walk { + for _, root := range roots { + if !root.Walk() { continue } - if err := objIter.AddRoot(ref.OID); err != nil { + if err := objIter.AddRoot(root.OID()); err != nil { return err } } + return nil }() }() @@ -190,7 +134,7 @@ func ScanRepositoryUsingGraph( case "tree": trees = append(trees, ObjectHeader{obj.OID, obj.ObjectSize}) case "commit": - commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, git.NullOID}) + commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, repo.NullOID()}) case "tag": tags = append(tags, ObjectHeader{obj.OID, obj.ObjectSize}) default: @@ -326,9 +270,15 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, refSeen := range refsSeen { + for _, root := range roots { progressMeter.Inc() - graph.RegisterReference(refSeen.Reference, refSeen.walked, refSeen.groups) + if refRoot, ok := root.(ReferenceRoot); ok { + graph.RegisterReference(refRoot.Reference(), refRoot.Groups()) + } + + if root.Walk() { + graph.pathResolver.RecordName(root.Name(), root.OID()) + } } progressMeter.Done() @@ -337,8 +287,6 @@ func ScanRepositoryUsingGraph( // Graph is an object graph that is being built up. type Graph struct { - rg RefGrouper - blobLock sync.Mutex blobSizes map[git.OID]BlobSize @@ -361,10 +309,8 @@ type Graph struct { } // NewGraph creates and returns a new `*Graph` instance. -func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { +func NewGraph(nameStyle NameStyle) *Graph { return &Graph{ - rg: rg, - blobSizes: make(map[git.OID]BlobSize), treeRecords: make(map[git.OID]*treeRecord), @@ -384,17 +330,18 @@ func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { } // RegisterReference records the specified reference in `g`. -func (g *Graph) RegisterReference(ref git.Reference, walked bool, groups []RefGroupSymbol) { +func (g *Graph) RegisterReference(ref git.Reference, groups []RefGroupSymbol) { g.historyLock.Lock() g.historySize.recordReference(g, ref) for _, group := range groups { g.historySize.recordReferenceGroup(g, group) } g.historyLock.Unlock() +} - if walked { - g.pathResolver.RecordReference(ref) - } +// Register a name that can be used for the specified OID. +func (g *Graph) RegisterName(name string, oid git.OID) { + g.pathResolver.RecordName(name, oid) } // HistorySize returns the size data that have been collected. diff --git a/sizes/grouper.go b/sizes/grouper.go new file mode 100644 index 0000000..fdaa927 --- /dev/null +++ b/sizes/grouper.go @@ -0,0 +1,88 @@ +package sizes + +import ( + "context" + + "github.com/github/git-sizer/git" +) + +// RefGroupSymbol is the string "identifier" that is used to refer to +// a refgroup, for example in the gitconfig. Nesting of refgroups is +// inferred from their names, using "." as separator between +// components. For example, if there are three refgroups with symbols +// "tags", "tags.releases", and "foo.bar", then "tags.releases" is +// considered to be nested within "tags", and "foo.bar" is considered +// to be nested within "foo", the latter being created automatically +// if it was not configured explicitly. +type RefGroupSymbol string + +// RefGroup is a group of references, for example "branches" or +// "tags". Reference groups might overlap. +type RefGroup struct { + // Symbol is the unique string by which this `RefGroup` is + // identified and configured. It consists of dot-separated + // components, which implicitly makes a nested tree-like + // structure. + Symbol RefGroupSymbol + + // Name is the name for this `ReferenceGroup` to be presented + // in user-readable output. + Name string +} + +// RefGrouper describes a type that can collate reference names into +// groups and decide which ones to walk. +type RefGrouper interface { + // Categorize tells whether `refname` should be walked at all, + // and if so, the symbols of the reference groups to which it + // belongs. + Categorize(refname string) (bool, []RefGroupSymbol) + + // Groups returns the list of `ReferenceGroup`s, in the order + // that they should be presented. The return value might + // depend on which references have been seen so far. + Groups() []RefGroup +} + +type RefRoot struct { + ref git.Reference + walk bool + groups []RefGroupSymbol +} + +func (rr RefRoot) Name() string { return rr.ref.Refname } +func (rr RefRoot) OID() git.OID { return rr.ref.OID } +func (rr RefRoot) Reference() git.Reference { return rr.ref } +func (rr RefRoot) Walk() bool { return rr.walk } +func (rr RefRoot) Groups() []RefGroupSymbol { return rr.groups } + +func CollectReferences( + ctx context.Context, repo *git.Repository, rg RefGrouper, +) ([]RefRoot, error) { + refIter, err := repo.NewReferenceIter(ctx) + if err != nil { + return nil, err + } + + var refsSeen []RefRoot + for { + ref, ok, err := refIter.Next() + if err != nil { + return nil, err + } + if !ok { + return refsSeen, nil + } + + walk, groups := rg.Categorize(ref.Refname) + + refsSeen = append( + refsSeen, + RefRoot{ + ref: ref, + walk: walk, + groups: groups, + }, + ) + } +} diff --git a/sizes/output.go b/sizes/output.go index 933cc05..037f905 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -155,7 +155,7 @@ func (i *item) Emit(t *table) { } func (i *item) Footnote(nameStyle NameStyle) string { - if i.path == nil || i.path.OID == git.NullOID { + if i.path == nil || git.IsNullOID(i.path.OID) { return "" } switch nameStyle { @@ -214,7 +214,7 @@ func (i *item) MarshalJSON() ([]byte, error) { LevelOfConcern: float64(value) / i.scale, } - if i.path != nil && i.path.OID != git.NullOID { + if i.path != nil && !git.IsNullOID(i.path.OID) { stat.ObjectName = i.path.OID.String() stat.ObjectDescription = i.path.Path() } diff --git a/sizes/path_resolver.go b/sizes/path_resolver.go index 2a3bb1c..275d19a 100644 --- a/sizes/path_resolver.go +++ b/sizes/path_resolver.go @@ -12,15 +12,15 @@ import ( // `rev-parse` input, including commit and/or file path) by which // specified objects are reachable. It is used as follows: // -// * Request an object's path using `RequestPath()`. The returned -// `Path` object is a placeholder for the object's path. +// - Request an object's path using `RequestPath()`. The returned +// `Path` object is a placeholder for the object's path. // -// * Tell the `PathResolver` about objects that might be along the -// object's reachability path, *in depth-first* order (i.e., -// referents before referers) by calling `RecordTree()`, -// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. +// - Tell the `PathResolver` about objects that might be along the +// object's reachability path, *in depth-first* order (i.e., +// referents before referers) by calling `RecordTree()`, +// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. // -// * Read the path out of the `Path` object using `Path.Path()`. +// - Read the path out of the `Path` object using `Path.Path()`. // // Multiple objects can be processed at once. // @@ -34,7 +34,7 @@ import ( type PathResolver interface { RequestPath(oid git.OID, objectType string) *Path ForgetPath(p *Path) - RecordReference(ref git.Reference) + RecordName(name string, oid git.OID) RecordTreeEntry(oid git.OID, name string, childOID git.OID) RecordCommit(oid, tree git.OID) RecordTag(oid git.OID, tag *git.Tag) @@ -60,7 +60,7 @@ func (n NullPathResolver) RequestPath(oid git.OID, objectType string) *Path { func (_ NullPathResolver) ForgetPath(p *Path) {} -func (_ NullPathResolver) RecordReference(ref git.Reference) {} +func (_ NullPathResolver) RecordName(name string, oid git.OID) {} func (_ NullPathResolver) RecordTreeEntry(oid git.OID, name string, childOID git.OID) {} @@ -77,19 +77,19 @@ type InOrderPathResolver struct { // (e.g., the biggest blob, or a tree containing the biggest blob, or // a commit whose tree contains the biggest blob). Valid states: // -// * `parent == nil && relativePath == ""`—we have not yet found -// anything that refers to this object. +// - `parent == nil && relativePath == ""`—we have not yet found +// anything that refers to this object. // -// * `parent != nil && relativePath == ""`—this object is a tree, and -// we have found a commit that refers to it. +// - `parent != nil && relativePath == ""`—this object is a tree, and +// we have found a commit that refers to it. // -// * `parent == nil && relativePath != ""`—we have found a reference -// that points directly at this object; `relativePath` is the full -// name of the reference. +// - `parent == nil && relativePath != ""`—we have found a reference +// that points directly at this object; `relativePath` is the full +// name of the reference. // -// * `parent != nil && relativePath != ""`—this object is a blob or -// tree, and we have found another tree that refers to it; -// `relativePath` is the corresponding tree entry name. +// - `parent != nil && relativePath != ""`—this object is a blob or +// tree, and we have found another tree that refers to it; +// `relativePath` is the corresponding tree entry name. type Path struct { // The OID of the object whose path we seek. This member is always // set. @@ -122,7 +122,8 @@ type Path struct { func (p *Path) TreePrefix() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return p.parent.TreePrefix() @@ -130,7 +131,9 @@ func (p *Path) TreePrefix() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath + "/" } - } else { + case p.relativePath != "": + return p.relativePath + "/" + default: return "???" } case "commit", "tag": @@ -153,7 +156,8 @@ func (p *Path) TreePrefix() string { func (p *Path) Path() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) @@ -161,7 +165,9 @@ func (p *Path) Path() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath } - } else { + case p.relativePath != "": + return p.relativePath + default: return "" } case "commit", "tag": @@ -274,18 +280,18 @@ func (pr *InOrderPathResolver) forgetPathLocked(p *Path) { } } -func (pr *InOrderPathResolver) RecordReference(ref git.Reference) { +func (pr *InOrderPathResolver) RecordName(name string, oid git.OID) { pr.lock.Lock() defer pr.lock.Unlock() - p, ok := pr.soughtPaths[ref.OID] + p, ok := pr.soughtPaths[oid] if !ok { // Nobody is looking for the path to the referent. return } - p.relativePath = ref.Refname - delete(pr.soughtPaths, ref.OID) + p.relativePath = name + delete(pr.soughtPaths, oid) } // Record that the tree with OID `oid` has an entry with the specified