From c1d1946924b97dda18bae8b937bd7e61ec6db3a7 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 19 Apr 2021 12:20:07 +0200 Subject: [PATCH 001/176] Update to go1.16.3 --- go.mod | 2 +- script/ensure-go-installed.sh | 6 +++--- script/install-vendored-go | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/go.mod b/go.mod index 20bf4b8..59bdcf6 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/github/git-sizer -go 1.13 +go 1.16 require ( github.com/davecgh/go-spew v1.1.1 // indirect diff --git a/script/ensure-go-installed.sh b/script/ensure-go-installed.sh index 43ba12d..3111b9e 100644 --- a/script/ensure-go-installed.sh +++ b/script/ensure-go-installed.sh @@ -4,17 +4,17 @@ if [ -z "$ROOTDIR" ]; then echo 1>&2 'ensure-go-installed.sh invoked without ROOTDIR set!' fi -# Is go installed, and at least 1.13? +# Is go installed, and at least 1.16? go_ok() { set -- $(go version 2>/dev/null | sed -n 's/.*go\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1 \2/p' | head -n 1) - [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 13 ] + [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 16 ] } # If a local go is installed, use it. set_up_vendored_go() { - GO_VERSION=go1.13.4 + GO_VERSION=go1.16.3 VENDORED_GOROOT="$ROOTDIR/vendor/$GO_VERSION/go" if [ -x "$VENDORED_GOROOT/bin/go" ]; then export GOROOT="$VENDORED_GOROOT" diff --git a/script/install-vendored-go b/script/install-vendored-go index 34bdf88..0fcf15c 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -1,14 +1,14 @@ #!/bin/sh # The checksums below must correspond to the downloads for this version. -GO_VERSION=go1.13.4 +GO_VERSION=go1.16.3 if [ $(uname -s) = "Darwin" ]; then GO_PKG=${GO_VERSION}.darwin-amd64.tar.gz - GO_PKG_SHA=9f0721551a24a1eb43d2005cd58bd7b17574e50384b8da8896b0754259790752 + GO_PKG_SHA=f4e96bbcd5d2d1942f5b55d9e4ab19564da4fad192012f6d7b0b9b055ba4208f elif [ $(uname -s) = "Linux" ]; then GO_PKG=${GO_VERSION}.linux-amd64.tar.gz - GO_PKG_SHA=692d17071736f74be04a72a06dab9cac1cd759377bd85316e52b2227604c004c + GO_PKG_SHA=951a3c7c6ce4e56ad883f97d9db74d3d6d80d5fec77455c6ada6c1f7ac4776d2 else echo 1>&2 "I don't know how to install Go on your platform." echo 1>&2 "Please install $GO_VERSION or later and add it to your PATH." From d099842b4b0aa1a436ec0b19f563d8582538e9ba Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 19 Apr 2021 16:35:18 +0200 Subject: [PATCH 002/176] Return success on `git-sizer --help` If the user explicitly asked for help, and that's what they got, then everybody's happy. Fixes: #71 --- git-sizer.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/git-sizer.go b/git-sizer.go index f84c42a..b5f8c78 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -116,6 +116,9 @@ func mainImplementation() error { err = flags.Parse(os.Args[1:]) if err != nil { + if err == pflag.ErrHelp { + return nil + } return err } From 649faaea22593b1c2c22ee484bc9f67968dfb09b Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 19 Apr 2021 16:29:01 +0200 Subject: [PATCH 003/176] install-vendored-go: also consider the architecture Make sure to download the Go toolchain that matches the architecture of the host where the `Makefile` is being run. --- script/install-vendored-go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/script/install-vendored-go b/script/install-vendored-go index 0fcf15c..aaa1efe 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -3,17 +3,21 @@ # The checksums below must correspond to the downloads for this version. GO_VERSION=go1.16.3 -if [ $(uname -s) = "Darwin" ]; then - GO_PKG=${GO_VERSION}.darwin-amd64.tar.gz - GO_PKG_SHA=f4e96bbcd5d2d1942f5b55d9e4ab19564da4fad192012f6d7b0b9b055ba4208f -elif [ $(uname -s) = "Linux" ]; then +case "$(uname -s):$(uname -m)" in +Linux:x86_64) GO_PKG=${GO_VERSION}.linux-amd64.tar.gz GO_PKG_SHA=951a3c7c6ce4e56ad883f97d9db74d3d6d80d5fec77455c6ada6c1f7ac4776d2 -else + ;; +Darwin:x86_64) + GO_PKG=${GO_VERSION}.darwin-amd64.tar.gz + GO_PKG_SHA=6bb1cf421f8abc2a9a4e39140b7397cdae6aca3e8d36dcff39a1a77f4f1170ac + ;; +*) echo 1>&2 "I don't know how to install Go on your platform." echo 1>&2 "Please install $GO_VERSION or later and add it to your PATH." exit 1 -fi + ;; +esac archivesum() { shasum -a256 "$ARCHIVE" From 0ee2accf1a50ef89df7da1297af15a0828d73c03 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 19 Apr 2021 16:30:14 +0200 Subject: [PATCH 004/176] Add support for building automatically on Darwin:arm64 --- Makefile | 1 + script/install-vendored-go | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/Makefile b/Makefile index cf910f1..3ca1079 100644 --- a/Makefile +++ b/Makefile @@ -74,6 +74,7 @@ $(eval $(call PLATFORM_template,linux,386)) $(eval $(call PLATFORM_template,darwin,386)) $(eval $(call PLATFORM_template,darwin,amd64)) +$(eval $(call PLATFORM_template,darwin,arm64)) $(eval $(call PLATFORM_template,windows,amd64,.exe)) $(eval $(call PLATFORM_template,windows,386,.exe)) diff --git a/script/install-vendored-go b/script/install-vendored-go index aaa1efe..2407618 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -12,6 +12,10 @@ Darwin:x86_64) GO_PKG=${GO_VERSION}.darwin-amd64.tar.gz GO_PKG_SHA=6bb1cf421f8abc2a9a4e39140b7397cdae6aca3e8d36dcff39a1a77f4f1170ac ;; +Darwin:arm64) + GO_PKG=${GO_VERSION}.darwin-arm64.tar.gz + GO_PKG_SHA=f4e96bbcd5d2d1942f5b55d9e4ab19564da4fad192012f6d7b0b9b055ba4208f + ;; *) echo 1>&2 "I don't know how to install Go on your platform." echo 1>&2 "Please install $GO_VERSION or later and add it to your PATH." From 57e3d3ec17934abceb7e03862257261af348dda6 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 19 Apr 2021 16:48:42 +0200 Subject: [PATCH 005/176] Remove support for building Darwin:386 releases It's no longer supported by the Go toolchain. --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index 3ca1079..4fa64e6 100644 --- a/Makefile +++ b/Makefile @@ -72,7 +72,6 @@ endef $(eval $(call PLATFORM_template,linux,amd64)) $(eval $(call PLATFORM_template,linux,386)) -$(eval $(call PLATFORM_template,darwin,386)) $(eval $(call PLATFORM_template,darwin,amd64)) $(eval $(call PLATFORM_template,darwin,arm64)) From bdbc487183e870eedbb73584b05defd945700c0d Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 18:30:42 +0200 Subject: [PATCH 006/176] Makefile: simplify the build command --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4fa64e6..155ccc8 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ all: bin/git-sizer .PHONY: bin/git-sizer bin/git-sizer: mkdir -p bin - $(GO) build $(GOFLAGS) -o $@ $(PACKAGE) + $(GO) build $(GOFLAGS) -o $@ . # Cross-compile for a bunch of common platforms. Note that this # doesn't work with USE_ISATTY: @@ -50,7 +50,7 @@ define PLATFORM_template = .PHONY: bin/git-sizer-$(1)-$(2)$(3) bin/git-sizer-$(1)-$(2)$(3): mkdir -p bin - GOOS=$(1) GOARCH=$(2) $$(GO) build $$(GOFLAGS) -ldflags "-X main.ReleaseVersion=$$(VERSION)" -o $$@ $$(PACKAGE) + GOOS=$(1) GOARCH=$(2) $$(GO) build $$(GOFLAGS) -ldflags "-X main.ReleaseVersion=$$(VERSION)" -o $$@ . common-platforms: bin/git-sizer-$(1)-$(2)$(3) # Note that releases don't include code from vendor (they're only used From 3e9c2d839c374d5e71fbb044321c936eaffca8a8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 18:43:06 +0200 Subject: [PATCH 007/176] Simplify some old build cruft Things are a lot easier nowadays! --- CONTRIBUTING.md | 2 +- Makefile | 26 -------------------------- docs/BUILDING.md | 2 +- 3 files changed, 2 insertions(+), 28 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bce8fc2..f4427e8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,7 +20,7 @@ Please note that this project is released with a [Contributor Code of Conduct][c Here are a few things you can do that will increase the likelihood of your pull request being accepted: -- Make sure that your code is formatted correctly: `make gofmt`. +- Make sure that your code is formatted correctly according to `go fmt`: `go fmt .`. - Write tests. - Keep your change as focused as possible. If there are multiple changes you would like to make that are not dependent upon each other, consider submitting them as separate pull requests. - Write a [good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html). diff --git a/Makefile b/Makefile index 155ccc8..f74882a 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,6 @@ GO111MODULES := 1 export GO111MODULES GO := $(CURDIR)/script/go -GOFMT := $(CURDIR)/script/gofmt GO_LDFLAGS := -X main.BuildVersion=$(shell git describe --tags --always --dirty || echo unknown) GOFLAGS := -ldflags "$(GO_LDFLAGS)" @@ -12,14 +11,6 @@ ifdef USE_ISATTY GOFLAGS := $(GOFLAGS) --tags isatty endif -GO_SRCS := $(sort $(shell $(GO) list -f ' \ - {{$$ip := .Dir}} \ - {{range .GoFiles }}{{printf "%s/%s\n" $$ip .}}{{end}} \ - {{range .CgoFiles }}{{printf "%s/%s\n" $$ip .}}{{end}} \ - {{range .TestGoFiles }}{{printf "%s/%s\n" $$ip .}}{{end}} \ - {{range .XTestGoFiles}}{{printf "%s/%s\n" $$ip .}}{{end}} \ - ' ./...)) - .PHONY: all all: bin/git-sizer @@ -85,23 +76,6 @@ test: bin/git-sizer gotest gotest: $(GO) test -timeout 60s $(GOFLAGS) ./... -.PHONY: gofmt -gofmt: - $(GOFMT) -l -w $(GO_SRCS) | sed -e 's/^/Fixing /' - -.PHONY: goimports -goimports: - goimports -l -w -e $(GO_SRCS) - -.PHONY: govet -govet: - $(GO) vet ./... - .PHONY: clean clean: rm -rf bin - -# List all of this project's Go sources: -.PHONY: srcs -srcs: - @printf "%s\n" $(GO_SRCS) diff --git a/docs/BUILDING.md b/docs/BUILDING.md index d215c80..a977a2c 100644 --- a/docs/BUILDING.md +++ b/docs/BUILDING.md @@ -25,7 +25,7 @@ This procedure is intended for experts and people who want to help develop `git- git clone https://github.com/github/git-sizer.git cd git-sizer -2. Install Go if necessary and create and prepare a project-local `GOPATH`: +2. Install Go if necessary: script/bootstrap From 811c2acd42b6f56bd7fc45e0557a11eed33e2224 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 19:00:50 +0200 Subject: [PATCH 008/176] .gitignore: add the `vendor` directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d66fcf8..9fb1b1b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /bin /releases +/vendor From 449077fc5d478f61391e45d51dcb3df0c3cf48f1 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 19:01:19 +0200 Subject: [PATCH 009/176] Makefile: build and test using `--mod=readonly` This keeps `go test` from getting upset if there is a `vendor` directory with a vendored Go toolchain in it. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f74882a..ff80eaf 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ export GO111MODULES GO := $(CURDIR)/script/go GO_LDFLAGS := -X main.BuildVersion=$(shell git describe --tags --always --dirty || echo unknown) -GOFLAGS := -ldflags "$(GO_LDFLAGS)" +GOFLAGS := -mod=readonly -ldflags "$(GO_LDFLAGS)" ifdef USE_ISATTY GOFLAGS := $(GOFLAGS) --tags isatty From deef63c1cf369d4fa1a8946ab455aa1b2988a408 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 21 Apr 2021 09:36:23 +0200 Subject: [PATCH 010/176] Use a safer method to locate the `git` executable `exec.LookPath()` from the Go standard library, which is used by `exec.Cmd`, implicitly searches for executables in the current directory before searching `PATH`. This could be awkward if the Git repository being analyzed contains a file like `git.exe` that could be run instead of the standard system `git` binary. So introduce a way to look for the "correct" `git` binary, record it in the `Repository` instance, and use that binary whenever we need to run `git`. Don't bother to make the same change in the test code, since tests are not run inside of a potentially hostile repository. --- git/git.go | 31 +++++++++++++++++++++++-------- git/git_bin.go | 27 +++++++++++++++++++++++++++ go.mod | 1 + go.sum | 2 ++ 4 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 git/git_bin.go diff --git a/git/git.go b/git/git.go index 7883cf9..34c7c53 100644 --- a/git/git.go +++ b/git/git.go @@ -62,6 +62,10 @@ func (oid OID) MarshalJSON() ([]byte, error) { type Repository struct { path string + + // gitBin is the path of the `git` executable that should be used + // when running commands in this repository. + gitBin string } // smartJoin returns the path that can be described as `relPath` @@ -74,20 +78,28 @@ func smartJoin(path, relPath string) string { return filepath.Join(path, relPath) } +// NewRepository creates a new repository object that can be used for +// running `git` commands within that repository. func NewRepository(path string) (*Repository, error) { - cmd := exec.Command("git", "-C", path, "rev-parse", "--git-dir") + // Find the `git` executable to be used: + gitBin, err := findGitBin() + if err != nil { + return nil, fmt.Errorf( + "could not find 'git' executable (is it in your PATH?): %v", err, + ) + } + + cmd := exec.Command(gitBin, "-C", path, "rev-parse", "--git-dir") out, err := cmd.Output() if err != nil { switch err := err.(type) { case *exec.Error: return nil, fmt.Errorf( - "could not run git (is it in your PATH?): %s", - err.Err, + "could not run '%s': %v", gitBin, err.Err, ) case *exec.ExitError: return nil, fmt.Errorf( - "git rev-parse failed: %s", - err.Stderr, + "git rev-parse failed: %s", err.Stderr, ) default: return nil, err @@ -95,7 +107,7 @@ func NewRepository(path string) (*Repository, error) { } gitDir := smartJoin(path, string(bytes.TrimSpace(out))) - cmd = exec.Command("git", "rev-parse", "--git-path", "shallow") + cmd = exec.Command(gitBin, "rev-parse", "--git-path", "shallow") cmd.Dir = gitDir out, err = cmd.Output() if err != nil { @@ -109,7 +121,10 @@ func NewRepository(path string) (*Repository, error) { return nil, errors.New("this appears to be a shallow clone; full clone required") } - return &Repository{path: gitDir}, nil + return &Repository{ + path: gitDir, + gitBin: gitBin, + }, nil } func (repo *Repository) gitCommand(callerArgs ...string) *exec.Cmd { @@ -125,7 +140,7 @@ func (repo *Repository) gitCommand(callerArgs ...string) *exec.Cmd { args = append(args, callerArgs...) - cmd := exec.Command("git", args...) + cmd := exec.Command(repo.gitBin, args...) cmd.Env = append( os.Environ(), diff --git a/git/git_bin.go b/git/git_bin.go new file mode 100644 index 0000000..fc03435 --- /dev/null +++ b/git/git_bin.go @@ -0,0 +1,27 @@ +package git + +import ( + "path/filepath" + + "github.com/cli/safeexec" +) + +// findGitBin finds the `git` binary in PATH that should be used by +// the rest of `git-sizer`. It uses `safeexec` to find the executable, +// because on Windows, `exec.Cmd` looks not only in PATH, but also in +// the current directory. This is a potential risk if the repository +// being scanned is hostile and non-bare because it might possibly +// contain an executable file named `git`. +func findGitBin() (string, error) { + gitBin, err := safeexec.LookPath("git") + if err != nil { + return "", err + } + + gitBin, err = filepath.Abs(gitBin) + if err != nil { + return "", err + } + + return gitBin, nil +} diff --git a/go.mod b/go.mod index 20bf4b8..293770e 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/github/git-sizer go 1.13 require ( + github.com/cli/safeexec v1.0.0 github.com/davecgh/go-spew v1.1.1 // indirect github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.4.0 diff --git a/go.sum b/go.sum index dff9970..590e4f5 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/cli/safeexec v1.0.0 h1:0VngyaIyqACHdcMNWfo6+KdUYnqEr2Sg+bSP1pdF+dI= +github.com/cli/safeexec v1.0.0/go.mod h1:Z/D4tTN8Vs5gXYHDCbaM1S/anmEDnJb1iW0+EJ5zx3Q= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= From cbc8706611dab631d9ea0f3150dcf6f14e389083 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 13 Mar 2021 18:20:42 +0100 Subject: [PATCH 011/176] NegatedBoolValue: rename some local variables for consistency --- git-sizer.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index f84c42a..21b1df7 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -23,21 +23,21 @@ type NegatedBoolValue struct { value *bool } -func (b *NegatedBoolValue) Set(s string) error { - v, err := strconv.ParseBool(s) - *b.value = !v +func (v *NegatedBoolValue) Set(s string) error { + b, err := strconv.ParseBool(s) + *v.value = !b return err } -func (b *NegatedBoolValue) Get() interface{} { - return !*b.value +func (v *NegatedBoolValue) Get() interface{} { + return !*v.value } -func (b *NegatedBoolValue) String() string { - if b == nil || b.value == nil { +func (v *NegatedBoolValue) String() string { + if v == nil || v.value == nil { return "true" } else { - return strconv.FormatBool(!*b.value) + return strconv.FormatBool(!*v.value) } } From 65f247d9d4e04c49b4536fe8247572703f3111c0 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 13 Mar 2021 18:14:11 +0100 Subject: [PATCH 012/176] Split the reference-filtering code into a separate file --- git/git.go | 70 ------------------------------------------- git/ref_filter.go | 75 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 70 deletions(-) create mode 100644 git/ref_filter.go diff --git a/git/git.go b/git/git.go index 7883cf9..5d3cf7e 100644 --- a/git/git.go +++ b/git/git.go @@ -290,76 +290,6 @@ func (l *BatchObjectIter) Close() error { return err } -type ReferenceFilter func(Reference) bool - -func AllReferencesFilter(_ Reference) bool { - return true -} - -func PrefixFilter(prefix string) ReferenceFilter { - return func(r Reference) bool { - return strings.HasPrefix(r.Refname, prefix) - } -} - -var ( - BranchesFilter ReferenceFilter = PrefixFilter("refs/heads/") - TagsFilter ReferenceFilter = PrefixFilter("refs/tags/") - RemotesFilter ReferenceFilter = PrefixFilter("refs/remotes/") -) - -func notNilFilters(filters ...ReferenceFilter) []ReferenceFilter { - var ret []ReferenceFilter - for _, filter := range filters { - if filter != nil { - ret = append(ret, filter) - } - } - return ret -} - -func OrFilter(filters ...ReferenceFilter) ReferenceFilter { - filters = notNilFilters(filters...) - if len(filters) == 0 { - return AllReferencesFilter - } else if len(filters) == 1 { - return filters[0] - } else { - return func(r Reference) bool { - for _, filter := range filters { - if filter(r) { - return true - } - } - return false - } - } -} - -func AndFilter(filters ...ReferenceFilter) ReferenceFilter { - filters = notNilFilters(filters...) - if len(filters) == 0 { - return AllReferencesFilter - } else if len(filters) == 1 { - return filters[0] - } else { - return func(r Reference) bool { - for _, filter := range filters { - if !filter(r) { - return false - } - } - return true - } - } -} - -func NotFilter(filter ReferenceFilter) ReferenceFilter { - return func(r Reference) bool { - return !filter(r) - } -} - // Parse a `cat-file --batch[-check]` output header line (including // the trailing LF). `spec`, if not "", is used in error messages. func parseBatchHeader(spec string, header string) (OID, ObjectType, counts.Count32, error) { diff --git a/git/ref_filter.go b/git/ref_filter.go new file mode 100644 index 0000000..67dc0d0 --- /dev/null +++ b/git/ref_filter.go @@ -0,0 +1,75 @@ +package git + +import ( + "strings" +) + +type ReferenceFilter func(Reference) bool + +func AllReferencesFilter(_ Reference) bool { + return true +} + +func PrefixFilter(prefix string) ReferenceFilter { + return func(r Reference) bool { + return strings.HasPrefix(r.Refname, prefix) + } +} + +var ( + BranchesFilter ReferenceFilter = PrefixFilter("refs/heads/") + TagsFilter ReferenceFilter = PrefixFilter("refs/tags/") + RemotesFilter ReferenceFilter = PrefixFilter("refs/remotes/") +) + +func notNilFilters(filters ...ReferenceFilter) []ReferenceFilter { + var ret []ReferenceFilter + for _, filter := range filters { + if filter != nil { + ret = append(ret, filter) + } + } + return ret +} + +func OrFilter(filters ...ReferenceFilter) ReferenceFilter { + filters = notNilFilters(filters...) + if len(filters) == 0 { + return AllReferencesFilter + } else if len(filters) == 1 { + return filters[0] + } else { + return func(r Reference) bool { + for _, filter := range filters { + if filter(r) { + return true + } + } + return false + } + } +} + +func AndFilter(filters ...ReferenceFilter) ReferenceFilter { + filters = notNilFilters(filters...) + if len(filters) == 0 { + return AllReferencesFilter + } else if len(filters) == 1 { + return filters[0] + } else { + return func(r Reference) bool { + for _, filter := range filters { + if !filter(r) { + return false + } + } + return true + } + } +} + +func NotFilter(filter ReferenceFilter) ReferenceFilter { + return func(r Reference) bool { + return !filter(r) + } +} From 09554d3e43e5249ac309a1cb28c3adbd670082b3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 13 Mar 2021 18:14:35 +0100 Subject: [PATCH 013/176] Make `PrefixFilter` a little more useful (and add a docstring) --- git/ref_filter.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/git/ref_filter.go b/git/ref_filter.go index 67dc0d0..2e18490 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -10,9 +10,25 @@ func AllReferencesFilter(_ Reference) bool { return true } +// PrefixFilter returns a `ReferenceFilter` that matches references +// whose names start with the specified `prefix`, which must match at +// a component boundary. For example, +// +// * Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not +// "refs/foobar". +// +// * Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or +// "refs/foobar". func PrefixFilter(prefix string) ReferenceFilter { + if strings.HasSuffix(prefix, "/") { + return func(r Reference) bool { + return strings.HasPrefix(r.Refname, prefix) + } + } + return func(r Reference) bool { - return strings.HasPrefix(r.Refname, prefix) + return strings.HasPrefix(r.Refname, prefix) && + (len(r.Refname) == len(prefix) || r.Refname[len(prefix)] == '/') } } From 9183d9884057e374e2f6d52f55feacb013af815a Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 13 Mar 2021 18:16:52 +0100 Subject: [PATCH 014/176] IncludeExcludeFilter: new way of selecting references --- git-sizer.go | 14 +++---- git/ref_filter.go | 102 +++++++++++++++++++++------------------------- 2 files changed, 52 insertions(+), 64 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 21b1df7..dba5dbc 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -155,24 +155,20 @@ func mainImplementation() error { var historySize sizes.HistorySize - var filter git.ReferenceFilter + var filter git.IncludeExcludeFilter if processBranches || processTags || processRemotes { - var filters []git.ReferenceFilter if processBranches { - filters = append(filters, git.BranchesFilter) + filter.Include(git.BranchesFilter) } if processTags { - filters = append(filters, git.TagsFilter) + filter.Include(git.TagsFilter) } if processRemotes { - filters = append(filters, git.RemotesFilter) + filter.Include(git.RemotesFilter) } - filter = git.OrFilter(filters...) - } else { - filter = git.AllReferencesFilter } - historySize, err = sizes.ScanRepositoryUsingGraph(repo, filter, nameStyle, progress) + historySize, err = sizes.ScanRepositoryUsingGraph(repo, filter.Filter, nameStyle, progress) if err != nil { return fmt.Errorf("error scanning repository: %s", err) } diff --git a/git/ref_filter.go b/git/ref_filter.go index 2e18490..2dd8996 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -10,6 +10,50 @@ func AllReferencesFilter(_ Reference) bool { return true } +type Polarity uint8 + +const ( + Include Polarity = iota + Exclude +) + +// polarizedFilter is a filter that might match, in which case it +// includes or excludes the reference (according to its polarity). If +// it doesn't match, then it doesn't say anything about the reference. +type polarizedFilter struct { + polarity Polarity + filter ReferenceFilter +} + +// IncludeExcludeFilter is a filter based on a bunch of +// `polarizedFilter`s. The last one that matches a reference wins. If +// none match, then the result is based on the polarity of the first +// polarizedFilter: if it is `Include`, then return `false`; if it is +// `Exclude`, then return `true`. +type IncludeExcludeFilter struct { + filters []polarizedFilter +} + +func (ief *IncludeExcludeFilter) Include(f ReferenceFilter) { + ief.filters = append(ief.filters, polarizedFilter{Include, f}) +} + +func (ief *IncludeExcludeFilter) Exclude(f ReferenceFilter) { + ief.filters = append(ief.filters, polarizedFilter{Exclude, f}) +} + +func (ief *IncludeExcludeFilter) Filter(r Reference) bool { + for i := len(ief.filters); i > 0; i-- { + f := ief.filters[i-1] + if !f.filter(r) { + continue + } + return f.polarity == Include + } + + return len(ief.filters) == 0 || ief.filters[0].polarity == Exclude +} + // PrefixFilter returns a `ReferenceFilter` that matches references // whose names start with the specified `prefix`, which must match at // a component boundary. For example, @@ -33,59 +77,7 @@ func PrefixFilter(prefix string) ReferenceFilter { } var ( - BranchesFilter ReferenceFilter = PrefixFilter("refs/heads/") - TagsFilter ReferenceFilter = PrefixFilter("refs/tags/") - RemotesFilter ReferenceFilter = PrefixFilter("refs/remotes/") + BranchesFilter = PrefixFilter("refs/heads/") + TagsFilter = PrefixFilter("refs/tags/") + RemotesFilter = PrefixFilter("refs/remotes/") ) - -func notNilFilters(filters ...ReferenceFilter) []ReferenceFilter { - var ret []ReferenceFilter - for _, filter := range filters { - if filter != nil { - ret = append(ret, filter) - } - } - return ret -} - -func OrFilter(filters ...ReferenceFilter) ReferenceFilter { - filters = notNilFilters(filters...) - if len(filters) == 0 { - return AllReferencesFilter - } else if len(filters) == 1 { - return filters[0] - } else { - return func(r Reference) bool { - for _, filter := range filters { - if filter(r) { - return true - } - } - return false - } - } -} - -func AndFilter(filters ...ReferenceFilter) ReferenceFilter { - filters = notNilFilters(filters...) - if len(filters) == 0 { - return AllReferencesFilter - } else if len(filters) == 1 { - return filters[0] - } else { - return func(r Reference) bool { - for _, filter := range filters { - if !filter(r) { - return false - } - } - return true - } - } -} - -func NotFilter(filter ReferenceFilter) ReferenceFilter { - return func(r Reference) bool { - return !filter(r) - } -} From 4f6bfb5a41a48b09c31adc8556bdd2f81e6f3689 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 13 Mar 2021 17:56:53 +0100 Subject: [PATCH 015/176] Use a hand-written usage message --- git-sizer.go | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/git-sizer.go b/git-sizer.go index dba5dbc..a893351 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -16,6 +16,26 @@ import ( "github.com/spf13/pflag" ) +const Usage = `usage: git-sizer [OPTS] + --branches process all branches + --tags process all tags + --remotes process all remotes + -v, --verbose report all statistics, whether concerning or not + --threshold threshold minimum level of concern (i.e., number of stars) + that should be reported. Default: + '--threshold=1'. + --critical only report critical statistics + --names=[none|hash|full] display names of large objects in the specified + style: 'none' (omit footnotes entirely), 'hash' + (show only the SHA-1s of objects), or 'full' + (show full names). Default is '--names=full'. + -j, --json output results in JSON format + --json-version=[1|2] choose which JSON format version to output. + Default: --json-version=1. + --[no-]progress report (don't report) progress to stderr. + --version only report the git-sizer version number +` + var ReleaseVersion string var BuildVersion string @@ -65,7 +85,10 @@ func mainImplementation() error { var progress bool var version bool - flags := pflag.NewFlagSet("", pflag.ContinueOnError) + flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) + flags.Usage = func() { + fmt.Print(Usage) + } flags.BoolVar(&processBranches, "branches", false, "process all branches") flags.BoolVar(&processTags, "tags", false, "process all tags") @@ -116,6 +139,9 @@ func mainImplementation() error { err = flags.Parse(os.Args[1:]) if err != nil { + if err == pflag.ErrHelp { + return nil + } return err } From d8b2507484f05ea13663f697e8798b8347ea0db3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 13 Mar 2021 18:24:12 +0100 Subject: [PATCH 016/176] Allow arbitrary references to be included/excluded by prefix --- git-sizer.go | 113 +++++++++++++++++++++++++++++++++++++--------- git/ref_filter.go | 6 --- 2 files changed, 91 insertions(+), 28 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index a893351..59f9a92 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -17,9 +17,7 @@ import ( ) const Usage = `usage: git-sizer [OPTS] - --branches process all branches - --tags process all tags - --remotes process all remotes + -v, --verbose report all statistics, whether concerning or not --threshold threshold minimum level of concern (i.e., number of stars) that should be reported. Default: @@ -34,6 +32,22 @@ const Usage = `usage: git-sizer [OPTS] Default: --json-version=1. --[no-]progress report (don't report) progress to stderr. --version only report the git-sizer version number + + Reference selection: + + By default, git-sizer processes all Git objects that are reachable from any + reference. The following options can be used to limit which references to + include. The last rule matching a reference determines whether that reference + is processed: + + --branches process branches + --tags process tags + --remotes process remote refs + --include prefix process references with the specified prefix + (e.g., '--include=refs/remotes/origin') + --exclude prefix don't process references with the specified + prefix (e.g., '--exclude=refs/notes') + ` var ReleaseVersion string @@ -65,6 +79,59 @@ func (v *NegatedBoolValue) Type() string { return "bool" } +type filterValue struct { + filter *git.IncludeExcludeFilter + polarity git.Polarity + prefix string +} + +func (v *filterValue) Set(s string) error { + var prefix string + var polarity git.Polarity + + if v.prefix == "" { + prefix = s + polarity = v.polarity + } else { + prefix = v.prefix + // Allow a boolean value to alter the polarity: + b, err := strconv.ParseBool(s) + if err != nil { + return err + } + if b { + polarity = git.Include + } else { + polarity = git.Exclude + } + } + + switch polarity { + case git.Include: + v.filter.Include(git.PrefixFilter(prefix)) + case git.Exclude: + v.filter.Exclude(git.PrefixFilter(prefix)) + } + + return nil +} + +func (v *filterValue) Get() interface{} { + return nil +} + +func (v *filterValue) String() string { + return "" +} + +func (v *filterValue) Type() string { + if v.prefix == "" { + return "prefix" + } else { + return "" + } +} + func main() { err := mainImplementation() if err != nil { @@ -74,9 +141,6 @@ func main() { } func mainImplementation() error { - var processBranches bool - var processTags bool - var processRemotes bool var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool @@ -84,15 +148,33 @@ func mainImplementation() error { var threshold sizes.Threshold = 1 var progress bool var version bool + var filter git.IncludeExcludeFilter flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) flags.Usage = func() { fmt.Print(Usage) } - flags.BoolVar(&processBranches, "branches", false, "process all branches") - flags.BoolVar(&processTags, "tags", false, "process all tags") - flags.BoolVar(&processRemotes, "remotes", false, "process all remote-tracking branches") + flags.Var(&filterValue{&filter, git.Include, ""}, "include", "include specified references") + flags.Var(&filterValue{&filter, git.Exclude, ""}, "exclude", "exclude specified references") + + flag := flags.VarPF( + &filterValue{&filter, git.Include, "refs/heads/"}, "branches", "", + "process all branches", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&filter, git.Include, "refs/tags/"}, "tags", "", + "process all tags", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&filter, git.Include, "refs/remotes/"}, "remotes", "", + "process all remotes", + ) + flag.NoOptDefVal = "true" flags.VarP( sizes.NewThresholdFlagValue(&threshold, 0), @@ -181,19 +263,6 @@ func mainImplementation() error { var historySize sizes.HistorySize - var filter git.IncludeExcludeFilter - if processBranches || processTags || processRemotes { - if processBranches { - filter.Include(git.BranchesFilter) - } - if processTags { - filter.Include(git.TagsFilter) - } - if processRemotes { - filter.Include(git.RemotesFilter) - } - } - historySize, err = sizes.ScanRepositoryUsingGraph(repo, filter.Filter, nameStyle, progress) if err != nil { return fmt.Errorf("error scanning repository: %s", err) diff --git a/git/ref_filter.go b/git/ref_filter.go index 2dd8996..f9ea650 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -75,9 +75,3 @@ func PrefixFilter(prefix string) ReferenceFilter { (len(r.Refname) == len(prefix) || r.Refname[len(prefix)] == '/') } } - -var ( - BranchesFilter = PrefixFilter("refs/heads/") - TagsFilter = PrefixFilter("refs/tags/") - RemotesFilter = PrefixFilter("refs/remotes/") -) From 9ee524178beb3053e51b030507e7e7fc23fc2355 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 13 Mar 2021 18:49:31 +0100 Subject: [PATCH 017/176] Allow arbitrary references to be included/excluded by regexp --- git-sizer.go | 57 ++++++++++++++++++++++++++++++++++++----------- git/ref_filter.go | 16 +++++++++++++ 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 59f9a92..a06887c 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -45,8 +45,15 @@ const Usage = `usage: git-sizer [OPTS] --remotes process remote refs --include prefix process references with the specified prefix (e.g., '--include=refs/remotes/origin') + --include-regexp pattern process references matching the specified + regular expression (e.g., + '--include-regexp=refs/tags/release-.*') --exclude prefix don't process references with the specified prefix (e.g., '--exclude=refs/notes') + --exclude-regexp pattern don't process references matching the specified + regular expression + + Regular expression patterns must match the full reference name. ` @@ -82,18 +89,25 @@ func (v *NegatedBoolValue) Type() string { type filterValue struct { filter *git.IncludeExcludeFilter polarity git.Polarity - prefix string + pattern string + regexp bool } func (v *filterValue) Set(s string) error { - var prefix string var polarity git.Polarity + var filter git.ReferenceFilter - if v.prefix == "" { - prefix = s + if v.regexp { polarity = v.polarity + var err error + filter, err = git.RegexpFilter(s) + if err != nil { + return fmt.Errorf("invalid regexp: %q", s) + } + } else if v.pattern == "" { + polarity = v.polarity + filter = git.PrefixFilter(s) } else { - prefix = v.prefix // Allow a boolean value to alter the polarity: b, err := strconv.ParseBool(s) if err != nil { @@ -104,13 +118,14 @@ func (v *filterValue) Set(s string) error { } else { polarity = git.Exclude } + filter = git.PrefixFilter(v.pattern) } switch polarity { case git.Include: - v.filter.Include(git.PrefixFilter(prefix)) + v.filter.Include(filter) case git.Exclude: - v.filter.Exclude(git.PrefixFilter(prefix)) + v.filter.Exclude(filter) } return nil @@ -125,7 +140,9 @@ func (v *filterValue) String() string { } func (v *filterValue) Type() string { - if v.prefix == "" { + if v.regexp { + return "regexp" + } else if v.pattern == "" { return "prefix" } else { return "" @@ -155,23 +172,37 @@ func mainImplementation() error { fmt.Print(Usage) } - flags.Var(&filterValue{&filter, git.Include, ""}, "include", "include specified references") - flags.Var(&filterValue{&filter, git.Exclude, ""}, "exclude", "exclude specified references") + flags.Var( + &filterValue{&filter, git.Include, "", false}, "include", + "include specified references", + ) + flags.Var( + &filterValue{&filter, git.Include, "", true}, "include-regexp", + "include references matching the specified regular expression", + ) + flags.Var( + &filterValue{&filter, git.Exclude, "", false}, "exclude", + "exclude specified references", + ) + flags.Var( + &filterValue{&filter, git.Exclude, "", true}, "exclude-regexp", + "exclude references matching the specified regular expression", + ) flag := flags.VarPF( - &filterValue{&filter, git.Include, "refs/heads/"}, "branches", "", + &filterValue{&filter, git.Include, "refs/heads/", false}, "branches", "", "process all branches", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&filter, git.Include, "refs/tags/"}, "tags", "", + &filterValue{&filter, git.Include, "refs/tags/", false}, "tags", "", "process all tags", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&filter, git.Include, "refs/remotes/"}, "remotes", "", + &filterValue{&filter, git.Include, "refs/remotes/", false}, "remotes", "", "process all remotes", ) flag.NoOptDefVal = "true" diff --git a/git/ref_filter.go b/git/ref_filter.go index f9ea650..ba11ce8 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -1,6 +1,7 @@ package git import ( + "regexp" "strings" ) @@ -75,3 +76,18 @@ func PrefixFilter(prefix string) ReferenceFilter { (len(r.Refname) == len(prefix) || r.Refname[len(prefix)] == '/') } } + +// RegexpFilter returns a `ReferenceFilter` that matches references +// whose names match the specified `prefix`, which must match the +// whole reference name. +func RegexpFilter(pattern string) (ReferenceFilter, error) { + pattern = "^" + pattern + "$" + re, err := regexp.Compile(pattern) + if err != nil { + return nil, err + } + + return func(r Reference) bool { + return re.MatchString(r.Refname) + }, nil +} From 99c5f75fbb0dda7c13a3d90847a730e706dbcada Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 13 Mar 2021 19:17:04 +0100 Subject: [PATCH 018/176] mainImplementation(): take the command-line arguments as args --- git-sizer.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index a06887c..2c84d42 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -150,14 +150,14 @@ func (v *filterValue) Type() string { } func main() { - err := mainImplementation() + err := mainImplementation(os.Args[1:]) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } } -func mainImplementation() error { +func mainImplementation(args []string) error { var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool @@ -250,7 +250,7 @@ func mainImplementation() error { flags.SortFlags = false - err = flags.Parse(os.Args[1:]) + err = flags.Parse(args) if err != nil { if err == pflag.ErrHelp { return nil @@ -280,9 +280,7 @@ func mainImplementation() error { return nil } - args := flags.Args() - - if len(args) != 0 { + if len(flags.Args()) != 0 { return errors.New("excess arguments") } From 1865f3345af7c503e7efbde7c8ad815388c2d1a2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 13 Mar 2021 19:18:10 +0100 Subject: [PATCH 019/176] Add a `--show-refs` option, to show what refs are being processed This will help people figure out the right reference selection options. --- git-sizer.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/git-sizer.go b/git-sizer.go index 2c84d42..d8d941b 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -52,6 +52,7 @@ const Usage = `usage: git-sizer [OPTS] prefix (e.g., '--exclude=refs/notes') --exclude-regexp pattern don't process references matching the specified regular expression + --show-refs show which refs are being included/excluded Regular expression patterns must match the full reference name. @@ -166,6 +167,7 @@ func mainImplementation(args []string) error { var progress bool var version bool var filter git.IncludeExcludeFilter + var showRefs bool flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) flags.Usage = func() { @@ -241,6 +243,7 @@ func mainImplementation(args []string) error { atty = false } flags.BoolVar(&progress, "progress", atty, "report progress to stderr") + flags.BoolVar(&showRefs, "show-refs", false, "list the references being processed") flags.BoolVar(&version, "version", false, "report the git-sizer version number") flags.Var(&NegatedBoolValue{&progress}, "no-progress", "suppress progress output") flags.Lookup("no-progress").NoOptDefVal = "true" @@ -292,7 +295,23 @@ func mainImplementation(args []string) error { var historySize sizes.HistorySize - historySize, err = sizes.ScanRepositoryUsingGraph(repo, filter.Filter, nameStyle, progress) + var refFilter git.ReferenceFilter = filter.Filter + + if showRefs { + oldRefFilter := refFilter + fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") + refFilter = func(ref git.Reference) bool { + b := oldRefFilter(ref) + if b { + fmt.Fprintf(os.Stderr, "+ %s\n", ref.Refname) + } else { + fmt.Fprintf(os.Stderr, " %s\n", ref.Refname) + } + return b + } + } + + historySize, err = sizes.ScanRepositoryUsingGraph(repo, refFilter, nameStyle, progress) if err != nil { return fmt.Errorf("error scanning repository: %s", err) } From 2b546af1bcec95750be2489ad8197928fc5e5a85 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 15:27:44 +0200 Subject: [PATCH 020/176] newGitBomb(): turn function into a test helper --- git_sizer_test.go | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 721d391..35e9a29 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -62,12 +62,12 @@ func addAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { } func newGitBomb( - repoName string, depth, breadth int, body string, -) (repo *git.Repository, err error) { + t *testing.T, repoName string, depth, breadth int, body string, +) (repo *git.Repository) { + t.Helper() + path, err := ioutil.TempDir("", repoName) - if err != nil { - return nil, err - } + require.NoError(t, err) defer func() { if err != nil { @@ -77,19 +77,16 @@ func newGitBomb( cmd := exec.Command("git", "init", "--bare", path) err = cmd.Run() - if err != nil { - return nil, err - } + require.NoError(t, err) repo, err = git.NewRepository(path) - if err != nil { - return nil, err - } + require.NoError(t, err) oid, err := repo.CreateObject("blob", func(w io.Writer) error { _, err := io.WriteString(w, body) return err }) + require.NoError(t, err) digits := len(fmt.Sprintf("%d", breadth-1)) @@ -109,9 +106,7 @@ func newGitBomb( } return nil }) - if err != nil { - return nil, err - } + require.NoError(t, err) mode = "40000" prefix = "d" @@ -129,16 +124,12 @@ func newGitBomb( ) return err }) - if err != nil { - return nil, err - } + require.NoError(t, err) err = repo.UpdateRef("refs/heads/master", oid) - if err != nil { - return nil, err - } + require.NoError(t, err) - return repo, nil + return repo } func pow(x uint64, n int) uint64 { @@ -153,10 +144,7 @@ func TestBomb(t *testing.T) { t.Parallel() assert := assert.New(t) - repo, err := newGitBomb("bomb", 10, 10, "boom!\n") - if err != nil { - t.Errorf("failed to create bomb: %s", err) - } + repo := newGitBomb(t, "bomb", 10, 10, "boom!\n") defer os.RemoveAll(repo.Path()) h, err := sizes.ScanRepositoryUsingGraph( From 092051954ce719a4eca4e9157c78fa9fa2abab9c Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 15:29:41 +0200 Subject: [PATCH 021/176] newGitBomb(): let the caller create the temporary directory This is simpler, because the caller is also the one that has to clean it up. --- git_sizer_test.go | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 35e9a29..3ac1bbe 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -62,21 +62,12 @@ func addAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { } func newGitBomb( - t *testing.T, repoName string, depth, breadth int, body string, + t *testing.T, path string, depth, breadth int, body string, ) (repo *git.Repository) { t.Helper() - path, err := ioutil.TempDir("", repoName) - require.NoError(t, err) - - defer func() { - if err != nil { - os.RemoveAll(path) - } - }() - cmd := exec.Command("git", "init", "--bare", path) - err = cmd.Run() + err := cmd.Run() require.NoError(t, err) repo, err = git.NewRepository(path) @@ -144,8 +135,14 @@ func TestBomb(t *testing.T) { t.Parallel() assert := assert.New(t) - repo := newGitBomb(t, "bomb", 10, 10, "boom!\n") - defer os.RemoveAll(repo.Path()) + path, err := ioutil.TempDir("", "bomb") + require.NoError(t, err) + + defer func() { + os.RemoveAll(path) + }() + + repo := newGitBomb(t, path, 10, 10, "boom!\n") h, err := sizes.ScanRepositoryUsingGraph( repo, git.AllReferencesFilter, sizes.NameStyleFull, false, From 7155294279d6340809aade5da82594e7f7d101eb Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 15:42:13 +0200 Subject: [PATCH 022/176] Make commit message in test repo more informative and less scary --- git_sizer_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 3ac1bbe..6a24666 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -110,7 +110,7 @@ func newGitBomb( "author Example 1112911993 -0700\n"+ "committer Example 1112911993 -0700\n"+ "\n"+ - "Mwahahaha!\n", + "Test git bomb\n", oid, ) return err @@ -152,8 +152,8 @@ func TestBomb(t *testing.T) { } assert.Equal(counts.Count32(1), h.UniqueCommitCount, "unique commit count") - assert.Equal(counts.Count64(169), h.UniqueCommitSize, "unique commit size") - assert.Equal(counts.Count32(169), h.MaxCommitSize, "max commit size") + assert.Equal(counts.Count64(172), h.UniqueCommitSize, "unique commit size") + assert.Equal(counts.Count32(172), h.MaxCommitSize, "max commit size") assert.Equal("refs/heads/master", h.MaxCommitSizeCommit.Path(), "max commit size commit") assert.Equal(counts.Count32(1), h.MaxHistoryDepth, "max history depth") assert.Equal(counts.Count32(0), h.MaxParentCount, "max parent count") From cd9e0c3d480138d069a1ddb556ad6f781696dfc2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 16:38:10 +0200 Subject: [PATCH 023/176] TestBomb(): don't instantiate an `Assertions` object The other test functions don't, so doing it in this one function was confusing. --- git_sizer_test.go | 83 +++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 43 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 6a24666..c782c3d 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -133,7 +133,6 @@ func pow(x uint64, n int) uint64 { func TestBomb(t *testing.T) { t.Parallel() - assert := assert.New(t) path, err := ioutil.TempDir("", "bomb") require.NoError(t, err) @@ -147,49 +146,47 @@ func TestBomb(t *testing.T) { h, err := sizes.ScanRepositoryUsingGraph( repo, git.AllReferencesFilter, sizes.NameStyleFull, false, ) - if !assert.NoError(err) { - return - } + require.NoError(t, err) - assert.Equal(counts.Count32(1), h.UniqueCommitCount, "unique commit count") - assert.Equal(counts.Count64(172), h.UniqueCommitSize, "unique commit size") - assert.Equal(counts.Count32(172), h.MaxCommitSize, "max commit size") - assert.Equal("refs/heads/master", h.MaxCommitSizeCommit.Path(), "max commit size commit") - assert.Equal(counts.Count32(1), h.MaxHistoryDepth, "max history depth") - assert.Equal(counts.Count32(0), h.MaxParentCount, "max parent count") - assert.Equal("refs/heads/master", h.MaxParentCountCommit.Path(), "max parent count commit") - - assert.Equal(counts.Count32(10), h.UniqueTreeCount, "unique tree count") - assert.Equal(counts.Count64(2910), h.UniqueTreeSize, "unique tree size") - assert.Equal(counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") - assert.Equal(counts.Count32(10), h.MaxTreeEntries, "max tree entries") - assert.Equal("refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.Path(), "max tree entries tree") - - assert.Equal(counts.Count32(1), h.UniqueBlobCount, "unique blob count") - assert.Equal(counts.Count64(6), h.UniqueBlobSize, "unique blob size") - assert.Equal(counts.Count32(6), h.MaxBlobSize, "max blob size") - assert.Equal("refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.Path(), "max blob size blob") - - assert.Equal(counts.Count32(0), h.UniqueTagCount, "unique tag count") - assert.Equal(counts.Count32(0), h.MaxTagDepth, "max tag depth") - - assert.Equal(counts.Count32(1), h.ReferenceCount, "reference count") - - assert.Equal(counts.Count32(10), h.MaxPathDepth, "max path depth") - assert.Equal("refs/heads/master^{tree}", h.MaxPathDepthTree.Path(), "max path depth tree") - assert.Equal(counts.Count32(29), h.MaxPathLength, "max path length") - assert.Equal("refs/heads/master^{tree}", h.MaxPathLengthTree.Path(), "max path length tree") - - assert.Equal(counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") - assert.Equal("refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.Path(), "max expanded tree count tree") - assert.Equal(counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") - assert.Equal("refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.Path(), "max expanded blob count tree") - assert.Equal(counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") - assert.Equal("refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.Path(), "max expanded blob size tree") - assert.Equal(counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") - assert.Nil(h.MaxExpandedLinkCountTree, "max expanded link count tree") - assert.Equal(counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") - assert.Nil(h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") + assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.Path(), "max commit size commit") + assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.Path(), "max parent count commit") + + assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.Path(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.Path(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.Path(), "max path depth tree") + assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.Path(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.Path(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.Path(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.Path(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") } func TestTaggedTags(t *testing.T) { From ebb8ecee56078ddfac5c26f34d4c72c875c8eaf3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 18:11:41 +0200 Subject: [PATCH 024/176] main_test.gitCommand(): label it a test helper --- git_sizer_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/git_sizer_test.go b/git_sizer_test.go index c782c3d..b8d5d5a 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -26,6 +26,8 @@ func TestExec(t *testing.T) { } func gitCommand(t *testing.T, repo *git.Repository, args ...string) *exec.Cmd { + t.Helper() + cmd := exec.Command("git", args...) cmd.Env = append(os.Environ(), "GIT_DIR="+repo.Path()) return cmd From 7924a9efe2319f3f6a58aa5d0b16860bfba027d8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 16:23:01 +0200 Subject: [PATCH 025/176] main_test.gitCommand(): take a path rather than `Repository` arg Let's decouple the tests from the `git` package. --- git_sizer_test.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index b8d5d5a..75f886b 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -25,11 +25,11 @@ func TestExec(t *testing.T) { assert.NoErrorf(t, err, "command failed; output: %#v", string(output)) } -func gitCommand(t *testing.T, repo *git.Repository, args ...string) *exec.Cmd { +func gitCommand(t *testing.T, repoPath string, args ...string) *exec.Cmd { t.Helper() cmd := exec.Command("git", args...) - cmd.Env = append(os.Environ(), "GIT_DIR="+repo.Path()) + cmd.Env = append(os.Environ(), "GIT_DIR="+repoPath) return cmd } @@ -46,7 +46,7 @@ func addFile(t *testing.T, repoPath string, repo *git.Repository, relativePath, require.NoErrorf(t, err, "writing to file %q", filename) require.NoErrorf(t, f.Close(), "closing file %q", filename) - cmd := gitCommand(t, repo, "add", relativePath) + cmd := gitCommand(t, repo.Path(), "add", relativePath) cmd.Dir = repoPath require.NoErrorf(t, cmd.Run(), "adding file %q", relativePath) } @@ -207,21 +207,21 @@ func TestTaggedTags(t *testing.T) { timestamp := time.Unix(1112911993, 0) - cmd = gitCommand(t, repo, "commit", "-m", "initial", "--allow-empty") + cmd = gitCommand(t, repo.Path(), "commit", "-m", "initial", "--allow-empty") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = gitCommand(t, repo, "tag", "-m", "tag 1", "tag", "master") + cmd = gitCommand(t, repo.Path(), "tag", "-m", "tag 1", "tag", "master") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = gitCommand(t, repo, "tag", "-m", "tag 2", "bag", "tag") + cmd = gitCommand(t, repo.Path(), "tag", "-m", "tag 2", "bag", "tag") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = gitCommand(t, repo, "tag", "-m", "tag 3", "wag", "bag") + cmd = gitCommand(t, repo.Path(), "tag", "-m", "tag 3", "wag", "bag") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") @@ -250,7 +250,7 @@ func TestFromSubdir(t *testing.T) { addFile(t, path, repo, "subdir/file.txt", "Hello, world!\n") - cmd = gitCommand(t, repo, "commit", "-m", "initial") + cmd = gitCommand(t, repo.Path(), "commit", "-m", "initial") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") @@ -283,7 +283,7 @@ func TestSubmodule(t *testing.T) { addFile(t, submPath, submRepo, "submfile2.txt", "Hello again, submodule!\n") addFile(t, submPath, submRepo, "submfile3.txt", "Hello again, submodule!\n") - cmd = gitCommand(t, submRepo, "commit", "-m", "subm initial") + cmd = gitCommand(t, submRepo.Path(), "commit", "-m", "subm initial") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") @@ -294,16 +294,16 @@ func TestSubmodule(t *testing.T) { require.NoError(t, err, "initializing main Repository object") addFile(t, mainPath, mainRepo, "mainfile.txt", "Hello, main!\n") - cmd = gitCommand(t, mainRepo, "commit", "-m", "main initial") + cmd = gitCommand(t, mainRepo.Path(), "commit", "-m", "main initial") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = gitCommand(t, mainRepo, "submodule", "add", submPath, "sub") + cmd = gitCommand(t, mainRepo.Path(), "submodule", "add", submPath, "sub") cmd.Dir = mainPath require.NoError(t, cmd.Run(), "adding submodule") - cmd = gitCommand(t, mainRepo, "commit", "-m", "add submodule") + cmd = gitCommand(t, mainRepo.Path(), "commit", "-m", "add submodule") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") From b4db7cd19a3fc3ca7787aefbe1377ba286a81c5b Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 16:25:36 +0200 Subject: [PATCH 026/176] updateRef(): make into a test helper function It's only needed for testing. --- git/git.go | 11 ----------- git_sizer_test.go | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/git/git.go b/git/git.go index 7883cf9..44eae15 100644 --- a/git/git.go +++ b/git/git.go @@ -522,17 +522,6 @@ func (repo *Repository) CreateObject(t ObjectType, writer func(io.Writer) error) return NewOID(string(bytes.TrimSpace(output))) } -func (repo *Repository) UpdateRef(refname string, oid OID) error { - var cmd *exec.Cmd - - if oid == NullOID { - cmd = repo.gitCommand("update-ref", "-d", refname) - } else { - cmd = repo.gitCommand("update-ref", refname, oid.String()) - } - return cmd.Run() -} - // Next returns the next object, or EOF when done. func (l *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { line, err := l.f.ReadString('\n') diff --git a/git_sizer_test.go b/git_sizer_test.go index 75f886b..32f9b97 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -33,6 +33,19 @@ func gitCommand(t *testing.T, repoPath string, args ...string) *exec.Cmd { return cmd } +func updateRef(t *testing.T, repoPath string, refname string, oid git.OID) error { + t.Helper() + + var cmd *exec.Cmd + + if oid == git.NullOID { + cmd = gitCommand(t, repoPath, "update-ref", "-d", refname) + } else { + cmd = gitCommand(t, repoPath, "update-ref", refname, oid.String()) + } + return cmd.Run() +} + func addFile(t *testing.T, repoPath string, repo *git.Repository, relativePath, contents string) { dirPath := filepath.Dir(relativePath) if dirPath != "." { @@ -119,7 +132,7 @@ func newGitBomb( }) require.NoError(t, err) - err = repo.UpdateRef("refs/heads/master", oid) + err = updateRef(t, repo.Path(), "refs/heads/master", oid) require.NoError(t, err) return repo From 72f7a67506d051a4ff2e6a6a1dc53301f985a127 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 20 Apr 2021 16:32:46 +0200 Subject: [PATCH 027/176] createObject(): make into a test helper function It's only needed for testing. --- git/git.go | 46 --------------------------------------- git_sizer_test.go | 55 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 55 deletions(-) diff --git a/git/git.go b/git/git.go index 44eae15..a9e3c76 100644 --- a/git/git.go +++ b/git/git.go @@ -7,7 +7,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -477,51 +476,6 @@ func (repo *Repository) NewObjectIter(args ...string) ( }, in1, nil } -// CreateObject creates a new Git object, of the specified type, in -// `Repository`. `writer` is a function that writes the object in `git -// hash-object` input format. This is used for testing only. -func (repo *Repository) CreateObject(t ObjectType, writer func(io.Writer) error) (OID, error) { - cmd := repo.gitCommand("hash-object", "-w", "-t", string(t), "--stdin") - in, err := cmd.StdinPipe() - if err != nil { - return OID{}, err - } - - out, err := cmd.StdoutPipe() - if err != nil { - return OID{}, err - } - - cmd.Stderr = os.Stderr - - err = cmd.Start() - if err != nil { - return OID{}, err - } - - err = writer(in) - err2 := in.Close() - if err != nil { - cmd.Wait() - return OID{}, err - } - if err2 != nil { - cmd.Wait() - return OID{}, err2 - } - - output, err := ioutil.ReadAll(out) - err2 = cmd.Wait() - if err != nil { - return OID{}, err - } - if err2 != nil { - return OID{}, err2 - } - - return NewOID(string(bytes.TrimSpace(output))) -} - // Next returns the next object, or EOF when done. func (l *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { line, err := l.f.ReadString('\n') diff --git a/git_sizer_test.go b/git_sizer_test.go index 32f9b97..9e182fb 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -1,6 +1,7 @@ package main_test import ( + "bytes" "fmt" "io" "io/ioutil" @@ -10,12 +11,12 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/github/git-sizer/counts" "github.com/github/git-sizer/git" "github.com/github/git-sizer/sizes" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) // Smoke test that the program runs. @@ -46,6 +47,45 @@ func updateRef(t *testing.T, repoPath string, refname string, oid git.OID) error return cmd.Run() } +// CreateObject creates a new Git object, of the specified type, in +// `Repository`. `writer` is a function that writes the object in `git +// hash-object` input format. This is used for testing only. +func createObject( + t *testing.T, repoPath string, otype git.ObjectType, writer func(io.Writer) error, +) git.OID { + t.Helper() + + cmd := gitCommand(t, repoPath, "hash-object", "-w", "-t", string(otype), "--stdin") + in, err := cmd.StdinPipe() + require.NoError(t, err) + + out, err := cmd.StdoutPipe() + cmd.Stderr = os.Stderr + + err = cmd.Start() + require.NoError(t, err) + + err = writer(in) + err2 := in.Close() + if err != nil { + cmd.Wait() + require.NoError(t, err) + } + if err2 != nil { + cmd.Wait() + require.NoError(t, err2) + } + + output, err := ioutil.ReadAll(out) + err2 = cmd.Wait() + require.NoError(t, err) + require.NoError(t, err2) + + oid, err := git.NewOID(string(bytes.TrimSpace(output))) + require.NoError(t, err) + return oid +} + func addFile(t *testing.T, repoPath string, repo *git.Repository, relativePath, contents string) { dirPath := filepath.Dir(relativePath) if dirPath != "." { @@ -88,11 +128,10 @@ func newGitBomb( repo, err = git.NewRepository(path) require.NoError(t, err) - oid, err := repo.CreateObject("blob", func(w io.Writer) error { + oid := createObject(t, repo.Path(), "blob", func(w io.Writer) error { _, err := io.WriteString(w, body) return err }) - require.NoError(t, err) digits := len(fmt.Sprintf("%d", breadth-1)) @@ -100,7 +139,7 @@ func newGitBomb( prefix := "f" for ; depth > 0; depth-- { - oid, err = repo.CreateObject("tree", func(w io.Writer) error { + oid = createObject(t, repo.Path(), "tree", func(w io.Writer) error { for i := 0; i < breadth; i++ { _, err = fmt.Fprintf( w, "%s %s%0*d\x00%s", @@ -112,13 +151,12 @@ func newGitBomb( } return nil }) - require.NoError(t, err) mode = "40000" prefix = "d" } - oid, err = repo.CreateObject("commit", func(w io.Writer) error { + oid = createObject(t, repo.Path(), "commit", func(w io.Writer) error { _, err := fmt.Fprintf( w, "tree %s\n"+ @@ -130,7 +168,6 @@ func newGitBomb( ) return err }) - require.NoError(t, err) err = updateRef(t, repo.Path(), "refs/heads/master", oid) require.NoError(t, err) From 5dbc258acf49720aa3b11caf5bbf3a9bdc96fd75 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 21 Apr 2021 10:50:07 +0200 Subject: [PATCH 028/176] gitCommand: take the path of there repo, not the `GIT_DIR`, as arg --- git_sizer_test.go | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 9e182fb..a3cbb38 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -29,9 +29,9 @@ func TestExec(t *testing.T) { func gitCommand(t *testing.T, repoPath string, args ...string) *exec.Cmd { t.Helper() - cmd := exec.Command("git", args...) - cmd.Env = append(os.Environ(), "GIT_DIR="+repoPath) - return cmd + gitArgs := []string{"-C", repoPath} + gitArgs = append(gitArgs, args...) + return exec.Command("git", gitArgs...) } func updateRef(t *testing.T, repoPath string, refname string, oid git.OID) error { @@ -99,8 +99,7 @@ func addFile(t *testing.T, repoPath string, repo *git.Repository, relativePath, require.NoErrorf(t, err, "writing to file %q", filename) require.NoErrorf(t, f.Close(), "closing file %q", filename) - cmd := gitCommand(t, repo.Path(), "add", relativePath) - cmd.Dir = repoPath + cmd := gitCommand(t, repoPath, "add", relativePath) require.NoErrorf(t, cmd.Run(), "adding file %q", relativePath) } @@ -257,21 +256,21 @@ func TestTaggedTags(t *testing.T) { timestamp := time.Unix(1112911993, 0) - cmd = gitCommand(t, repo.Path(), "commit", "-m", "initial", "--allow-empty") + cmd = gitCommand(t, path, "commit", "-m", "initial", "--allow-empty") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = gitCommand(t, repo.Path(), "tag", "-m", "tag 1", "tag", "master") + cmd = gitCommand(t, path, "tag", "-m", "tag 1", "tag", "master") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = gitCommand(t, repo.Path(), "tag", "-m", "tag 2", "bag", "tag") + cmd = gitCommand(t, path, "tag", "-m", "tag 2", "bag", "tag") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = gitCommand(t, repo.Path(), "tag", "-m", "tag 3", "wag", "bag") + cmd = gitCommand(t, path, "tag", "-m", "tag 3", "wag", "bag") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") @@ -300,7 +299,7 @@ func TestFromSubdir(t *testing.T) { addFile(t, path, repo, "subdir/file.txt", "Hello, world!\n") - cmd = gitCommand(t, repo.Path(), "commit", "-m", "initial") + cmd = gitCommand(t, path, "commit", "-m", "initial") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") @@ -333,7 +332,7 @@ func TestSubmodule(t *testing.T) { addFile(t, submPath, submRepo, "submfile2.txt", "Hello again, submodule!\n") addFile(t, submPath, submRepo, "submfile3.txt", "Hello again, submodule!\n") - cmd = gitCommand(t, submRepo.Path(), "commit", "-m", "subm initial") + cmd = gitCommand(t, submPath, "commit", "-m", "subm initial") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") @@ -344,16 +343,16 @@ func TestSubmodule(t *testing.T) { require.NoError(t, err, "initializing main Repository object") addFile(t, mainPath, mainRepo, "mainfile.txt", "Hello, main!\n") - cmd = gitCommand(t, mainRepo.Path(), "commit", "-m", "main initial") + cmd = gitCommand(t, mainPath, "commit", "-m", "main initial") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = gitCommand(t, mainRepo.Path(), "submodule", "add", submPath, "sub") + cmd = gitCommand(t, mainPath, "submodule", "add", submPath, "sub") cmd.Dir = mainPath require.NoError(t, cmd.Run(), "adding submodule") - cmd = gitCommand(t, mainRepo.Path(), "commit", "-m", "add submodule") + cmd = gitCommand(t, mainPath, "commit", "-m", "add submodule") addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") From 1d63286396b9b9dd07c683ff9e496431a3131b4c Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 21 Apr 2021 10:51:46 +0200 Subject: [PATCH 029/176] createObject(): remove unused argument --- git_sizer_test.go | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index a3cbb38..77488ae 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -48,8 +48,9 @@ func updateRef(t *testing.T, repoPath string, refname string, oid git.OID) error } // CreateObject creates a new Git object, of the specified type, in -// `Repository`. `writer` is a function that writes the object in `git -// hash-object` input format. This is used for testing only. +// the repository at `repoPath`. `writer` is a function that writes +// the object in `git hash-object` input format. This is used for +// testing only. func createObject( t *testing.T, repoPath string, otype git.ObjectType, writer func(io.Writer) error, ) git.OID { @@ -86,7 +87,7 @@ func createObject( return oid } -func addFile(t *testing.T, repoPath string, repo *git.Repository, relativePath, contents string) { +func addFile(t *testing.T, repoPath string, relativePath, contents string) { dirPath := filepath.Dir(relativePath) if dirPath != "." { require.NoError(t, os.MkdirAll(filepath.Join(repoPath, dirPath), 0777), "creating subdir") @@ -292,12 +293,10 @@ func TestFromSubdir(t *testing.T) { cmd := exec.Command("git", "init", path) require.NoError(t, cmd.Run(), "initializing repo") - repo, err := git.NewRepository(path) - require.NoError(t, err, "initializing Repository object") timestamp := time.Unix(1112911993, 0) - addFile(t, path, repo, "subdir/file.txt", "Hello, world!\n") + addFile(t, path, "subdir/file.txt", "Hello, world!\n") cmd = gitCommand(t, path, "commit", "-m", "initial") addAuthorInfo(cmd, ×tamp) @@ -326,11 +325,9 @@ func TestSubmodule(t *testing.T) { submPath := filepath.Join(path, "subm") cmd := exec.Command("git", "init", submPath) require.NoError(t, cmd.Run(), "initializing subm repo") - submRepo, err := git.NewRepository(submPath) - require.NoError(t, err, "initializing subm Repository object") - addFile(t, submPath, submRepo, "submfile1.txt", "Hello, submodule!\n") - addFile(t, submPath, submRepo, "submfile2.txt", "Hello again, submodule!\n") - addFile(t, submPath, submRepo, "submfile3.txt", "Hello again, submodule!\n") + addFile(t, submPath, "submfile1.txt", "Hello, submodule!\n") + addFile(t, submPath, "submfile2.txt", "Hello again, submodule!\n") + addFile(t, submPath, "submfile3.txt", "Hello again, submodule!\n") cmd = gitCommand(t, submPath, "commit", "-m", "subm initial") addAuthorInfo(cmd, ×tamp) @@ -341,7 +338,7 @@ func TestSubmodule(t *testing.T) { require.NoError(t, cmd.Run(), "initializing main repo") mainRepo, err := git.NewRepository(mainPath) require.NoError(t, err, "initializing main Repository object") - addFile(t, mainPath, mainRepo, "mainfile.txt", "Hello, main!\n") + addFile(t, mainPath, "mainfile.txt", "Hello, main!\n") cmd = gitCommand(t, mainPath, "commit", "-m", "main initial") addAuthorInfo(cmd, ×tamp) From 74d7c16885600f44ba83ab39403fa26ee5a04510 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 21 Apr 2021 11:02:22 +0200 Subject: [PATCH 030/176] tests: stop using `Repository.Path()` --- git_sizer_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 77488ae..5d07d78 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -128,7 +128,7 @@ func newGitBomb( repo, err = git.NewRepository(path) require.NoError(t, err) - oid := createObject(t, repo.Path(), "blob", func(w io.Writer) error { + oid := createObject(t, path, "blob", func(w io.Writer) error { _, err := io.WriteString(w, body) return err }) @@ -139,7 +139,7 @@ func newGitBomb( prefix := "f" for ; depth > 0; depth-- { - oid = createObject(t, repo.Path(), "tree", func(w io.Writer) error { + oid = createObject(t, path, "tree", func(w io.Writer) error { for i := 0; i < breadth; i++ { _, err = fmt.Fprintf( w, "%s %s%0*d\x00%s", @@ -156,7 +156,7 @@ func newGitBomb( prefix = "d" } - oid = createObject(t, repo.Path(), "commit", func(w io.Writer) error { + oid = createObject(t, path, "commit", func(w io.Writer) error { _, err := fmt.Fprintf( w, "tree %s\n"+ @@ -169,7 +169,7 @@ func newGitBomb( return err }) - err = updateRef(t, repo.Path(), "refs/heads/master", oid) + err = updateRef(t, path, "refs/heads/master", oid) require.NoError(t, err) return repo From bb0bde6022f47cd3060844e75562c18356bcbdb9 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 21 Apr 2021 11:07:59 +0200 Subject: [PATCH 031/176] tests: stop using `git.Repository` objects internally Only create them when needed for calling `ScanRepositoryUsingGraph()`. --- git_sizer_test.go | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 5d07d78..effc30d 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -26,6 +26,14 @@ func TestExec(t *testing.T) { assert.NoErrorf(t, err, "command failed; output: %#v", string(output)) } +func newRepository(t *testing.T, repoPath string) *git.Repository { + t.Helper() + + repo, err := git.NewRepository(repoPath) + require.NoError(t, err) + return repo +} + func gitCommand(t *testing.T, repoPath string, args ...string) *exec.Cmd { t.Helper() @@ -118,16 +126,13 @@ func addAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { func newGitBomb( t *testing.T, path string, depth, breadth int, body string, -) (repo *git.Repository) { +) { t.Helper() cmd := exec.Command("git", "init", "--bare", path) err := cmd.Run() require.NoError(t, err) - repo, err = git.NewRepository(path) - require.NoError(t, err) - oid := createObject(t, path, "blob", func(w io.Writer) error { _, err := io.WriteString(w, body) return err @@ -171,8 +176,6 @@ func newGitBomb( err = updateRef(t, path, "refs/heads/master", oid) require.NoError(t, err) - - return repo } func pow(x uint64, n int) uint64 { @@ -193,10 +196,10 @@ func TestBomb(t *testing.T) { os.RemoveAll(path) }() - repo := newGitBomb(t, path, 10, 10, "boom!\n") + newGitBomb(t, path, 10, 10, "boom!\n") h, err := sizes.ScanRepositoryUsingGraph( - repo, git.AllReferencesFilter, sizes.NameStyleFull, false, + newRepository(t, path), git.AllReferencesFilter, sizes.NameStyleFull, false, ) require.NoError(t, err) @@ -252,8 +255,6 @@ func TestTaggedTags(t *testing.T) { cmd := exec.Command("git", "init", path) require.NoError(t, cmd.Run(), "initializing repo") - repo, err := git.NewRepository(path) - require.NoError(t, err, "initializing Repository object") timestamp := time.Unix(1112911993, 0) @@ -276,7 +277,7 @@ func TestTaggedTags(t *testing.T) { require.NoError(t, cmd.Run(), "creating tag 3") h, err := sizes.ScanRepositoryUsingGraph( - repo, git.AllReferencesFilter, sizes.NameStyleNone, false, + newRepository(t, path), git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -302,10 +303,9 @@ func TestFromSubdir(t *testing.T) { addAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") - repo2, err := git.NewRepository(filepath.Join(path, "subdir")) - require.NoError(t, err, "creating Repository object in subdirectory") h, err := sizes.ScanRepositoryUsingGraph( - repo2, git.AllReferencesFilter, sizes.NameStyleNone, false, + newRepository(t, filepath.Join(path, "subdir")), + git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -336,8 +336,7 @@ func TestSubmodule(t *testing.T) { mainPath := filepath.Join(path, "main") cmd = exec.Command("git", "init", mainPath) require.NoError(t, cmd.Run(), "initializing main repo") - mainRepo, err := git.NewRepository(mainPath) - require.NoError(t, err, "initializing main Repository object") + addFile(t, mainPath, "mainfile.txt", "Hello, main!\n") cmd = gitCommand(t, mainPath, "commit", "-m", "main initial") @@ -355,7 +354,7 @@ func TestSubmodule(t *testing.T) { // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - mainRepo, git.AllReferencesFilter, sizes.NameStyleNone, false, + newRepository(t, mainPath), git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -363,10 +362,9 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(1), h.MaxExpandedSubmoduleCount, "max expanded submodule count") // Analyze the submodule: - submRepo2, err := git.NewRepository(filepath.Join(mainPath, "sub")) - require.NoError(t, err, "creating Repository object in submodule") h, err = sizes.ScanRepositoryUsingGraph( - submRepo2, git.AllReferencesFilter, sizes.NameStyleNone, false, + newRepository(t, filepath.Join(mainPath, "sub")), + git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") From 0e7961fff494601e464895c705ba77181780bca3 Mon Sep 17 00:00:00 2001 From: Daniel Bast <2790401+dbast@users.noreply.github.com> Date: Thu, 22 Apr 2021 12:06:18 +0200 Subject: [PATCH 032/176] Add test workflow --- .github/workflows/test.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..956332e --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,15 @@ +on: [push, pull_request] +name: Test +jobs: + test: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Get full repo histroy + run: git fetch --prune --unshallow --tags + - name: Test + run: make test From 55a860f77068429628f25996ed1a41290b3606b4 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 22 Apr 2021 15:27:22 +0200 Subject: [PATCH 033/176] ReferenceFilter: consider only the reference name That's all we need right now, and it will make the thing easier to test. --- git-sizer.go | 8 ++++---- git/ref_filter.go | 22 +++++++++++----------- sizes/graph.go | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index d8d941b..f8ae892 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -300,12 +300,12 @@ func mainImplementation(args []string) error { if showRefs { oldRefFilter := refFilter fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") - refFilter = func(ref git.Reference) bool { - b := oldRefFilter(ref) + refFilter = func(refname string) bool { + b := oldRefFilter(refname) if b { - fmt.Fprintf(os.Stderr, "+ %s\n", ref.Refname) + fmt.Fprintf(os.Stderr, "+ %s\n", refname) } else { - fmt.Fprintf(os.Stderr, " %s\n", ref.Refname) + fmt.Fprintf(os.Stderr, " %s\n", refname) } return b } diff --git a/git/ref_filter.go b/git/ref_filter.go index ba11ce8..6f2140c 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -5,9 +5,9 @@ import ( "strings" ) -type ReferenceFilter func(Reference) bool +type ReferenceFilter func(refname string) bool -func AllReferencesFilter(_ Reference) bool { +func AllReferencesFilter(_ string) bool { return true } @@ -43,10 +43,10 @@ func (ief *IncludeExcludeFilter) Exclude(f ReferenceFilter) { ief.filters = append(ief.filters, polarizedFilter{Exclude, f}) } -func (ief *IncludeExcludeFilter) Filter(r Reference) bool { +func (ief *IncludeExcludeFilter) Filter(refname string) bool { for i := len(ief.filters); i > 0; i-- { f := ief.filters[i-1] - if !f.filter(r) { + if !f.filter(refname) { continue } return f.polarity == Include @@ -66,14 +66,14 @@ func (ief *IncludeExcludeFilter) Filter(r Reference) bool { // "refs/foobar". func PrefixFilter(prefix string) ReferenceFilter { if strings.HasSuffix(prefix, "/") { - return func(r Reference) bool { - return strings.HasPrefix(r.Refname, prefix) + return func(refname string) bool { + return strings.HasPrefix(refname, prefix) } } - return func(r Reference) bool { - return strings.HasPrefix(r.Refname, prefix) && - (len(r.Refname) == len(prefix) || r.Refname[len(prefix)] == '/') + return func(refname string) bool { + return strings.HasPrefix(refname, prefix) && + (len(refname) == len(prefix) || refname[len(prefix)] == '/') } } @@ -87,7 +87,7 @@ func RegexpFilter(pattern string) (ReferenceFilter, error) { return nil, err } - return func(r Reference) bool { - return re.MatchString(r.Refname) + return func(refname string) bool { + return re.MatchString(refname) }, nil } diff --git a/sizes/graph.go b/sizes/graph.go index d358938..ffe5aea 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -62,7 +62,7 @@ func ScanRepositoryUsingGraph( if !ok { break } - if !filter(ref) { + if !filter(ref.Refname) { continue } refs = append(refs, ref) From 197eb5a945a5a460151e908be27cdddb655fe6a5 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 22 Apr 2021 15:49:41 +0200 Subject: [PATCH 034/176] Add some tests of `ReferenceFilter`s and `IncludeExcludeFilter` --- git/ref_filter_test.go | 101 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 git/ref_filter_test.go diff --git a/git/ref_filter_test.go b/git/ref_filter_test.go new file mode 100644 index 0000000..013e082 --- /dev/null +++ b/git/ref_filter_test.go @@ -0,0 +1,101 @@ +package git_test + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/github/git-sizer/git" +) + +func TestPrefixFilter(t *testing.T) { + t.Parallel() + + for _, p := range []struct { + prefix string + refname string + expected bool + }{ + {"refs/heads", "refs/heads/master", true}, + {"refs/heads", "refs/tags/master", false}, + {"refs/heads", "refs/he", false}, + {"refs/heads", "refs/headstrong", false}, + {"refs/heads", "refs/tags/refs/heads", false}, + {"refs/stash", "refs/stash", true}, + {"refs/stash", "refs/stashy", false}, + {"refs/remotes", "refs/remotes/origin/master", true}, + } { + t.Run( + fmt.Sprintf("prefix '%s', refname '%s'", p.prefix, p.refname), + func(t *testing.T) { + assert.Equal(t, p.expected, git.PrefixFilter(p.prefix)(p.refname)) + }, + ) + } +} + +func regexpFilter(t *testing.T, pattern string) git.ReferenceFilter { + t.Helper() + + f, err := git.RegexpFilter(pattern) + require.NoError(t, err) + return f +} + +func TestRegexpFilter(t *testing.T) { + t.Parallel() + + for _, p := range []struct { + pattern string + refname string + expected bool + }{ + {`refs/heads/master`, "refs/heads/master", true}, + {`refs/heads/.*`, "refs/heads/master", true}, + {`.*/heads/.*`, "refs/heads/master", true}, + {`.*/heads/`, "refs/heads/master", false}, + {`.*/heads`, "refs/heads/master", false}, + {`/heads/.*`, "refs/heads/master", false}, + {`heads/.*`, "refs/heads/master", false}, + {`refs/tags/release-\d+\.\d+\.\d+`, "refs/tags/release-1.22.333", true}, + {`refs/tags/release-\d+\.\d+\.\d+`, "refs/tags/release-1.2.3rc1", false}, + } { + t.Run( + fmt.Sprintf("pattern '%s', refname '%s'", p.pattern, p.refname), + func(t *testing.T) { + assert.Equal(t, p.expected, regexpFilter(t, p.pattern)(p.refname)) + }, + ) + } +} + +func TestIncludeExcludeFilter(t *testing.T) { + t.Parallel() + + var filter git.IncludeExcludeFilter + filter.Include(git.PrefixFilter("refs/heads")) + filter.Exclude(regexpFilter(t, "refs/heads/.*foo.*")) + filter.Include(git.PrefixFilter("refs/remotes")) + filter.Exclude(git.PrefixFilter("refs/remotes/foo")) + + for _, p := range []struct { + refname string + expected bool + }{ + {"refs/heads/master", true}, + {"refs/heads/buffoon", false}, + {"refs/remotes/origin/master", true}, + {"refs/remotes/foo/master", false}, + {"refs/not-mentioned", false}, + } { + t.Run( + fmt.Sprintf("include-exclude '%s'", p.refname), + func(t *testing.T) { + assert.Equal(t, p.expected, filter.Filter(p.refname)) + }, + ) + } + +} From d01bfce0722237204f1cf2a0ef222509423eef95 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 23 Apr 2021 13:09:31 +0200 Subject: [PATCH 035/176] Add an integration test of reference selection --- git_sizer_test.go | 114 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/git_sizer_test.go b/git_sizer_test.go index 721d391..5761648 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -1,6 +1,8 @@ package main_test import ( + "bytes" + "encoding/json" "fmt" "io" "io/ioutil" @@ -141,6 +143,118 @@ func newGitBomb( return repo, nil } +func TestRefSelection(t *testing.T) { + t.Parallel() + + allRefs := []string{ + "refs/barfoo", + "refs/foo", + "refs/foobar", + "refs/heads/foo", + "refs/heads/master", + "refs/remotes/origin/master", + "refs/remotes/upstream/foo", + "refs/remotes/upstream/master", + "refs/tags/foolish", + "refs/tags/other", + "refs/tags/release-1", + "refs/tags/release-2", + } + + expectedStderr := "References (included references marked with '+'):\n" + + "+ refs/barfoo\n" + + " refs/foo\n" + + "+ refs/foobar\n" + + "+ refs/heads/foo\n" + + "+ refs/heads/master\n" + + " refs/remotes/origin/master\n" + + "+ refs/remotes/upstream/foo\n" + + " refs/remotes/upstream/master\n" + + "+ refs/tags/foolish\n" + + "+ refs/tags/other\n" + + " refs/tags/release-1\n" + + " refs/tags/release-2\n" + + // Create a test repo with one orphan commit per refname: + path, err := ioutil.TempDir("", "ref-selection") + require.NoError(t, err) + + defer os.RemoveAll(path) + + err = exec.Command("git", "init", "--bare", path).Run() + require.NoError(t, err) + + repo, err := git.NewRepository(path) + require.NoError(t, err) + + for _, refname := range allRefs { + oid, err := repo.CreateObject("blob", func(w io.Writer) error { + _, err := fmt.Fprintf(w, "%s\n", refname) + return err + }) + require.NoError(t, err) + + oid, err = repo.CreateObject("tree", func(w io.Writer) error { + _, err = fmt.Fprintf(w, "100644 a.txt\x00%s", oid.Bytes()) + return err + }) + require.NoError(t, err) + + oid, err = repo.CreateObject("commit", func(w io.Writer) error { + _, err := fmt.Fprintf( + w, + "tree %s\n"+ + "author Example 1112911993 -0700\n"+ + "committer Example 1112911993 -0700\n"+ + "\n"+ + "Commit for reference %s\n", + oid, refname, + ) + return err + }) + require.NoError(t, err) + + err = repo.UpdateRef(refname, oid) + require.NoError(t, err) + } + + executable, err := exec.LookPath("bin/git-sizer") + require.NoError(t, err) + executable, err = filepath.Abs(executable) + require.NoError(t, err) + + cmd := exec.Command( + executable, "--show-refs", "--no-progress", "--json", "--json-version=2", + "--include=refs/heads", + "--tags", + "--exclude", "refs/heads/foo", + "--include-regexp", ".*foo.*", + "--exclude", "refs/foo", + "--exclude-regexp", "refs/tags/release-.*", + ) + cmd.Dir = path + var stdout bytes.Buffer + cmd.Stdout = &stdout + var stderr bytes.Buffer + cmd.Stderr = &stderr + err = cmd.Run() + require.NoError(t, err) + + // Make sure that the right number of commits was scanned: + var v struct { + UniqueCommitCount struct { + Value int + } + } + err = json.Unmarshal(stdout.Bytes(), &v) + if assert.NoError(t, err) { + assert.EqualValues(t, 7, v.UniqueCommitCount.Value) + } + + // Make sure that the right references were reported scanned: + assert.Equal(t, expectedStderr, stderr.String()) +} + func pow(x uint64, n int) uint64 { p := uint64(1) for ; n > 0; n-- { From c7c0d038661bd44759dcc8ce045040245b163724 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 23 Apr 2021 13:19:06 +0200 Subject: [PATCH 036/176] updateRef(): check for errors internally That's a big part of the point of being a test helper :-) --- git_sizer_test.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 2ab7554..6c82a7a 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -43,7 +43,7 @@ func gitCommand(t *testing.T, repoPath string, args ...string) *exec.Cmd { return exec.Command("git", gitArgs...) } -func updateRef(t *testing.T, repoPath string, refname string, oid git.OID) error { +func updateRef(t *testing.T, repoPath string, refname string, oid git.OID) { t.Helper() var cmd *exec.Cmd @@ -53,7 +53,7 @@ func updateRef(t *testing.T, repoPath string, refname string, oid git.OID) error } else { cmd = gitCommand(t, repoPath, "update-ref", refname, oid.String()) } - return cmd.Run() + require.NoError(t, cmd.Run()) } // createObject creates a new Git object, of the specified type, in @@ -175,8 +175,7 @@ func newGitBomb( return err }) - err = updateRef(t, path, "refs/heads/master", oid) - require.NoError(t, err) + updateRef(t, path, "refs/heads/master", oid) } func TestRefSelection(t *testing.T) { @@ -244,8 +243,7 @@ func TestRefSelection(t *testing.T) { return err }) - err = updateRef(t, path, refname, oid) - require.NoError(t, err) + updateRef(t, path, refname, oid) } executable, err := exec.LookPath("bin/git-sizer") From f9bab8396844797cfcc5517cddac6af66d8d1bce Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 23 Apr 2021 14:14:25 +0200 Subject: [PATCH 037/176] TestPrefixFilter(): add tests of prefixes ending in `/` --- git/ref_filter_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/git/ref_filter_test.go b/git/ref_filter_test.go index 013e082..928de81 100644 --- a/git/ref_filter_test.go +++ b/git/ref_filter_test.go @@ -23,6 +23,8 @@ func TestPrefixFilter(t *testing.T) { {"refs/heads", "refs/he", false}, {"refs/heads", "refs/headstrong", false}, {"refs/heads", "refs/tags/refs/heads", false}, + {"refs/heads/", "refs/heads", false}, + {"refs/heads/", "refs/heads/foo/bar", true}, {"refs/stash", "refs/stash", true}, {"refs/stash", "refs/stashy", false}, {"refs/remotes", "refs/remotes/origin/master", true}, From 3bcb0cd9714ae8315c6c5984abe5adfe2221eda8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 23 Apr 2021 14:27:22 +0200 Subject: [PATCH 038/176] Reference filtering: improve the usage note And make the tests more resemble the examples from the usage note. --- git-sizer.go | 16 +++++++++------- git/ref_filter_test.go | 12 +++++++++--- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index f8ae892..278bde1 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -19,7 +19,7 @@ import ( const Usage = `usage: git-sizer [OPTS] -v, --verbose report all statistics, whether concerning or not - --threshold threshold minimum level of concern (i.e., number of stars) + --threshold THRESHOLD minimum level of concern (i.e., number of stars) that should be reported. Default: '--threshold=1'. --critical only report critical statistics @@ -43,18 +43,20 @@ const Usage = `usage: git-sizer [OPTS] --branches process branches --tags process tags --remotes process remote refs - --include prefix process references with the specified prefix + --include PREFIX process references with the specified PREFIX (e.g., '--include=refs/remotes/origin') - --include-regexp pattern process references matching the specified + --include-regexp REGEXP process references matching the specified regular expression (e.g., '--include-regexp=refs/tags/release-.*') - --exclude prefix don't process references with the specified - prefix (e.g., '--exclude=refs/notes') - --exclude-regexp pattern don't process references matching the specified + --exclude PREFIX don't process references with the specified + PREFIX (e.g., '--exclude=refs/notes') + --exclude-regexp REGEXP don't process references matching the specified regular expression --show-refs show which refs are being included/excluded - Regular expression patterns must match the full reference name. + Prefixes must match at a boundary; for example 'refs/foo' matches + 'refs/foo' and 'refs/foo/bar' but not 'refs/foobar'. Regular + expression patterns must match the full reference name. ` diff --git a/git/ref_filter_test.go b/git/ref_filter_test.go index 928de81..b03c588 100644 --- a/git/ref_filter_test.go +++ b/git/ref_filter_test.go @@ -23,10 +23,16 @@ func TestPrefixFilter(t *testing.T) { {"refs/heads", "refs/he", false}, {"refs/heads", "refs/headstrong", false}, {"refs/heads", "refs/tags/refs/heads", false}, - {"refs/heads/", "refs/heads", false}, - {"refs/heads/", "refs/heads/foo/bar", true}, + + {"refs/foo", "refs/foo/bar", true}, + {"refs/foo", "refs/foo", true}, + {"refs/foo", "refs/foobar", false}, + + {"refs/foo/", "refs/foo/bar", true}, + {"refs/foo/", "refs/foo", false}, + {"refs/foo/", "refs/foobar", false}, + {"refs/stash", "refs/stash", true}, - {"refs/stash", "refs/stashy", false}, {"refs/remotes", "refs/remotes/origin/master", true}, } { t.Run( From 320b27a449205f0ab946a95684226565e0eec9c8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 8 Jul 2021 12:18:33 +0200 Subject: [PATCH 039/176] test.yml: fix typo --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 956332e..1fa5ffb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,7 +9,7 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v2 - - name: Get full repo histroy + - name: Get full repo history run: git fetch --prune --unshallow --tags - name: Test run: make test From f25ea5396df8c82c1195cb713e0d7f026b77e372 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 18 Sep 2021 11:43:08 +0200 Subject: [PATCH 040/176] Allow several options to be set via gitconfig: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * `sizer.jsonVersion` — default JSON version to output * `sizer.threshold` — default `--threshold` value * `sizer.names` — default `--names` value * `sizer.progress` — whether to show progress --- git-sizer.go | 90 ++++++++++++++++++++++++++++++++++++++++-------- git/gitconfig.go | 70 +++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 15 deletions(-) create mode 100644 git/gitconfig.go diff --git a/git-sizer.go b/git-sizer.go index 278bde1..c2c6796 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -18,19 +18,28 @@ import ( const Usage = `usage: git-sizer [OPTS] - -v, --verbose report all statistics, whether concerning or not --threshold THRESHOLD minimum level of concern (i.e., number of stars) that should be reported. Default: - '--threshold=1'. - --critical only report critical statistics + '--threshold=1'. Can be set via gitconfig: + 'sizer.threshold'. + -v, --verbose report all statistics, whether concerning or + not; equivalent to '--threshold=0 + --no-verbose equivalent to '--threshold=1' + --critical only report critical statistics; equivalent + to '--threshold=30' --names=[none|hash|full] display names of large objects in the specified - style: 'none' (omit footnotes entirely), 'hash' - (show only the SHA-1s of objects), or 'full' - (show full names). Default is '--names=full'. + style. Values: + * 'none' - omit footnotes entirely + * 'hash' - show only the SHA-1s of objects + * 'full' - show full names + Default is '--names=full'. Can be set via + gitconfig: 'sizer.names'. -j, --json output results in JSON format --json-version=[1|2] choose which JSON format version to output. - Default: --json-version=1. - --[no-]progress report (don't report) progress to stderr. + Default: --json-version=1. Can be set via + gitconfig: 'sizer.jsonVersion'. + --[no-]progress report (don't report) progress to stderr. Can + be set via gitconfig: 'sizer.progress'. --version only report the git-sizer version number Reference selection: @@ -164,8 +173,8 @@ func mainImplementation(args []string) error { var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool - var jsonVersion uint - var threshold sizes.Threshold = 1 + var jsonVersion int + var threshold sizes.Threshold var progress bool var version bool var filter git.IncludeExcludeFilter @@ -217,6 +226,12 @@ func mainImplementation(args []string) error { ) flags.Lookup("verbose").NoOptDefVal = "true" + flags.Var( + sizes.NewThresholdFlagValue(&threshold, 1), + "no-verbose", "report statistics that are at all concerning", + ) + flags.Lookup("no-verbose").NoOptDefVal = "true" + flags.Var( &threshold, "threshold", "minimum level of concern (i.e., number of stars) that should be\n"+ @@ -238,7 +253,7 @@ func mainImplementation(args []string) error { ) flags.BoolVarP(&jsonOutput, "json", "j", false, "output results in JSON format") - flags.UintVar(&jsonVersion, "json-version", 1, "JSON format version to output (1 or 2)") + flags.IntVar(&jsonVersion, "json-version", 1, "JSON format version to output (1 or 2)") atty, err := isatty.Isatty(os.Stderr.Fd()) if err != nil { @@ -263,10 +278,6 @@ func mainImplementation(args []string) error { return err } - if jsonOutput && !(jsonVersion == 1 || jsonVersion == 2) { - return fmt.Errorf("JSON version must be 1 or 2") - } - if cpuprofile != "" { f, err := os.Create(cpuprofile) if err != nil { @@ -295,6 +306,55 @@ func mainImplementation(args []string) error { } defer repo.Close() + if jsonOutput { + if !flags.Changed("json-version") { + v, err := repo.ConfigIntDefault("sizer.jsonVersion", jsonVersion) + if err != nil { + return err + } + jsonVersion = v + if !(jsonVersion == 1 || jsonVersion == 2) { + return fmt.Errorf("JSON version (read from gitconfig) must be 1 or 2") + } + } else if !(jsonVersion == 1 || jsonVersion == 2) { + return fmt.Errorf("JSON version must be 1 or 2") + } + } + + if !flags.Changed("threshold") && + !flags.Changed("verbose") && + !flags.Changed("no-verbose") && + !flags.Changed("critical") { + s, err := repo.ConfigStringDefault("sizer.threshold", fmt.Sprintf("%g", threshold)) + if err != nil { + return err + } + v, err := strconv.ParseFloat(s, 64) + if err != nil { + return fmt.Errorf("parsing gitconfig value for 'sizer.threshold': %w", err) + } + threshold = sizes.Threshold(v) + } + + if !flags.Changed("names") { + s, err := repo.ConfigStringDefault("sizer.names", "full") + if err != nil { + return err + } + err = nameStyle.Set(s) + if err != nil { + return fmt.Errorf("parsing gitconfig value for 'sizer.names': %w", err) + } + } + + if !flags.Changed("progress") && !flags.Changed("no-progress") { + v, err := repo.ConfigBoolDefault("sizer.progress", progress) + if err != nil { + return fmt.Errorf("parsing gitconfig value for 'sizer.progress': %w", err) + } + progress = v + } + var historySize sizes.HistorySize var refFilter git.ReferenceFilter = filter.Filter diff --git a/git/gitconfig.go b/git/gitconfig.go new file mode 100644 index 0000000..4519af8 --- /dev/null +++ b/git/gitconfig.go @@ -0,0 +1,70 @@ +package git + +import ( + "bytes" + "fmt" + "strconv" +) + +func (repo *Repository) ConfigStringDefault(key string, defaultValue string) (string, error) { + cmd := repo.gitCommand( + "config", + "--default", defaultValue, + key, + ) + + out, err := cmd.Output() + if err != nil { + return defaultValue, fmt.Errorf("running 'git config': %w", err) + } + + if len(out) > 0 && out[len(out)-1] == '\n' { + out = out[:len(out)-1] + } + + return string(out), nil +} + +func (repo *Repository) ConfigBoolDefault(key string, defaultValue bool) (bool, error) { + cmd := repo.gitCommand( + "config", + "--type", "bool", + "--default", strconv.FormatBool(defaultValue), + key, + ) + + out, err := cmd.Output() + if err != nil { + return defaultValue, fmt.Errorf("running 'git config': %w", err) + } + + s := string(bytes.TrimSpace(out)) + value, err := strconv.ParseBool(s) + if err != nil { + return defaultValue, fmt.Errorf("unexpected bool value from 'git config': %q", s) + } + + return value, nil +} + +func (repo *Repository) ConfigIntDefault(key string, defaultValue int) (int, error) { + cmd := repo.gitCommand( + "config", + "--type", "int", + "--default", strconv.Itoa(defaultValue), + key, + ) + + out, err := cmd.Output() + if err != nil { + return defaultValue, fmt.Errorf("running 'git config': %w", err) + } + + s := string(bytes.TrimSpace(out)) + value, err := strconv.Atoi(s) + if err != nil { + return defaultValue, fmt.Errorf("unexpected int value from 'git config': %q", s) + } + + return value, nil +} From 45cf9e24b36632c3ab453499a025012ee66d6120 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 8 Jul 2021 18:33:33 +0200 Subject: [PATCH 041/176] Add a bunch more reference selection options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add some more reference selection options. The same functionality could be achieved using the existing options, but it's more convenient to have these dedicated options as shorthand: * `--no-branches`, `--no-tags`, `--no-remotes` — the opposite of the existing `branches`, `--tags`, and `--remotes` options. * `--notes` and `--no-notes` — include or exclude `refs/notes/*`. * `--stash` and `--no-stash` — include or exclude `refs/stash`. --- git-sizer.go | 123 ++++++++++++++++++++++++--------- git/ref_filter.go | 12 ++++ git_sizer_test.go | 169 ++++++++++++++++++++++++++++++---------------- 3 files changed, 213 insertions(+), 91 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index c2c6796..bcba7f5 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -49,16 +49,18 @@ const Usage = `usage: git-sizer [OPTS] include. The last rule matching a reference determines whether that reference is processed: - --branches process branches - --tags process tags - --remotes process remote refs + --[no-]branches process [don't process] branches + --[no-]tags process [don't process] tags + --[no-]remotes process [don't process] remote-tracking references + --[no-]notes process [don't process] git-notes references + --[no-]stash process [don't process] refs/stash --include PREFIX process references with the specified PREFIX (e.g., '--include=refs/remotes/origin') --include-regexp REGEXP process references matching the specified regular expression (e.g., '--include-regexp=refs/tags/release-.*') --exclude PREFIX don't process references with the specified - PREFIX (e.g., '--exclude=refs/notes') + PREFIX (e.g., '--exclude=refs/changes') --exclude-regexp REGEXP don't process references matching the specified regular expression --show-refs show which refs are being included/excluded @@ -99,38 +101,53 @@ func (v *NegatedBoolValue) Type() string { } type filterValue struct { - filter *git.IncludeExcludeFilter + // The filter to which values will be appended: + filter *git.IncludeExcludeFilter + + // The polarity of this option (i.e., does it cause the things + // that it references to be included or excluded?): polarity git.Polarity - pattern string - regexp bool + + // If this is set, then it is used as the pattern. If not, then + // the user should supply the pattern. + pattern string + + // Should `pattern` be interpreted as a regexp (as opposed to a + // prefix)? + regexp bool } func (v *filterValue) Set(s string) error { - var polarity git.Polarity var filter git.ReferenceFilter + polarity := v.polarity + + var pattern string + if v.pattern != "" { + // The pattern is fixed for this option: + pattern = v.pattern + + // It's not really expected, but if the user supplied a + // `false` boolean value, invert the polarity: + b, err := strconv.ParseBool(s) + if err != nil { + return err + } + if !b { + polarity = polarity.Inverted() + } + } else { + // The user must supply the pattern. + pattern = s + } if v.regexp { - polarity = v.polarity var err error - filter, err = git.RegexpFilter(s) + filter, err = git.RegexpFilter(pattern) if err != nil { return fmt.Errorf("invalid regexp: %q", s) } - } else if v.pattern == "" { - polarity = v.polarity - filter = git.PrefixFilter(s) } else { - // Allow a boolean value to alter the polarity: - b, err := strconv.ParseBool(s) - if err != nil { - return err - } - if b { - polarity = git.Include - } else { - polarity = git.Exclude - } - filter = git.PrefixFilter(v.pattern) + filter = git.PrefixFilter(pattern) } switch polarity { @@ -152,12 +169,12 @@ func (v *filterValue) String() string { } func (v *filterValue) Type() string { - if v.regexp { + if v.pattern != "" { + return "bool" + } else if v.regexp { return "regexp" - } else if v.pattern == "" { - return "prefix" } else { - return "" + return "prefix" } } @@ -203,20 +220,62 @@ func mainImplementation(args []string) error { ) flag := flags.VarPF( - &filterValue{&filter, git.Include, "refs/heads/", false}, "branches", "", + &filterValue{&filter, git.Include, "refs/heads", false}, "branches", "", "process all branches", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&filter, git.Include, "refs/tags/", false}, "tags", "", + &filterValue{&filter, git.Exclude, "refs/heads", false}, "no-branches", "", + "exclude all branches", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&filter, git.Include, "refs/tags", false}, "tags", "", "process all tags", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&filter, git.Include, "refs/remotes/", false}, "remotes", "", - "process all remotes", + &filterValue{&filter, git.Exclude, "refs/tags", false}, "no-tags", "", + "exclude all tags", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&filter, git.Include, "refs/remotes", false}, "remotes", "", + "process all remote-tracking references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&filter, git.Exclude, "refs/remotes", false}, "no-remotes", "", + "exclude all remote-tracking references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&filter, git.Include, "refs/notes", false}, "notes", "", + "process all git-notes references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&filter, git.Exclude, "refs/notes", false}, "no-notes", "", + "exclude all git-notes references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&filter, git.Include, "refs/stash", true}, "stash", "", + "process refs/stash", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&filter, git.Exclude, "refs/stash", true}, "no-stash", "", + "exclude refs/stash", ) flag.NoOptDefVal = "true" diff --git a/git/ref_filter.go b/git/ref_filter.go index 6f2140c..f71c418 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -18,6 +18,18 @@ const ( Exclude ) +func (p Polarity) Inverted() Polarity { + switch p { + case Include: + return Exclude + case Exclude: + return Include + default: + // This shouldn't happen: + return Exclude + } +} + // polarizedFilter is a filter that might match, in which case it // includes or excludes the reference (according to its polarity). If // it doesn't match, then it doesn't say anything about the reference. diff --git a/git_sizer_test.go b/git_sizer_test.go index 6c82a7a..897b801 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "testing" "time" @@ -178,37 +179,53 @@ func newGitBomb( updateRef(t, path, "refs/heads/master", oid) } -func TestRefSelection(t *testing.T) { +// TestRefSelections tests various combinations of reference selection +// options. +func TestRefSelections(t *testing.T) { t.Parallel() - allRefs := []string{ - "refs/barfoo", - "refs/foo", - "refs/foobar", - "refs/heads/foo", - "refs/heads/master", - "refs/remotes/origin/master", - "refs/remotes/upstream/foo", - "refs/remotes/upstream/master", - "refs/tags/foolish", - "refs/tags/other", - "refs/tags/release-1", - "refs/tags/release-2", + references := []struct { + // The plusses and spaces in the `results` string correspond + // to the expected results for one of the tests: `results[i]` + // tells whether we expect `refname` to be included ('+') or + // excluded (' ') in test case number `i`. + results string + + refname string + }{ + // 1111111 + //01234567890123456 + {"+ + + + + + + +", "refs/barfoo"}, + {"+ + + + + + +++ ", "refs/foo"}, + {"+ + + + + + + +", "refs/foobar"}, + {"++ + + + +++ +", "refs/heads/foo"}, + {"++ + + + ++ +", "refs/heads/master"}, + {"+ + + ++ + ", "refs/notes/discussion"}, + {"+ + ++ + + ", "refs/remotes/origin/master"}, + {"+ + ++ + + + +", "refs/remotes/upstream/foo"}, + {"+ + ++ + + ", "refs/remotes/upstream/master"}, + {"+ + + + ++ ", "refs/stash"}, + {"+ ++ + + +++ +", "refs/tags/foolish"}, + {"+ ++ + + ++ +", "refs/tags/other"}, + {"+ ++ + + ++ + ", "refs/tags/release-1"}, + {"+ ++ + + ++ + ", "refs/tags/release-2"}, } - expectedStderr := "References (included references marked with '+'):\n" + - "+ refs/barfoo\n" + - " refs/foo\n" + - "+ refs/foobar\n" + - "+ refs/heads/foo\n" + - "+ refs/heads/master\n" + - " refs/remotes/origin/master\n" + - "+ refs/remotes/upstream/foo\n" + - " refs/remotes/upstream/master\n" + - "+ refs/tags/foolish\n" + - "+ refs/tags/other\n" + - " refs/tags/release-1\n" + - " refs/tags/release-2\n" + // computeExpectations assembles and returns the results expected + // for test `i` from the `references` slice. + computeExpectations := func(i int) (string, int) { + var sb strings.Builder + fmt.Fprintln(&sb, "References (included references marked with '+'):") + count := 0 + for _, p := range references { + present := p.results[i] + fmt.Fprintf(&sb, "%c %s\n", present, p.refname) + if present == '+' { + count++ + } + } + return sb.String(), count + } // Create a test repo with one orphan commit per refname: path, err := ioutil.TempDir("", "ref-selection") @@ -219,9 +236,9 @@ func TestRefSelection(t *testing.T) { err = exec.Command("git", "init", "--bare", path).Run() require.NoError(t, err) - for _, refname := range allRefs { + for _, p := range references { oid := createObject(t, path, "blob", func(w io.Writer) error { - _, err := fmt.Fprintf(w, "%s\n", refname) + _, err := fmt.Fprintf(w, "%s\n", p.refname) return err }) @@ -238,12 +255,12 @@ func TestRefSelection(t *testing.T) { "committer Example 1112911993 -0700\n"+ "\n"+ "Commit for reference %s\n", - oid, refname, + oid, p.refname, ) return err }) - updateRef(t, path, refname, oid) + updateRef(t, path, p.refname, oid) } executable, err := exec.LookPath("bin/git-sizer") @@ -251,36 +268,70 @@ func TestRefSelection(t *testing.T) { executable, err = filepath.Abs(executable) require.NoError(t, err) - cmd := exec.Command( - executable, "--show-refs", "--no-progress", "--json", "--json-version=2", - "--include=refs/heads", - "--tags", - "--exclude", "refs/heads/foo", - "--include-regexp", ".*foo.*", - "--exclude", "refs/foo", - "--exclude-regexp", "refs/tags/release-.*", - ) - cmd.Dir = path - var stdout bytes.Buffer - cmd.Stdout = &stdout - var stderr bytes.Buffer - cmd.Stderr = &stderr - err = cmd.Run() - require.NoError(t, err) + for i, p := range []struct { + name string + args []string + }{ + {"no arguments", nil}, // 0 + {"branches", []string{"--branches"}}, // 1 + {"no branches", []string{"--no-branches"}}, // 2 + {"tags", []string{"--tags"}}, // 3 + {"no tags", []string{"--no-tags"}}, // 4 + {"remotes", []string{"--remotes"}}, // 5 + {"no remotes", []string{"--no-remotes"}}, // 6 + {"notes", []string{"--notes"}}, // 7 + {"no notes", []string{"--no-notes"}}, // 8 + {"stash", []string{"--stash"}}, // 9 + {"no stash", []string{"--no-stash"}}, // 10 + {"branches and tags", []string{"--branches", "--tags"}}, // 11 + {"foo", []string{"--include-regexp", ".*foo.*"}}, // 12 + {"refs/foo as prefix", []string{"--include", "refs/foo"}}, // 13 + {"refs/foo as regexp", []string{"--include-regexp", "refs/foo"}}, // 14 + {"release tags", []string{"--include-regexp", "refs/tags/release-.*"}}, // 15 + { + name: "combination", + args: []string{ + "--include=refs/heads", + "--tags", + "--exclude", "refs/heads/foo", + "--include-regexp", ".*foo.*", + "--exclude", "refs/foo", + "--exclude-regexp", "refs/tags/release-.*", + }, + }, // 16 + } { + t.Run( + p.name, + func(t *testing.T) { + args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} + args = append(args, p.args...) + cmd := exec.Command(executable, args...) + cmd.Dir = path + var stdout bytes.Buffer + cmd.Stdout = &stdout + var stderr bytes.Buffer + cmd.Stderr = &stderr + err = cmd.Run() + assert.NoError(t, err) + + expectedStderr, expectedUniqueCommitCount := computeExpectations(i) + + // Make sure that the right number of commits was scanned: + var v struct { + UniqueCommitCount struct { + Value int + } + } + err = json.Unmarshal(stdout.Bytes(), &v) + if assert.NoError(t, err) { + assert.EqualValues(t, expectedUniqueCommitCount, v.UniqueCommitCount.Value) + } - // Make sure that the right number of commits was scanned: - var v struct { - UniqueCommitCount struct { - Value int - } - } - err = json.Unmarshal(stdout.Bytes(), &v) - if assert.NoError(t, err) { - assert.EqualValues(t, 7, v.UniqueCommitCount.Value) + // Make sure that the right references were reported scanned: + assert.Equal(t, expectedStderr, stderr.String()) + }, + ) } - - // Make sure that the right references were reported scanned: - assert.Equal(t, expectedStderr, stderr.String()) } func pow(x uint64, n int) uint64 { From 6b1b17d84c583009c04d875252d11a67344277e4 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 20 Sep 2021 10:36:51 +0200 Subject: [PATCH 042/176] TestRefSelections: space out the test case table I'm going to reuse this infrastructure for some more tests, so make the formatting less cramped. --- git_sizer_test.go | 83 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 18 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 897b801..ab0ac8d 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -272,23 +272,70 @@ func TestRefSelections(t *testing.T) { name string args []string }{ - {"no arguments", nil}, // 0 - {"branches", []string{"--branches"}}, // 1 - {"no branches", []string{"--no-branches"}}, // 2 - {"tags", []string{"--tags"}}, // 3 - {"no tags", []string{"--no-tags"}}, // 4 - {"remotes", []string{"--remotes"}}, // 5 - {"no remotes", []string{"--no-remotes"}}, // 6 - {"notes", []string{"--notes"}}, // 7 - {"no notes", []string{"--no-notes"}}, // 8 - {"stash", []string{"--stash"}}, // 9 - {"no stash", []string{"--no-stash"}}, // 10 - {"branches and tags", []string{"--branches", "--tags"}}, // 11 - {"foo", []string{"--include-regexp", ".*foo.*"}}, // 12 - {"refs/foo as prefix", []string{"--include", "refs/foo"}}, // 13 - {"refs/foo as regexp", []string{"--include-regexp", "refs/foo"}}, // 14 - {"release tags", []string{"--include-regexp", "refs/tags/release-.*"}}, // 15 - { + { // 0 + name: "no arguments", + }, + { // 1 + name: "branches", + args: []string{"--branches"}, + }, + { // 2 + name: "no branches", + args: []string{"--no-branches"}, + }, + { // 3 + name: "tags", + args: []string{"--tags"}, + }, + { // 4 + name: "no tags", + args: []string{"--no-tags"}, + }, + { // 5 + name: "remotes", + args: []string{"--remotes"}, + }, + { // 6 + name: "no remotes", + args: []string{"--no-remotes"}, + }, + { // 7 + name: "notes", + args: []string{"--notes"}, + }, + { // 8 + name: "no notes", + args: []string{"--no-notes"}, + }, + { // 9 + name: "stash", + args: []string{"--stash"}, + }, + { // 10 + name: "no stash", + args: []string{"--no-stash"}, + }, + { // 11 + name: "branches and tags", + args: []string{"--branches", "--tags"}, + }, + { // 12 + name: "foo", + args: []string{"--include-regexp", ".*foo.*"}, + }, + { // 13 + name: "refs/foo as prefix", + args: []string{"--include", "refs/foo"}, + }, + { // 14 + name: "refs/foo as regexp", + args: []string{"--include-regexp", "refs/foo"}, + }, + { // 15 + name: "release tags", + args: []string{"--include-regexp", "refs/tags/release-.*"}, + }, + { // 16 name: "combination", args: []string{ "--include=refs/heads", @@ -298,7 +345,7 @@ func TestRefSelections(t *testing.T) { "--exclude", "refs/foo", "--exclude-regexp", "refs/tags/release-.*", }, - }, // 16 + }, } { t.Run( p.name, From a48ad8b59fbb4fc89a8355a8cb7ba85dcada85b8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 18 Sep 2021 16:00:24 +0200 Subject: [PATCH 043/176] Make it possible to define reference groups via gitconfig Add a way to define groups of reference names via gitconfig, using include/exclude rules like those supported on the command line. This allows gitconfig like [refgroup "normal"] include = refs/heads include = refs/tags excludeRegexp = refs/tags/release-.* and then `git sizer --refgroup=normal`, to include "normal" branches and tags but not release tags in the analysis. --- git-sizer.go | 87 +++++++++++++++++++++++++++++++++++++++++-- git/gitconfig.go | 82 ++++++++++++++++++++++++++++++++++++++++ git/gitconfig_test.go | 36 ++++++++++++++++++ git_sizer_test.go | 73 +++++++++++++++++++++++++++--------- 4 files changed, 256 insertions(+), 22 deletions(-) create mode 100644 git/gitconfig_test.go diff --git a/git-sizer.go b/git-sizer.go index bcba7f5..63925e4 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -63,6 +63,11 @@ const Usage = `usage: git-sizer [OPTS] PREFIX (e.g., '--exclude=refs/changes') --exclude-regexp REGEXP don't process references matching the specified regular expression + --refgroup=NAME process reference in group defined by gitconfig: + 'refgroup.NAME.include', + 'refgroup.NAME.includeRegexp', + 'refgroup.NAME.exclude', and + 'refgroup.NAME.excludeRegexp' as above. --show-refs show which refs are being included/excluded Prefixes must match at a boundary; for example 'refs/foo' matches @@ -178,6 +183,70 @@ func (v *filterValue) Type() string { } } +type filterGroupValue struct { + filter *git.IncludeExcludeFilter + repo *git.Repository +} + +func (v *filterGroupValue) Set(name string) error { + // At this point, it is not yet certain that the command was run + // inside a Git repository. If not, ignore this option (the + // command will error out anyway). + if v.repo == nil { + fmt.Fprintf( + os.Stderr, + "warning: not in Git repository; ignoring '--refgroup' option.\n", + ) + return nil + } + + config, err := v.repo.Config(fmt.Sprintf("refgroup.%s", name)) + if err != nil { + return err + } + for _, entry := range config.Entries { + switch entry.Key { + case "include": + v.filter.Include(git.PrefixFilter(entry.Value)) + case "includeregexp": + filter, err := git.RegexpFilter(entry.Value) + if err != nil { + return fmt.Errorf( + "invalid regular expression for 'refgroup.%s.%s': %w", + name, entry.Key, err, + ) + } + v.filter.Include(filter) + case "exclude": + v.filter.Exclude(git.PrefixFilter(entry.Value)) + case "excluderegexp": + filter, err := git.RegexpFilter(entry.Value) + if err != nil { + return fmt.Errorf( + "invalid regular expression for 'refgroup.%s.%s': %w", + name, entry.Key, err, + ) + } + v.filter.Exclude(filter) + default: + // Ignore unrecognized keys. + } + } + return nil +} + +func (v *filterGroupValue) Get() interface{} { + return nil +} + +func (v *filterGroupValue) String() string { + return "" +} + +func (v *filterGroupValue) Type() string { + return "name" +} + func main() { err := mainImplementation(os.Args[1:]) if err != nil { @@ -197,6 +266,13 @@ func mainImplementation(args []string) error { var filter git.IncludeExcludeFilter var showRefs bool + // Try to open the repository, but it's not an error yet if this + // fails, because the user might only be asking for `--help`. + repo, repoErr := git.NewRepository(".") + if repoErr == nil { + defer repo.Close() + } + flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) flags.Usage = func() { fmt.Print(Usage) @@ -279,6 +355,11 @@ func mainImplementation(args []string) error { ) flag.NoOptDefVal = "true" + flag = flags.VarPF( + &filterGroupValue{&filter, repo}, "refgroup", "", + "process references in refgroup defined by gitconfig", + ) + flags.VarP( sizes.NewThresholdFlagValue(&threshold, 0), "verbose", "v", "report all statistics, whether concerning or not", @@ -359,11 +440,9 @@ func mainImplementation(args []string) error { return errors.New("excess arguments") } - repo, err := git.NewRepository(".") - if err != nil { - return fmt.Errorf("couldn't open Git repository: %s", err) + if repoErr != nil { + return fmt.Errorf("couldn't open Git repository: %s", repoErr) } - defer repo.Close() if jsonOutput { if !flags.Changed("json-version") { diff --git a/git/gitconfig.go b/git/gitconfig.go index 4519af8..4bf32d1 100644 --- a/git/gitconfig.go +++ b/git/gitconfig.go @@ -2,10 +2,92 @@ package git import ( "bytes" + "errors" "fmt" "strconv" + "strings" ) +type ConfigEntry struct { + Key string + Value string +} + +type Config struct { + Entries []ConfigEntry +} + +// Config returns the entries from gitconfig. If `prefix` is provided, +// then only include entries in that section, which must match the at +// a component boundary (as defined by `configKeyMatchesPrefix()`), +// and strip off the prefix in the keys that are returned. +func (repo *Repository) Config(prefix string) (*Config, error) { + cmd := repo.gitCommand("config", "--list", "-z") + + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("reading git configuration: %w", err) + } + + var config Config + + for len(out) > 0 { + keyEnd := bytes.IndexByte(out, '\n') + if keyEnd == -1 { + return nil, errors.New("invalid output from 'git config'") + } + key := string(out[:keyEnd]) + out = out[keyEnd+1:] + valueEnd := bytes.IndexByte(out, 0) + if valueEnd == -1 { + return nil, errors.New("invalid output from 'git config'") + } + value := string(out[:valueEnd]) + out = out[valueEnd+1:] + + ok, rest := configKeyMatchesPrefix(key, prefix) + if !ok { + continue + } + + entry := ConfigEntry{ + Key: rest, + Value: value, + } + config.Entries = append(config.Entries, entry) + } + + return &config, nil +} + +// configKeyMatchesPrefix checks whether `key` starts with `prefix` at +// a component boundary (i.e., at a '.'). If yes, it returns `true` +// and the part of the key after the prefix; e.g.: +// +// configKeyMatchesPrefix("foo.bar", "foo") → true, "bar" +// configKeyMatchesPrefix("foo.bar", "foo.") → true, "bar" +// configKeyMatchesPrefix("foo.bar", "foo.bar") → true, "" +// configKeyMatchesPrefix("foo.bar", "foo.bar.") → false, "" +func configKeyMatchesPrefix(key, prefix string) (bool, string) { + if prefix == "" { + return true, key + } + if !strings.HasPrefix(key, prefix) { + return false, "" + } + + if prefix[len(prefix)-1] == '.' { + return true, key[len(prefix):] + } + if len(key) == len(prefix) { + return true, "" + } + if key[len(prefix)] == '.' { + return true, key[len(prefix)+1:] + } + return false, "" +} + func (repo *Repository) ConfigStringDefault(key string, defaultValue string) (string, error) { cmd := repo.gitCommand( "config", diff --git a/git/gitconfig_test.go b/git/gitconfig_test.go new file mode 100644 index 0000000..a98c4df --- /dev/null +++ b/git/gitconfig_test.go @@ -0,0 +1,36 @@ +package git + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestConfigKeyMatchesPrefix(t *testing.T) { + for _, p := range []struct { + key, prefix string + expectedBool bool + expectedString string + }{ + {"foo.bar", "", true, "foo.bar"}, + {"foo.bar", "foo", true, "bar"}, + {"foo.bar", "foo.", true, "bar"}, + {"foo.bar", "foo.bar", true, ""}, + {"foo.bar", "foo.bar.", false, ""}, + {"foo.bar", "foo.bar.baz", false, ""}, + {"foo.bar", "foo.barbaz", false, ""}, + {"foo.bar.baz", "foo.bar", true, "baz"}, + {"foo.barbaz", "foo.bar", false, ""}, + {"foo.bar", "bar", false, ""}, + } { + t.Run( + fmt.Sprintf("TestConfigKeyMatchesPrefix(%q, %q)", p.key, p.prefix), + func(t *testing.T) { + ok, s := configKeyMatchesPrefix(p.key, p.prefix) + assert.Equal(t, p.expectedBool, ok) + assert.Equal(t, p.expectedString, s) + }, + ) + } +} diff --git a/git_sizer_test.go b/git_sizer_test.go index ab0ac8d..8d1bbd1 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -193,22 +193,22 @@ func TestRefSelections(t *testing.T) { refname string }{ - // 1111111 - //01234567890123456 - {"+ + + + + + + +", "refs/barfoo"}, - {"+ + + + + + +++ ", "refs/foo"}, - {"+ + + + + + + +", "refs/foobar"}, - {"++ + + + +++ +", "refs/heads/foo"}, - {"++ + + + ++ +", "refs/heads/master"}, - {"+ + + ++ + ", "refs/notes/discussion"}, - {"+ + ++ + + ", "refs/remotes/origin/master"}, - {"+ + ++ + + + +", "refs/remotes/upstream/foo"}, - {"+ + ++ + + ", "refs/remotes/upstream/master"}, - {"+ + + + ++ ", "refs/stash"}, - {"+ ++ + + +++ +", "refs/tags/foolish"}, - {"+ ++ + + ++ +", "refs/tags/other"}, - {"+ ++ + + ++ + ", "refs/tags/release-1"}, - {"+ ++ + + ++ + ", "refs/tags/release-2"}, + // 111111111 + //0123456789012345678 + {"+ + + + + + + + +", "refs/barfoo"}, + {"+ + + + + + +++ ", "refs/foo"}, + {"+ + + + + + + + +", "refs/foobar"}, + {"++ + + + +++ +++", "refs/heads/foo"}, + {"++ + + + ++ +++", "refs/heads/master"}, + {"+ + + ++ + ", "refs/notes/discussion"}, + {"+ + ++ + + ", "refs/remotes/origin/master"}, + {"+ + ++ + + + + +", "refs/remotes/upstream/foo"}, + {"+ + ++ + + ", "refs/remotes/upstream/master"}, + {"+ + + + ++ ", "refs/stash"}, + {"+ ++ + + +++ + +", "refs/tags/foolish"}, + {"+ ++ + + ++ + +", "refs/tags/other"}, + {"+ ++ + + ++ + ", "refs/tags/release-1"}, + {"+ ++ + + ++ + ", "refs/tags/release-2"}, } // computeExpectations assembles and returns the results expected @@ -269,8 +269,9 @@ func TestRefSelections(t *testing.T) { require.NoError(t, err) for i, p := range []struct { - name string - args []string + name string + args []string + config [][2]string }{ { // 0 name: "no arguments", @@ -346,10 +347,46 @@ func TestRefSelections(t *testing.T) { "--exclude-regexp", "refs/tags/release-.*", }, }, + { // 17 + name: "branches-refgroup", + args: []string{"--refgroup=mygroup"}, + config: [][2]string{ + {"include", "refs/heads"}, + }, + }, + { // 18 + name: "combination-refgroup", + args: []string{"--refgroup=mygroup"}, + config: [][2]string{ + {"include", "refs/heads"}, + {"include", "refs/tags"}, + {"exclude", "refs/heads/foo"}, + {"includeRegexp", ".*foo.*"}, + {"exclude", "refs/foo"}, + {"excludeRegexp", "refs/tags/release-.*"}, + }, + }, } { t.Run( p.name, func(t *testing.T) { + if len(p.config) != 0 { + for _, c := range p.config { + cmd := gitCommand( + t, path, + "config", "--add", fmt.Sprintf("refgroup.mygroup.%s", c[0]), c[1], + ) + err := cmd.Run() + require.NoError(t, err) + } + defer func() { + cmd := gitCommand( + t, path, "config", "--remove-section", "refgroup.mygroup", + ) + err := cmd.Run() + require.NoError(t, err) + }() + } args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} args = append(args, p.args...) cmd := exec.Command(executable, args...) From 1da85bfa4f177d440008dbce7f9e70a1f3dc507a Mon Sep 17 00:00:00 2001 From: Thomas Gummerer Date: Thu, 7 Oct 2021 10:55:06 +0000 Subject: [PATCH 044/176] make default threshold 1 again https://github.com/github/git-sizer/pull/83 started allowing some values to be set through config options. In the process the default value for the threshold was lost, which used to be 1, but after the change was 0. Set the default threshold to 1 again as is documented. --- git-sizer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-sizer.go b/git-sizer.go index 63925e4..19b2a15 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -260,7 +260,7 @@ func mainImplementation(args []string) error { var cpuprofile string var jsonOutput bool var jsonVersion int - var threshold sizes.Threshold + var threshold sizes.Threshold = 1 var progress bool var version bool var filter git.IncludeExcludeFilter From 878581ff58b82f2de78dba564a785af8dba7ad36 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 10:19:48 +0200 Subject: [PATCH 045/176] testutils: split out an internal package for test utilities --- git_sizer_test.go | 176 ++++++++------------------------ internal/testutils/repoutils.go | 115 +++++++++++++++++++++ 2 files changed, 156 insertions(+), 135 deletions(-) create mode 100644 internal/testutils/repoutils.go diff --git a/git_sizer_test.go b/git_sizer_test.go index 8d1bbd1..54bc01c 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -18,6 +18,7 @@ import ( "github.com/github/git-sizer/counts" "github.com/github/git-sizer/git" + "github.com/github/git-sizer/internal/testutils" "github.com/github/git-sizer/sizes" ) @@ -28,104 +29,6 @@ func TestExec(t *testing.T) { assert.NoErrorf(t, err, "command failed; output: %#v", string(output)) } -func newRepository(t *testing.T, repoPath string) *git.Repository { - t.Helper() - - repo, err := git.NewRepository(repoPath) - require.NoError(t, err) - return repo -} - -func gitCommand(t *testing.T, repoPath string, args ...string) *exec.Cmd { - t.Helper() - - gitArgs := []string{"-C", repoPath} - gitArgs = append(gitArgs, args...) - return exec.Command("git", gitArgs...) -} - -func updateRef(t *testing.T, repoPath string, refname string, oid git.OID) { - t.Helper() - - var cmd *exec.Cmd - - if oid == git.NullOID { - cmd = gitCommand(t, repoPath, "update-ref", "-d", refname) - } else { - cmd = gitCommand(t, repoPath, "update-ref", refname, oid.String()) - } - require.NoError(t, cmd.Run()) -} - -// createObject creates a new Git object, of the specified type, in -// the repository at `repoPath`. `writer` is a function that writes -// the object in `git hash-object` input format. This is used for -// testing only. -func createObject( - t *testing.T, repoPath string, otype git.ObjectType, writer func(io.Writer) error, -) git.OID { - t.Helper() - - cmd := gitCommand(t, repoPath, "hash-object", "-w", "-t", string(otype), "--stdin") - in, err := cmd.StdinPipe() - require.NoError(t, err) - - out, err := cmd.StdoutPipe() - cmd.Stderr = os.Stderr - - err = cmd.Start() - require.NoError(t, err) - - err = writer(in) - err2 := in.Close() - if err != nil { - cmd.Wait() - require.NoError(t, err) - } - if err2 != nil { - cmd.Wait() - require.NoError(t, err2) - } - - output, err := ioutil.ReadAll(out) - err2 = cmd.Wait() - require.NoError(t, err) - require.NoError(t, err2) - - oid, err := git.NewOID(string(bytes.TrimSpace(output))) - require.NoError(t, err) - return oid -} - -func addFile(t *testing.T, repoPath string, relativePath, contents string) { - dirPath := filepath.Dir(relativePath) - if dirPath != "." { - require.NoError(t, os.MkdirAll(filepath.Join(repoPath, dirPath), 0777), "creating subdir") - } - - filename := filepath.Join(repoPath, relativePath) - f, err := os.Create(filename) - require.NoErrorf(t, err, "creating file %q", filename) - _, err = f.WriteString(contents) - require.NoErrorf(t, err, "writing to file %q", filename) - require.NoErrorf(t, f.Close(), "closing file %q", filename) - - cmd := gitCommand(t, repoPath, "add", relativePath) - require.NoErrorf(t, cmd.Run(), "adding file %q", relativePath) -} - -func addAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { - cmd.Env = append(cmd.Env, - "GIT_AUTHOR_NAME=Arthur", - "GIT_AUTHOR_EMAIL=arthur@example.com", - fmt.Sprintf("GIT_AUTHOR_DATE=%d -0700", timestamp.Unix()), - "GIT_COMMITTER_NAME=Constance", - "GIT_COMMITTER_EMAIL=constance@example.com", - fmt.Sprintf("GIT_COMMITTER_DATE=%d -0700", timestamp.Unix()), - ) - *timestamp = timestamp.Add(60 * time.Second) -} - func newGitBomb( t *testing.T, path string, depth, breadth int, body string, ) { @@ -135,7 +38,7 @@ func newGitBomb( err := cmd.Run() require.NoError(t, err) - oid := createObject(t, path, "blob", func(w io.Writer) error { + oid := testutils.CreateObject(t, path, "blob", func(w io.Writer) error { _, err := io.WriteString(w, body) return err }) @@ -146,7 +49,7 @@ func newGitBomb( prefix := "f" for ; depth > 0; depth-- { - oid = createObject(t, path, "tree", func(w io.Writer) error { + oid = testutils.CreateObject(t, path, "tree", func(w io.Writer) error { for i := 0; i < breadth; i++ { _, err = fmt.Fprintf( w, "%s %s%0*d\x00%s", @@ -163,7 +66,7 @@ func newGitBomb( prefix = "d" } - oid = createObject(t, path, "commit", func(w io.Writer) error { + oid = testutils.CreateObject(t, path, "commit", func(w io.Writer) error { _, err := fmt.Fprintf( w, "tree %s\n"+ @@ -176,7 +79,7 @@ func newGitBomb( return err }) - updateRef(t, path, "refs/heads/master", oid) + testutils.UpdateRef(t, path, "refs/heads/master", oid) } // TestRefSelections tests various combinations of reference selection @@ -237,17 +140,17 @@ func TestRefSelections(t *testing.T) { require.NoError(t, err) for _, p := range references { - oid := createObject(t, path, "blob", func(w io.Writer) error { + oid := testutils.CreateObject(t, path, "blob", func(w io.Writer) error { _, err := fmt.Fprintf(w, "%s\n", p.refname) return err }) - oid = createObject(t, path, "tree", func(w io.Writer) error { + oid = testutils.CreateObject(t, path, "tree", func(w io.Writer) error { _, err = fmt.Fprintf(w, "100644 a.txt\x00%s", oid.Bytes()) return err }) - oid = createObject(t, path, "commit", func(w io.Writer) error { + oid = testutils.CreateObject(t, path, "commit", func(w io.Writer) error { _, err := fmt.Fprintf( w, "tree %s\n"+ @@ -260,7 +163,7 @@ func TestRefSelections(t *testing.T) { return err }) - updateRef(t, path, p.refname, oid) + testutils.UpdateRef(t, path, p.refname, oid) } executable, err := exec.LookPath("bin/git-sizer") @@ -372,7 +275,7 @@ func TestRefSelections(t *testing.T) { func(t *testing.T) { if len(p.config) != 0 { for _, c := range p.config { - cmd := gitCommand( + cmd := testutils.GitCommand( t, path, "config", "--add", fmt.Sprintf("refgroup.mygroup.%s", c[0]), c[1], ) @@ -380,7 +283,7 @@ func TestRefSelections(t *testing.T) { require.NoError(t, err) } defer func() { - cmd := gitCommand( + cmd := testutils.GitCommand( t, path, "config", "--remove-section", "refgroup.mygroup", ) err := cmd.Run() @@ -439,7 +342,8 @@ func TestBomb(t *testing.T) { newGitBomb(t, path, 10, 10, "boom!\n") h, err := sizes.ScanRepositoryUsingGraph( - newRepository(t, path), git.AllReferencesFilter, sizes.NameStyleFull, false, + testutils.NewRepository(t, path), + git.AllReferencesFilter, sizes.NameStyleFull, false, ) require.NoError(t, err) @@ -498,26 +402,27 @@ func TestTaggedTags(t *testing.T) { timestamp := time.Unix(1112911993, 0) - cmd = gitCommand(t, path, "commit", "-m", "initial", "--allow-empty") - addAuthorInfo(cmd, ×tamp) + cmd = testutils.GitCommand(t, path, "commit", "-m", "initial", "--allow-empty") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = gitCommand(t, path, "tag", "-m", "tag 1", "tag", "master") - addAuthorInfo(cmd, ×tamp) + cmd = testutils.GitCommand(t, path, "tag", "-m", "tag 1", "tag", "master") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = gitCommand(t, path, "tag", "-m", "tag 2", "bag", "tag") - addAuthorInfo(cmd, ×tamp) + cmd = testutils.GitCommand(t, path, "tag", "-m", "tag 2", "bag", "tag") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = gitCommand(t, path, "tag", "-m", "tag 3", "wag", "bag") - addAuthorInfo(cmd, ×tamp) + cmd = testutils.GitCommand(t, path, "tag", "-m", "tag 3", "wag", "bag") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") h, err := sizes.ScanRepositoryUsingGraph( - newRepository(t, path), git.AllReferencesFilter, sizes.NameStyleNone, false, + testutils.NewRepository(t, path), + git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -537,14 +442,14 @@ func TestFromSubdir(t *testing.T) { timestamp := time.Unix(1112911993, 0) - addFile(t, path, "subdir/file.txt", "Hello, world!\n") + testutils.AddFile(t, path, "subdir/file.txt", "Hello, world!\n") - cmd = gitCommand(t, path, "commit", "-m", "initial") - addAuthorInfo(cmd, ×tamp) + cmd = testutils.GitCommand(t, path, "commit", "-m", "initial") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") h, err := sizes.ScanRepositoryUsingGraph( - newRepository(t, filepath.Join(path, "subdir")), + testutils.NewRepository(t, filepath.Join(path, "subdir")), git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") @@ -565,36 +470,37 @@ func TestSubmodule(t *testing.T) { submPath := filepath.Join(path, "subm") cmd := exec.Command("git", "init", submPath) require.NoError(t, cmd.Run(), "initializing subm repo") - addFile(t, submPath, "submfile1.txt", "Hello, submodule!\n") - addFile(t, submPath, "submfile2.txt", "Hello again, submodule!\n") - addFile(t, submPath, "submfile3.txt", "Hello again, submodule!\n") + testutils.AddFile(t, submPath, "submfile1.txt", "Hello, submodule!\n") + testutils.AddFile(t, submPath, "submfile2.txt", "Hello again, submodule!\n") + testutils.AddFile(t, submPath, "submfile3.txt", "Hello again, submodule!\n") - cmd = gitCommand(t, submPath, "commit", "-m", "subm initial") - addAuthorInfo(cmd, ×tamp) + cmd = testutils.GitCommand(t, submPath, "commit", "-m", "subm initial") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") mainPath := filepath.Join(path, "main") cmd = exec.Command("git", "init", mainPath) require.NoError(t, cmd.Run(), "initializing main repo") - addFile(t, mainPath, "mainfile.txt", "Hello, main!\n") + testutils.AddFile(t, mainPath, "mainfile.txt", "Hello, main!\n") - cmd = gitCommand(t, mainPath, "commit", "-m", "main initial") - addAuthorInfo(cmd, ×tamp) + cmd = testutils.GitCommand(t, mainPath, "commit", "-m", "main initial") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = gitCommand(t, mainPath, "submodule", "add", submPath, "sub") + cmd = testutils.GitCommand(t, mainPath, "submodule", "add", submPath, "sub") cmd.Dir = mainPath require.NoError(t, cmd.Run(), "adding submodule") - cmd = gitCommand(t, mainPath, "commit", "-m", "add submodule") - addAuthorInfo(cmd, ×tamp) + cmd = testutils.GitCommand(t, mainPath, "commit", "-m", "add submodule") + testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - newRepository(t, mainPath), git.AllReferencesFilter, sizes.NameStyleNone, false, + testutils.NewRepository(t, mainPath), + git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -603,7 +509,7 @@ func TestSubmodule(t *testing.T) { // Analyze the submodule: h, err = sizes.ScanRepositoryUsingGraph( - newRepository(t, filepath.Join(mainPath, "sub")), + testutils.NewRepository(t, filepath.Join(mainPath, "sub")), git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go new file mode 100644 index 0000000..6acecb1 --- /dev/null +++ b/internal/testutils/repoutils.go @@ -0,0 +1,115 @@ +package testutils + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/github/git-sizer/git" +) + +func NewRepository(t *testing.T, repoPath string) *git.Repository { + t.Helper() + + repo, err := git.NewRepository(repoPath) + require.NoError(t, err) + return repo +} + +func GitCommand(t *testing.T, repoPath string, args ...string) *exec.Cmd { + t.Helper() + + gitArgs := []string{"-C", repoPath} + gitArgs = append(gitArgs, args...) + return exec.Command("git", gitArgs...) +} + +func UpdateRef(t *testing.T, repoPath string, refname string, oid git.OID) { + t.Helper() + + var cmd *exec.Cmd + + if oid == git.NullOID { + cmd = GitCommand(t, repoPath, "update-ref", "-d", refname) + } else { + cmd = GitCommand(t, repoPath, "update-ref", refname, oid.String()) + } + require.NoError(t, cmd.Run()) +} + +// createObject creates a new Git object, of the specified type, in +// the repository at `repoPath`. `writer` is a function that writes +// the object in `git hash-object` input format. This is used for +// testing only. +func CreateObject( + t *testing.T, repoPath string, otype git.ObjectType, writer func(io.Writer) error, +) git.OID { + t.Helper() + + cmd := GitCommand(t, repoPath, "hash-object", "-w", "-t", string(otype), "--stdin") + in, err := cmd.StdinPipe() + require.NoError(t, err) + + out, err := cmd.StdoutPipe() + cmd.Stderr = os.Stderr + + err = cmd.Start() + require.NoError(t, err) + + err = writer(in) + err2 := in.Close() + if err != nil { + cmd.Wait() + require.NoError(t, err) + } + if err2 != nil { + cmd.Wait() + require.NoError(t, err2) + } + + output, err := ioutil.ReadAll(out) + err2 = cmd.Wait() + require.NoError(t, err) + require.NoError(t, err2) + + oid, err := git.NewOID(string(bytes.TrimSpace(output))) + require.NoError(t, err) + return oid +} + +func AddFile(t *testing.T, repoPath string, relativePath, contents string) { + dirPath := filepath.Dir(relativePath) + if dirPath != "." { + require.NoError(t, os.MkdirAll(filepath.Join(repoPath, dirPath), 0777), "creating subdir") + } + + filename := filepath.Join(repoPath, relativePath) + f, err := os.Create(filename) + require.NoErrorf(t, err, "creating file %q", filename) + _, err = f.WriteString(contents) + require.NoErrorf(t, err, "writing to file %q", filename) + require.NoErrorf(t, f.Close(), "closing file %q", filename) + + cmd := GitCommand(t, repoPath, "add", relativePath) + require.NoErrorf(t, cmd.Run(), "adding file %q", relativePath) +} + +func AddAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { + cmd.Env = append(cmd.Env, + "GIT_AUTHOR_NAME=Arthur", + "GIT_AUTHOR_EMAIL=arthur@example.com", + fmt.Sprintf("GIT_AUTHOR_DATE=%d -0700", timestamp.Unix()), + "GIT_COMMITTER_NAME=Constance", + "GIT_COMMITTER_EMAIL=constance@example.com", + fmt.Sprintf("GIT_COMMITTER_DATE=%d -0700", timestamp.Unix()), + ) + *timestamp = timestamp.Add(60 * time.Second) +} From 8cbe1296ffce31019beab417ec86f26d8952aebb Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 10:38:34 +0200 Subject: [PATCH 046/176] testutils.AddFile(): label as a helper function --- internal/testutils/repoutils.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 6acecb1..e15c1ab 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -86,6 +86,8 @@ func CreateObject( } func AddFile(t *testing.T, repoPath string, relativePath, contents string) { + t.Helper() + dirPath := filepath.Dir(relativePath) if dirPath != "." { require.NoError(t, os.MkdirAll(filepath.Join(repoPath, dirPath), 0777), "creating subdir") From 27a84b95302962f7ff60ebd3a809aec4b315f779 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 10:29:49 +0200 Subject: [PATCH 047/176] testutils.CreateReferencedOrphan(): new helper function --- git_sizer_test.go | 25 +------------------------ internal/testutils/repoutils.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 54bc01c..97d20b7 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -140,30 +140,7 @@ func TestRefSelections(t *testing.T) { require.NoError(t, err) for _, p := range references { - oid := testutils.CreateObject(t, path, "blob", func(w io.Writer) error { - _, err := fmt.Fprintf(w, "%s\n", p.refname) - return err - }) - - oid = testutils.CreateObject(t, path, "tree", func(w io.Writer) error { - _, err = fmt.Fprintf(w, "100644 a.txt\x00%s", oid.Bytes()) - return err - }) - - oid = testutils.CreateObject(t, path, "commit", func(w io.Writer) error { - _, err := fmt.Fprintf( - w, - "tree %s\n"+ - "author Example 1112911993 -0700\n"+ - "committer Example 1112911993 -0700\n"+ - "\n"+ - "Commit for reference %s\n", - oid, p.refname, - ) - return err - }) - - testutils.UpdateRef(t, path, p.refname, oid) + testutils.CreateReferencedOrphan(t, path, p.refname) } executable, err := exec.LookPath("bin/git-sizer") diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index e15c1ab..9ecc19b 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -104,6 +104,35 @@ func AddFile(t *testing.T, repoPath string, relativePath, contents string) { require.NoErrorf(t, cmd.Run(), "adding file %q", relativePath) } +func CreateReferencedOrphan(t *testing.T, repoPath string, refname string) { + t.Helper() + + oid := CreateObject(t, repoPath, "blob", func(w io.Writer) error { + _, err := fmt.Fprintf(w, "%s\n", refname) + return err + }) + + oid = CreateObject(t, repoPath, "tree", func(w io.Writer) error { + _, err := fmt.Fprintf(w, "100644 a.txt\x00%s", oid.Bytes()) + return err + }) + + oid = CreateObject(t, repoPath, "commit", func(w io.Writer) error { + _, err := fmt.Fprintf( + w, + "tree %s\n"+ + "author Example 1112911993 -0700\n"+ + "committer Example 1112911993 -0700\n"+ + "\n"+ + "Commit for reference %s\n", + oid, refname, + ) + return err + }) + + UpdateRef(t, repoPath, refname, oid) +} + func AddAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { cmd.Env = append(cmd.Env, "GIT_AUTHOR_NAME=Arthur", From c7b767174635ff79d8cd1adcd4e39b9292306da6 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 10:57:13 +0200 Subject: [PATCH 048/176] TestRefSelections: clone the test repository for each test This eliminates the need to restore the gitconfig after each test and reduces the risk of other test crosstalk. --- git_sizer_test.go | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 97d20b7..062e652 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -250,23 +250,27 @@ func TestRefSelections(t *testing.T) { t.Run( p.name, func(t *testing.T) { - if len(p.config) != 0 { - for _, c := range p.config { - cmd := testutils.GitCommand( - t, path, - "config", "--add", fmt.Sprintf("refgroup.mygroup.%s", c[0]), c[1], - ) - err := cmd.Run() - require.NoError(t, err) - } - defer func() { - cmd := testutils.GitCommand( - t, path, "config", "--remove-section", "refgroup.mygroup", - ) - err := cmd.Run() - require.NoError(t, err) - }() + clonePath, err := ioutil.TempDir("", "ref-selection") + require.NoError(t, err) + + defer os.RemoveAll(clonePath) + + err = exec.Command( + "git", "clone", "--bare", "--mirror", path, clonePath, + ).Run() + require.NoError(t, err) + + path := clonePath + + for _, c := range p.config { + cmd := testutils.GitCommand( + t, path, + "config", "--add", fmt.Sprintf("refgroup.mygroup.%s", c[0]), c[1], + ) + err := cmd.Run() + require.NoError(t, err) } + args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} args = append(args, p.args...) cmd := exec.Command(executable, args...) From 2cb5485e020527729fec594a1e6d2d495c195e35 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 10:59:34 +0200 Subject: [PATCH 049/176] testutils.ConfigAdd(): new helper function --- git_sizer_test.go | 7 ++----- internal/testutils/repoutils.go | 9 +++++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 062e652..20309f7 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -263,12 +263,9 @@ func TestRefSelections(t *testing.T) { path := clonePath for _, c := range p.config { - cmd := testutils.GitCommand( - t, path, - "config", "--add", fmt.Sprintf("refgroup.mygroup.%s", c[0]), c[1], + testutils.ConfigAdd( + t, path, fmt.Sprintf("refgroup.mygroup.%s", c[0]), c[1], ) - err := cmd.Run() - require.NoError(t, err) } args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 9ecc19b..06ae136 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -144,3 +144,12 @@ func AddAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { ) *timestamp = timestamp.Add(60 * time.Second) } + +// ConfigAdd adds a key-value pair to the gitconfig in the repository +// at `repoPath`. +func ConfigAdd(t *testing.T, repoPath string, key, value string) { + t.Helper() + + err := GitCommand(t, repoPath, "config", "--add", key, value).Run() + require.NoError(t, err) +} From 544cca079e53ecda33f51798c3b472f17e059a28 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 11:10:16 +0200 Subject: [PATCH 050/176] TestRefSelections(): remove the special handling for `mygroup` config --- git_sizer_test.go | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 20309f7..957e063 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -231,19 +231,19 @@ func TestRefSelections(t *testing.T) { name: "branches-refgroup", args: []string{"--refgroup=mygroup"}, config: [][2]string{ - {"include", "refs/heads"}, + {"refgroup.mygroup.include", "refs/heads"}, }, }, { // 18 name: "combination-refgroup", args: []string{"--refgroup=mygroup"}, config: [][2]string{ - {"include", "refs/heads"}, - {"include", "refs/tags"}, - {"exclude", "refs/heads/foo"}, - {"includeRegexp", ".*foo.*"}, - {"exclude", "refs/foo"}, - {"excludeRegexp", "refs/tags/release-.*"}, + {"refgroup.mygroup.include", "refs/heads"}, + {"refgroup.mygroup.include", "refs/tags"}, + {"refgroup.mygroup.exclude", "refs/heads/foo"}, + {"refgroup.mygroup.includeRegexp", ".*foo.*"}, + {"refgroup.mygroup.exclude", "refs/foo"}, + {"refgroup.mygroup.excludeRegexp", "refs/tags/release-.*"}, }, }, } { @@ -263,9 +263,7 @@ func TestRefSelections(t *testing.T) { path := clonePath for _, c := range p.config { - testutils.ConfigAdd( - t, path, fmt.Sprintf("refgroup.mygroup.%s", c[0]), c[1], - ) + testutils.ConfigAdd(t, path, c[0], c[1]) } args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} From 0cddf11d276186adb75984466a5f6908ddd55c65 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 11:11:32 +0200 Subject: [PATCH 051/176] TestRefSelections(): use `git.ConfigEntry` to store config settings --- git_sizer_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 957e063..655715c 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -151,7 +151,7 @@ func TestRefSelections(t *testing.T) { for i, p := range []struct { name string args []string - config [][2]string + config []git.ConfigEntry }{ { // 0 name: "no arguments", @@ -230,14 +230,14 @@ func TestRefSelections(t *testing.T) { { // 17 name: "branches-refgroup", args: []string{"--refgroup=mygroup"}, - config: [][2]string{ + config: []git.ConfigEntry{ {"refgroup.mygroup.include", "refs/heads"}, }, }, { // 18 name: "combination-refgroup", args: []string{"--refgroup=mygroup"}, - config: [][2]string{ + config: []git.ConfigEntry{ {"refgroup.mygroup.include", "refs/heads"}, {"refgroup.mygroup.include", "refs/tags"}, {"refgroup.mygroup.exclude", "refs/heads/foo"}, @@ -262,8 +262,8 @@ func TestRefSelections(t *testing.T) { path := clonePath - for _, c := range p.config { - testutils.ConfigAdd(t, path, c[0], c[1]) + for _, e := range p.config { + testutils.ConfigAdd(t, path, e.Key, e.Value) } args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} From b30ab78b146d3be1359ec3c0ce1c32e2cd2833d8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 16:21:24 +0200 Subject: [PATCH 052/176] testutils.TestRepo: new type representing a test git repository Use the new type in the main tests. --- git_sizer_test.go | 136 +++++++++++++------------------- internal/testutils/repoutils.go | 128 ++++++++++++++++++++++++------ 2 files changed, 156 insertions(+), 108 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 655715c..ad658ce 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -29,16 +29,10 @@ func TestExec(t *testing.T) { assert.NoErrorf(t, err, "command failed; output: %#v", string(output)) } -func newGitBomb( - t *testing.T, path string, depth, breadth int, body string, -) { +func newGitBomb(t *testing.T, repo *testutils.TestRepo, depth, breadth int, body string) { t.Helper() - cmd := exec.Command("git", "init", "--bare", path) - err := cmd.Run() - require.NoError(t, err) - - oid := testutils.CreateObject(t, path, "blob", func(w io.Writer) error { + oid := repo.CreateObject(t, "blob", func(w io.Writer) error { _, err := io.WriteString(w, body) return err }) @@ -49,9 +43,9 @@ func newGitBomb( prefix := "f" for ; depth > 0; depth-- { - oid = testutils.CreateObject(t, path, "tree", func(w io.Writer) error { + oid = repo.CreateObject(t, "tree", func(w io.Writer) error { for i := 0; i < breadth; i++ { - _, err = fmt.Fprintf( + _, err := fmt.Fprintf( w, "%s %s%0*d\x00%s", mode, prefix, digits, i, oid.Bytes(), ) @@ -66,7 +60,7 @@ func newGitBomb( prefix = "d" } - oid = testutils.CreateObject(t, path, "commit", func(w io.Writer) error { + oid = repo.CreateObject(t, "commit", func(w io.Writer) error { _, err := fmt.Fprintf( w, "tree %s\n"+ @@ -79,7 +73,7 @@ func newGitBomb( return err }) - testutils.UpdateRef(t, path, "refs/heads/master", oid) + repo.UpdateRef(t, "refs/heads/master", oid) } // TestRefSelections tests various combinations of reference selection @@ -131,16 +125,12 @@ func TestRefSelections(t *testing.T) { } // Create a test repo with one orphan commit per refname: - path, err := ioutil.TempDir("", "ref-selection") - require.NoError(t, err) + repo := testutils.NewTestRepo(t, true, "ref-selection") - defer os.RemoveAll(path) - - err = exec.Command("git", "init", "--bare", path).Run() - require.NoError(t, err) + defer repo.Remove(t) for _, p := range references { - testutils.CreateReferencedOrphan(t, path, p.refname) + repo.CreateReferencedOrphan(t, p.refname) } executable, err := exec.LookPath("bin/git-sizer") @@ -250,26 +240,18 @@ func TestRefSelections(t *testing.T) { t.Run( p.name, func(t *testing.T) { - clonePath, err := ioutil.TempDir("", "ref-selection") - require.NoError(t, err) - - defer os.RemoveAll(clonePath) + repo := repo.Clone(t, "ref-selection") - err = exec.Command( - "git", "clone", "--bare", "--mirror", path, clonePath, - ).Run() - require.NoError(t, err) - - path := clonePath + defer repo.Remove(t) for _, e := range p.config { - testutils.ConfigAdd(t, path, e.Key, e.Value) + repo.ConfigAdd(t, e.Key, e.Value) } args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} args = append(args, p.args...) cmd := exec.Command(executable, args...) - cmd.Dir = path + cmd.Dir = repo.Path var stdout bytes.Buffer cmd.Stdout = &stdout var stderr bytes.Buffer @@ -308,17 +290,13 @@ func pow(x uint64, n int) uint64 { func TestBomb(t *testing.T) { t.Parallel() - path, err := ioutil.TempDir("", "bomb") - require.NoError(t, err) - - defer func() { - os.RemoveAll(path) - }() + repo := testutils.NewTestRepo(t, true, "bomb") + defer repo.Remove(t) - newGitBomb(t, path, 10, 10, "boom!\n") + newGitBomb(t, repo, 10, 10, "boom!\n") h, err := sizes.ScanRepositoryUsingGraph( - testutils.NewRepository(t, path), + repo.Repository(t), git.AllReferencesFilter, sizes.NameStyleFull, false, ) require.NoError(t, err) @@ -366,38 +344,32 @@ func TestBomb(t *testing.T) { func TestTaggedTags(t *testing.T) { t.Parallel() - path, err := ioutil.TempDir("", "tagged-tags") - require.NoError(t, err, "creating temporary directory") - defer func() { - os.RemoveAll(path) - }() - - cmd := exec.Command("git", "init", path) - require.NoError(t, cmd.Run(), "initializing repo") + repo := testutils.NewTestRepo(t, false, "tagged-tags") + defer repo.Remove(t) timestamp := time.Unix(1112911993, 0) - cmd = testutils.GitCommand(t, path, "commit", "-m", "initial", "--allow-empty") + cmd := repo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = testutils.GitCommand(t, path, "tag", "-m", "tag 1", "tag", "master") + cmd = repo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = testutils.GitCommand(t, path, "tag", "-m", "tag 2", "bag", "tag") + cmd = repo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = testutils.GitCommand(t, path, "tag", "-m", "tag 3", "wag", "bag") + cmd = repo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") h, err := sizes.ScanRepositoryUsingGraph( - testutils.NewRepository(t, path), + repo.Repository(t), git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") @@ -406,26 +378,20 @@ func TestTaggedTags(t *testing.T) { func TestFromSubdir(t *testing.T) { t.Parallel() - path, err := ioutil.TempDir("", "subdir") - require.NoError(t, err, "creating temporary directory") - defer func() { - os.RemoveAll(path) - }() - - cmd := exec.Command("git", "init", path) - require.NoError(t, cmd.Run(), "initializing repo") + repo := testutils.NewTestRepo(t, false, "subdir") + defer repo.Remove(t) timestamp := time.Unix(1112911993, 0) - testutils.AddFile(t, path, "subdir/file.txt", "Hello, world!\n") + repo.AddFile(t, "subdir/file.txt", "Hello, world!\n") - cmd = testutils.GitCommand(t, path, "commit", "-m", "initial") + cmd := repo.GitCommand(t, "commit", "-m", "initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") h, err := sizes.ScanRepositoryUsingGraph( - testutils.NewRepository(t, filepath.Join(path, "subdir")), + repo.Repository(t), git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") @@ -434,48 +400,51 @@ func TestFromSubdir(t *testing.T) { func TestSubmodule(t *testing.T) { t.Parallel() - path, err := ioutil.TempDir("", "submodule") + + tmp, err := ioutil.TempDir("", "submodule") require.NoError(t, err, "creating temporary directory") defer func() { - os.RemoveAll(path) + os.RemoveAll(tmp) }() timestamp := time.Unix(1112911993, 0) - submPath := filepath.Join(path, "subm") - cmd := exec.Command("git", "init", submPath) - require.NoError(t, cmd.Run(), "initializing subm repo") - testutils.AddFile(t, submPath, "submfile1.txt", "Hello, submodule!\n") - testutils.AddFile(t, submPath, "submfile2.txt", "Hello again, submodule!\n") - testutils.AddFile(t, submPath, "submfile3.txt", "Hello again, submodule!\n") + submRepo := testutils.TestRepo{ + Path: filepath.Join(tmp, "subm"), + } + submRepo.Init(t, false) + submRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") + submRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") + submRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") - cmd = testutils.GitCommand(t, submPath, "commit", "-m", "subm initial") + cmd := submRepo.GitCommand(t, "commit", "-m", "subm initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") - mainPath := filepath.Join(path, "main") - cmd = exec.Command("git", "init", mainPath) - require.NoError(t, cmd.Run(), "initializing main repo") + mainRepo := testutils.TestRepo{ + Path: filepath.Join(tmp, "main"), + } + mainRepo.Init(t, false) - testutils.AddFile(t, mainPath, "mainfile.txt", "Hello, main!\n") + mainRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") - cmd = testutils.GitCommand(t, mainPath, "commit", "-m", "main initial") + cmd = mainRepo.GitCommand(t, "commit", "-m", "main initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = testutils.GitCommand(t, mainPath, "submodule", "add", submPath, "sub") - cmd.Dir = mainPath + cmd = mainRepo.GitCommand(t, "submodule", "add", submRepo.Path, "sub") + cmd.Dir = mainRepo.Path require.NoError(t, cmd.Run(), "adding submodule") - cmd = testutils.GitCommand(t, mainPath, "commit", "-m", "add submodule") + cmd = mainRepo.GitCommand(t, "commit", "-m", "add submodule") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - testutils.NewRepository(t, mainPath), + mainRepo.Repository(t), git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") @@ -484,8 +453,11 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(1), h.MaxExpandedSubmoduleCount, "max expanded submodule count") // Analyze the submodule: + submRepo2 := testutils.TestRepo{ + Path: filepath.Join(mainRepo.Path, "sub"), + } h, err = sizes.ScanRepositoryUsingGraph( - testutils.NewRepository(t, filepath.Join(mainPath, "sub")), + submRepo2.Repository(t), git.AllReferencesFilter, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 06ae136..b5b839e 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -16,45 +16,112 @@ import ( "github.com/github/git-sizer/git" ) -func NewRepository(t *testing.T, repoPath string) *git.Repository { +// TestRepo represents a git repository used for tests. +type TestRepo struct { + Path string +} + +// NewTestRepo creates and initializes a test repository in a +// temporary directory constructed using `pattern`. The caller must +// delete the repository by calling `repo.Remove()`. +func NewTestRepo(t *testing.T, bare bool, pattern string) *TestRepo { t.Helper() - repo, err := git.NewRepository(repoPath) + path, err := ioutil.TempDir("", pattern) require.NoError(t, err) - return repo + + repo := TestRepo{Path: path} + + repo.Init(t, bare) + + return &TestRepo{ + Path: path, + } } -func GitCommand(t *testing.T, repoPath string, args ...string) *exec.Cmd { +// Init initializes a git repository at `repo.Path`. +func (repo *TestRepo) Init(t *testing.T, bare bool) { t.Helper() - gitArgs := []string{"-C", repoPath} + // Don't use `GitCommand()` because the directory might not + // exist yet: + var cmd *exec.Cmd + if bare { + cmd = exec.Command("git", "init", "--bare", repo.Path) + } else { + cmd = exec.Command("git", "init", repo.Path) + } + err := cmd.Run() + require.NoError(t, err) +} + +// Remove deletes the test repository at `repo.Path`. +func (repo *TestRepo) Remove(t *testing.T) { + t.Helper() + + _ = os.RemoveAll(repo.Path) +} + +// Clone creates a clone of `repo` at a temporary path constructued +// using `pattern`. The caller is responsible for removing it when +// done by calling `Remove()`. +func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { + t.Helper() + + path, err := ioutil.TempDir("", pattern) + require.NoError(t, err) + + err = repo.GitCommand( + t, "clone", "--bare", "--mirror", repo.Path, path, + ).Run() + require.NoError(t, err) + + return &TestRepo{ + Path: path, + } +} + +// Repository returns a `*git.Repository` for `repo`. +func (repo *TestRepo) Repository(t *testing.T) *git.Repository { + t.Helper() + + r, err := git.NewRepository(repo.Path) + require.NoError(t, err) + return r +} + +// GitCommand creates an `*exec.Cmd` for running `git` in `repo` with +// the specified arguments. +func (repo *TestRepo) GitCommand(t *testing.T, args ...string) *exec.Cmd { + t.Helper() + + gitArgs := []string{"-C", repo.Path} gitArgs = append(gitArgs, args...) return exec.Command("git", gitArgs...) } -func UpdateRef(t *testing.T, repoPath string, refname string, oid git.OID) { +func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) { t.Helper() var cmd *exec.Cmd if oid == git.NullOID { - cmd = GitCommand(t, repoPath, "update-ref", "-d", refname) + cmd = repo.GitCommand(t, "update-ref", "-d", refname) } else { - cmd = GitCommand(t, repoPath, "update-ref", refname, oid.String()) + cmd = repo.GitCommand(t, "update-ref", refname, oid.String()) } require.NoError(t, cmd.Run()) } // createObject creates a new Git object, of the specified type, in // the repository at `repoPath`. `writer` is a function that writes -// the object in `git hash-object` input format. This is used for -// testing only. -func CreateObject( - t *testing.T, repoPath string, otype git.ObjectType, writer func(io.Writer) error, +// the object in `git hash-object` input format. +func (repo *TestRepo) CreateObject( + t *testing.T, otype git.ObjectType, writer func(io.Writer) error, ) git.OID { t.Helper() - cmd := GitCommand(t, repoPath, "hash-object", "-w", "-t", string(otype), "--stdin") + cmd := repo.GitCommand(t, "hash-object", "-w", "-t", string(otype), "--stdin") in, err := cmd.StdinPipe() require.NoError(t, err) @@ -85,39 +152,49 @@ func CreateObject( return oid } -func AddFile(t *testing.T, repoPath string, relativePath, contents string) { +// AddFile adds and stages a file in `repo` at path `relativePath` +// with the specified `contents`. This must be run in a non-bare +// repository. +func (repo *TestRepo) AddFile(t *testing.T, relativePath, contents string) { t.Helper() dirPath := filepath.Dir(relativePath) if dirPath != "." { - require.NoError(t, os.MkdirAll(filepath.Join(repoPath, dirPath), 0777), "creating subdir") + require.NoError( + t, + os.MkdirAll(filepath.Join(repo.Path, dirPath), 0777), + "creating subdir", + ) } - filename := filepath.Join(repoPath, relativePath) + filename := filepath.Join(repo.Path, relativePath) f, err := os.Create(filename) require.NoErrorf(t, err, "creating file %q", filename) _, err = f.WriteString(contents) require.NoErrorf(t, err, "writing to file %q", filename) require.NoErrorf(t, f.Close(), "closing file %q", filename) - cmd := GitCommand(t, repoPath, "add", relativePath) + cmd := repo.GitCommand(t, "add", relativePath) require.NoErrorf(t, cmd.Run(), "adding file %q", relativePath) } -func CreateReferencedOrphan(t *testing.T, repoPath string, refname string) { +// CreateReferencedOrphan creates a simple new orphan commit and +// points the reference with name `refname` at it. This can be run in +// a bare or non-bare repository. +func (repo *TestRepo) CreateReferencedOrphan(t *testing.T, refname string) { t.Helper() - oid := CreateObject(t, repoPath, "blob", func(w io.Writer) error { + oid := repo.CreateObject(t, "blob", func(w io.Writer) error { _, err := fmt.Fprintf(w, "%s\n", refname) return err }) - oid = CreateObject(t, repoPath, "tree", func(w io.Writer) error { + oid = repo.CreateObject(t, "tree", func(w io.Writer) error { _, err := fmt.Fprintf(w, "100644 a.txt\x00%s", oid.Bytes()) return err }) - oid = CreateObject(t, repoPath, "commit", func(w io.Writer) error { + oid = repo.CreateObject(t, "commit", func(w io.Writer) error { _, err := fmt.Fprintf( w, "tree %s\n"+ @@ -130,7 +207,7 @@ func CreateReferencedOrphan(t *testing.T, repoPath string, refname string) { return err }) - UpdateRef(t, repoPath, refname, oid) + repo.UpdateRef(t, refname, oid) } func AddAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { @@ -145,11 +222,10 @@ func AddAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { *timestamp = timestamp.Add(60 * time.Second) } -// ConfigAdd adds a key-value pair to the gitconfig in the repository -// at `repoPath`. -func ConfigAdd(t *testing.T, repoPath string, key, value string) { +// ConfigAdd adds a key-value pair to the gitconfig in `repo`. +func (repo *TestRepo) ConfigAdd(t *testing.T, key, value string) { t.Helper() - err := GitCommand(t, repoPath, "config", "--add", key, value).Run() + err := repo.GitCommand(t, "config", "--add", key, value).Run() require.NoError(t, err) } From 8aa7551141e499eb07237b0fdfe41e1e555e84fa Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 26 Sep 2021 14:07:21 +0200 Subject: [PATCH 053/176] mainImplementation(): there is no need to pre-declare `historySize` --- git-sizer.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 19b2a15..c01f537 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -493,8 +493,6 @@ func mainImplementation(args []string) error { progress = v } - var historySize sizes.HistorySize - var refFilter git.ReferenceFilter = filter.Filter if showRefs { @@ -511,7 +509,7 @@ func mainImplementation(args []string) error { } } - historySize, err = sizes.ScanRepositoryUsingGraph(repo, refFilter, nameStyle, progress) + historySize, err := sizes.ScanRepositoryUsingGraph(repo, refFilter, nameStyle, progress) if err != nil { return fmt.Errorf("error scanning repository: %s", err) } From 0b0adf1c7c6f7b0b1af4341a3298a177a646e6b9 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Oct 2021 12:19:46 +0200 Subject: [PATCH 054/176] NegatedBoolValue: move to a separate file --- git-sizer.go | 26 -------------------------- negated_bool_value.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 26 deletions(-) create mode 100644 negated_bool_value.go diff --git a/git-sizer.go b/git-sizer.go index c01f537..6a8b7c2 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -79,32 +79,6 @@ const Usage = `usage: git-sizer [OPTS] var ReleaseVersion string var BuildVersion string -type NegatedBoolValue struct { - value *bool -} - -func (v *NegatedBoolValue) Set(s string) error { - b, err := strconv.ParseBool(s) - *v.value = !b - return err -} - -func (v *NegatedBoolValue) Get() interface{} { - return !*v.value -} - -func (v *NegatedBoolValue) String() string { - if v == nil || v.value == nil { - return "true" - } else { - return strconv.FormatBool(!*v.value) - } -} - -func (v *NegatedBoolValue) Type() string { - return "bool" -} - type filterValue struct { // The filter to which values will be appended: filter *git.IncludeExcludeFilter diff --git a/negated_bool_value.go b/negated_bool_value.go new file mode 100644 index 0000000..b92238b --- /dev/null +++ b/negated_bool_value.go @@ -0,0 +1,31 @@ +package main + +import ( + "strconv" +) + +type NegatedBoolValue struct { + value *bool +} + +func (v *NegatedBoolValue) Set(s string) error { + b, err := strconv.ParseBool(s) + *v.value = !b + return err +} + +func (v *NegatedBoolValue) Get() interface{} { + return !*v.value +} + +func (v *NegatedBoolValue) String() string { + if v == nil || v.value == nil { + return "true" + } else { + return strconv.FormatBool(!*v.value) + } +} + +func (v *NegatedBoolValue) Type() string { + return "bool" +} From 2e0da54abd77f210f0a2e83b2db1f3a4c23ff62c Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Oct 2021 13:12:43 +0200 Subject: [PATCH 055/176] ReferenceFilter: simplify how filters are combined Instead of the special `IncludeExcludeFilter`, treat filters just as sets, with the usual set operations (union, intersection, and inversion) and build them up that way. Add a `Combiner` that knows how to combine two `ReferenceFilter`s, and use it to implement "include" vs. "exclude". Build special behavior into the combiners if the left argument is `nil`, to get the old semantics that the "default" for an unmentioned reference depends on whether the first directive is "include" vs. "exclude". --- git-sizer.go | 83 ++++++++++++++----------- git/ref_filter.go | 135 +++++++++++++++++++++++++---------------- git/ref_filter_test.go | 22 ++++--- sizes/graph.go | 2 +- 4 files changed, 144 insertions(+), 98 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 6a8b7c2..ff389e7 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -80,25 +80,28 @@ var ReleaseVersion string var BuildVersion string type filterValue struct { - // The filter to which values will be appended: - filter *git.IncludeExcludeFilter - - // The polarity of this option (i.e., does it cause the things - // that it references to be included or excluded?): - polarity git.Polarity - - // If this is set, then it is used as the pattern. If not, then - // the user should supply the pattern. + // filter is the filter that will be modified if this option + // is used. + filter *git.ReferenceFilter + + // combiner specifies how the filter generated by this option + // is combined with the existing filter; i.e., does it cause + // the matching references to be included or excluded? + combiner git.Combiner + + // pattern, if it is set, is the pattern (prefix or regexp) to + // be matched. If it is not set, then the user must supply the + // pattern. pattern string - // Should `pattern` be interpreted as a regexp (as opposed to a - // prefix)? + // regexp specifies whether `pattern` should be interpreted as + // a regexp (as opposed to a prefix). regexp bool } func (v *filterValue) Set(s string) error { var filter git.ReferenceFilter - polarity := v.polarity + combiner := v.combiner var pattern string if v.pattern != "" { @@ -112,7 +115,7 @@ func (v *filterValue) Set(s string) error { return err } if !b { - polarity = polarity.Inverted() + combiner = combiner.Inverted() } } else { // The user must supply the pattern. @@ -129,12 +132,7 @@ func (v *filterValue) Set(s string) error { filter = git.PrefixFilter(pattern) } - switch polarity { - case git.Include: - v.filter.Include(filter) - case git.Exclude: - v.filter.Exclude(filter) - } + *v.filter = combiner.Combine(*v.filter, filter) return nil } @@ -158,7 +156,7 @@ func (v *filterValue) Type() string { } type filterGroupValue struct { - filter *git.IncludeExcludeFilter + filter *git.ReferenceFilter repo *git.Repository } @@ -181,7 +179,9 @@ func (v *filterGroupValue) Set(name string) error { for _, entry := range config.Entries { switch entry.Key { case "include": - v.filter.Include(git.PrefixFilter(entry.Value)) + *v.filter = git.Include.Combine( + *v.filter, git.PrefixFilter(entry.Value), + ) case "includeregexp": filter, err := git.RegexpFilter(entry.Value) if err != nil { @@ -190,9 +190,11 @@ func (v *filterGroupValue) Set(name string) error { name, entry.Key, err, ) } - v.filter.Include(filter) + *v.filter = git.Include.Combine(*v.filter, filter) case "exclude": - v.filter.Exclude(git.PrefixFilter(entry.Value)) + *v.filter = git.Exclude.Combine( + *v.filter, git.PrefixFilter(entry.Value), + ) case "excluderegexp": filter, err := git.RegexpFilter(entry.Value) if err != nil { @@ -201,7 +203,7 @@ func (v *filterGroupValue) Set(name string) error { name, entry.Key, err, ) } - v.filter.Exclude(filter) + *v.filter = git.Exclude.Combine(*v.filter, filter) default: // Ignore unrecognized keys. } @@ -237,7 +239,7 @@ func mainImplementation(args []string) error { var threshold sizes.Threshold = 1 var progress bool var version bool - var filter git.IncludeExcludeFilter + var filter git.ReferenceFilter var showRefs bool // Try to open the repository, but it's not an error yet if this @@ -467,23 +469,16 @@ func mainImplementation(args []string) error { progress = v } - var refFilter git.ReferenceFilter = filter.Filter + if filter == nil { + filter = git.AllReferencesFilter + } if showRefs { - oldRefFilter := refFilter fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") - refFilter = func(refname string) bool { - b := oldRefFilter(refname) - if b { - fmt.Fprintf(os.Stderr, "+ %s\n", refname) - } else { - fmt.Fprintf(os.Stderr, " %s\n", refname) - } - return b - } + filter = showRefFilter{filter} } - historySize, err := sizes.ScanRepositoryUsingGraph(repo, refFilter, nameStyle, progress) + historySize, err := sizes.ScanRepositoryUsingGraph(repo, filter, nameStyle, progress) if err != nil { return fmt.Errorf("error scanning repository: %s", err) } @@ -509,3 +504,17 @@ func mainImplementation(args []string) error { return nil } + +type showRefFilter struct { + f git.ReferenceFilter +} + +func (f showRefFilter) Filter(refname string) bool { + b := f.f.Filter(refname) + if b { + fmt.Fprintf(os.Stderr, "+ %s\n", refname) + } else { + fmt.Fprintf(os.Stderr, " %s\n", refname) + } + return b +} diff --git a/git/ref_filter.go b/git/ref_filter.go index f71c418..8eb8a9b 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -5,68 +5,84 @@ import ( "strings" ) -type ReferenceFilter func(refname string) bool +type ReferenceFilter interface { + Filter(refname string) bool +} -func AllReferencesFilter(_ string) bool { - return true +// Combiner combines two `ReferenceFilter`s into one compound one. +// `f1` is allowed to be `nil`. +type Combiner interface { + Combine(f1, f2 ReferenceFilter) ReferenceFilter + Inverted() Combiner } -type Polarity uint8 +type inverse struct { + f ReferenceFilter +} -const ( - Include Polarity = iota - Exclude -) +func (f inverse) Filter(refname string) bool { + return !f.f.Filter(refname) +} -func (p Polarity) Inverted() Polarity { - switch p { - case Include: - return Exclude - case Exclude: - return Include - default: - // This shouldn't happen: - return Exclude - } +type intersection struct { + f1, f2 ReferenceFilter +} + +func (f intersection) Filter(refname string) bool { + return f.f1.Filter(refname) && f.f2.Filter(refname) } -// polarizedFilter is a filter that might match, in which case it -// includes or excludes the reference (according to its polarity). If -// it doesn't match, then it doesn't say anything about the reference. -type polarizedFilter struct { - polarity Polarity - filter ReferenceFilter +// Include is a Combiner that includes the references matched by `f2`. +// If `f1` is `nil`, it is treated as including nothing. +type include struct{} + +func (_ include) Combine(f1, f2 ReferenceFilter) ReferenceFilter { + if f1 == nil { + return f2 + } + return union{f1, f2} } -// IncludeExcludeFilter is a filter based on a bunch of -// `polarizedFilter`s. The last one that matches a reference wins. If -// none match, then the result is based on the polarity of the first -// polarizedFilter: if it is `Include`, then return `false`; if it is -// `Exclude`, then return `true`. -type IncludeExcludeFilter struct { - filters []polarizedFilter +func (_ include) Inverted() Combiner { + return Exclude } -func (ief *IncludeExcludeFilter) Include(f ReferenceFilter) { - ief.filters = append(ief.filters, polarizedFilter{Include, f}) +var Include include + +type union struct { + f1, f2 ReferenceFilter } -func (ief *IncludeExcludeFilter) Exclude(f ReferenceFilter) { - ief.filters = append(ief.filters, polarizedFilter{Exclude, f}) +func (f union) Filter(refname string) bool { + return f.f1.Filter(refname) || f.f2.Filter(refname) } -func (ief *IncludeExcludeFilter) Filter(refname string) bool { - for i := len(ief.filters); i > 0; i-- { - f := ief.filters[i-1] - if !f.filter(refname) { - continue - } - return f.polarity == Include +// Exclude is a Combiner that excludes the references matched by `f2`. +// If `f1` is `nil`, it is treated as including everything. +type exclude struct{} + +func (_ exclude) Combine(f1, f2 ReferenceFilter) ReferenceFilter { + if f1 == nil { + return inverse{f2} } + return intersection{f1, inverse{f2}} + +} - return len(ief.filters) == 0 || ief.filters[0].polarity == Exclude +func (_ exclude) Inverted() Combiner { + return include{} } +var Exclude exclude + +type allReferencesFilter struct{} + +func (_ allReferencesFilter) Filter(_ string) bool { + return true +} + +var AllReferencesFilter allReferencesFilter + // PrefixFilter returns a `ReferenceFilter` that matches references // whose names start with the specified `prefix`, which must match at // a component boundary. For example, @@ -77,16 +93,23 @@ func (ief *IncludeExcludeFilter) Filter(refname string) bool { // * Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or // "refs/foobar". func PrefixFilter(prefix string) ReferenceFilter { - if strings.HasSuffix(prefix, "/") { - return func(refname string) bool { - return strings.HasPrefix(refname, prefix) - } + if prefix == "" { + return AllReferencesFilter } + return prefixFilter{prefix} +} - return func(refname string) bool { - return strings.HasPrefix(refname, prefix) && - (len(refname) == len(prefix) || refname[len(prefix)] == '/') +type prefixFilter struct { + prefix string +} + +func (f prefixFilter) Filter(refname string) bool { + if strings.HasSuffix(f.prefix, "/") { + return strings.HasPrefix(refname, f.prefix) } + + return strings.HasPrefix(refname, f.prefix) && + (len(refname) == len(f.prefix) || refname[len(f.prefix)] == '/') } // RegexpFilter returns a `ReferenceFilter` that matches references @@ -99,7 +122,13 @@ func RegexpFilter(pattern string) (ReferenceFilter, error) { return nil, err } - return func(refname string) bool { - return re.MatchString(refname) - }, nil + return regexpFilter{re}, nil +} + +type regexpFilter struct { + re *regexp.Regexp +} + +func (f regexpFilter) Filter(refname string) bool { + return f.re.MatchString(refname) } diff --git a/git/ref_filter_test.go b/git/ref_filter_test.go index b03c588..20ae018 100644 --- a/git/ref_filter_test.go +++ b/git/ref_filter_test.go @@ -38,7 +38,11 @@ func TestPrefixFilter(t *testing.T) { t.Run( fmt.Sprintf("prefix '%s', refname '%s'", p.prefix, p.refname), func(t *testing.T) { - assert.Equal(t, p.expected, git.PrefixFilter(p.prefix)(p.refname)) + assert.Equal( + t, + p.expected, + git.PrefixFilter(p.prefix).Filter(p.refname), + ) }, ) } @@ -73,7 +77,11 @@ func TestRegexpFilter(t *testing.T) { t.Run( fmt.Sprintf("pattern '%s', refname '%s'", p.pattern, p.refname), func(t *testing.T) { - assert.Equal(t, p.expected, regexpFilter(t, p.pattern)(p.refname)) + assert.Equal( + t, + p.expected, + regexpFilter(t, p.pattern).Filter(p.refname), + ) }, ) } @@ -82,11 +90,11 @@ func TestRegexpFilter(t *testing.T) { func TestIncludeExcludeFilter(t *testing.T) { t.Parallel() - var filter git.IncludeExcludeFilter - filter.Include(git.PrefixFilter("refs/heads")) - filter.Exclude(regexpFilter(t, "refs/heads/.*foo.*")) - filter.Include(git.PrefixFilter("refs/remotes")) - filter.Exclude(git.PrefixFilter("refs/remotes/foo")) + var filter git.ReferenceFilter + filter = git.Include.Combine(filter, git.PrefixFilter("refs/heads")) + filter = git.Exclude.Combine(filter, regexpFilter(t, "refs/heads/.*foo.*")) + filter = git.Include.Combine(filter, git.PrefixFilter("refs/remotes")) + filter = git.Exclude.Combine(filter, git.PrefixFilter("refs/remotes/foo")) for _, p := range []struct { refname string diff --git a/sizes/graph.go b/sizes/graph.go index ffe5aea..a634fde 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -62,7 +62,7 @@ func ScanRepositoryUsingGraph( if !ok { break } - if !filter(ref.Refname) { + if !filter.Filter(ref.Refname) { continue } refs = append(refs, ref) From 3ddf88e48ac63ebcbc5eac54d2a21b1f6fb7f790 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Oct 2021 14:04:02 +0200 Subject: [PATCH 056/176] Start adding a mechanism for grouping references configurably Add types `RefGroup` and `RefGrouper` that will allow reference to be grouped into multiple configurable groups, and to tally up the count of references by group. --- git-sizer.go | 18 +++++++++++++++++- git_sizer_test.go | 20 +++++++++++++++----- sizes/graph.go | 34 ++++++++++++++++++++++++++++++++-- 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index ff389e7..a945dfe 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -478,7 +478,11 @@ func mainImplementation(args []string) error { filter = showRefFilter{filter} } - historySize, err := sizes.ScanRepositoryUsingGraph(repo, filter, nameStyle, progress) + rg := refGrouper{ + filter: filter, + } + + historySize, err := sizes.ScanRepositoryUsingGraph(repo, &rg, nameStyle, progress) if err != nil { return fmt.Errorf("error scanning repository: %s", err) } @@ -505,6 +509,18 @@ func mainImplementation(args []string) error { return nil } +type refGrouper struct { + filter git.ReferenceFilter +} + +func (rg *refGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { + return rg.filter.Filter(refname), nil +} + +func (rg *refGrouper) Groups() []sizes.RefGroup { + return nil +} + type showRefFilter struct { f git.ReferenceFilter } diff --git a/git_sizer_test.go b/git_sizer_test.go index e239507..65be141 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -285,6 +285,16 @@ func pow(x uint64, n int) uint64 { return p } +type refGrouper struct{} + +func (rg refGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { + return true, nil +} + +func (rg refGrouper) Groups() []sizes.RefGroup { + return nil +} + func TestBomb(t *testing.T) { t.Parallel() @@ -295,7 +305,7 @@ func TestBomb(t *testing.T) { h, err := sizes.ScanRepositoryUsingGraph( repo.Repository(t), - git.AllReferencesFilter, sizes.NameStyleFull, false, + refGrouper{}, sizes.NameStyleFull, false, ) require.NoError(t, err) @@ -368,7 +378,7 @@ func TestTaggedTags(t *testing.T) { h, err := sizes.ScanRepositoryUsingGraph( repo.Repository(t), - git.AllReferencesFilter, sizes.NameStyleNone, false, + refGrouper{}, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -390,7 +400,7 @@ func TestFromSubdir(t *testing.T) { h, err := sizes.ScanRepositoryUsingGraph( repo.Repository(t), - git.AllReferencesFilter, sizes.NameStyleNone, false, + refGrouper{}, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -443,7 +453,7 @@ func TestSubmodule(t *testing.T) { // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( mainRepo.Repository(t), - git.AllReferencesFilter, sizes.NameStyleNone, false, + refGrouper{}, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -456,7 +466,7 @@ func TestSubmodule(t *testing.T) { } h, err = sizes.ScanRepositoryUsingGraph( submRepo2.Repository(t), - git.AllReferencesFilter, sizes.NameStyleNone, false, + refGrouper{}, sizes.NameStyleNone, false, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/sizes/graph.go b/sizes/graph.go index a634fde..565901c 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -13,8 +13,36 @@ import ( "github.com/github/git-sizer/meter" ) +type RefGroupSymbol string + +// RefGroup is a group of references, for example "branches" or +// "tags". Reference groups might overlap. +type RefGroup struct { + // Symbol is the unique string by which this `RefGroup` is + // identified and configured. It consists of dot-separated + // components, which implicitly makes a nested tree-like + // structure. + Symbol RefGroupSymbol + + // Name is the name for this `ReferenceGroup` to be presented + // in user-readable output. + Name string +} + +type RefGrouper interface { + // Categorize tells whether `refname` should be walked at all, + // and if so, the symbols of the reference groups to which it + // belongs. + Categorize(refname string) (bool, []RefGroupSymbol) + + // Groups returns the list of `ReferenceGroup`s, in the order + // that they should be presented. The return value might + // depend on which references have been seen so far. + Groups() []RefGroup +} + func ScanRepositoryUsingGraph( - repo *git.Repository, filter git.ReferenceFilter, nameStyle NameStyle, progress bool, + repo *git.Repository, rg RefGrouper, nameStyle NameStyle, progress bool, ) (HistorySize, error) { graph := NewGraph(nameStyle) var progressMeter meter.Progress @@ -62,7 +90,9 @@ func ScanRepositoryUsingGraph( if !ok { break } - if !filter.Filter(ref.Refname) { + + walk, _ := rg.Categorize(ref.Refname) + if !walk { continue } refs = append(refs, ref) From 73dae86247feb39f5dd75e7cc2305cf1c95c213e Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Oct 2021 14:12:43 +0200 Subject: [PATCH 057/176] ScanRepositoryUsingGraph(): keep track of reference groups --- sizes/graph.go | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index 565901c..6265aef 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -41,6 +41,11 @@ type RefGrouper interface { Groups() []RefGroup } +type refSeen struct { + git.Reference + groups []RefGroupSymbol +} + func ScanRepositoryUsingGraph( repo *git.Repository, rg RefGrouper, nameStyle NameStyle, progress bool, ) (HistorySize, error) { @@ -73,7 +78,7 @@ func ScanRepositoryUsingGraph( }() errChan := make(chan error, 1) - var refs []git.Reference + var refsSeen []refSeen // Feed the references that we want into the stdin of the object // iterator: go func() { @@ -91,11 +96,20 @@ func ScanRepositoryUsingGraph( break } - walk, _ := rg.Categorize(ref.Refname) + walk, groups := rg.Categorize(ref.Refname) + + refsSeen = append( + refsSeen, + refSeen{ + Reference: ref, + groups: groups, + }, + ) + if !walk { continue } - refs = append(refs, ref) + _, err = bufin.WriteString(ref.OID.String()) if err != nil { errChan <- err @@ -356,9 +370,9 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, ref := range refs { + for _, refSeen := range refsSeen { progressMeter.Inc() - graph.RegisterReference(ref) + graph.RegisterReference(refSeen.Reference) } progressMeter.Done() From acf2ea773e96771cd509b74066a12af62f7abe88 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Oct 2021 14:17:01 +0200 Subject: [PATCH 058/176] Graph: pass the reference group symbols along to `HistorySize` --- sizes/graph.go | 7 +++++-- sizes/sizes.go | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index 6265aef..6a32290 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -372,7 +372,7 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing references: %d") for _, refSeen := range refsSeen { progressMeter.Inc() - graph.RegisterReference(refSeen.Reference) + graph.RegisterReference(refSeen.Reference, refSeen.groups) } progressMeter.Done() @@ -420,9 +420,12 @@ func NewGraph(nameStyle NameStyle) *Graph { } } -func (g *Graph) RegisterReference(ref git.Reference) { +func (g *Graph) RegisterReference(ref git.Reference, groups []RefGroupSymbol) { g.historyLock.Lock() g.historySize.recordReference(g, ref) + for _, group := range groups { + g.historySize.recordReferenceGroup(g, group) + } g.historyLock.Unlock() g.pathResolver.RecordReference(ref) diff --git a/sizes/sizes.go b/sizes/sizes.go index eb08e6f..40ed802 100644 --- a/sizes/sizes.go +++ b/sizes/sizes.go @@ -288,3 +288,7 @@ func (s *HistorySize) recordTag(g *Graph, oid git.OID, tagSize TagSize, size cou func (s *HistorySize) recordReference(g *Graph, ref git.Reference) { s.ReferenceCount.Increment(1) } + +func (s *HistorySize) recordReferenceGroup(g *Graph, group RefGroupSymbol) { + // FIXME +} From 03516971b0f2b003a6fcaf5ffc7d5afb5a0dda89 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Oct 2021 14:29:39 +0200 Subject: [PATCH 059/176] HistorySize: keep track of counts for each reference group --- sizes/graph.go | 4 ++++ sizes/sizes.go | 12 +++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sizes/graph.go b/sizes/graph.go index 6a32290..c8d2c53 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -416,6 +416,10 @@ func NewGraph(nameStyle NameStyle) *Graph { tagRecords: make(map[git.OID]*tagRecord), tagSizes: make(map[git.OID]TagSize), + historySize: HistorySize{ + ReferenceGroups: make(map[RefGroupSymbol]*counts.Count32), + }, + pathResolver: NewPathResolver(nameStyle), } } diff --git a/sizes/sizes.go b/sizes/sizes.go index 40ed802..b3de0bc 100644 --- a/sizes/sizes.go +++ b/sizes/sizes.go @@ -160,6 +160,10 @@ type HistorySize struct { // once. ReferenceCount counts.Count32 `json:"reference_count"` + // ReferenceGroups keeps track of how many references in each + // reference group were scanned. + ReferenceGroups map[RefGroupSymbol]*counts.Count32 `json:"reference_groups"` + // The maximum TreeSize in the analyzed history (where each // attribute is maximized separately). @@ -290,5 +294,11 @@ func (s *HistorySize) recordReference(g *Graph, ref git.Reference) { } func (s *HistorySize) recordReferenceGroup(g *Graph, group RefGroupSymbol) { - // FIXME + c, ok := s.ReferenceGroups[group] + if ok { + c.Increment(1) + } else { + n := counts.Count32(1) + s.ReferenceGroups[group] = &n + } } From 164961a1a22f2bcb7f7017df0a6e6f1e0980170c Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Oct 2021 15:04:33 +0200 Subject: [PATCH 060/176] HistorySize: output counts of references per reference group --- git-sizer.go | 7 +++++-- sizes/graph.go | 8 ++++++-- sizes/output.go | 38 +++++++++++++++++++++++++++++++++----- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index a945dfe..c1a1eee 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -494,7 +494,7 @@ func mainImplementation(args []string) error { case 1: j, err = json.MarshalIndent(historySize, "", " ") case 2: - j, err = historySize.JSON(threshold, nameStyle) + j, err = historySize.JSON(rg.Groups(), threshold, nameStyle) default: return fmt.Errorf("JSON version must be 1 or 2") } @@ -503,7 +503,10 @@ func mainImplementation(args []string) error { } fmt.Printf("%s\n", j) } else { - io.WriteString(os.Stdout, historySize.TableString(threshold, nameStyle)) + io.WriteString( + os.Stdout, + historySize.TableString(rg.Groups(), threshold, nameStyle), + ) } return nil diff --git a/sizes/graph.go b/sizes/graph.go index c8d2c53..e8b655f 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -49,7 +49,7 @@ type refSeen struct { func ScanRepositoryUsingGraph( repo *git.Repository, rg RefGrouper, nameStyle NameStyle, progress bool, ) (HistorySize, error) { - graph := NewGraph(nameStyle) + graph := NewGraph(rg, nameStyle) var progressMeter meter.Progress if progress { progressMeter = meter.NewProgressMeter(100 * time.Millisecond) @@ -383,6 +383,8 @@ func ScanRepositoryUsingGraph( type Graph struct { repo *git.Repository + rg RefGrouper + blobLock sync.Mutex blobSizes map[git.OID]BlobSize @@ -404,8 +406,10 @@ type Graph struct { pathResolver PathResolver } -func NewGraph(nameStyle NameStyle) *Graph { +func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { return &Graph{ + rg: rg, + blobSizes: make(map[git.OID]BlobSize), treeRecords: make(map[git.OID]*treeRecord), diff --git a/sizes/output.go b/sizes/output.go index 4f1d5fd..5ee20e2 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -345,8 +345,10 @@ type table struct { buf bytes.Buffer } -func (s HistorySize) TableString(threshold Threshold, nameStyle NameStyle) string { - contents := s.contents() +func (s HistorySize) TableString( + refGroups []RefGroup, threshold Threshold, nameStyle NameStyle, +) string { + contents := s.contents(refGroups) t := table{ threshold: threshold, nameStyle: nameStyle, @@ -422,19 +424,41 @@ func (t *table) formatRow( ) } -func (s HistorySize) JSON(threshold Threshold, nameStyle NameStyle) ([]byte, error) { - contents := s.contents() +func (s HistorySize) JSON( + refGroups []RefGroup, threshold Threshold, nameStyle NameStyle, +) ([]byte, error) { + contents := s.contents(refGroups) items := make(map[string]*item) contents.CollectItems(items) j, err := json.MarshalIndent(items, "", " ") return j, err } -func (s HistorySize) contents() tableContents { +func (s HistorySize) contents(refGroups []RefGroup) tableContents { S := newSection I := newItem metric := counts.Metric binary := counts.Binary + + var rgis []tableContents + for _, rg := range refGroups { + if rg.Symbol == "" { + continue + } + count, ok := s.ReferenceGroups[rg.Symbol] + if !ok { + continue + } + rgis = append( + rgis, + I( + fmt.Sprintf("refgroup.%s", rg.Symbol), rg.Name, + fmt.Sprintf("The number of references in group '%s'", rg.Symbol), + nil, *count, metric, "", 25000, + ), + ) + } + return S( "", S( @@ -484,6 +508,10 @@ func (s HistorySize) contents() tableContents { I("referenceCount", "Count", "The total number of references", nil, s.ReferenceCount, metric, "", 25e3), + S( + "", + rgis..., + ), ), ), From 1b49fb4cfe206c895f8ad999ade459a383972252 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Oct 2021 15:14:43 +0200 Subject: [PATCH 061/176] HistorySize.contents(): indent reference group lines hierarchically --- sizes/output.go | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/sizes/output.go b/sizes/output.go index 5ee20e2..4bf80a6 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "strconv" + "strings" "github.com/github/git-sizer/counts" "github.com/github/git-sizer/git" @@ -218,6 +219,26 @@ func (i *item) MarshalJSON() ([]byte, error) { return json.Marshal(stat) } +// Indented returns an `item` that is just like `i`, but indented by +// `depth` more levels. +func (i *item) Indented(depth int) tableContents { + return &indentedItem{ + tableContents: i, + depth: depth, + } +} + +type indentedItem struct { + tableContents + depth int +} + +func (i *indentedItem) Emit(t *table) { + subTable := t.indented("", i.depth) + i.tableContents.Emit(subTable) + t.addSection(subTable) +} + type Threshold float64 // Methods to implement pflag.Value: @@ -365,16 +386,20 @@ func (s HistorySize) TableString( return t.generateHeader() + t.buf.String() + t.footnotes.String() } -func (t *table) subTable(sectionHeader string) *table { +func (t *table) indented(sectionHeader string, depth int) *table { return &table{ threshold: t.threshold, nameStyle: t.nameStyle, sectionHeader: sectionHeader, footnotes: t.footnotes, - indent: t.indent + 1, + indent: t.indent + depth, } } +func (t *table) subTable(sectionHeader string) *table { + return t.indented(sectionHeader, 1) +} + func (t *table) addSection(subTable *table) { if subTable.buf.Len() > 0 { if t.buf.Len() == 0 { @@ -449,14 +474,13 @@ func (s HistorySize) contents(refGroups []RefGroup) tableContents { if !ok { continue } - rgis = append( - rgis, - I( - fmt.Sprintf("refgroup.%s", rg.Symbol), rg.Name, - fmt.Sprintf("The number of references in group '%s'", rg.Symbol), - nil, *count, metric, "", 25000, - ), + rgi := I( + fmt.Sprintf("refgroup.%s", rg.Symbol), rg.Name, + fmt.Sprintf("The number of references in group '%s'", rg.Symbol), + nil, *count, metric, "", 25000, ) + indent := strings.Count(string(rg.Symbol), ".") + rgis = append(rgis, rgi.Indented(indent)) } return S( From 554bb8b2c3d9419c228af9e87b475d8484e2254c Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Oct 2021 15:56:02 +0200 Subject: [PATCH 062/176] internal/refopts: new package Move some reference option related code to a separate internal package. --- git-sizer.go | 283 ++----------------------- internal/refopts/filter_group_value.go | 76 +++++++ internal/refopts/filter_value.go | 84 ++++++++ internal/refopts/ref_group_builder.go | 154 ++++++++++++++ 4 files changed, 328 insertions(+), 269 deletions(-) create mode 100644 internal/refopts/filter_group_value.go create mode 100644 internal/refopts/filter_value.go create mode 100644 internal/refopts/ref_group_builder.go diff --git a/git-sizer.go b/git-sizer.go index c1a1eee..e829b78 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -9,11 +9,12 @@ import ( "runtime/pprof" "strconv" + "github.com/spf13/pflag" + "github.com/github/git-sizer/git" + "github.com/github/git-sizer/internal/refopts" "github.com/github/git-sizer/isatty" "github.com/github/git-sizer/sizes" - - "github.com/spf13/pflag" ) const Usage = `usage: git-sizer [OPTS] @@ -79,150 +80,6 @@ const Usage = `usage: git-sizer [OPTS] var ReleaseVersion string var BuildVersion string -type filterValue struct { - // filter is the filter that will be modified if this option - // is used. - filter *git.ReferenceFilter - - // combiner specifies how the filter generated by this option - // is combined with the existing filter; i.e., does it cause - // the matching references to be included or excluded? - combiner git.Combiner - - // pattern, if it is set, is the pattern (prefix or regexp) to - // be matched. If it is not set, then the user must supply the - // pattern. - pattern string - - // regexp specifies whether `pattern` should be interpreted as - // a regexp (as opposed to a prefix). - regexp bool -} - -func (v *filterValue) Set(s string) error { - var filter git.ReferenceFilter - combiner := v.combiner - - var pattern string - if v.pattern != "" { - // The pattern is fixed for this option: - pattern = v.pattern - - // It's not really expected, but if the user supplied a - // `false` boolean value, invert the polarity: - b, err := strconv.ParseBool(s) - if err != nil { - return err - } - if !b { - combiner = combiner.Inverted() - } - } else { - // The user must supply the pattern. - pattern = s - } - - if v.regexp { - var err error - filter, err = git.RegexpFilter(pattern) - if err != nil { - return fmt.Errorf("invalid regexp: %q", s) - } - } else { - filter = git.PrefixFilter(pattern) - } - - *v.filter = combiner.Combine(*v.filter, filter) - - return nil -} - -func (v *filterValue) Get() interface{} { - return nil -} - -func (v *filterValue) String() string { - return "" -} - -func (v *filterValue) Type() string { - if v.pattern != "" { - return "bool" - } else if v.regexp { - return "regexp" - } else { - return "prefix" - } -} - -type filterGroupValue struct { - filter *git.ReferenceFilter - repo *git.Repository -} - -func (v *filterGroupValue) Set(name string) error { - // At this point, it is not yet certain that the command was run - // inside a Git repository. If not, ignore this option (the - // command will error out anyway). - if v.repo == nil { - fmt.Fprintf( - os.Stderr, - "warning: not in Git repository; ignoring '--refgroup' option.\n", - ) - return nil - } - - config, err := v.repo.Config(fmt.Sprintf("refgroup.%s", name)) - if err != nil { - return err - } - for _, entry := range config.Entries { - switch entry.Key { - case "include": - *v.filter = git.Include.Combine( - *v.filter, git.PrefixFilter(entry.Value), - ) - case "includeregexp": - filter, err := git.RegexpFilter(entry.Value) - if err != nil { - return fmt.Errorf( - "invalid regular expression for 'refgroup.%s.%s': %w", - name, entry.Key, err, - ) - } - *v.filter = git.Include.Combine(*v.filter, filter) - case "exclude": - *v.filter = git.Exclude.Combine( - *v.filter, git.PrefixFilter(entry.Value), - ) - case "excluderegexp": - filter, err := git.RegexpFilter(entry.Value) - if err != nil { - return fmt.Errorf( - "invalid regular expression for 'refgroup.%s.%s': %w", - name, entry.Key, err, - ) - } - *v.filter = git.Exclude.Combine(*v.filter, filter) - default: - // Ignore unrecognized keys. - } - } - return nil -} - -func (v *filterGroupValue) Get() interface{} { - return nil -} - -func (v *filterGroupValue) String() string { - return "" -} - -func (v *filterGroupValue) Type() string { - return "name" -} - func main() { err := mainImplementation(os.Args[1:]) if err != nil { @@ -239,8 +96,6 @@ func mainImplementation(args []string) error { var threshold sizes.Threshold = 1 var progress bool var version bool - var filter git.ReferenceFilter - var showRefs bool // Try to open the repository, but it's not an error yet if this // fails, because the user might only be asking for `--help`. @@ -254,88 +109,6 @@ func mainImplementation(args []string) error { fmt.Print(Usage) } - flags.Var( - &filterValue{&filter, git.Include, "", false}, "include", - "include specified references", - ) - flags.Var( - &filterValue{&filter, git.Include, "", true}, "include-regexp", - "include references matching the specified regular expression", - ) - flags.Var( - &filterValue{&filter, git.Exclude, "", false}, "exclude", - "exclude specified references", - ) - flags.Var( - &filterValue{&filter, git.Exclude, "", true}, "exclude-regexp", - "exclude references matching the specified regular expression", - ) - - flag := flags.VarPF( - &filterValue{&filter, git.Include, "refs/heads", false}, "branches", "", - "process all branches", - ) - flag.NoOptDefVal = "true" - - flag = flags.VarPF( - &filterValue{&filter, git.Exclude, "refs/heads", false}, "no-branches", "", - "exclude all branches", - ) - flag.NoOptDefVal = "true" - - flag = flags.VarPF( - &filterValue{&filter, git.Include, "refs/tags", false}, "tags", "", - "process all tags", - ) - flag.NoOptDefVal = "true" - - flag = flags.VarPF( - &filterValue{&filter, git.Exclude, "refs/tags", false}, "no-tags", "", - "exclude all tags", - ) - flag.NoOptDefVal = "true" - - flag = flags.VarPF( - &filterValue{&filter, git.Include, "refs/remotes", false}, "remotes", "", - "process all remote-tracking references", - ) - flag.NoOptDefVal = "true" - - flag = flags.VarPF( - &filterValue{&filter, git.Exclude, "refs/remotes", false}, "no-remotes", "", - "exclude all remote-tracking references", - ) - flag.NoOptDefVal = "true" - - flag = flags.VarPF( - &filterValue{&filter, git.Include, "refs/notes", false}, "notes", "", - "process all git-notes references", - ) - flag.NoOptDefVal = "true" - - flag = flags.VarPF( - &filterValue{&filter, git.Exclude, "refs/notes", false}, "no-notes", "", - "exclude all git-notes references", - ) - flag.NoOptDefVal = "true" - - flag = flags.VarPF( - &filterValue{&filter, git.Include, "refs/stash", true}, "stash", "", - "process refs/stash", - ) - flag.NoOptDefVal = "true" - - flag = flags.VarPF( - &filterValue{&filter, git.Exclude, "refs/stash", true}, "no-stash", "", - "exclude refs/stash", - ) - flag.NoOptDefVal = "true" - - flag = flags.VarPF( - &filterGroupValue{&filter, repo}, "refgroup", "", - "process references in refgroup defined by gitconfig", - ) - flags.VarP( sizes.NewThresholdFlagValue(&threshold, 0), "verbose", "v", "report all statistics, whether concerning or not", @@ -376,7 +149,6 @@ func mainImplementation(args []string) error { atty = false } flags.BoolVar(&progress, "progress", atty, "report progress to stderr") - flags.BoolVar(&showRefs, "show-refs", false, "list the references being processed") flags.BoolVar(&version, "version", false, "report the git-sizer version number") flags.Var(&NegatedBoolValue{&progress}, "no-progress", "suppress progress output") flags.Lookup("no-progress").NoOptDefVal = "true" @@ -384,6 +156,13 @@ func mainImplementation(args []string) error { flags.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to file") flags.MarkHidden("cpuprofile") + var rgb refopts.RefGroupBuilder + var configger refopts.Configger + if repo != nil { + configger = repo + } + rgb.AddRefopts(flags, configger) + flags.SortFlags = false err = flags.Parse(args) @@ -469,20 +248,12 @@ func mainImplementation(args []string) error { progress = v } - if filter == nil { - filter = git.AllReferencesFilter - } - - if showRefs { - fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") - filter = showRefFilter{filter} - } - - rg := refGrouper{ - filter: filter, + rg, err := rgb.Finish() + if err != nil { + return err } - historySize, err := sizes.ScanRepositoryUsingGraph(repo, &rg, nameStyle, progress) + historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progress) if err != nil { return fmt.Errorf("error scanning repository: %s", err) } @@ -511,29 +282,3 @@ func mainImplementation(args []string) error { return nil } - -type refGrouper struct { - filter git.ReferenceFilter -} - -func (rg *refGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { - return rg.filter.Filter(refname), nil -} - -func (rg *refGrouper) Groups() []sizes.RefGroup { - return nil -} - -type showRefFilter struct { - f git.ReferenceFilter -} - -func (f showRefFilter) Filter(refname string) bool { - b := f.f.Filter(refname) - if b { - fmt.Fprintf(os.Stderr, "+ %s\n", refname) - } else { - fmt.Fprintf(os.Stderr, " %s\n", refname) - } - return b -} diff --git a/internal/refopts/filter_group_value.go b/internal/refopts/filter_group_value.go new file mode 100644 index 0000000..a21ffed --- /dev/null +++ b/internal/refopts/filter_group_value.go @@ -0,0 +1,76 @@ +package refopts + +import ( + "fmt" + "os" + + "github.com/github/git-sizer/git" +) + +type filterGroupValue struct { + filter *git.ReferenceFilter + configger Configger +} + +func (v *filterGroupValue) Set(name string) error { + // At this point, it is not yet certain that the command was run + // inside a Git repository. If not, ignore this option (the + // command will error out anyway). + if v.configger == nil { + fmt.Fprintf( + os.Stderr, + "warning: not in Git repository; ignoring '--refgroup' option.\n", + ) + return nil + } + + config, err := v.configger.Config(fmt.Sprintf("refgroup.%s", name)) + if err != nil { + return err + } + for _, entry := range config.Entries { + switch entry.Key { + case "include": + *v.filter = git.Include.Combine( + *v.filter, git.PrefixFilter(entry.Value), + ) + case "includeregexp": + filter, err := git.RegexpFilter(entry.Value) + if err != nil { + return fmt.Errorf( + "invalid regular expression for 'refgroup.%s.%s': %w", + name, entry.Key, err, + ) + } + *v.filter = git.Include.Combine(*v.filter, filter) + case "exclude": + *v.filter = git.Exclude.Combine( + *v.filter, git.PrefixFilter(entry.Value), + ) + case "excluderegexp": + filter, err := git.RegexpFilter(entry.Value) + if err != nil { + return fmt.Errorf( + "invalid regular expression for 'refgroup.%s.%s': %w", + name, entry.Key, err, + ) + } + *v.filter = git.Exclude.Combine(*v.filter, filter) + default: + // Ignore unrecognized keys. + } + } + return nil +} + +func (v *filterGroupValue) Get() interface{} { + return nil +} + +func (v *filterGroupValue) String() string { + return "" +} + +func (v *filterGroupValue) Type() string { + return "name" +} diff --git a/internal/refopts/filter_value.go b/internal/refopts/filter_value.go new file mode 100644 index 0000000..82c2b0b --- /dev/null +++ b/internal/refopts/filter_value.go @@ -0,0 +1,84 @@ +package refopts + +import ( + "fmt" + "strconv" + + "github.com/github/git-sizer/git" +) + +type filterValue struct { + // filter is the filter that will be modified if this option + // is used. + filter *git.ReferenceFilter + + // combiner specifies how the filter generated by this option + // is combined with the existing filter; i.e., does it cause + // the matching references to be included or excluded? + combiner git.Combiner + + // pattern, if it is set, is the pattern (prefix or regexp) to + // be matched. If it is not set, then the user must supply the + // pattern. + pattern string + + // regexp specifies whether `pattern` should be interpreted as + // a regexp (as opposed to a prefix). + regexp bool +} + +func (v *filterValue) Set(s string) error { + var filter git.ReferenceFilter + combiner := v.combiner + + var pattern string + if v.pattern != "" { + // The pattern is fixed for this option: + pattern = v.pattern + + // It's not really expected, but if the user supplied a + // `false` boolean value, invert the polarity: + b, err := strconv.ParseBool(s) + if err != nil { + return err + } + if !b { + combiner = combiner.Inverted() + } + } else { + // The user must supply the pattern. + pattern = s + } + + if v.regexp { + var err error + filter, err = git.RegexpFilter(pattern) + if err != nil { + return fmt.Errorf("invalid regexp: %q", s) + } + } else { + filter = git.PrefixFilter(pattern) + } + + *v.filter = combiner.Combine(*v.filter, filter) + + return nil +} + +func (v *filterValue) Get() interface{} { + return nil +} + +func (v *filterValue) String() string { + return "" +} + +func (v *filterValue) Type() string { + if v.pattern != "" { + return "bool" + } else if v.regexp { + return "regexp" + } else { + return "prefix" + } +} diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go new file mode 100644 index 0000000..18e3a28 --- /dev/null +++ b/internal/refopts/ref_group_builder.go @@ -0,0 +1,154 @@ +package refopts + +import ( + "fmt" + "os" + + "github.com/spf13/pflag" + + "github.com/github/git-sizer/git" + "github.com/github/git-sizer/sizes" +) + +type Configger interface { + Config(prefix string) (*git.Config, error) +} + +// RefGroupBuilder handles reference-related options and puts together +// a `sizes.RefGrouper` to be used by the main part of the program. +type RefGroupBuilder struct { + Filter git.ReferenceFilter + ShowRefs bool +} + +// Add some reference-related options to `flags`. +func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet, configger Configger) { + flags.Var( + &filterValue{&rgb.Filter, git.Include, "", false}, "include", + "include specified references", + ) + flags.Var( + &filterValue{&rgb.Filter, git.Include, "", true}, "include-regexp", + "include references matching the specified regular expression", + ) + flags.Var( + &filterValue{&rgb.Filter, git.Exclude, "", false}, "exclude", + "exclude specified references", + ) + flags.Var( + &filterValue{&rgb.Filter, git.Exclude, "", true}, "exclude-regexp", + "exclude references matching the specified regular expression", + ) + + flag := flags.VarPF( + &filterValue{&rgb.Filter, git.Include, "refs/heads", false}, "branches", "", + "process all branches", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&rgb.Filter, git.Exclude, "refs/heads", false}, "no-branches", "", + "exclude all branches", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&rgb.Filter, git.Include, "refs/tags", false}, "tags", "", + "process all tags", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&rgb.Filter, git.Exclude, "refs/tags", false}, "no-tags", "", + "exclude all tags", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&rgb.Filter, git.Include, "refs/remotes", false}, "remotes", "", + "process all remote-tracking references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&rgb.Filter, git.Exclude, "refs/remotes", false}, "no-remotes", "", + "exclude all remote-tracking references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&rgb.Filter, git.Include, "refs/notes", false}, "notes", "", + "process all git-notes references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&rgb.Filter, git.Exclude, "refs/notes", false}, "no-notes", "", + "exclude all git-notes references", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&rgb.Filter, git.Include, "refs/stash", true}, "stash", "", + "process refs/stash", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterValue{&rgb.Filter, git.Exclude, "refs/stash", true}, "no-stash", "", + "exclude refs/stash", + ) + flag.NoOptDefVal = "true" + + flag = flags.VarPF( + &filterGroupValue{&rgb.Filter, configger}, "refgroup", "", + "process references in refgroup defined by gitconfig", + ) + + flags.BoolVar(&rgb.ShowRefs, "show-refs", false, "list the references being processed") +} + +// Finish collects the information gained from processing the options +// and returns a `sizes.RefGrouper`. +func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { + if rgb.Filter == nil { + rgb.Filter = git.AllReferencesFilter + } + + if rgb.ShowRefs { + fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") + rgb.Filter = showRefFilter{rgb.Filter} + } + + return &refGrouper{ + filter: rgb.Filter, + }, nil + +} + +type refGrouper struct { + filter git.ReferenceFilter +} + +func (rg *refGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { + return rg.filter.Filter(refname), nil +} + +func (rg *refGrouper) Groups() []sizes.RefGroup { + return nil +} + +// showRefFilter is a `git.ReferenceFilter` that logs its choices to Stderr. +type showRefFilter struct { + f git.ReferenceFilter +} + +func (f showRefFilter) Filter(refname string) bool { + b := f.f.Filter(refname) + if b { + fmt.Fprintf(os.Stderr, "+ %s\n", refname) + } else { + fmt.Fprintf(os.Stderr, " %s\n", refname) + } + return b +} From a909729ee75b32cd1ea30328c73cfcfc6b174dff Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Oct 2021 22:45:38 +0200 Subject: [PATCH 063/176] Config: allow the full key name to be recovered --- git/gitconfig.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/git/gitconfig.go b/git/gitconfig.go index 4bf32d1..44b03df 100644 --- a/git/gitconfig.go +++ b/git/gitconfig.go @@ -14,6 +14,7 @@ type ConfigEntry struct { } type Config struct { + Prefix string Entries []ConfigEntry } @@ -29,7 +30,9 @@ func (repo *Repository) Config(prefix string) (*Config, error) { return nil, fmt.Errorf("reading git configuration: %w", err) } - var config Config + config := Config{ + Prefix: prefix, + } for len(out) > 0 { keyEnd := bytes.IndexByte(out, '\n') @@ -60,6 +63,15 @@ func (repo *Repository) Config(prefix string) (*Config, error) { return &config, nil } +// FullKey returns the full gitconfig key name for the relative key +// name `key`. +func (config *Config) FullKey(key string) string { + if config.Prefix == "" { + return key + } + return fmt.Sprintf("%s.%s", config.Prefix, key) +} + // configKeyMatchesPrefix checks whether `key` starts with `prefix` at // a component boundary (i.e., at a '.'). If yes, it returns `true` // and the part of the key after the prefix; e.g.: From 1955ee0f4622561733aae11760600c1cb13ae1b8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 14 Oct 2021 07:42:56 +0200 Subject: [PATCH 064/176] RefGroupBuilder: use the refgroups defined in gitconfig to group refs --- git-sizer.go | 7 +- internal/refopts/ref_group.go | 122 ++++++++++++++ internal/refopts/ref_group_builder.go | 230 +++++++++++++++++++++++--- 3 files changed, 331 insertions(+), 28 deletions(-) create mode 100644 internal/refopts/ref_group.go diff --git a/git-sizer.go b/git-sizer.go index e829b78..448d986 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -156,11 +156,16 @@ func mainImplementation(args []string) error { flags.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to file") flags.MarkHidden("cpuprofile") - var rgb refopts.RefGroupBuilder var configger refopts.Configger if repo != nil { configger = repo } + + rgb, err := refopts.NewRefGroupBuilder(configger) + if err != nil { + return err + } + rgb.AddRefopts(flags, configger) flags.SortFlags = false diff --git a/internal/refopts/ref_group.go b/internal/refopts/ref_group.go new file mode 100644 index 0000000..ff0304a --- /dev/null +++ b/internal/refopts/ref_group.go @@ -0,0 +1,122 @@ +package refopts + +import ( + "fmt" + + "github.com/github/git-sizer/git" + "github.com/github/git-sizer/sizes" +) + +// refGroup represents one reference group and also its relationship +// to its parent group and any subgroups.. Note that reference groups +// don't intrinsically have anything to do with the layout of the +// reference namespace, but they will often be used that way. +type refGroup struct { + sizes.RefGroup + + // filter is the filter for just this reference group. Filters + // for any parent groups must also be applied. + filter git.ReferenceFilter + + parent *refGroup + + // subgroups are the `refGroup` instances representing any + // direct subgroups. + subgroups []*refGroup + + // otherRefGroup, if set, is the refGroup for tallying + // references that match `filter` but don't match any of the + // subgroups. + otherRefGroup *sizes.RefGroup +} + +func (rg *refGroup) collectSymbols(refname string) (bool, []sizes.RefGroupSymbol) { + walk := false + var symbols []sizes.RefGroupSymbol + + if rg.filter == nil { + // The tree doesn't have its own filter. Consider it matched + // iff at least one subtree matches it. + + for _, sg := range rg.subgroups { + w, ss := sg.collectSymbols(refname) + if w { + walk = true + } + if len(ss) > 0 && len(symbols) == 0 { + symbols = append(symbols, rg.Symbol) + } + symbols = append(symbols, ss...) + } + } else { + // The tree has its own filter. If it doesn't match the + // reference, then the subtrees don't even get a chance to + // try. + if !rg.filter.Filter(refname) { + return false, nil + } + + walk = true + symbols = append(symbols, rg.Symbol) + + for _, sg := range rg.subgroups { + _, ss := sg.collectSymbols(refname) + symbols = append(symbols, ss...) + } + + // References that match the tree filter but no subtree + // filters are counted as "other": + if rg.otherRefGroup != nil && len(symbols) == 1 { + symbols = append(symbols, rg.otherRefGroup.Symbol) + } + } + + return walk, symbols +} + +// augmentFromConfig augments `rg` based on configuration in the +// gitconfig and returns the result. It is not considered an error if +// there are no usable config entries for the filter. +func (rg *refGroup) augmentFromConfig(configger Configger) error { + config, err := configger.Config(fmt.Sprintf("refgroup.%s", rg.Symbol)) + if err != nil { + return err + } + + for _, entry := range config.Entries { + switch entry.Key { + case "name": + rg.Name = entry.Value + case "include": + rg.filter = git.Include.Combine( + rg.filter, git.PrefixFilter(entry.Value), + ) + case "includeregexp": + f, err := git.RegexpFilter(entry.Value) + if err != nil { + return fmt.Errorf( + "invalid regular expression for '%s': %w", + config.FullKey(entry.Key), err, + ) + } + rg.filter = git.Include.Combine(rg.filter, f) + case "exclude": + rg.filter = git.Exclude.Combine( + rg.filter, git.PrefixFilter(entry.Value), + ) + case "excluderegexp": + f, err := git.RegexpFilter(entry.Value) + if err != nil { + return fmt.Errorf( + "invalid regular expression for '%s': %w", + config.FullKey(entry.Key), err, + ) + } + rg.filter = git.Exclude.Combine(rg.filter, f) + default: + // Ignore unrecognized keys. + } + } + + return nil +} diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 18e3a28..4573f1c 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -3,6 +3,7 @@ package refopts import ( "fmt" "os" + "strings" "github.com/spf13/pflag" @@ -17,91 +18,210 @@ type Configger interface { // RefGroupBuilder handles reference-related options and puts together // a `sizes.RefGrouper` to be used by the main part of the program. type RefGroupBuilder struct { - Filter git.ReferenceFilter + topLevelGroup *refGroup + groups map[sizes.RefGroupSymbol]*refGroup + ShowRefs bool } +func NewRefGroupBuilder(configger Configger) (*RefGroupBuilder, error) { + tlg := refGroup{ + RefGroup: sizes.RefGroup{ + Symbol: "", + Name: "Refs to walk", + }, + } + + rgb := RefGroupBuilder{ + topLevelGroup: &tlg, + groups: map[sizes.RefGroupSymbol]*refGroup{ + "": &tlg, + }, + } + + rgb.initializeStandardRefgroups() + if err := rgb.readRefgroupsFromGitconfig(configger); err != nil { + return nil, err + } + + return &rgb, nil +} + +// getGroup returns the `refGroup` for the symbol with the specified +// name, first creating it (and any missing parents) if needed. +func (rgb *RefGroupBuilder) getGroup(symbol sizes.RefGroupSymbol) *refGroup { + if rg, ok := rgb.groups[symbol]; ok { + return rg + } + + parentSymbol := parentName(symbol) + parent := rgb.getGroup(parentSymbol) + + rg := refGroup{ + RefGroup: sizes.RefGroup{ + Symbol: symbol, + }, + parent: parent, + } + + rgb.groups[symbol] = &rg + parent.subgroups = append(parent.subgroups, &rg) + return &rg +} + +func parentName(symbol sizes.RefGroupSymbol) sizes.RefGroupSymbol { + i := strings.LastIndexByte(string(symbol), '.') + if i == -1 { + return "" + } + return symbol[:i] +} + +func (rgb *RefGroupBuilder) initializeStandardRefgroups() { + initializeGroup := func( + symbol sizes.RefGroupSymbol, name string, filter git.ReferenceFilter, + ) { + rg := rgb.getGroup(symbol) + rg.Name = name + rg.filter = filter + } + + initializeGroup("branches", "Branches", git.PrefixFilter("refs/heads/")) + initializeGroup("tags", "Tags", git.PrefixFilter("refs/tags/")) + initializeGroup("remotes", "Remote-tracking refs", git.PrefixFilter("refs/remotes/")) + initializeGroup("notes", "Git notes", git.PrefixFilter("refs/notes/")) + + filter, err := git.RegexpFilter("refs/stash") + if err != nil { + panic("internal error") + } + initializeGroup("stash", "Git stash", filter) +} + +func (rgb *RefGroupBuilder) readRefgroupsFromGitconfig(configger Configger) error { + if configger == nil { + // At this point, it is not yet certain that the command was + // run inside a Git repository. If not, ignore this option + // (the command will error out anyway). + return nil + } + + config, err := configger.Config("refgroup") + if err != nil { + return err + } + + seen := make(map[sizes.RefGroupSymbol]bool) + for _, entry := range config.Entries { + symbol, _ := splitKey(entry.Key) + if symbol == "" || seen[symbol] { + // The point of this loop is only to find + // _which_ groups are defined, so we only need + // to visit each one once. + continue + } + + rg := rgb.getGroup(symbol) + if err := rg.augmentFromConfig(configger); err != nil { + return err + } + + seen[symbol] = true + } + + return nil +} + +func splitKey(key string) (sizes.RefGroupSymbol, string) { + i := strings.LastIndexByte(key, '.') + if i == -1 { + return "", key + } + return sizes.RefGroupSymbol(key[:i]), key[i+1:] +} + // Add some reference-related options to `flags`. func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet, configger Configger) { + tlf := &rgb.topLevelGroup.filter flags.Var( - &filterValue{&rgb.Filter, git.Include, "", false}, "include", + &filterValue{tlf, git.Include, "", false}, "include", "include specified references", ) flags.Var( - &filterValue{&rgb.Filter, git.Include, "", true}, "include-regexp", + &filterValue{tlf, git.Include, "", true}, "include-regexp", "include references matching the specified regular expression", ) flags.Var( - &filterValue{&rgb.Filter, git.Exclude, "", false}, "exclude", + &filterValue{tlf, git.Exclude, "", false}, "exclude", "exclude specified references", ) flags.Var( - &filterValue{&rgb.Filter, git.Exclude, "", true}, "exclude-regexp", + &filterValue{tlf, git.Exclude, "", true}, "exclude-regexp", "exclude references matching the specified regular expression", ) flag := flags.VarPF( - &filterValue{&rgb.Filter, git.Include, "refs/heads", false}, "branches", "", + &filterValue{tlf, git.Include, "refs/heads", false}, "branches", "", "process all branches", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&rgb.Filter, git.Exclude, "refs/heads", false}, "no-branches", "", + &filterValue{tlf, git.Exclude, "refs/heads", false}, "no-branches", "", "exclude all branches", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&rgb.Filter, git.Include, "refs/tags", false}, "tags", "", + &filterValue{tlf, git.Include, "refs/tags", false}, "tags", "", "process all tags", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&rgb.Filter, git.Exclude, "refs/tags", false}, "no-tags", "", + &filterValue{tlf, git.Exclude, "refs/tags", false}, "no-tags", "", "exclude all tags", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&rgb.Filter, git.Include, "refs/remotes", false}, "remotes", "", + &filterValue{tlf, git.Include, "refs/remotes", false}, "remotes", "", "process all remote-tracking references", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&rgb.Filter, git.Exclude, "refs/remotes", false}, "no-remotes", "", + &filterValue{tlf, git.Exclude, "refs/remotes", false}, "no-remotes", "", "exclude all remote-tracking references", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&rgb.Filter, git.Include, "refs/notes", false}, "notes", "", + &filterValue{tlf, git.Include, "refs/notes", false}, "notes", "", "process all git-notes references", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&rgb.Filter, git.Exclude, "refs/notes", false}, "no-notes", "", + &filterValue{tlf, git.Exclude, "refs/notes", false}, "no-notes", "", "exclude all git-notes references", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&rgb.Filter, git.Include, "refs/stash", true}, "stash", "", + &filterValue{tlf, git.Include, "refs/stash", true}, "stash", "", "process refs/stash", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{&rgb.Filter, git.Exclude, "refs/stash", true}, "no-stash", "", + &filterValue{tlf, git.Exclude, "refs/stash", true}, "no-stash", "", "exclude refs/stash", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterGroupValue{&rgb.Filter, configger}, "refgroup", "", + &filterGroupValue{tlf, configger}, "refgroup", "", "process references in refgroup defined by gitconfig", ) @@ -111,33 +231,89 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet, configger Configger // Finish collects the information gained from processing the options // and returns a `sizes.RefGrouper`. func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { - if rgb.Filter == nil { - rgb.Filter = git.AllReferencesFilter + if rgb.topLevelGroup.filter == nil { + rgb.topLevelGroup.filter = git.AllReferencesFilter } if rgb.ShowRefs { fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") - rgb.Filter = showRefFilter{rgb.Filter} + rgb.topLevelGroup.filter = showRefFilter{rgb.topLevelGroup.filter} + } + + refGrouper := refGrouper{ + topLevelGroup: rgb.topLevelGroup, } - return &refGrouper{ - filter: rgb.Filter, - }, nil + if err := refGrouper.fillInTree(refGrouper.topLevelGroup); err != nil { + return nil, err + } + if refGrouper.topLevelGroup.filter != nil { + refGrouper.ignoredRefGroup = &sizes.RefGroup{ + Symbol: "ignored", + Name: "Ignored", + } + refGrouper.refGroups = append(refGrouper.refGroups, *refGrouper.ignoredRefGroup) + } + + return &refGrouper, nil } type refGrouper struct { - filter git.ReferenceFilter -} + topLevelGroup *refGroup + refGroups []sizes.RefGroup -func (rg *refGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { - return rg.filter.Filter(refname), nil + // ignoredRefGroup, if set, is the reference group for + // tallying references that don't match at all. + ignoredRefGroup *sizes.RefGroup } -func (rg *refGrouper) Groups() []sizes.RefGroup { +func (refGrouper *refGrouper) fillInTree(rg *refGroup) error { + if rg.Name == "" { + _, rg.Name = splitKey(string(rg.Symbol)) + } + + if rg.filter == nil && len(rg.subgroups) == 0 { + return fmt.Errorf("refgroup '%s' is not defined", rg.Symbol) + } + + refGrouper.refGroups = append(refGrouper.refGroups, rg.RefGroup) + + for _, rg := range rg.subgroups { + if err := refGrouper.fillInTree(rg); err != nil { + return err + } + } + + if len(rg.subgroups) != 0 { + var otherSymbol sizes.RefGroupSymbol + if rg.Symbol == "" { + otherSymbol = "other" + } else { + otherSymbol = sizes.RefGroupSymbol(fmt.Sprintf("%s.other", rg.Symbol)) + } + rg.otherRefGroup = &sizes.RefGroup{ + Symbol: otherSymbol, + Name: "Other", + } + refGrouper.refGroups = append(refGrouper.refGroups, *rg.otherRefGroup) + } + return nil } +func (refGrouper *refGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { + walk, symbols := refGrouper.topLevelGroup.collectSymbols(refname) + if !walk && refGrouper.ignoredRefGroup != nil { + symbols = append(symbols, refGrouper.ignoredRefGroup.Symbol) + } + return walk, symbols +} + +func (refGrouper *refGrouper) Groups() []sizes.RefGroup { + return refGrouper.refGroups +} + // showRefFilter is a `git.ReferenceFilter` that logs its choices to Stderr. type showRefFilter struct { f git.ReferenceFilter From c7b9b89dfe59a86bfd87a59793aab95ee92b7608 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 14 Oct 2021 08:07:58 +0200 Subject: [PATCH 065/176] filterGroupValue: use the existing map of refgroups Change `filterGroupValue` to use the existing map of reference groups that has been put together by the `RefGroupBuilder` rather than reading the gitconfig again. This still doesn't handle nested groups correctly, but we'll fix that in a moment. --- git-sizer.go | 2 +- internal/refopts/filter_group_value.go | 61 +++++--------------------- internal/refopts/ref_group_builder.go | 4 +- 3 files changed, 15 insertions(+), 52 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 448d986..21284c1 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -166,7 +166,7 @@ func mainImplementation(args []string) error { return err } - rgb.AddRefopts(flags, configger) + rgb.AddRefopts(flags) flags.SortFlags = false diff --git a/internal/refopts/filter_group_value.go b/internal/refopts/filter_group_value.go index a21ffed..af2c7d8 100644 --- a/internal/refopts/filter_group_value.go +++ b/internal/refopts/filter_group_value.go @@ -2,64 +2,27 @@ package refopts import ( "fmt" - "os" "github.com/github/git-sizer/git" + "github.com/github/git-sizer/sizes" ) type filterGroupValue struct { - filter *git.ReferenceFilter - configger Configger + filter *git.ReferenceFilter + groups map[sizes.RefGroupSymbol]*refGroup } -func (v *filterGroupValue) Set(name string) error { - // At this point, it is not yet certain that the command was run - // inside a Git repository. If not, ignore this option (the - // command will error out anyway). - if v.configger == nil { - fmt.Fprintf( - os.Stderr, - "warning: not in Git repository; ignoring '--refgroup' option.\n", - ) - return nil - } +func (v *filterGroupValue) Set(symbolString string) error { + symbol := sizes.RefGroupSymbol(symbolString) - config, err := v.configger.Config(fmt.Sprintf("refgroup.%s", name)) - if err != nil { - return err - } - for _, entry := range config.Entries { - switch entry.Key { - case "include": - *v.filter = git.Include.Combine( - *v.filter, git.PrefixFilter(entry.Value), - ) - case "includeregexp": - filter, err := git.RegexpFilter(entry.Value) - if err != nil { - return fmt.Errorf( - "invalid regular expression for 'refgroup.%s.%s': %w", - name, entry.Key, err, - ) - } - *v.filter = git.Include.Combine(*v.filter, filter) - case "exclude": - *v.filter = git.Exclude.Combine( - *v.filter, git.PrefixFilter(entry.Value), - ) - case "excluderegexp": - filter, err := git.RegexpFilter(entry.Value) - if err != nil { - return fmt.Errorf( - "invalid regular expression for 'refgroup.%s.%s': %w", - name, entry.Key, err, - ) - } - *v.filter = git.Exclude.Combine(*v.filter, filter) - default: - // Ignore unrecognized keys. - } + refGroup, ok := v.groups[symbol] + + if !ok || symbol == "" { + return fmt.Errorf("refgroup '%s' is not defined", symbol) } + + *v.filter = git.Include.Combine(*v.filter, refGroup.filter) + return nil } diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 4573f1c..71bc3d6 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -141,7 +141,7 @@ func splitKey(key string) (sizes.RefGroupSymbol, string) { } // Add some reference-related options to `flags`. -func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet, configger Configger) { +func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { tlf := &rgb.topLevelGroup.filter flags.Var( &filterValue{tlf, git.Include, "", false}, "include", @@ -221,7 +221,7 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet, configger Configger flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterGroupValue{tlf, configger}, "refgroup", "", + &filterGroupValue{tlf, rgb.groups}, "refgroup", "", "process references in refgroup defined by gitconfig", ) From 91d9c08af4210655bcf72646f942561df0c12467 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 14 Oct 2021 08:30:38 +0200 Subject: [PATCH 066/176] showRefGrouper: new implementation for `--show-ref` Implement `--show-ref` by logging at the `RefGrouper` level rather than at the `ReferenceFilter` level. This decouples use of the filter from logging. --- internal/refopts/ref_group_builder.go | 25 +++++-------------------- internal/refopts/show_ref_grouper.go | 24 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 20 deletions(-) create mode 100644 internal/refopts/show_ref_grouper.go diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 71bc3d6..2c9ef32 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -235,11 +235,6 @@ func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { rgb.topLevelGroup.filter = git.AllReferencesFilter } - if rgb.ShowRefs { - fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") - rgb.topLevelGroup.filter = showRefFilter{rgb.topLevelGroup.filter} - } - refGrouper := refGrouper{ topLevelGroup: rgb.topLevelGroup, } @@ -256,6 +251,11 @@ func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { refGrouper.refGroups = append(refGrouper.refGroups, *refGrouper.ignoredRefGroup) } + if rgb.ShowRefs { + fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") + return showRefGrouper{&refGrouper, os.Stderr}, nil + } + return &refGrouper, nil } @@ -313,18 +313,3 @@ func (refGrouper *refGrouper) Categorize(refname string) (bool, []sizes.RefGroup func (refGrouper *refGrouper) Groups() []sizes.RefGroup { return refGrouper.refGroups } - -// showRefFilter is a `git.ReferenceFilter` that logs its choices to Stderr. -type showRefFilter struct { - f git.ReferenceFilter -} - -func (f showRefFilter) Filter(refname string) bool { - b := f.f.Filter(refname) - if b { - fmt.Fprintf(os.Stderr, "+ %s\n", refname) - } else { - fmt.Fprintf(os.Stderr, " %s\n", refname) - } - return b -} diff --git a/internal/refopts/show_ref_grouper.go b/internal/refopts/show_ref_grouper.go new file mode 100644 index 0000000..3b2f742 --- /dev/null +++ b/internal/refopts/show_ref_grouper.go @@ -0,0 +1,24 @@ +package refopts + +import ( + "fmt" + "io" + + "github.com/github/git-sizer/sizes" +) + +// showRefFilter is a `git.ReferenceFilter` that logs its choices to Stderr. +type showRefGrouper struct { + *refGrouper + w io.Writer +} + +func (refGrouper showRefGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { + walk, symbols := refGrouper.refGrouper.Categorize(refname) + if walk { + fmt.Fprintf(refGrouper.w, "+ %s\n", refname) + } else { + fmt.Fprintf(refGrouper.w, " %s\n", refname) + } + return walk, symbols +} From dc635714b0f67fea872d76d8d169f7dd7e9a13cb Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 14 Oct 2021 09:27:52 +0200 Subject: [PATCH 067/176] filterGroupValue: handle nested groups correctly If the user specifies a refgroup as part of what should be walked, we only want to include the references that would appear in that refgroup. This means that we have to consider its parents and maybe its children. Use a new `refGroupFilter` to implement this logic. --- internal/refopts/filter_group_value.go | 58 +++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/internal/refopts/filter_group_value.go b/internal/refopts/filter_group_value.go index af2c7d8..58549a1 100644 --- a/internal/refopts/filter_group_value.go +++ b/internal/refopts/filter_group_value.go @@ -7,6 +7,19 @@ import ( "github.com/github/git-sizer/sizes" ) +// filterGroupValue handles `--refgroup=REFGROUP` options, which +// affect the top-level filter. These are a little bit tricky, because +// the references matched by a refgroup depend on its parents (because +// if the parents don't allow the reference, it won't even get tested +// by the regroup's own filter) and also its children (because if the +// refgroup doesn't have its own filter, then it is defined to be the +// union of its children). Meanwhile, when testing parents, we +// shouldn't test the top-level group, because that's what we are +// trying to affect. +// +// The filtering itself is implemented using a `refGroupFilter`, which +// contains a pointer to a `refGroup` and uses it (including its +// `parent` and `subgroups` to figure out what should be allowed. type filterGroupValue struct { filter *git.ReferenceFilter groups map[sizes.RefGroupSymbol]*refGroup @@ -21,7 +34,7 @@ func (v *filterGroupValue) Set(symbolString string) error { return fmt.Errorf("refgroup '%s' is not defined", symbol) } - *v.filter = git.Include.Combine(*v.filter, refGroup.filter) + *v.filter = git.Include.Combine(*v.filter, refGroupFilter{refGroup}) return nil } @@ -37,3 +50,46 @@ func (v *filterGroupValue) String() string { func (v *filterGroupValue) Type() string { return "name" } + +// refGroupFilter is a filter based on what would be allowed through +// by a particular refGroup. This is used as part of a top-level +// filter, so it ignores what the top-level filter would say. +type refGroupFilter struct { + refGroup *refGroup +} + +func (f refGroupFilter) Filter(refname string) bool { + return refGroupPasses(f.refGroup.parent, refname) && + refGroupMatches(f.refGroup, refname) +} + +// refGroupMatches retruns true iff `rg` would allow `refname` +// through, not considering its parents. If `rg` doesn't have its own +// filter, this consults its children. +func refGroupMatches(rg *refGroup, refname string) bool { + if rg.filter != nil { + return rg.filter.Filter(refname) + } + + for _, sg := range rg.subgroups { + if refGroupMatches(sg, refname) { + return true + } + } + + return false +} + +// refGroupPasses returns true iff `rg` and the parents of `rg` (not +// including the top-level group) would allow `refname` through. This +// does not consider children of `rg`, which we would still need to +// consult if `rg` doesn't have a filter of its own. +func refGroupPasses(rg *refGroup, refname string) bool { + if rg.Symbol == "" { + return true + } + if !refGroupPasses(rg.parent, refname) { + return false + } + return rg.filter == nil || rg.filter.Filter(refname) +} From 33f9c9ebccf3f2186da7de469d3efb63312db672 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 11 Oct 2021 16:14:24 +0200 Subject: [PATCH 068/176] filterGroupValue: take a `*RefGroupBuilder` as argument --- internal/refopts/filter_group_value.go | 9 +++++---- internal/refopts/ref_group_builder.go | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/internal/refopts/filter_group_value.go b/internal/refopts/filter_group_value.go index 58549a1..4e2d360 100644 --- a/internal/refopts/filter_group_value.go +++ b/internal/refopts/filter_group_value.go @@ -21,20 +21,21 @@ import ( // contains a pointer to a `refGroup` and uses it (including its // `parent` and `subgroups` to figure out what should be allowed. type filterGroupValue struct { - filter *git.ReferenceFilter - groups map[sizes.RefGroupSymbol]*refGroup + rgb *RefGroupBuilder } func (v *filterGroupValue) Set(symbolString string) error { symbol := sizes.RefGroupSymbol(symbolString) - refGroup, ok := v.groups[symbol] + refGroup, ok := v.rgb.groups[symbol] if !ok || symbol == "" { return fmt.Errorf("refgroup '%s' is not defined", symbol) } - *v.filter = git.Include.Combine(*v.filter, refGroupFilter{refGroup}) + v.rgb.topLevelGroup.filter = git.Include.Combine( + v.rgb.topLevelGroup.filter, refGroupFilter{refGroup}, + ) return nil } diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 2c9ef32..9ad9649 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -221,7 +221,7 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterGroupValue{tlf, rgb.groups}, "refgroup", "", + &filterGroupValue{rgb}, "refgroup", "", "process references in refgroup defined by gitconfig", ) From 136b90183be3d3cfafe828e93c9a455b61797dd2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 17:06:05 +0200 Subject: [PATCH 069/176] filterValue: take a `*RefGroupBuilder` as argument --- internal/refopts/filter_value.go | 8 ++++---- internal/refopts/ref_group_builder.go | 29 +++++++++++++-------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/internal/refopts/filter_value.go b/internal/refopts/filter_value.go index 82c2b0b..2c4735b 100644 --- a/internal/refopts/filter_value.go +++ b/internal/refopts/filter_value.go @@ -8,9 +8,9 @@ import ( ) type filterValue struct { - // filter is the filter that will be modified if this option - // is used. - filter *git.ReferenceFilter + // rgb is the RefGroupBuilder whose top-level filter is + // affected if this option is used. + rgb *RefGroupBuilder // combiner specifies how the filter generated by this option // is combined with the existing filter; i.e., does it cause @@ -60,7 +60,7 @@ func (v *filterValue) Set(s string) error { filter = git.PrefixFilter(pattern) } - *v.filter = combiner.Combine(*v.filter, filter) + v.rgb.topLevelGroup.filter = combiner.Combine(v.rgb.topLevelGroup.filter, filter) return nil } diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 9ad9649..e996f38 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -142,80 +142,79 @@ func splitKey(key string) (sizes.RefGroupSymbol, string) { // Add some reference-related options to `flags`. func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { - tlf := &rgb.topLevelGroup.filter flags.Var( - &filterValue{tlf, git.Include, "", false}, "include", + &filterValue{rgb, git.Include, "", false}, "include", "include specified references", ) flags.Var( - &filterValue{tlf, git.Include, "", true}, "include-regexp", + &filterValue{rgb, git.Include, "", true}, "include-regexp", "include references matching the specified regular expression", ) flags.Var( - &filterValue{tlf, git.Exclude, "", false}, "exclude", + &filterValue{rgb, git.Exclude, "", false}, "exclude", "exclude specified references", ) flags.Var( - &filterValue{tlf, git.Exclude, "", true}, "exclude-regexp", + &filterValue{rgb, git.Exclude, "", true}, "exclude-regexp", "exclude references matching the specified regular expression", ) flag := flags.VarPF( - &filterValue{tlf, git.Include, "refs/heads", false}, "branches", "", + &filterValue{rgb, git.Include, "refs/heads", false}, "branches", "", "process all branches", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{tlf, git.Exclude, "refs/heads", false}, "no-branches", "", + &filterValue{rgb, git.Exclude, "refs/heads", false}, "no-branches", "", "exclude all branches", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{tlf, git.Include, "refs/tags", false}, "tags", "", + &filterValue{rgb, git.Include, "refs/tags", false}, "tags", "", "process all tags", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{tlf, git.Exclude, "refs/tags", false}, "no-tags", "", + &filterValue{rgb, git.Exclude, "refs/tags", false}, "no-tags", "", "exclude all tags", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{tlf, git.Include, "refs/remotes", false}, "remotes", "", + &filterValue{rgb, git.Include, "refs/remotes", false}, "remotes", "", "process all remote-tracking references", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{tlf, git.Exclude, "refs/remotes", false}, "no-remotes", "", + &filterValue{rgb, git.Exclude, "refs/remotes", false}, "no-remotes", "", "exclude all remote-tracking references", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{tlf, git.Include, "refs/notes", false}, "notes", "", + &filterValue{rgb, git.Include, "refs/notes", false}, "notes", "", "process all git-notes references", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{tlf, git.Exclude, "refs/notes", false}, "no-notes", "", + &filterValue{rgb, git.Exclude, "refs/notes", false}, "no-notes", "", "exclude all git-notes references", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{tlf, git.Include, "refs/stash", true}, "stash", "", + &filterValue{rgb, git.Include, "refs/stash", true}, "stash", "", "process refs/stash", ) flag.NoOptDefVal = "true" flag = flags.VarPF( - &filterValue{tlf, git.Exclude, "refs/stash", true}, "no-stash", "", + &filterValue{rgb, git.Exclude, "refs/stash", true}, "no-stash", "", "exclude refs/stash", ) flag.NoOptDefVal = "true" From 87e2f1aac2d0165e7c08ef1d06c16b2dbee59317 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 17:06:23 +0200 Subject: [PATCH 070/176] Change `--include` and `--exclude` to take more flexible arguments --- git_sizer_test.go | 14 +++++------ internal/refopts/filter_value.go | 43 ++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 65be141..4c8be56 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -191,7 +191,7 @@ func TestRefSelections(t *testing.T) { }, { // 12 name: "foo", - args: []string{"--include-regexp", ".*foo.*"}, + args: []string{"--include", "/.*foo.*/"}, }, { // 13 name: "refs/foo as prefix", @@ -199,11 +199,11 @@ func TestRefSelections(t *testing.T) { }, { // 14 name: "refs/foo as regexp", - args: []string{"--include-regexp", "refs/foo"}, + args: []string{"--include", "/refs/foo/"}, }, { // 15 name: "release tags", - args: []string{"--include-regexp", "refs/tags/release-.*"}, + args: []string{"--include", "/refs/tags/release-.*/"}, }, { // 16 name: "combination", @@ -211,21 +211,21 @@ func TestRefSelections(t *testing.T) { "--include=refs/heads", "--tags", "--exclude", "refs/heads/foo", - "--include-regexp", ".*foo.*", + "--include", "/.*foo.*/", "--exclude", "refs/foo", - "--exclude-regexp", "refs/tags/release-.*", + "--exclude", "/refs/tags/release-.*/", }, }, { // 17 name: "branches-refgroup", - args: []string{"--refgroup=mygroup"}, + args: []string{"--include=@mygroup"}, config: []git.ConfigEntry{ {"refgroup.mygroup.include", "refs/heads"}, }, }, { // 18 name: "combination-refgroup", - args: []string{"--refgroup=mygroup"}, + args: []string{"--include=@mygroup"}, config: []git.ConfigEntry{ {"refgroup.mygroup.include", "refs/heads"}, {"refgroup.mygroup.include", "refs/tags"}, diff --git a/internal/refopts/filter_value.go b/internal/refopts/filter_value.go index 2c4735b..f3fa35b 100644 --- a/internal/refopts/filter_value.go +++ b/internal/refopts/filter_value.go @@ -1,10 +1,13 @@ package refopts import ( + "errors" "fmt" "strconv" + "strings" "github.com/github/git-sizer/git" + "github.com/github/git-sizer/sizes" ) type filterValue struct { @@ -23,7 +26,7 @@ type filterValue struct { pattern string // regexp specifies whether `pattern` should be interpreted as - // a regexp (as opposed to a prefix). + // a regexp (as opposed to handling it flexibly). regexp bool } @@ -57,7 +60,11 @@ func (v *filterValue) Set(s string) error { return fmt.Errorf("invalid regexp: %q", s) } } else { - filter = git.PrefixFilter(pattern) + var err error + filter, err = v.interpretFlexibly(pattern) + if err != nil { + return err + } } v.rgb.topLevelGroup.filter = combiner.Combine(v.rgb.topLevelGroup.filter, filter) @@ -65,6 +72,38 @@ func (v *filterValue) Set(s string) error { return nil } +// Interpret an option argument flexibly: +// +// * If it is bracketed with `/` characters, treat it as a regexp. +// +// * If it starts with `@`, then consider it a refgroup name. That +// refgroup must already be defined. Use its filter. This construct +// is only allowed at the top level. +// +// * Otherwise treat it as a prefix. +func (v *filterValue) interpretFlexibly(s string) (git.ReferenceFilter, error) { + if len(s) >= 2 && strings.HasPrefix(s, "/") && strings.HasSuffix(s, "/") { + pattern := s[1 : len(s)-1] + return git.RegexpFilter(pattern) + } + + if len(s) >= 1 && s[0] == '@' { + name := sizes.RefGroupSymbol(s[1:]) + if name == "" { + return nil, errors.New("missing refgroup name") + } + + refGroup := v.rgb.groups[name] + if refGroup == nil { + return nil, fmt.Errorf("undefined refgroup '%s'", name) + } + + return refGroupFilter{refGroup}, nil + } + + return git.PrefixFilter(s), nil +} + func (v *filterValue) Get() interface{} { return nil } From a067cc39776f9cab8ccd0424148f9cda11af0b62 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 11 Oct 2021 18:59:37 +0200 Subject: [PATCH 071/176] Update docs --- git-sizer.go | 52 +++++++++++++++------------ internal/refopts/ref_group_builder.go | 19 +++++++--- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 21284c1..9a734d6 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -45,35 +45,43 @@ const Usage = `usage: git-sizer [OPTS] Reference selection: - By default, git-sizer processes all Git objects that are reachable from any - reference. The following options can be used to limit which references to - include. The last rule matching a reference determines whether that reference - is processed: + By default, git-sizer processes all Git objects that are reachable + from any reference. The following options can be used to limit which + references to process. The last rule matching a reference determines + whether that reference is processed. --[no-]branches process [don't process] branches --[no-]tags process [don't process] tags - --[no-]remotes process [don't process] remote-tracking references + --[no-]remotes process [don't process] remote-tracking + references --[no-]notes process [don't process] git-notes references --[no-]stash process [don't process] refs/stash - --include PREFIX process references with the specified PREFIX - (e.g., '--include=refs/remotes/origin') - --include-regexp REGEXP process references matching the specified - regular expression (e.g., - '--include-regexp=refs/tags/release-.*') - --exclude PREFIX don't process references with the specified - PREFIX (e.g., '--exclude=refs/changes') - --exclude-regexp REGEXP don't process references matching the specified - regular expression - --refgroup=NAME process reference in group defined by gitconfig: - 'refgroup.NAME.include', - 'refgroup.NAME.includeRegexp', - 'refgroup.NAME.exclude', and - 'refgroup.NAME.excludeRegexp' as above. + --include PREFIX, --exclude PREFIX + process [don't process] references with the + specified PREFIX (e.g., + '--include=refs/remotes/origin') + --include /REGEXP/, --exclude /REGEXP/ + process [don't process] references matching the + specified regular expression (e.g., + '--include=refs/tags/release-.*') + --include @REFGROUP, --exclude @REFGROUP + process [don't process] references in the + specified reference group (see below) --show-refs show which refs are being included/excluded - Prefixes must match at a boundary; for example 'refs/foo' matches - 'refs/foo' and 'refs/foo/bar' but not 'refs/foobar'. Regular - expression patterns must match the full reference name. + PREFIX must match at a boundary; for example 'refs/foo' matches + 'refs/foo' and 'refs/foo/bar' but not 'refs/foobar'. + + REGEXP patterns must match the full reference name. + + REFGROUP can be the name of a predefined reference group ('branches', + 'tags', 'remotes', 'notes', or 'stash'), or one defined via + gitconfig settings like the following (these can be repeated): + + * 'refgroup.REFGROUP.include=PREFIX' + * 'refgroup.REFGROUP.includeRegexp=REGEXP' + * 'refgroup.REFGROUP.exclude=PREFIX' + * 'refgroup.REFGROUP.excludeRegexp=REGEXP' ` diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index e996f38..1a2b68f 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -146,20 +146,27 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { &filterValue{rgb, git.Include, "", false}, "include", "include specified references", ) - flags.Var( - &filterValue{rgb, git.Include, "", true}, "include-regexp", + + flag := flags.VarPF( + &filterValue{rgb, git.Include, "", true}, "include-regexp", "", "include references matching the specified regular expression", ) + flag.Hidden = true + flag.Deprecated = "use --include=/REGEXP/" + flags.Var( &filterValue{rgb, git.Exclude, "", false}, "exclude", "exclude specified references", ) - flags.Var( - &filterValue{rgb, git.Exclude, "", true}, "exclude-regexp", + + flag = flags.VarPF( + &filterValue{rgb, git.Exclude, "", true}, "exclude-regexp", "", "exclude references matching the specified regular expression", ) + flag.Hidden = true + flag.Deprecated = "use --exclude=/REGEXP/" - flag := flags.VarPF( + flag = flags.VarPF( &filterValue{rgb, git.Include, "refs/heads", false}, "branches", "", "process all branches", ) @@ -223,6 +230,8 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { &filterGroupValue{rgb}, "refgroup", "", "process references in refgroup defined by gitconfig", ) + flag.Hidden = true + flag.Deprecated = "use --include=@REFGROUP" flags.BoolVar(&rgb.ShowRefs, "show-refs", false, "list the references being processed") } From 3b20437ab827d5199278fe65167a40c4d044fab8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 15 Oct 2021 18:06:03 +0200 Subject: [PATCH 072/176] TestRepo: set up a clean git environment when running git commands --- git_sizer_test.go | 2 -- internal/testutils/repoutils.go | 49 ++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index ad658ce..e239507 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -126,7 +126,6 @@ func TestRefSelections(t *testing.T) { // Create a test repo with one orphan commit per refname: repo := testutils.NewTestRepo(t, true, "ref-selection") - defer repo.Remove(t) for _, p := range references { @@ -241,7 +240,6 @@ func TestRefSelections(t *testing.T) { p.name, func(t *testing.T) { repo := repo.Clone(t, "ref-selection") - defer repo.Remove(t) for _, e := range p.config { diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index b5b839e..ac6895e 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "testing" "time" @@ -51,6 +52,7 @@ func (repo *TestRepo) Init(t *testing.T, bare bool) { } else { cmd = exec.Command("git", "init", repo.Path) } + cmd.Env = CleanGitEnv() err := cmd.Run() require.NoError(t, err) } @@ -90,6 +92,49 @@ func (repo *TestRepo) Repository(t *testing.T) *git.Repository { return r } +// localEnvVars is a list of the variable names that should be cleared +// to give Git a clean environment. +var localEnvVars = func() map[string]bool { + m := map[string]bool{ + "HOME": true, + "XDG_CONFIG_HOME": true, + } + out, err := exec.Command("git", "rev-parse", "--local-env-vars").Output() + if err != nil { + return m + } + for _, k := range strings.Fields(string(out)) { + m[k] = true + } + return m +}() + +// GitEnv returns an appropriate environment for running `git` +// commands without being confused by any existing environment or +// gitconfig. +func CleanGitEnv() []string { + var env []string + for _, e := range os.Environ() { + i := strings.IndexByte(e, '=') + if i == -1 { + // This shouldn't happen, but if it does, + // ignore it. + continue + } + k := e[:i] + if localEnvVars[k] { + continue + } + env = append(env, e) + } + return append( + env, + fmt.Sprintf("HOME=%s", os.DevNull), + fmt.Sprintf("XDG_CONFIG_HOME=%s", os.DevNull), + "GIT_CONFIG_NOSYSTEM=1", + ) +} + // GitCommand creates an `*exec.Cmd` for running `git` in `repo` with // the specified arguments. func (repo *TestRepo) GitCommand(t *testing.T, args ...string) *exec.Cmd { @@ -97,7 +142,9 @@ func (repo *TestRepo) GitCommand(t *testing.T, args ...string) *exec.Cmd { gitArgs := []string{"-C", repo.Path} gitArgs = append(gitArgs, args...) - return exec.Command("git", gitArgs...) + cmd := exec.Command("git", gitArgs...) + cmd.Env = CleanGitEnv() + return cmd } func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) { From be29b60f040f739f8b26a2e08fe1e97408c67d56 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 18 Oct 2021 12:11:10 +0200 Subject: [PATCH 073/176] Add some more predefined refgroups: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * `pulls` — GitHub style pull request references (`refs/pull/**`) * `changes` — Gerrit style changeset references (`refs/changes/**`) --- git-sizer.go | 6 ++++-- internal/refopts/ref_group_builder.go | 10 +++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 9a734d6..63f2e90 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -75,9 +75,11 @@ const Usage = `usage: git-sizer [OPTS] REGEXP patterns must match the full reference name. REFGROUP can be the name of a predefined reference group ('branches', - 'tags', 'remotes', 'notes', or 'stash'), or one defined via - gitconfig settings like the following (these can be repeated): + 'tags', 'remotes', 'pulls', 'changes', 'notes', or 'stash'), or one + defined via gitconfig settings like the following (the + include/exclude settings can be repeated): + * 'refgroup.REFGROUP.name=NAME' * 'refgroup.REFGROUP.include=PREFIX' * 'refgroup.REFGROUP.includeRegexp=REGEXP' * 'refgroup.REFGROUP.exclude=PREFIX' diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 1a2b68f..6a472d9 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -89,9 +89,17 @@ func (rgb *RefGroupBuilder) initializeStandardRefgroups() { initializeGroup("branches", "Branches", git.PrefixFilter("refs/heads/")) initializeGroup("tags", "Tags", git.PrefixFilter("refs/tags/")) initializeGroup("remotes", "Remote-tracking refs", git.PrefixFilter("refs/remotes/")) + initializeGroup("pulls", "Pull request refs", git.PrefixFilter("refs/pull/")) + + filter, err := git.RegexpFilter(`refs/changes/\d{2}/\d+/\d+`) + if err != nil { + panic("internal error") + } + initializeGroup("changes", "Changeset refs", filter) + initializeGroup("notes", "Git notes", git.PrefixFilter("refs/notes/")) - filter, err := git.RegexpFilter("refs/stash") + filter, err = git.RegexpFilter(`refs/stash`) if err != nil { panic("internal error") } From 3d385d32ef0c2eb7908b16dcac9af8b01e0f54c9 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 18 Oct 2021 12:07:18 +0200 Subject: [PATCH 074/176] Add some tests of refgroup handling --- git_sizer_test.go | 236 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) diff --git a/git_sizer_test.go b/git_sizer_test.go index 4c8be56..3df7c4b 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -277,6 +277,242 @@ func TestRefSelections(t *testing.T) { } } +func TestRefgroups(t *testing.T) { + t.Parallel() + + references := []string{ + "refs/changes/20/884120/1", + "refs/changes/45/12345/42", + "refs/fo", + "refs/foo", + "refs/heads/foo", + "refs/heads/main", + "refs/notes/discussion", + "refs/notes/tests/build", + "refs/notes/tests/default", + "refs/pull/1/head", + "refs/pull/1/merge", + "refs/pull/123/head", + "refs/pull/1234/head", + "refs/remotes/origin/master", + "refs/remotes/upstream/foo", + "refs/remotes/upstream/master", + "refs/stash", + "refs/tags/foolish", + "refs/tags/other", + "refs/tags/release-1", + "refs/tags/release-2", + } + + // Create a test repo with one orphan commit per refname: + repo := testutils.NewTestRepo(t, true, "refgroups") + defer repo.Remove(t) + + for _, refname := range references { + repo.CreateReferencedOrphan(t, refname) + } + + executable, err := exec.LookPath("bin/git-sizer") + require.NoError(t, err) + executable, err = filepath.Abs(executable) + require.NoError(t, err) + + for _, p := range []struct { + name string + args []string + config []git.ConfigEntry + stdout string + stderr string + }{ + { + name: "no arguments", + stdout: ` +| * References | | | +| * Count | 21 | | +| * Branches | 2 | | +| * Tags | 4 | | +| * Remote-tracking refs | 3 | | +| * Pull request refs | 4 | | +| * Changeset refs | 2 | | +| * Git notes | 3 | | +| * Git stash | 1 | | +| * Other | 2 | | +| | | | +`[1:], + stderr: ` +References (included references marked with '+'): ++ refs/changes/20/884120/1 ++ refs/changes/45/12345/42 ++ refs/fo ++ refs/foo ++ refs/heads/foo ++ refs/heads/main ++ refs/notes/discussion ++ refs/notes/tests/build ++ refs/notes/tests/default ++ refs/pull/1/head ++ refs/pull/1/merge ++ refs/pull/123/head ++ refs/pull/1234/head ++ refs/remotes/origin/master ++ refs/remotes/upstream/foo ++ refs/remotes/upstream/master ++ refs/stash ++ refs/tags/foolish ++ refs/tags/other ++ refs/tags/release-1 ++ refs/tags/release-2 +`[1:], + }, + { + name: "nested-groups", + config: []git.ConfigEntry{ + // Note that refgroup "misc" is defined implicitly. + + {"refgroup.misc.foo.includeRegexp", ".*foo.*"}, + + {"refgroup.misc.foo.oatend.includeRegexp", ".*o"}, + + {"refgroup.misc.foo.bogus.include", "bogus"}, + + {"refgroup.tags.releases.name", "Releases"}, + {"refgroup.tags.releases.includeRegexp", "refs/tags/release-.*"}, + }, + stdout: ` +| * References | | | +| * Count | 21 | | +| * Branches | 2 | | +| * Tags | 4 | | +| * Releases | 2 | | +| * Other | 2 | | +| * Remote-tracking refs | 3 | | +| * Pull request refs | 4 | | +| * Changeset refs | 2 | | +| * Git notes | 3 | | +| * Git stash | 1 | | +| * misc | 4 | | +| * foo | 4 | | +| * oatend | 3 | | +| * Other | 1 | | +| * Other | 1 | | +| | | | +`[1:], + }, + { + name: "include-refgroups", + args: []string{"--include=@branches", "--include=@tags.releases", "--include=@oatend"}, + config: []git.ConfigEntry{ + {"refgroup.oatend.includeRegexp", ".*o"}, + + {"refgroup.tags.releases.name", "Releases"}, + {"refgroup.tags.releases.includeRegexp", "refs/tags/release-.*"}, + }, + stdout: ` +| * References | | | +| * Count | 21 | | +| * Branches | 2 | | +| * Tags | 2 | | +| * Releases | 2 | | +| * Remote-tracking refs | 1 | | +| * oatend | 4 | | +| * Ignored | 14 | | +| | | | +`[1:], + stderr: ` +References (included references marked with '+'): + refs/changes/20/884120/1 + refs/changes/45/12345/42 ++ refs/fo ++ refs/foo ++ refs/heads/foo ++ refs/heads/main + refs/notes/discussion + refs/notes/tests/build + refs/notes/tests/default + refs/pull/1/head + refs/pull/1/merge + refs/pull/123/head + refs/pull/1234/head + refs/remotes/origin/master ++ refs/remotes/upstream/foo + refs/remotes/upstream/master + refs/stash + refs/tags/foolish + refs/tags/other ++ refs/tags/release-1 ++ refs/tags/release-2 +`[1:], + }, + { + name: "exclude-refgroup", + args: []string{"--exclude=@stash", "--exclude=@notes"}, + stdout: ` +| * References | | | +| * Count | 21 | | +| * Branches | 2 | | +| * Tags | 4 | | +| * Remote-tracking refs | 3 | | +| * Pull request refs | 4 | | +| * Changeset refs | 2 | | +| * Other | 2 | | +| * Ignored | 4 | | +| | | | +`[1:], + stderr: ` +References (included references marked with '+'): ++ refs/changes/20/884120/1 ++ refs/changes/45/12345/42 ++ refs/fo ++ refs/foo ++ refs/heads/foo ++ refs/heads/main + refs/notes/discussion + refs/notes/tests/build + refs/notes/tests/default ++ refs/pull/1/head ++ refs/pull/1/merge ++ refs/pull/123/head ++ refs/pull/1234/head ++ refs/remotes/origin/master ++ refs/remotes/upstream/foo ++ refs/remotes/upstream/master + refs/stash ++ refs/tags/foolish ++ refs/tags/other ++ refs/tags/release-1 ++ refs/tags/release-2 +`[1:], + }, + } { + t.Run( + p.name, + func(t *testing.T) { + repo := repo.Clone(t, "refgroups") + defer repo.Remove(t) + + for _, e := range p.config { + repo.ConfigAdd(t, e.Key, e.Value) + } + + args := append([]string{"--show-refs", "-v", "--no-progress"}, p.args...) + cmd := exec.Command(executable, args...) + cmd.Dir = repo.Path + var stdout bytes.Buffer + cmd.Stdout = &stdout + var stderr bytes.Buffer + cmd.Stderr = &stderr + err := cmd.Run() + assert.NoError(t, err) + + assert.Contains(t, stdout.String(), p.stdout) + if p.stderr != "" { + assert.Equal(t, stderr.String(), p.stderr) + } + }, + ) + } +} + func pow(x uint64, n int) uint64 { p := uint64(1) for ; n > 0; n-- { From d508da2473307df13addd7a0d6c14bdd58bd433f Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 27 Oct 2021 13:41:22 +0200 Subject: [PATCH 075/176] Don't record unwalked references with the path resolver Recording unwalked references with the path resolver means that the footnote for an extreme object might use an unwalked references in the "names" for the object, even though the object must necessarily also be reachable from at least one reference that _was_ walked. Fix this by not recording unwalked references with the path resolver. --- sizes/graph.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index e8b655f..88115e2 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -43,6 +43,7 @@ type RefGrouper interface { type refSeen struct { git.Reference + walked bool groups []RefGroupSymbol } @@ -102,6 +103,7 @@ func ScanRepositoryUsingGraph( refsSeen, refSeen{ Reference: ref, + walked: walk, groups: groups, }, ) @@ -372,7 +374,7 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing references: %d") for _, refSeen := range refsSeen { progressMeter.Inc() - graph.RegisterReference(refSeen.Reference, refSeen.groups) + graph.RegisterReference(refSeen.Reference, refSeen.walked, refSeen.groups) } progressMeter.Done() @@ -428,7 +430,7 @@ func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { } } -func (g *Graph) RegisterReference(ref git.Reference, groups []RefGroupSymbol) { +func (g *Graph) RegisterReference(ref git.Reference, walked bool, groups []RefGroupSymbol) { g.historyLock.Lock() g.historySize.recordReference(g, ref) for _, group := range groups { @@ -436,7 +438,9 @@ func (g *Graph) RegisterReference(ref git.Reference, groups []RefGroupSymbol) { } g.historyLock.Unlock() - g.pathResolver.RecordReference(ref) + if walked { + g.pathResolver.RecordReference(ref) + } } func (g *Graph) HistorySize() HistorySize { From 643805cd5d383977ab9951dc347de73317f3198d Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 21 Oct 2021 12:30:43 +0200 Subject: [PATCH 076/176] ObjectIter: remove unused member --- git/git.go | 1 - 1 file changed, 1 deletion(-) diff --git a/git/git.go b/git/git.go index e8d981e..fd49b64 100644 --- a/git/git.go +++ b/git/git.go @@ -331,7 +331,6 @@ func parseBatchHeader(spec string, header string) (OID, ObjectType, counts.Count type ObjectIter struct { cmd1 *exec.Cmd cmd2 *exec.Cmd - in1 io.Writer out1 io.ReadCloser out2 io.ReadCloser f *bufio.Reader From b70634637e3fd4125984019e1ab1b56070d66e86 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 21 Oct 2021 12:32:17 +0200 Subject: [PATCH 077/176] Close(): name receivers consistently --- git/git.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/git/git.go b/git/git.go index fd49b64..bc01150 100644 --- a/git/git.go +++ b/git/git.go @@ -231,9 +231,9 @@ func (iter *ReferenceIter) Next() (Reference, bool, error) { }, true, nil } -func (l *ReferenceIter) Close() error { - err := l.out.Close() - err2 := l.cmd.Wait() +func (iter *ReferenceIter) Close() error { + err := iter.out.Close() + err2 := iter.cmd.Wait() if err == nil { err = err2 } @@ -295,9 +295,9 @@ func (iter *BatchObjectIter) Next() (OID, ObjectType, counts.Count32, []byte, er return oid, objectType, objectSize, data, nil } -func (l *BatchObjectIter) Close() error { - err := l.out.Close() - err2 := l.cmd.Wait() +func (iter *BatchObjectIter) Close() error { + err := iter.out.Close() + err2 := iter.cmd.Wait() if err == nil { err = err2 } From 04e31f8f56c6e0a023fea45cbc549b7ff1c0c37d Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 21 Oct 2021 13:25:37 +0200 Subject: [PATCH 078/176] git_sizer_test: use `testing.T.Cleanup()` rather than defer Its semantics allow running subtests in parallel. --- git_sizer_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 3df7c4b..5fe5720 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -126,7 +126,7 @@ func TestRefSelections(t *testing.T) { // Create a test repo with one orphan commit per refname: repo := testutils.NewTestRepo(t, true, "ref-selection") - defer repo.Remove(t) + t.Cleanup(func() { repo.Remove(t) }) for _, p := range references { repo.CreateReferencedOrphan(t, p.refname) @@ -306,7 +306,7 @@ func TestRefgroups(t *testing.T) { // Create a test repo with one orphan commit per refname: repo := testutils.NewTestRepo(t, true, "refgroups") - defer repo.Remove(t) + t.Cleanup(func() { repo.Remove(t) }) for _, refname := range references { repo.CreateReferencedOrphan(t, refname) @@ -535,7 +535,7 @@ func TestBomb(t *testing.T) { t.Parallel() repo := testutils.NewTestRepo(t, true, "bomb") - defer repo.Remove(t) + t.Cleanup(func() { repo.Remove(t) }) newGitBomb(t, repo, 10, 10, "boom!\n") From 530ff908549194a14a403640eb9317e003053297 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 21 Oct 2021 13:34:53 +0200 Subject: [PATCH 079/176] git_sizer_test: also run subtests in parallel --- git_sizer_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 5fe5720..2e20b8a 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -236,9 +236,12 @@ func TestRefSelections(t *testing.T) { }, }, } { + i, p := i, p t.Run( p.name, func(t *testing.T) { + t.Parallel() + repo := repo.Clone(t, "ref-selection") defer repo.Remove(t) @@ -254,7 +257,7 @@ func TestRefSelections(t *testing.T) { cmd.Stdout = &stdout var stderr bytes.Buffer cmd.Stderr = &stderr - err = cmd.Run() + err := cmd.Run() assert.NoError(t, err) expectedStderr, expectedUniqueCommitCount := computeExpectations(i) @@ -484,9 +487,12 @@ References (included references marked with '+'): `[1:], }, } { + p := p t.Run( p.name, func(t *testing.T) { + t.Parallel() + repo := repo.Clone(t, "refgroups") defer repo.Remove(t) From cb831015d167e1b231083ad5a4d416877d83c5d7 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Thu, 21 Oct 2021 15:29:03 +0200 Subject: [PATCH 080/176] TestRepo.CreateObject(): check for a possible error --- internal/testutils/repoutils.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index ac6895e..80aa387 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -173,6 +173,7 @@ func (repo *TestRepo) CreateObject( require.NoError(t, err) out, err := cmd.StdoutPipe() + require.NoError(t, err) cmd.Stderr = os.Stderr err = cmd.Start() From be2362a80eca03f94272709cbfd07c5c96fba7c2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 13:42:07 +0100 Subject: [PATCH 081/176] Repository.GetConfig(): rename method from `Config()` This should help prevent confusion with the name of the `Config` type itself. --- git/gitconfig.go | 11 ++++++----- internal/refopts/ref_group.go | 2 +- internal/refopts/ref_group_builder.go | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/git/gitconfig.go b/git/gitconfig.go index 44b03df..69031be 100644 --- a/git/gitconfig.go +++ b/git/gitconfig.go @@ -18,11 +18,12 @@ type Config struct { Entries []ConfigEntry } -// Config returns the entries from gitconfig. If `prefix` is provided, -// then only include entries in that section, which must match the at -// a component boundary (as defined by `configKeyMatchesPrefix()`), -// and strip off the prefix in the keys that are returned. -func (repo *Repository) Config(prefix string) (*Config, error) { +// GetConfig returns the entries from gitconfig. If `prefix` is +// provided, then only include entries in that section, which must +// match the at a component boundary (as defined by +// `configKeyMatchesPrefix()`), and strip off the prefix in the keys +// that are returned. +func (repo *Repository) GetConfig(prefix string) (*Config, error) { cmd := repo.gitCommand("config", "--list", "-z") out, err := cmd.Output() diff --git a/internal/refopts/ref_group.go b/internal/refopts/ref_group.go index ff0304a..b86b333 100644 --- a/internal/refopts/ref_group.go +++ b/internal/refopts/ref_group.go @@ -78,7 +78,7 @@ func (rg *refGroup) collectSymbols(refname string) (bool, []sizes.RefGroupSymbol // gitconfig and returns the result. It is not considered an error if // there are no usable config entries for the filter. func (rg *refGroup) augmentFromConfig(configger Configger) error { - config, err := configger.Config(fmt.Sprintf("refgroup.%s", rg.Symbol)) + config, err := configger.GetConfig(fmt.Sprintf("refgroup.%s", rg.Symbol)) if err != nil { return err } diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 6a472d9..7a89b82 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -12,7 +12,7 @@ import ( ) type Configger interface { - Config(prefix string) (*git.Config, error) + GetConfig(prefix string) (*git.Config, error) } // RefGroupBuilder handles reference-related options and puts together @@ -114,7 +114,7 @@ func (rgb *RefGroupBuilder) readRefgroupsFromGitconfig(configger Configger) erro return nil } - config, err := configger.Config("refgroup") + config, err := configger.GetConfig("refgroup") if err != nil { return err } From 669067d525a00e4db87bed7125a71e98a950f256 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 13:52:46 +0100 Subject: [PATCH 082/176] Add and improve a bunch of docstrings --- counts/counts.go | 19 ++++-- counts/human.go | 25 ++++--- git/git.go | 93 ++++++++++++++++++++++----- git/gitconfig.go | 16 ++++- internal/refopts/ref_group_builder.go | 25 ++++++- internal/testutils/repoutils.go | 16 +++-- isatty/isatty_disabled.go | 1 + isatty/isatty_enabled.go | 1 + meter/meter.go | 6 ++ negated_bool_value.go | 3 + sizes/footnotes.go | 8 +++ sizes/graph.go | 25 ++++++- 12 files changed, 196 insertions(+), 42 deletions(-) diff --git a/counts/counts.go b/counts/counts.go index 580dc7b..f6d4a3c 100644 --- a/counts/counts.go +++ b/counts/counts.go @@ -4,9 +4,11 @@ import ( "math" ) -// A count of something, capped at math.MaxUint32. +// Count32 is a count of something, capped at math.MaxUint32. type Count32 uint32 +// NewCount32 initializes a Count32 from a uint64, capped at +// math.MaxUint32. func NewCount32(n uint64) Count32 { if n > math.MaxUint32 { return Count32(math.MaxUint32) @@ -14,11 +16,13 @@ func NewCount32(n uint64) Count32 { return Count32(n) } +// ToUint64 returns the value of `n` as a `uint64`. If the value has +// overflowed, it returns `(math.MaxUint32, true)`. func (n Count32) ToUint64() (uint64, bool) { return uint64(n), n == math.MaxUint32 } -// Return the sum of two Count32s, capped at math.MaxUint32. +// Plus returns the sum of two Count32s, capped at math.MaxUint32. func (n1 Count32) Plus(n2 Count32) Count32 { n := n1 + n2 if n < n1 { @@ -28,7 +32,7 @@ func (n1 Count32) Plus(n2 Count32) Count32 { return n } -// Increment `*n1` by `n2`, capped at math.MaxUint32. +// Increment increases `*n1` by `n2`, capped at math.MaxUint32. func (n1 *Count32) Increment(n2 Count32) { *n1 = n1.Plus(n2) } @@ -55,18 +59,21 @@ func (n1 *Count32) AdjustMaxIfPossible(n2 Count32) bool { } } -// A count of something, capped at math.MaxUint64. +// Count64 is a count of something, capped at math.MaxUint64. type Count64 uint64 +// NewCount64 initializes a Count64 from a uint64. func NewCount64(n uint64) Count64 { return Count64(n) } +// ToUint64 returns the value of `n` as a `uint64`. If the value has +// overflowed, it returns `(math.MaxUint64, true)`. func (n Count64) ToUint64() (uint64, bool) { return uint64(n), n == math.MaxUint64 } -// Return the sum of two Count64s, capped at math.MaxUint64. +// Plus returns the sum of two Count64s, capped at math.MaxUint64. func (n1 Count64) Plus(n2 Count64) Count64 { n := n1 + n2 if n < n1 { @@ -76,7 +83,7 @@ func (n1 Count64) Plus(n2 Count64) Count64 { return n } -// Increment `*n1` by `n2`, capped at math.MaxUint64. +// Increment increases `*n1` by `n2`, capped at math.MaxUint64. func (n1 *Count64) Increment(n2 Count64) { *n1 = n1.Plus(n2) } diff --git a/counts/human.go b/counts/human.go index cc69d50..a921c69 100644 --- a/counts/human.go +++ b/counts/human.go @@ -4,24 +4,28 @@ import ( "fmt" ) -// A quantity that can be made human-readable using Human(). +// Humanable is a quantity that can be made human-readable using +// `Humaner.Format()`. type Humanable interface { - // Return the value as a uint64, and a boolean telling whether it - // overflowed. + // ToUint64 returns the value as a uint64, and a boolean telling + // whether it overflowed. ToUint64() (uint64, bool) } -// An object that can format a Humanable in human-readable format. +// Humaner is an object that can format a Humanable in human-readable +// format. type Humaner struct { name string prefixes []Prefix } +// Prefix is a metric-like prefix that implies a scaling factor. type Prefix struct { Name string Multiplier uint64 } +// Metric is a Humaner representing metric prefixes. var Metric = Humaner{ name: "metric", prefixes: []Prefix{ @@ -34,6 +38,8 @@ var Metric = Humaner{ }, } +// Binary is a Humaner representing power-of-1024 based prefixes, +// typically used for bytes. var Binary = Humaner{ name: "binary", prefixes: []Prefix{ @@ -46,12 +52,14 @@ var Binary = Humaner{ }, } +// Name returns the name of `h` ("metric" or "binary"). func (h *Humaner) Name() string { return h.name } -// Format n, aligned, in `len(unit) + 10` or fewer characters (except -// for extremely large numbers). +// FormatNumber formats n, aligned, in `len(unit) + 10` or fewer +// characters (except for extremely large numbers). It returns strings +// representing the numeral and the unit string. func (h *Humaner) FormatNumber(n uint64, unit string) (string, string) { prefix := h.prefixes[0] @@ -82,8 +90,9 @@ func (h *Humaner) FormatNumber(n uint64, unit string) (string, string) { } } -// Format values, aligned, in `len(unit) + 10` or fewer characters -// (except for extremely large numbers). +// Format formats values, aligned, in `len(unit) + 10` or fewer +// characters (except for extremely large numbers). It returns strings +// representing the numeral and the unit string. func (h *Humaner) Format(value Humanable, unit string) (string, string) { n, overflow := value.ToUint64() if overflow { diff --git a/git/git.go b/git/git.go index bc01150..a9402b1 100644 --- a/git/git.go +++ b/git/git.go @@ -16,15 +16,21 @@ import ( "github.com/github/git-sizer/counts" ) -// The type of an object ("blob", "tree", "commit", "tag", "missing"). +// ObjectType represents the type of a Git object ("blob", "tree", +// "commit", "tag", or "missing"). type ObjectType string +// OID represents the SHA-1 object ID of a Git object, in binary +// format. type OID struct { v [20]byte } +// NullOID is the null object ID; i.e., all zeros. var NullOID OID +// OIDFromBytes converts a byte slice containing an object ID in +// binary format into an `OID`. func OIDFromBytes(oidBytes []byte) (OID, error) { var oid OID if len(oidBytes) != len(oid.v) { @@ -34,6 +40,8 @@ func OIDFromBytes(oidBytes []byte) (OID, error) { return oid, nil } +// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40}`) +// into an `OID`. func NewOID(s string) (OID, error) { oidBytes, err := hex.DecodeString(s) if err != nil { @@ -42,14 +50,18 @@ func NewOID(s string) (OID, error) { return OIDFromBytes(oidBytes) } +// String formats `oid` as a string in hex format. func (oid OID) String() string { return hex.EncodeToString(oid.v[:]) } +// Bytes returns a byte slice view of `oid`, in binary format. func (oid OID) Bytes() []byte { return oid.v[:] } +// MarshalJSON expresses `oid` as a JSON string with its enclosing +// quotation marks. func (oid OID) MarshalJSON() ([]byte, error) { src := oid.v[:] dst := make([]byte, hex.EncodedLen(len(src))+2) @@ -59,6 +71,7 @@ func (oid OID) MarshalJSON() ([]byte, error) { return dst, nil } +// Repository represents a Git repository on disk. type Repository struct { path string @@ -151,21 +164,32 @@ func (repo *Repository) gitCommand(callerArgs ...string) *exec.Cmd { return cmd } +// Path returns the path to `repo`. func (repo *Repository) Path() string { return repo.path } +// Close closes `repo`, possibly freeing up resources. func (repo *Repository) Close() error { return nil } +// Reference represents a Git reference. type Reference struct { - Refname string + // Refname is the full reference name of the reference. + Refname string + + // ObjectType is the type of the object referenced. ObjectType ObjectType + + // ObjectSize is the size of the referred-to object, in bytes. ObjectSize counts.Count32 - OID OID + + // OID is the OID of the referred-to object. + OID OID } +// ReferenceIter is an iterator that interates over references. type ReferenceIter struct { cmd *exec.Cmd out io.ReadCloser @@ -200,6 +224,9 @@ func (repo *Repository) NewReferenceIter() (*ReferenceIter, error) { }, nil } +// Next returns either the next reference or a boolean `false` value +// indicating that the iteration is over. On errors, return an error +// (in this case, the caller must still call `Close()`). func (iter *ReferenceIter) Next() (Reference, bool, error) { line, err := iter.f.ReadString('\n') if err != nil { @@ -231,6 +258,7 @@ func (iter *ReferenceIter) Next() (Reference, bool, error) { }, true, nil } +// Close closes the iterator and frees up resources. func (iter *ReferenceIter) Close() error { err := iter.out.Close() err2 := iter.cmd.Wait() @@ -240,15 +268,20 @@ func (iter *ReferenceIter) Close() error { return err } +// BatchObjectIter iterates over objects whose names are fed into its +// stdin. The output is buffered, so it has to be closed before you +// can be sure that you have gotten all of the objects. type BatchObjectIter struct { cmd *exec.Cmd out io.ReadCloser f *bufio.Reader } -// NewBatchObjectIter returns iterates over objects whose names are -// fed into its stdin. The output is buffered, so it has to be closed -// before you can be sure to read all of the objects. +// NewBatchObjectIter returns a `*BatchObjectIterator` and an +// `io.WriteCloser`. The iterator iterates over objects whose names +// are fed into the `io.WriteCloser`, one per line. The +// `io.WriteCloser` should normally be closed and the iterator's +// output drained before `Close()` is called. func (repo *Repository) NewBatchObjectIter() (*BatchObjectIter, io.WriteCloser, error) { cmd := repo.gitCommand("cat-file", "--batch", "--buffer") @@ -276,6 +309,8 @@ func (repo *Repository) NewBatchObjectIter() (*BatchObjectIter, io.WriteCloser, }, in, nil } +// Next returns the next object: its OID, type, size, and contents. +// When no more data are available, it returns an `io.EOF` error. func (iter *BatchObjectIter) Next() (OID, ObjectType, counts.Count32, []byte, error) { header, err := iter.f.ReadString('\n') if err != nil { @@ -295,6 +330,8 @@ func (iter *BatchObjectIter) Next() (OID, ObjectType, counts.Count32, []byte, er return oid, objectType, objectSize, data, nil } +// Close closes the iterator and frees up resources. If any iterator +// output hasn't been read yet, it will be lost. func (iter *BatchObjectIter) Close() error { err := iter.out.Close() err2 := iter.cmd.Wait() @@ -328,6 +365,7 @@ func parseBatchHeader(spec string, header string) (OID, ObjectType, counts.Count return oid, ObjectType(words[1]), counts.NewCount32(size), nil } +// ObjectIter iterates over objects in a Git repository. type ObjectIter struct { cmd1 *exec.Cmd cmd2 *exec.Cmd @@ -338,12 +376,12 @@ type ObjectIter struct { } // NewObjectIter returns an iterator that iterates over objects in -// `repo`. The second return value is the stdin of the `rev-list` -// command. The caller can feed values into it but must close it in -// any case. -func (repo *Repository) NewObjectIter(args ...string) ( - *ObjectIter, io.WriteCloser, error, -) { +// `repo`. The arguments are passed to `git rev-list --objects`. The +// second return value is the stdin of the `rev-list` command. The +// caller can feed values into it but must close it in any case. +func (repo *Repository) NewObjectIter( + args ...string, +) (*ObjectIter, io.WriteCloser, error) { cmd1 := repo.gitCommand(append([]string{"rev-list", "--objects"}, args...)...) in1, err := cmd1.StdinPipe() if err != nil { @@ -420,7 +458,8 @@ func (repo *Repository) NewObjectIter(args ...string) ( }, in1, nil } -// Next returns the next object, or EOF when done. +// Next returns the next object: its OID, type, and size. When no more +// data are available, it returns an `io.EOF` error. func (l *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { line, err := l.f.ReadString('\n') if err != nil { @@ -430,6 +469,7 @@ func (l *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { return parseBatchHeader("", line) } +// Close closes the iterator and frees up resources. func (l *ObjectIter) Close() error { l.out1.Close() err := <-l.errChan @@ -445,12 +485,15 @@ func (l *ObjectIter) Close() error { return err } +// ObjectHeaderIter iterates over the headers within a commit or tag +// object. type ObjectHeaderIter struct { name string data string } -// Iterate over a commit or tag object header. `data` should be the +// NewObjectHeaderIter returns an `ObjectHeaderIter` that iterates +// over the headers in a commit or tag object. `data` should be the // object's contents, which is usually terminated by a blank line that // separates the header from the comment. However, annotated tags // don't always include comments, and Git even tolerates commits @@ -472,10 +515,12 @@ func NewObjectHeaderIter(name string, data []byte) (ObjectHeaderIter, error) { return ObjectHeaderIter{name, string(data[:headerEnd+1])}, nil } +// HasNext returns true iff there are more headers to retrieve. func (iter *ObjectHeaderIter) HasNext() bool { return len(iter.data) > 0 } +// Next returns the key and value of the next header. func (iter *ObjectHeaderIter) Next() (string, string, error) { if len(iter.data) == 0 { return "", "", fmt.Errorf("header for %s read past end", iter.name) @@ -496,12 +541,15 @@ func (iter *ObjectHeaderIter) Next() (string, string, error) { return key, value, nil } +// Commit represents the parts of a commit object that we need. type Commit struct { Size counts.Count32 Parents []OID Tree OID } +// ParseCommit parses the commit object whose contents are in `data`. +// `oid` is used only in error messages. func ParseCommit(oid OID, data []byte) (*Commit, error) { var parents []OID var tree OID @@ -543,38 +591,46 @@ func ParseCommit(oid OID, data []byte) (*Commit, error) { }, nil } +// Tree represents a Git tree object. type Tree struct { data string } +// ParseTree parses the tree object whose contents are contained in +// `data`. `oid` is currently unused. func ParseTree(oid OID, data []byte) (*Tree, error) { return &Tree{string(data)}, nil } +// Size returns the size of the tree object. func (tree Tree) Size() counts.Count32 { return counts.NewCount32(uint64(len(tree.data))) } -// Note that Name shares memory with the tree data that were -// originally read; i.e., retaining a pointer to Name keeps the tree -// data reachable. +// TreeEntry represents an entry in a Git tree object. Note that Name +// shares memory with the tree data that were originally read; i.e., +// retaining a pointer to Name keeps the tree data reachable. type TreeEntry struct { Name string OID OID Filemode uint } +// TreeIter is an iterator over the entries in a Git tree object. type TreeIter struct { // The as-yet-unread part of the tree's data. data string } +// Iter returns an iterator over the entries in `tree`. func (tree *Tree) Iter() *TreeIter { return &TreeIter{ data: tree.data, } } +// NextEntry returns either the next entry in a Git tree, or a `false` +// boolean value if there are no more entries. func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { var entry TreeEntry @@ -611,12 +667,15 @@ func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { return entry, true, nil } +// Tag represents the information that we need about a Git tag object. type Tag struct { Size counts.Count32 Referent OID ReferentType ObjectType } +// ParseTag parses the Git tag object whose contents are contained in +// `data`. `oid` is used only in error messages. func ParseTag(oid OID, data []byte) (*Tag, error) { var referent OID var referentFound bool diff --git a/git/gitconfig.go b/git/gitconfig.go index 69031be..1cae881 100644 --- a/git/gitconfig.go +++ b/git/gitconfig.go @@ -8,13 +8,25 @@ import ( "strings" ) +// ConfigEntry represents an entry in the gitconfig. type ConfigEntry struct { - Key string + // Key is the entry's key, with any common `prefix` removed (see + // `Config()`). + Key string + + // Value is the entry's value, as a string. Value string } +// Config represents the gitconfig, or part of the gitconfig, read by +// `ReadConfig()`. type Config struct { - Prefix string + // Prefix is the key prefix that was read to fill this `Config`. + Prefix string + + // Entries contains the configuration entries that matched + // `Prefix`, in the order that they are reported by `git config + // --list`. Entries []ConfigEntry } diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 7a89b82..b298af5 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -11,6 +11,7 @@ import ( "github.com/github/git-sizer/sizes" ) +// Configger is an abstraction for a thing that can read gitconfig. type Configger interface { GetConfig(prefix string) (*git.Config, error) } @@ -24,6 +25,8 @@ type RefGroupBuilder struct { ShowRefs bool } +// NewRefGroupBuilder creates and returns a `RefGroupBuilder` +// instance. func NewRefGroupBuilder(configger Configger) (*RefGroupBuilder, error) { tlg := refGroup{ RefGroup: sizes.RefGroup{ @@ -69,6 +72,8 @@ func (rgb *RefGroupBuilder) getGroup(symbol sizes.RefGroupSymbol) *refGroup { return &rg } +// parentName returns the symbol of the refgroup that is the parent of +// `symbol`, or "" if `symbol` is the top-level group. func parentName(symbol sizes.RefGroupSymbol) sizes.RefGroupSymbol { i := strings.LastIndexByte(string(symbol), '.') if i == -1 { @@ -77,6 +82,8 @@ func parentName(symbol sizes.RefGroupSymbol) sizes.RefGroupSymbol { return symbol[:i] } +// initializeStandardRefgroups initializes the built-in refgroups +// ("branches", "tags", etc). func (rgb *RefGroupBuilder) initializeStandardRefgroups() { initializeGroup := func( symbol sizes.RefGroupSymbol, name string, filter git.ReferenceFilter, @@ -106,6 +113,9 @@ func (rgb *RefGroupBuilder) initializeStandardRefgroups() { initializeGroup("stash", "Git stash", filter) } +// readRefgroupsFromGitconfig reads any refgroups defined in the +// gitconfig into `rgb`. Any configuration settings for the built-in +// groups are added to the pre-existing definitions of those groups. func (rgb *RefGroupBuilder) readRefgroupsFromGitconfig(configger Configger) error { if configger == nil { // At this point, it is not yet certain that the command was @@ -140,6 +150,9 @@ func (rgb *RefGroupBuilder) readRefgroupsFromGitconfig(configger Configger) erro return nil } +// splitKey splits `key`, which is part of a gitconfig key, into the +// refgroup symbol to which it applies and the field name within that +// section. func splitKey(key string) (sizes.RefGroupSymbol, string) { i := strings.LastIndexByte(key, '.') if i == -1 { @@ -148,7 +161,7 @@ func splitKey(key string) (sizes.RefGroupSymbol, string) { return sizes.RefGroupSymbol(key[:i]), key[i+1:] } -// Add some reference-related options to `flags`. +// AddRefopts adds the reference-related options to `flags`. func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { flags.Var( &filterValue{rgb, git.Include, "", false}, "include", @@ -275,6 +288,8 @@ func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { return &refGrouper, nil } +// refGrouper is a `sizes.RefGrouper` based on a hierarchy of nested +// refgroups. type refGrouper struct { topLevelGroup *refGroup refGroups []sizes.RefGroup @@ -284,6 +299,10 @@ type refGrouper struct { ignoredRefGroup *sizes.RefGroup } +// fillInTree processes the refgroups in the tree rooted at `rg`, +// setting default names where they are missing, verifying that they +// are all defined, adding "Other" groups where needed, and adding the +// refgroups in depth-first-traversal order to `refGrouper.refGroups`. func (refGrouper *refGrouper) fillInTree(rg *refGroup) error { if rg.Name == "" { _, rg.Name = splitKey(string(rg.Symbol)) @@ -318,6 +337,8 @@ func (refGrouper *refGrouper) fillInTree(rg *refGroup) error { return nil } +// Categorize decides whether to walk the reference named `refname` +// and which refgroup(s) it should be counted in. func (refGrouper *refGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { walk, symbols := refGrouper.topLevelGroup.collectSymbols(refname) if !walk && refGrouper.ignoredRefGroup != nil { @@ -326,6 +347,8 @@ func (refGrouper *refGrouper) Categorize(refname string) (bool, []sizes.RefGroup return walk, symbols } +// Groups returns a list of all defined refgroups, in the order that +// they should be output. func (refGrouper *refGrouper) Groups() []sizes.RefGroup { return refGrouper.refGroups } diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 80aa387..36d122c 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -109,9 +109,8 @@ var localEnvVars = func() map[string]bool { return m }() -// GitEnv returns an appropriate environment for running `git` -// commands without being confused by any existing environment or -// gitconfig. +// CleanGitEnv returns a clean environment for running `git` commands +// so that they won't be affected by the local environment. func CleanGitEnv() []string { var env []string for _, e := range os.Environ() { @@ -147,6 +146,7 @@ func (repo *TestRepo) GitCommand(t *testing.T, args ...string) *exec.Cmd { return cmd } +// UpdateRef updates the reference named `refname` to the value `oid`. func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) { t.Helper() @@ -160,9 +160,9 @@ func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) { require.NoError(t, cmd.Run()) } -// createObject creates a new Git object, of the specified type, in -// the repository at `repoPath`. `writer` is a function that writes -// the object in `git hash-object` input format. +// CreateObject creates a new Git object, of the specified type, in +// the repository at `repoPath`. `writer` is a function that generates +// the object contents in `git hash-object` input format. func (repo *TestRepo) CreateObject( t *testing.T, otype git.ObjectType, writer func(io.Writer) error, ) git.OID { @@ -258,6 +258,10 @@ func (repo *TestRepo) CreateReferencedOrphan(t *testing.T, refname string) { repo.UpdateRef(t, refname, oid) } +// AddAuthorInfo adds environment variables to `cmd.Env` that set the +// Git author and committer to known values and set the timestamp to +// `*timestamp`. Then `*timestamp` is moved forward by a minute, so +// that each commit gets a unique timestamp. func AddAuthorInfo(cmd *exec.Cmd, timestamp *time.Time) { cmd.Env = append(cmd.Env, "GIT_AUTHOR_NAME=Arthur", diff --git a/isatty/isatty_disabled.go b/isatty/isatty_disabled.go index c16f1d7..6dc9448 100644 --- a/isatty/isatty_disabled.go +++ b/isatty/isatty_disabled.go @@ -2,6 +2,7 @@ package isatty +// Isatty is a stub implementation of `Isatty()` that always returns `true`. func Isatty(fd uintptr) (bool, error) { return true, nil } diff --git a/isatty/isatty_enabled.go b/isatty/isatty_enabled.go index 04f7516..2286b24 100644 --- a/isatty/isatty_enabled.go +++ b/isatty/isatty_enabled.go @@ -12,6 +12,7 @@ import ( "syscall" ) +// Isatty tries to determine whether `fd` is a TTY. func Isatty(fd uintptr) (bool, error) { result, err := C.isatty(C.int(fd)) if err != nil && err != syscall.EINVAL { diff --git a/meter/meter.go b/meter/meter.go index 118e6d5..d241cc4 100644 --- a/meter/meter.go +++ b/meter/meter.go @@ -24,6 +24,9 @@ type Progress interface { Done() } +// Spinners is a slice of short strings that are repeatedly output in +// order to show the user that we are working, before we have any +// actual information to show. var Spinners = []string{"|", "(", "<", "-", "<", "(", "|", ")", ">", "-", ">", ")"} // progressMeter is a `Progress` that reports the current state every @@ -42,6 +45,9 @@ type progressMeter struct { count int64 } +// NewProgressMeter returns a progress meter that can be used to show +// progress to a TTY periodically, including an increasing int64 +// value. func NewProgressMeter(period time.Duration) Progress { return &progressMeter{ period: period, diff --git a/negated_bool_value.go b/negated_bool_value.go index b92238b..b983711 100644 --- a/negated_bool_value.go +++ b/negated_bool_value.go @@ -4,6 +4,9 @@ import ( "strconv" ) +// NegatedBoolValue is a `pflag.Value` that set a boolean variable to +// the inverse of what the argument would normally indicate (e.g., to +// implement `--no-foo`-style arguments). type NegatedBoolValue struct { value *bool } diff --git a/sizes/footnotes.go b/sizes/footnotes.go index 4652e4e..3ecf013 100644 --- a/sizes/footnotes.go +++ b/sizes/footnotes.go @@ -5,17 +5,23 @@ import ( "fmt" ) +// Footnotes collects and numbers footnotes for a `table`. type Footnotes struct { footnotes []string indexes map[string]int } +// NewFootnotes creates and returns a new `Footnotes` instance. func NewFootnotes() *Footnotes { return &Footnotes{ indexes: make(map[string]int), } } +// CreateCitation adds a footnote with the specified text and returns +// the string that should be used to refer to it (e.g., "[2]"). If +// there is already a footnote with the exact same text, reuse its +// number. func (f *Footnotes) CreateCitation(footnote string) string { if footnote == "" { return "" @@ -30,6 +36,8 @@ func (f *Footnotes) CreateCitation(footnote string) string { return fmt.Sprintf("[%d]", index) } +// String returns a string representation of the footnote, including a +// trailing LF. func (f *Footnotes) String() string { if len(f.footnotes) == 0 { return "" diff --git a/sizes/graph.go b/sizes/graph.go index 88115e2..c9304ad 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -13,6 +13,14 @@ import ( "github.com/github/git-sizer/meter" ) +// RefGroupSymbol is the string "identifier" that is used to refer to +// a refgroup, for example in the gitconfig. Nesting of refgroups is +// inferred from their names, using "." as separator between +// components. For example, if there are three refgroups with symbols +// "tags", "tags.releases", and "foo.bar", then "tags.releases" is +// considered to be nested within "tags", and "foo.bar" is considered +// to be nested within "foo", the latter being created automatically +// if it was not configured explicitly. type RefGroupSymbol string // RefGroup is a group of references, for example "branches" or @@ -29,6 +37,8 @@ type RefGroup struct { Name string } +// RefGrouper describes a type that can collate reference names into +// groups and decide which ones to walk. type RefGrouper interface { // Categorize tells whether `refname` should be walked at all, // and if so, the symbols of the reference groups to which it @@ -47,6 +57,13 @@ type refSeen struct { groups []RefGroupSymbol } +// ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which +// references to scan and how to group them. `nameStyle` specifies +// whether the output should include full names, hashes only, or +// nothing in the footnotes. `progress` tells whether a progress meter +// should be displayed while it works. +// +// It returns the size data for the repository. func ScanRepositoryUsingGraph( repo *git.Repository, rg RefGrouper, nameStyle NameStyle, progress bool, ) (HistorySize, error) { @@ -381,7 +398,7 @@ func ScanRepositoryUsingGraph( return graph.HistorySize(), nil } -// An object graph that is being built up. +// Graph is an object graph that is being built up. type Graph struct { repo *git.Repository @@ -408,6 +425,7 @@ type Graph struct { pathResolver PathResolver } +// NewGraph creates and returns a new `*Graph` instance. func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { return &Graph{ rg: rg, @@ -430,6 +448,7 @@ func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { } } +// RegisterReference records the specified reference in `g`. func (g *Graph) RegisterReference(ref git.Reference, walked bool, groups []RefGroupSymbol) { g.historyLock.Lock() g.historySize.recordReference(g, ref) @@ -443,6 +462,7 @@ func (g *Graph) RegisterReference(ref git.Reference, walked bool, groups []RefGr } } +// HistorySize returns the size data that have been collected. func (g *Graph) HistorySize() HistorySize { g.treeLock.Lock() defer g.treeLock.Unlock() @@ -459,7 +479,8 @@ func (g *Graph) HistorySize() HistorySize { return g.historySize } -// Record that the specified `oid` is a blob with the specified size. +// RegisterBlob records that the specified `oid` is a blob with the +// specified size. func (g *Graph) RegisterBlob(oid git.OID, objectSize counts.Count32) { size := BlobSize{Size: objectSize} // There are no listeners. Since this is a blob, we know all that From 59f84fa2723f6e5ceee871852fb643e5575ff4a5 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 13:53:31 +0100 Subject: [PATCH 083/176] counts.go: return early in the "unusual" cases --- counts/counts.go | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/counts/counts.go b/counts/counts.go index f6d4a3c..3961256 100644 --- a/counts/counts.go +++ b/counts/counts.go @@ -40,23 +40,23 @@ func (n1 *Count32) Increment(n2 Count32) { // AdjustMaxIfNecessary adjusts `*n1` to be `max(*n1, n2)`. Return // true iff `n2` was greater than `*n1`. func (n1 *Count32) AdjustMaxIfNecessary(n2 Count32) bool { - if n2 > *n1 { - *n1 = n2 - return true - } else { + if n2 <= *n1 { return false } + + *n1 = n2 + return true } // AdjustMaxIfPossible adjusts `*n1` to be `max(*n1, n2)`. Return true // iff `n2` was greater than or equal to `*n1`. func (n1 *Count32) AdjustMaxIfPossible(n2 Count32) bool { - if n2 >= *n1 { - *n1 = n2 - return true - } else { + if n2 < *n1 { return false } + + *n1 = n2 + return true } // Count64 is a count of something, capped at math.MaxUint64. @@ -91,21 +91,21 @@ func (n1 *Count64) Increment(n2 Count64) { // AdjustMaxIfNecessary adjusts `*n1` to be `max(*n1, n2)`. Return // true iff `n2` was greater than `*n1`. func (n1 *Count64) AdjustMaxIfNecessary(n2 Count64) bool { - if n2 > *n1 { - *n1 = n2 - return true - } else { + if n2 <= *n1 { return false } + + *n1 = n2 + return true } // AdjustMaxIfPossible adjusts `*n1` to be `max(*n1, n2)`. Return true // iff `n2` was greater than or equal to `*n1`. func (n1 *Count64) AdjustMaxIfPossible(n2 Count64) bool { - if n2 > *n1 { - *n1 = n2 - return true - } else { + if n2 <= *n1 { return false } + + *n1 = n2 + return true } From 96b7447fa70cdfc6f8d754cd215159daf37119ad Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 13:54:32 +0100 Subject: [PATCH 084/176] Humaner.FormatNumber(): return early in the "unusual" case Also, convert a chained `if` statement into a `switch`. --- counts/human.go | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/counts/human.go b/counts/human.go index a921c69..8f5de74 100644 --- a/counts/human.go +++ b/counts/human.go @@ -74,20 +74,22 @@ func (h *Humaner) FormatNumber(n uint64, unit string) (string, string) { if prefix.Multiplier == 1 { return fmt.Sprintf("%d", n), unit - } else { - mantissa := float64(n) / float64(prefix.Multiplier) - var format string + } - if wholePart >= 100 { - // `mantissa` can actually be up to 1023.999. - format = "%.0f" - } else if wholePart >= 10 { - format = "%.1f" - } else { - format = "%.2f" - } - return fmt.Sprintf(format, mantissa), prefix.Name + unit + mantissa := float64(n) / float64(prefix.Multiplier) + var format string + + switch { + case wholePart >= 100: + // `mantissa` can actually be up to 1023.999. + format = "%.0f" + case wholePart >= 10: + format = "%.1f" + default: + format = "%.2f" } + + return fmt.Sprintf(format, mantissa), prefix.Name + unit } // Format formats values, aligned, in `len(unit) + 10` or fewer From dce4a1260ad749283e32e5460b160d73cf7fa432 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 13:55:24 +0100 Subject: [PATCH 085/176] Humaner: name some return values for documentation's sake --- counts/human.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/counts/human.go b/counts/human.go index 8f5de74..ae75838 100644 --- a/counts/human.go +++ b/counts/human.go @@ -60,7 +60,7 @@ func (h *Humaner) Name() string { // FormatNumber formats n, aligned, in `len(unit) + 10` or fewer // characters (except for extremely large numbers). It returns strings // representing the numeral and the unit string. -func (h *Humaner) FormatNumber(n uint64, unit string) (string, string) { +func (h *Humaner) FormatNumber(n uint64, unit string) (numeral string, unitString string) { prefix := h.prefixes[0] wholePart := n @@ -95,7 +95,7 @@ func (h *Humaner) FormatNumber(n uint64, unit string) (string, string) { // Format formats values, aligned, in `len(unit) + 10` or fewer // characters (except for extremely large numbers). It returns strings // representing the numeral and the unit string. -func (h *Humaner) Format(value Humanable, unit string) (string, string) { +func (h *Humaner) Format(value Humanable, unit string) (numeral string, unitString string) { n, overflow := value.ToUint64() if overflow { return "∞", unit From ad4269794cd4f4a3cc4acbce3029ba938d33ac82 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 13:56:15 +0100 Subject: [PATCH 086/176] usage: make constant private --- git-sizer.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 63f2e90..5115f8a 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -17,7 +17,7 @@ import ( "github.com/github/git-sizer/sizes" ) -const Usage = `usage: git-sizer [OPTS] +const usage = `usage: git-sizer [OPTS] --threshold THRESHOLD minimum level of concern (i.e., number of stars) that should be reported. Default: @@ -116,7 +116,7 @@ func mainImplementation(args []string) error { flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) flags.Usage = func() { - fmt.Print(Usage) + fmt.Print(usage) } flags.VarP( From 4740a650f5ad7274a9b31390e02f18f544c2d899 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 13:59:48 +0100 Subject: [PATCH 087/176] Fix the wrapping of some errors --- git-sizer.go | 8 ++++---- git/git.go | 6 +++--- sizes/output.go | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 5115f8a..235e96e 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -191,7 +191,7 @@ func mainImplementation(args []string) error { if cpuprofile != "" { f, err := os.Create(cpuprofile) if err != nil { - return fmt.Errorf("couldn't set up cpuprofile file: %s", err) + return fmt.Errorf("couldn't set up cpuprofile file: %w", err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() @@ -211,7 +211,7 @@ func mainImplementation(args []string) error { } if repoErr != nil { - return fmt.Errorf("couldn't open Git repository: %s", repoErr) + return fmt.Errorf("couldn't open Git repository: %w", repoErr) } if jsonOutput { @@ -270,7 +270,7 @@ func mainImplementation(args []string) error { historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progress) if err != nil { - return fmt.Errorf("error scanning repository: %s", err) + return fmt.Errorf("error scanning repository: %w", err) } if jsonOutput { @@ -285,7 +285,7 @@ func mainImplementation(args []string) error { return fmt.Errorf("JSON version must be 1 or 2") } if err != nil { - return fmt.Errorf("could not convert %v to json: %s", historySize, err) + return fmt.Errorf("could not convert %v to json: %w", historySize, err) } fmt.Printf("%s\n", j) } else { diff --git a/git/git.go b/git/git.go index a9402b1..1d59ad0 100644 --- a/git/git.go +++ b/git/git.go @@ -97,7 +97,7 @@ func NewRepository(path string) (*Repository, error) { gitBin, err := findGitBin() if err != nil { return nil, fmt.Errorf( - "could not find 'git' executable (is it in your PATH?): %v", err, + "could not find 'git' executable (is it in your PATH?): %w", err, ) } @@ -107,7 +107,7 @@ func NewRepository(path string) (*Repository, error) { switch err := err.(type) { case *exec.Error: return nil, fmt.Errorf( - "could not run '%s': %v", gitBin, err.Err, + "could not run '%s': %w", gitBin, err.Err, ) case *exec.ExitError: return nil, fmt.Errorf( @@ -124,7 +124,7 @@ func NewRepository(path string) (*Repository, error) { out, err = cmd.Output() if err != nil { return nil, fmt.Errorf( - "could not run 'git rev-parse --git-path shallow': %s", err, + "could not run 'git rev-parse --git-path shallow': %w", err, ) } shallow := smartJoin(gitDir, string(bytes.TrimSpace(out))) diff --git a/sizes/output.go b/sizes/output.go index 4bf80a6..52afa79 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -262,7 +262,7 @@ func (t *Threshold) String() string { func (t *Threshold) Set(s string) error { v, err := strconv.ParseFloat(s, 64) if err != nil { - return fmt.Errorf("error parsing floating-point value %q: %s", s, err) + return fmt.Errorf("error parsing floating-point value %q: %w", s, err) } *t = Threshold(v) return nil From 6c39369b11e6c7d3c93a21c5ed453a0dfbe76a53 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 14:48:55 +0100 Subject: [PATCH 088/176] git-sizer.go: handle some more errors --- git-sizer.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 235e96e..3248971 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -164,7 +164,9 @@ func mainImplementation(args []string) error { flags.Lookup("no-progress").NoOptDefVal = "true" flags.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to file") - flags.MarkHidden("cpuprofile") + if err := flags.MarkHidden("cpuprofile"); err != nil { + return fmt.Errorf("marking option hidden: %w", err) + } var configger refopts.Configger if repo != nil { @@ -193,7 +195,9 @@ func mainImplementation(args []string) error { if err != nil { return fmt.Errorf("couldn't set up cpuprofile file: %w", err) } - pprof.StartCPUProfile(f) + if err := pprof.StartCPUProfile(f); err != nil { + return fmt.Errorf("starting CPU profiling: %w", err) + } defer pprof.StopCPUProfile() } @@ -289,10 +293,12 @@ func mainImplementation(args []string) error { } fmt.Printf("%s\n", j) } else { - io.WriteString( + if _, err := io.WriteString( os.Stdout, historySize.TableString(rg.Groups(), threshold, nameStyle), - ) + ); err != nil { + return fmt.Errorf("writing output: %w", err) + } } return nil From 33b4b53f20ae5aef5808462b3f8b2d96172cb465 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 14:50:20 +0100 Subject: [PATCH 089/176] filterValue.Type(): replace a chained `if` statement with a `switch` --- internal/refopts/filter_value.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/internal/refopts/filter_value.go b/internal/refopts/filter_value.go index f3fa35b..5dec209 100644 --- a/internal/refopts/filter_value.go +++ b/internal/refopts/filter_value.go @@ -113,11 +113,12 @@ func (v *filterValue) String() string { } func (v *filterValue) Type() string { - if v.pattern != "" { + switch { + case v.pattern != "": return "bool" - } else if v.regexp { + case v.regexp: return "regexp" - } else { + default: return "prefix" } } From 790d3afeee49f94fb4e5337dd698d236b0bc6080 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 14:52:21 +0100 Subject: [PATCH 090/176] CleanGitEnv(): pre-allocate the `env` slice --- internal/testutils/repoutils.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 36d122c..478f33a 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -112,8 +112,9 @@ var localEnvVars = func() map[string]bool { // CleanGitEnv returns a clean environment for running `git` commands // so that they won't be affected by the local environment. func CleanGitEnv() []string { - var env []string - for _, e := range os.Environ() { + osEnv := os.Environ() + env := make([]string, 0, len(osEnv)+3) + for _, e := range osEnv { i := strings.IndexByte(e, '=') if i == -1 { // This shouldn't happen, but if it does, From c5ed233725642d8fda5eaf1ae9cbdf1d5f75e867 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 14:54:02 +0100 Subject: [PATCH 091/176] NegatedBoolValue.String(): dispense with the special case first --- negated_bool_value.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/negated_bool_value.go b/negated_bool_value.go index b983711..9dd19fe 100644 --- a/negated_bool_value.go +++ b/negated_bool_value.go @@ -24,9 +24,9 @@ func (v *NegatedBoolValue) Get() interface{} { func (v *NegatedBoolValue) String() string { if v == nil || v.value == nil { return "true" - } else { - return strconv.FormatBool(!*v.value) } + + return strconv.FormatBool(!*v.value) } func (v *NegatedBoolValue) Type() string { From 8fa5f3177510c45bddf9c1831ab4f425054e5f4e Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 14:54:27 +0100 Subject: [PATCH 092/176] item.levelOfConcern(): correctly report overflows of 32-bit values If a `Count32` overflows, its `ToUint64()` method returns `math.MaxUint32, true`. This should always be reported as exclamation points, regardless of the threshold. --- sizes/output.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sizes/output.go b/sizes/output.go index 52afa79..7605ef4 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -174,7 +174,10 @@ func (l *item) Footnote(nameStyle NameStyle) string { // return the string that should be used as its "level of concern" and // `true`; otherwise, return `"", false`. func (l *item) levelOfConcern(threshold Threshold) (string, bool) { - value, _ := l.value.ToUint64() + value, overflow := l.value.ToUint64() + if overflow { + return "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", true + } alert := Threshold(float64(value) / l.scale) if alert < threshold { return "", false From 46bc15ca771431a8c138a159c6fbc87145e0bcde Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 15:05:31 +0100 Subject: [PATCH 093/176] isatty: add `go:build` lines equivalent to the `+build` lines The march of progress. --- isatty/isatty_disabled.go | 1 + isatty/isatty_enabled.go | 1 + 2 files changed, 2 insertions(+) diff --git a/isatty/isatty_disabled.go b/isatty/isatty_disabled.go index 6dc9448..3121d33 100644 --- a/isatty/isatty_disabled.go +++ b/isatty/isatty_disabled.go @@ -1,3 +1,4 @@ +//go:build !isatty // +build !isatty package isatty diff --git a/isatty/isatty_enabled.go b/isatty/isatty_enabled.go index 2286b24..94d7f53 100644 --- a/isatty/isatty_enabled.go +++ b/isatty/isatty_enabled.go @@ -1,3 +1,4 @@ +//go:build isatty // +build isatty package isatty From e1e4b3fe614529569c03040eb323341d9513aecc Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 15:19:20 +0100 Subject: [PATCH 094/176] TestRepo.CreateObject(): reap the `git hash-object` command on errors --- internal/testutils/repoutils.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 478f33a..03aac9a 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -12,6 +12,7 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/github/git-sizer/git" @@ -182,13 +183,13 @@ func (repo *TestRepo) CreateObject( err = writer(in) err2 := in.Close() - if err != nil { - cmd.Wait() - require.NoError(t, err) + if !assert.NoError(t, err) { + _ = cmd.Wait() + t.FailNow() } - if err2 != nil { - cmd.Wait() - require.NoError(t, err2) + if !assert.NoError(t, err2) { + _ = cmd.Wait() + t.FailNow() } output, err := ioutil.ReadAll(out) From ee2696bb68472debf370fb06eb4653815cc5e120 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 15:22:44 +0100 Subject: [PATCH 095/176] ObjectIter: rename receiver variables consistently --- git/git.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/git/git.go b/git/git.go index 1d59ad0..dd802a8 100644 --- a/git/git.go +++ b/git/git.go @@ -460,8 +460,8 @@ func (repo *Repository) NewObjectIter( // Next returns the next object: its OID, type, and size. When no more // data are available, it returns an `io.EOF` error. -func (l *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { - line, err := l.f.ReadString('\n') +func (iter *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { + line, err := iter.f.ReadString('\n') if err != nil { return OID{}, "", 0, err } @@ -470,15 +470,15 @@ func (l *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { } // Close closes the iterator and frees up resources. -func (l *ObjectIter) Close() error { - l.out1.Close() - err := <-l.errChan - l.out2.Close() - err2 := l.cmd1.Wait() +func (iter *ObjectIter) Close() error { + iter.out1.Close() + err := <-iter.errChan + iter.out2.Close() + err2 := iter.cmd1.Wait() if err == nil { err = err2 } - err2 = l.cmd2.Wait() + err2 = iter.cmd2.Wait() if err == nil { err = err2 } From 291df0e3ce9042a74eebbc434dce8b984db53f5f Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 15:43:56 +0100 Subject: [PATCH 096/176] HistorySize: change receiver to pointer --- sizes/output.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sizes/output.go b/sizes/output.go index 7605ef4..4c38e8f 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -41,7 +41,7 @@ func (s TagSize) String() string { return fmt.Sprintf("tag_depth=%d", s.TagDepth) } -func (s HistorySize) String() string { +func (s *HistorySize) String() string { return fmt.Sprintf( "unique_commit_count=%d, unique_commit_count = %d, max_commit_size = %d, "+ "max_history_depth=%d, max_parent_count=%d, "+ @@ -369,7 +369,7 @@ type table struct { buf bytes.Buffer } -func (s HistorySize) TableString( +func (s *HistorySize) TableString( refGroups []RefGroup, threshold Threshold, nameStyle NameStyle, ) string { contents := s.contents(refGroups) @@ -452,7 +452,7 @@ func (t *table) formatRow( ) } -func (s HistorySize) JSON( +func (s *HistorySize) JSON( refGroups []RefGroup, threshold Threshold, nameStyle NameStyle, ) ([]byte, error) { contents := s.contents(refGroups) @@ -462,7 +462,7 @@ func (s HistorySize) JSON( return j, err } -func (s HistorySize) contents(refGroups []RefGroup) tableContents { +func (s *HistorySize) contents(refGroups []RefGroup) tableContents { S := newSection I := newItem metric := counts.Metric From 6a6235e9759aa312d98ea46f1ee55690fc9393d6 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 15:48:43 +0100 Subject: [PATCH 097/176] Graph: remove unused member --- sizes/graph.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index c9304ad..eb55c2b 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -400,8 +400,6 @@ func ScanRepositoryUsingGraph( // Graph is an object graph that is being built up. type Graph struct { - repo *git.Repository - rg RefGrouper blobLock sync.Mutex From 8a0051cb9f79f1a5b3bbd7ff9ead0a24343378dc Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 16:09:54 +0100 Subject: [PATCH 098/176] item: rename receiver variables consistently --- sizes/output.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sizes/output.go b/sizes/output.go index 4c38e8f..f461d19 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -141,30 +141,30 @@ func newItem( } } -func (l *item) Emit(t *table) { - levelOfConcern, interesting := l.levelOfConcern(t.threshold) +func (i *item) Emit(t *table) { + levelOfConcern, interesting := i.levelOfConcern(t.threshold) if !interesting { return } - valueString, unitString := l.humaner.Format(l.value, l.unit) + valueString, unitString := i.humaner.Format(i.value, i.unit) t.formatRow( - l.name, t.footnotes.CreateCitation(l.Footnote(t.nameStyle)), + i.name, t.footnotes.CreateCitation(i.Footnote(t.nameStyle)), valueString, unitString, levelOfConcern, ) } -func (l *item) Footnote(nameStyle NameStyle) string { - if l.path == nil || l.path.OID == git.NullOID { +func (i *item) Footnote(nameStyle NameStyle) string { + if i.path == nil || i.path.OID == git.NullOID { return "" } switch nameStyle { case NameStyleNone: return "" case NameStyleHash: - return l.path.OID.String() + return i.path.OID.String() case NameStyleFull: - return l.path.String() + return i.path.String() default: panic("unexpected NameStyle") } @@ -173,12 +173,12 @@ func (l *item) Footnote(nameStyle NameStyle) string { // If this item's alert level is at least as high as the threshold, // return the string that should be used as its "level of concern" and // `true`; otherwise, return `"", false`. -func (l *item) levelOfConcern(threshold Threshold) (string, bool) { - value, overflow := l.value.ToUint64() +func (i *item) levelOfConcern(threshold Threshold) (string, bool) { + value, overflow := i.value.ToUint64() if overflow { return "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", true } - alert := Threshold(float64(value) / l.scale) + alert := Threshold(float64(value) / i.scale) if alert < threshold { return "", false } From eac12a854a17063da7932ac52065d3f50c63ed69 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 16:10:44 +0100 Subject: [PATCH 099/176] output.go: dispense with special cases early --- sizes/output.go | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/sizes/output.go b/sizes/output.go index f461d19..d6476b7 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -248,17 +248,17 @@ type Threshold float64 func (t *Threshold) String() string { if t == nil { return "UNSET" - } else { - switch *t { - case 0: - return "--verbose" - case 1: - return "--threshold=1" - case 30: - return "--critical" - default: - return fmt.Sprintf("--threshold=%g", *t) - } + } + + switch *t { + case 0: + return "--verbose" + case 1: + return "--threshold=1" + case 30: + return "--critical" + default: + return fmt.Sprintf("--threshold=%g", *t) } } @@ -328,17 +328,17 @@ const ( func (n *NameStyle) String() string { if n == nil { return "UNSET" - } else { - switch *n { - case NameStyleNone: - return "none" - case NameStyleHash: - return "hash" - case NameStyleFull: - return "full" - default: - panic("Unexpected NameStyle value") - } + } + + switch *n { + case NameStyleNone: + return "none" + case NameStyleHash: + return "hash" + case NameStyleFull: + return "full" + default: + panic("Unexpected NameStyle value") } } From fd3460ec9cc5f5408c2d8050053f9c83a51c1775 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 17:01:49 +0100 Subject: [PATCH 100/176] ScanRepositoryUsingGraph(): return the unexpected object type error --- sizes/graph.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sizes/graph.go b/sizes/graph.go index eb55c2b..4ceedc5 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -219,7 +219,7 @@ func ScanRepositoryUsingGraph( case "tag": tags = append(tags, ObjectHeader{oid, objectSize}) default: - err = fmt.Errorf("unexpected object type: %s", objectType) + return HistorySize{}, fmt.Errorf("unexpected object type: %s", objectType) } } progressMeter.Done() From 71dfcef8df2ae30612b0fe366920f533719068af Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 17:02:43 +0100 Subject: [PATCH 101/176] Appease the linter Who knew? --- sizes/graph.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sizes/graph.go b/sizes/graph.go index 4ceedc5..eca2f0c 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -294,7 +294,7 @@ func ScanRepositoryUsingGraph( }() progressMeter.Start("Processing trees: %d") - for _ = range trees { + for range trees { oid, objectType, _, data, err := objectIter.Next() if err != nil { if err != io.EOF { From 0308dfd1b2d0f97e43a75d92c9c45d3560864ad3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 17:03:16 +0100 Subject: [PATCH 102/176] output.go: add blank lines after comments These comments apply to multiple methods, so leave a space between them and the following method. --- sizes/output.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sizes/output.go b/sizes/output.go index d6476b7..dd59674 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -245,6 +245,7 @@ func (i *indentedItem) Emit(t *table) { type Threshold float64 // Methods to implement pflag.Value: + func (t *Threshold) String() string { if t == nil { return "UNSET" @@ -325,6 +326,7 @@ const ( ) // Methods to implement pflag.Value: + func (n *NameStyle) String() string { if n == nil { return "UNSET" From 14169194b4600c54b3c4be1146e14b3f9a71eec9 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 17:04:59 +0100 Subject: [PATCH 103/176] path_resolver.go: use `switch` instead of chained `if`s This improves the linter's joy. --- sizes/path_resolver.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sizes/path_resolver.go b/sizes/path_resolver.go index f0f59d2..c08ddc2 100644 --- a/sizes/path_resolver.go +++ b/sizes/path_resolver.go @@ -134,12 +134,13 @@ func (p *Path) TreePrefix() string { return "???" } case "commit", "tag": - if p.parent != nil { + switch { + case p.parent != nil: // The parent is a tag. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) - } else if p.relativePath != "" { + case p.relativePath != "": return p.relativePath + ":" - } else { + default: return p.OID.String() + ":" } default: @@ -164,12 +165,13 @@ func (p *Path) Path() string { return "" } case "commit", "tag": - if p.parent != nil { + switch { + case p.parent != nil: // The parent is a tag. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) - } else if p.relativePath != "" { + case p.relativePath != "": return p.relativePath - } else { + default: return "" } default: From f3caecba4cf28cbc634f03da46771092654b81da Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 17:05:50 +0100 Subject: [PATCH 104/176] InOrderPathResolver.forgetPathLocked(): handle unusual case first --- sizes/path_resolver.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sizes/path_resolver.go b/sizes/path_resolver.go index c08ddc2..2a3bb1c 100644 --- a/sizes/path_resolver.go +++ b/sizes/path_resolver.go @@ -256,10 +256,13 @@ func (pr *InOrderPathResolver) forgetPathLocked(p *Path) { panic("forgetPathLocked() called when refcount zero") } p.seekerCount-- + if p.seekerCount > 0 { // The path is still wanted (by another seeker). return - } else if p.parent != nil { + } + + if p.parent != nil { // We already found the object's parent, and the parent's path // is wanted on account if this object. Decrement its // seekerCount. From 5c8651855cdc4bb1380efa9a096514eaf85dbd13 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 17:43:45 +0100 Subject: [PATCH 105/176] Repository.Close(): remove method It didn't do anything. --- git-sizer.go | 3 --- git/git.go | 5 ----- 2 files changed, 8 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 3248971..a2a8c43 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -110,9 +110,6 @@ func mainImplementation(args []string) error { // Try to open the repository, but it's not an error yet if this // fails, because the user might only be asking for `--help`. repo, repoErr := git.NewRepository(".") - if repoErr == nil { - defer repo.Close() - } flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) flags.Usage = func() { diff --git a/git/git.go b/git/git.go index dd802a8..2ded098 100644 --- a/git/git.go +++ b/git/git.go @@ -169,11 +169,6 @@ func (repo *Repository) Path() string { return repo.path } -// Close closes `repo`, possibly freeing up resources. -func (repo *Repository) Close() error { - return nil -} - // Reference represents a Git reference. type Reference struct { // Refname is the full reference name of the reference. From 93e2cd51277dbf025e4bc22641e2975a2e37eb6c Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 17:06:31 +0100 Subject: [PATCH 106/176] Suppress some linter warnings --- git_sizer_test.go | 1 + sizes/output.go | 1 + 2 files changed, 2 insertions(+) diff --git a/git_sizer_test.go b/git_sizer_test.go index 2e20b8a..a6b3dce 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -90,6 +90,7 @@ func TestRefSelections(t *testing.T) { refname string }{ + //nolint:gocritic // Want columns in comment to match initializers. // 111111111 //0123456789012345678 {"+ + + + + + + + +", "refs/barfoo"}, diff --git a/sizes/output.go b/sizes/output.go index dd59674..933cc05 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -470,6 +470,7 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents { metric := counts.Metric binary := counts.Binary + //nolint:prealloc // The length is not known in advance. var rgis []tableContents for _, rg := range refGroups { if rg.Symbol == "" { From e6f8d99a242c1f649cc1139d2c3eb521a261960b Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 1 Nov 2021 18:31:06 +0100 Subject: [PATCH 107/176] Audit some command executions and tell the linter that they're OK --- git/git.go | 5 +++++ internal/testutils/repoutils.go | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/git/git.go b/git/git.go index 2ded098..281f54d 100644 --- a/git/git.go +++ b/git/git.go @@ -101,6 +101,8 @@ func NewRepository(path string) (*Repository, error) { ) } + //nolint:gosec // `gitBin` is chosen carefully, and `path` is the + // path to the repository. cmd := exec.Command(gitBin, "-C", path, "rev-parse", "--git-dir") out, err := cmd.Output() if err != nil { @@ -119,6 +121,7 @@ func NewRepository(path string) (*Repository, error) { } gitDir := smartJoin(path, string(bytes.TrimSpace(out))) + //nolint:gosec // `gitBin` is chosen carefully. cmd = exec.Command(gitBin, "rev-parse", "--git-path", "shallow") cmd.Dir = gitDir out, err = cmd.Output() @@ -152,6 +155,8 @@ func (repo *Repository) gitCommand(callerArgs ...string) *exec.Cmd { args = append(args, callerArgs...) + //nolint:gosec // `gitBin` is chosen carefully, and the rest of + // the args have been checked. cmd := exec.Command(repo.gitBin, args...) cmd.Env = append( diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 03aac9a..cb00dee 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -49,8 +49,10 @@ func (repo *TestRepo) Init(t *testing.T, bare bool) { // exist yet: var cmd *exec.Cmd if bare { + //nolint:gosec // `repo.Path` is a path that we created. cmd = exec.Command("git", "init", "--bare", repo.Path) } else { + //nolint:gosec // `repo.Path` is a path that we created. cmd = exec.Command("git", "init", repo.Path) } cmd.Env = CleanGitEnv() @@ -143,6 +145,8 @@ func (repo *TestRepo) GitCommand(t *testing.T, args ...string) *exec.Cmd { gitArgs := []string{"-C", repo.Path} gitArgs = append(gitArgs, args...) + + //nolint:gosec // The args all come from the test code. cmd := exec.Command("git", gitArgs...) cmd.Env = CleanGitEnv() return cmd From ba4f15f2c989241c01e4deb95908cda0f0e70f25 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 07:50:23 +0100 Subject: [PATCH 108/176] test.yml: spell out the steps rather than using `make` --- .github/workflows/test.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1fa5ffb..d7a186b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,9 +7,25 @@ jobs: os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} steps: + - name: Setup go + uses: actions/setup-go@v2 + with: + go-version: '1.17' + - name: Checkout code uses: actions/checkout@v2 + - name: Get full repo history run: git fetch --prune --unshallow --tags + + - name: Download dependencies + run: go mod download + + - name: Build + run: | + mkdir -p bin + go build -o bin . + ls -la bin + - name: Test - run: make test + run: go test -race -timeout 60s ./... From ca3454014ba73898ecd884446e5178e5ed7ac5d6 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 07:51:39 +0100 Subject: [PATCH 109/176] Get the automated tests running on Windows, too --- .github/workflows/test.yml | 5 ++++- git_sizer_test.go | 31 ++++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d7a186b..bd802c6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,7 +4,7 @@ jobs: test: strategy: matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.os }} steps: - name: Setup go @@ -19,13 +19,16 @@ jobs: run: git fetch --prune --unshallow --tags - name: Download dependencies + shell: bash run: go mod download - name: Build + shell: bash run: | mkdir -p bin go build -o bin . ls -la bin - name: Test + shell: bash run: go test -race -timeout 60s ./... diff --git a/git_sizer_test.go b/git_sizer_test.go index a6b3dce..e816350 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" "testing" "time" @@ -22,9 +23,27 @@ import ( "github.com/github/git-sizer/sizes" ) +func sizerExe(t *testing.T) string { + t.Helper() + + v := "bin/git-sizer" + switch runtime.GOOS { + case "windows": + v = `bin\git-sizer.exe` + } + + v, err := exec.LookPath(v) + require.NoError(t, err) + + v, err = filepath.Abs(v) + require.NoError(t, err) + + return v +} + // Smoke test that the program runs. func TestExec(t *testing.T) { - cmd := exec.Command("bin/git-sizer") + cmd := exec.Command(sizerExe(t)) output, err := cmd.CombinedOutput() assert.NoErrorf(t, err, "command failed; output: %#v", string(output)) } @@ -133,10 +152,7 @@ func TestRefSelections(t *testing.T) { repo.CreateReferencedOrphan(t, p.refname) } - executable, err := exec.LookPath("bin/git-sizer") - require.NoError(t, err) - executable, err = filepath.Abs(executable) - require.NoError(t, err) + executable := sizerExe(t) for i, p := range []struct { name string @@ -316,10 +332,7 @@ func TestRefgroups(t *testing.T) { repo.CreateReferencedOrphan(t, refname) } - executable, err := exec.LookPath("bin/git-sizer") - require.NoError(t, err) - executable, err = filepath.Abs(executable) - require.NoError(t, err) + executable := sizerExe(t) for _, p := range []struct { name string From 6aa6890117e3fab8343e9507e170a958f8066f4f Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 11:48:29 +0100 Subject: [PATCH 110/176] Don't cancel CI on other OSs just because one fails --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bd802c6..fa04802 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,6 +5,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] + fail-fast: false runs-on: ${{ matrix.os }} steps: - name: Setup go From 1458ae5f8bf52f3b6cf8ae76bd016982cc53bf9b Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 2 Nov 2021 09:30:01 +0100 Subject: [PATCH 111/176] internal/pipe: new package for handling command pipelines --- go.mod | 5 +- go.sum | 50 ++- internal/pipe/command.go | 223 +++++++++++ internal/pipe/filter-error.go | 132 +++++++ internal/pipe/function.go | 66 ++++ internal/pipe/iocopier.go | 62 +++ internal/pipe/linewise.go | 74 ++++ internal/pipe/pipeline.go | 211 +++++++++++ internal/pipe/pipeline_test.go | 664 +++++++++++++++++++++++++++++++++ internal/pipe/print.go | 37 ++ internal/pipe/scanner.go | 75 ++++ internal/pipe/stage.go | 34 ++ 12 files changed, 1623 insertions(+), 10 deletions(-) create mode 100644 internal/pipe/command.go create mode 100644 internal/pipe/filter-error.go create mode 100644 internal/pipe/function.go create mode 100644 internal/pipe/iocopier.go create mode 100644 internal/pipe/linewise.go create mode 100644 internal/pipe/pipeline.go create mode 100644 internal/pipe/pipeline_test.go create mode 100644 internal/pipe/print.go create mode 100644 internal/pipe/scanner.go create mode 100644 internal/pipe/stage.go diff --git a/go.mod b/go.mod index f5d1529..58a3901 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/cli/safeexec v1.0.0 github.com/davecgh/go-spew v1.1.1 // indirect github.com/spf13/pflag v1.0.5 - github.com/stretchr/testify v1.4.0 - gopkg.in/yaml.v2 v2.2.7 // indirect + github.com/stretchr/testify v1.7.0 + go.uber.org/goleak v1.1.12 + golang.org/x/sync v0.0.0-20210220032951-036812b2e83c ) diff --git a/go.sum b/go.sum index 590e4f5..d977b15 100644 --- a/go.sum +++ b/go.sum @@ -1,19 +1,53 @@ github.com/cli/safeexec v1.0.0 h1:0VngyaIyqACHdcMNWfo6+KdUYnqEr2Sg+bSP1pdF+dI= github.com/cli/safeexec v1.0.0/go.mod h1:Z/D4tTN8Vs5gXYHDCbaM1S/anmEDnJb1iW0+EJ5zx3Q= -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= +go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.5 h1:ouewzE6p+/VEB31YYnTbEJdi8pFqKp4P4n85vwo3DHA= +golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.7 h1:VUgggvou5XRW9mHwD/yXxIYSMtY0zoKQf/v226p2nyo= -gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/pipe/command.go b/internal/pipe/command.go new file mode 100644 index 0000000..b5d6c05 --- /dev/null +++ b/internal/pipe/command.go @@ -0,0 +1,223 @@ +package pipe + +import ( + "bytes" + "context" + "errors" + "io" + "os" + "os/exec" + "sync/atomic" + "syscall" + "time" + + "golang.org/x/sync/errgroup" +) + +// commandStage is a pipeline `Stage` based on running an external +// command and piping the data through its stdin and stdout. +type commandStage struct { + name string + stdin io.Closer + cmd *exec.Cmd + done chan struct{} + wg errgroup.Group + stderr bytes.Buffer + + // If the context expired and we attempted to kill the command, + // `ctx.Err()` is stored here. + ctxErr atomic.Value +} + +// Command returns a pipeline `Stage` based on the specified external +// `command`, run with the given command-line `args`. Its stdin and +// stdout are handled as usual, and its stderr is collected and +// included in any `*exec.ExitError` that the command might emit. +func Command(command string, args ...string) Stage { + if len(command) == 0 { + panic("attempt to create command with empty command") + } + + cmd := exec.Command(command, args...) + return CommandStage(command, cmd) +} + +// Command returns a pipeline `Stage` with the name `name`, based on +// the specified `cmd`. Its stdin and stdout are handled as usual, and +// its stderr is collected and included in any `*exec.ExitError` that +// the command might emit. +func CommandStage(name string, cmd *exec.Cmd) Stage { + return &commandStage{ + name: name, + cmd: cmd, + done: make(chan struct{}), + } +} + +func (s *commandStage) Name() string { + return s.name +} + +func (s *commandStage) Start( + ctx context.Context, env Env, stdin io.ReadCloser, +) (io.ReadCloser, error) { + if s.cmd.Dir == "" { + s.cmd.Dir = env.Dir + } + + if stdin != nil { + s.cmd.Stdin = stdin + // Also keep a copy so that we can close it when the command exits: + s.stdin = stdin + } + + stdout, err := s.cmd.StdoutPipe() + if err != nil { + return nil, err + } + + // If the caller hasn't arranged otherwise, read the command's + // standard error into our `stderr` field: + if s.cmd.Stderr == nil { + // We can't just set `s.cmd.Stderr = &s.stderr`, because if we + // do then `s.cmd.Wait()` doesn't wait to be sure that all + // error output has been captured. By doing this ourselves, we + // can be sure. + p, err := s.cmd.StderrPipe() + if err != nil { + return nil, err + } + s.wg.Go(func() error { + _, err := io.Copy(&s.stderr, p) + // We don't consider `ErrClosed` an error (FIXME: is this + // correct?): + if err != nil && !errors.Is(err, os.ErrClosed) { + return err + } + return nil + }) + } + + // Put the command in its own process group: + if s.cmd.SysProcAttr == nil { + s.cmd.SysProcAttr = &syscall.SysProcAttr{} + } + s.cmd.SysProcAttr.Setpgid = true + + if err := s.cmd.Start(); err != nil { + return nil, err + } + + // Arrange for the process to be killed (gently) if the context + // expires before the command exits normally: + go func() { + select { + case <-ctx.Done(): + s.kill(ctx.Err()) + case <-s.done: + // Process already done; no need to kill anything. + } + }() + + return stdout, nil +} + +// kill is called to kill the process if the context expires. `err` is +// the corresponding value of `Context.Err()`. +func (s *commandStage) kill(err error) { + // I believe that the calls to `syscall.Kill()` in this method are + // racy. It could be that s.cmd.Wait() succeeds immediately before + // this call, in which case the process group wouldn't exist + // anymore. But I don't see any way to avoid this without + // duplicating a lot of code from `exec.Cmd`. (`os.Cmd.Kill()` and + // `os.Cmd.Signal()` appear to be race-free, but only because they + // use internal synchronization. But those methods only kill the + // process, not the process group, so they are not suitable here. + + // We started the process with PGID == PID: + pid := s.cmd.Process.Pid + select { + case <-s.done: + // Process has ended; no need to kill it again. + return + default: + } + + // Record the `ctx.Err()`, which will be used as the error result + // for this stage. + s.ctxErr.Store(err) + + // First try to kill using a relatively gentle signal so that + // the processes have a chance to clean up after themselves: + _ = syscall.Kill(-pid, syscall.SIGTERM) + + // Well-behaved processes should commit suicide after the above, + // but if they don't exit within 2s, murder the whole lot of them: + go func() { + // Use an explicit `time.Timer` rather than `time.After()` so + // that we can stop it (freeing resources) promptly if the + // command exits before the timer triggers. + timer := time.NewTimer(2 * time.Second) + defer timer.Stop() + + select { + case <-s.done: + // Process has ended; no need to kill it again. + case <-timer.C: + _ = syscall.Kill(-pid, syscall.SIGKILL) + } + }() +} + +// filterCmdError interprets `err`, which was returned by `Cmd.Wait()` +// (possibly `nil`), possibly modifying it or ignoring it. It returns +// the error that should actually be returned to the caller (possibly +// `nil`). +func (s *commandStage) filterCmdError(err error) error { + if err == nil { + return nil + } + + eErr, ok := err.(*exec.ExitError) + if !ok { + return err + } + + ctxErr, ok := s.ctxErr.Load().(error) + if ok { + // If the process looks like it was killed by us, substitute + // `ctxErr` for the process's own exit error. + ps, ok := eErr.ProcessState.Sys().(syscall.WaitStatus) + if ok && ps.Signaled() && + (ps.Signal() == syscall.SIGTERM || ps.Signal() == syscall.SIGKILL) { + return ctxErr + } + } + + eErr.Stderr = s.stderr.Bytes() + return eErr +} + +func (s *commandStage) Wait() error { + defer close(s.done) + + // Make sure that any stderr is copied before `s.cmd.Wait()` + // closes the read end of the pipe: + wErr := s.wg.Wait() + + err := s.cmd.Wait() + err = s.filterCmdError(err) + + if err == nil && wErr != nil { + err = wErr + } + + if s.stdin != nil { + cErr := s.stdin.Close() + if cErr != nil && err == nil { + return cErr + } + } + + return err +} diff --git a/internal/pipe/filter-error.go b/internal/pipe/filter-error.go new file mode 100644 index 0000000..6373be3 --- /dev/null +++ b/internal/pipe/filter-error.go @@ -0,0 +1,132 @@ +package pipe + +import ( + "errors" + "io" + "os/exec" + "syscall" +) + +// ErrorFilter is a function that can filter errors from +// `Stage.Wait()`. The original error (possibly nil) is passed in as +// an argument, and whatever the function returns is the error +// (possibly nil) that is actually emitted. +type ErrorFilter func(err error) error + +func FilterError(s Stage, filter ErrorFilter) Stage { + return efStage{Stage: s, filter: filter} +} + +type efStage struct { + Stage + filter ErrorFilter +} + +func (s efStage) Wait() error { + return s.filter(s.Stage.Wait()) +} + +// ErrorMatcher decides whether its argument matches some class of +// errors (e.g., errors that we want to ignore). The function will +// only be invoked for non-nil errors. +type ErrorMatcher func(err error) bool + +// IgnoreError creates a stage that acts like `s` except that it +// ignores any errors that are matched by `em`. Use like +// +// p.Add(pipe.IgnoreError( +// someStage, +// func(err error) bool { +// var myError *MyErrorType +// return errors.As(err, &myError) && myError.foo == 42 +// }, +// ) +// +// The second argument can also be one of the `ErrorMatcher`s that are +// provided by this package (e.g., `IsError(target)`, +// IsSignal(signal), `IsSIGPIPE`, `IsEPIPE`, `IsPipeError`), or one of +// the functions from the standard library that has the same signature +// (e.g., `os.IsTimeout`), or some combination of these (e.g., +// `AnyError(IsSIGPIPE, os.IsTimeout)`). +func IgnoreError(s Stage, em ErrorMatcher) Stage { + return FilterError(s, + func(err error) error { + if err == nil || em(err) { + return nil + } + return err + }, + ) +} + +// AnyError returns an `ErrorMatcher` that returns true for an error +// that matches any of the `ems`. +func AnyError(ems ...ErrorMatcher) ErrorMatcher { + return func(err error) bool { + if err == nil { + return false + } + for _, em := range ems { + if em(err) { + return true + } + } + return false + } +} + +// IsError returns an ErrorIdentifier for the specified target error, +// matched using `errors.Is()`. Use like +// +// p.Add(pipe.IgnoreError(someStage, IsError(io.EOF))) +func IsError(target error) ErrorMatcher { + return func(err error) bool { + return errors.Is(err, target) + } +} + +// IsSIGPIPE returns an `ErrorMatcher` that matches `*exec.ExitError`s +// that were caused by the specified signal. The match for +// `*exec.ExitError`s uses `errors.As()`. +func IsSignal(signal syscall.Signal) ErrorMatcher { + return func(err error) bool { + var eErr *exec.ExitError + + if !errors.As(err, &eErr) { + return false + } + + status, ok := eErr.Sys().(syscall.WaitStatus) + return ok && status.Signaled() && status.Signal() == signal + } +} + +var ( + // IsSIGPIPE is an `ErrorMatcher` that matches `*exec.ExitError`s + // that were caused by SIGPIPE. The match for `*exec.ExitError`s + // uses `errors.As()`. Use like + // + // p.Add(IgnoreError(someStage, IsSIGPIPE)) + IsSIGPIPE = IsSignal(syscall.SIGPIPE) + + // IsEPIPE is an `ErrorMatcher` that matches `syscall.EPIPE` using + // `errors.Is()`. Use like + // + // p.Add(IgnoreError(someStage, IsEPIPE)) + IsEPIPE = IsError(syscall.EPIPE) + + // IsErrClosedPipe is an `ErrorMatcher` that matches + // `io.ErrClosedPipe` using `errors.Is()`. (`io.ErrClosedPipe` is + // the error that results from writing to a closed + // `*io.PipeWriter`.) Use like + // + // p.Add(IgnoreError(someStage, IsErrClosedPipe)) + IsErrClosedPipe = IsError(io.ErrClosedPipe) + + // IsPipeError is an `ErrorMatcher` that matches a few different + // errors that typically result if a stage writes to a subsequent + // stage that has stopped reading from its stdin. Use like + // + // p.Add(IgnoreError(someStage, IsPipeError)) + IsPipeError = AnyError(IsSIGPIPE, IsEPIPE, IsErrClosedPipe) +) diff --git a/internal/pipe/function.go b/internal/pipe/function.go new file mode 100644 index 0000000..bc5d0bd --- /dev/null +++ b/internal/pipe/function.go @@ -0,0 +1,66 @@ +package pipe + +import ( + "context" + "fmt" + "io" +) + +// StageFunc is a function that can be used to power a `goStage`. It +// should read its input from `stdin` and write its output to +// `stdout`. `stdin` and `stdout` will be closed automatically (if +// necessary) once the function returns. +// +// Neither `stdin` nor `stdout` are necessarily buffered. If the +// `StageFunc` requires buffering, it needs to arrange that itself. +// +// A `StageFunc` is run in a separate goroutine, so it must be careful +// to synchronize any data access aside from reading and writing. +type StageFunc func(ctx context.Context, env Env, stdin io.Reader, stdout io.Writer) error + +// Function returns a pipeline `Stage` that will run a `StageFunc` in +// a separate goroutine to process the data. See `StageFunc` for more +// information. +func Function(name string, f StageFunc) Stage { + return &goStage{ + name: name, + f: f, + done: make(chan struct{}), + } +} + +// goStage is a `Stage` that does its work by running an arbitrary +// `stageFunc` in a goroutine. +type goStage struct { + name string + f StageFunc + done chan struct{} + err error +} + +func (s *goStage) Name() string { + return s.name +} + +func (s *goStage) Start(ctx context.Context, env Env, stdin io.ReadCloser) (io.ReadCloser, error) { + r, w := io.Pipe() + go func() { + s.err = s.f(ctx, env, stdin, w) + if err := w.Close(); err != nil && s.err == nil { + s.err = fmt.Errorf("error closing output pipe for stage %q: %w", s.Name(), err) + } + if stdin != nil { + if err := stdin.Close(); err != nil && s.err == nil { + s.err = fmt.Errorf("error closing stdin for stage %q: %w", s.Name(), err) + } + } + close(s.done) + }() + + return r, nil +} + +func (s *goStage) Wait() error { + <-s.done + return s.err +} diff --git a/internal/pipe/iocopier.go b/internal/pipe/iocopier.go new file mode 100644 index 0000000..26d5b0f --- /dev/null +++ b/internal/pipe/iocopier.go @@ -0,0 +1,62 @@ +package pipe + +import ( + "context" + "errors" + "io" + "os" +) + +// ioCopier is a stage that copies its stdin to a specified +// `io.Writer`. It generates no stdout itself. +type ioCopier struct { + w io.WriteCloser + done chan struct{} + err error +} + +func newIOCopier(w io.WriteCloser) *ioCopier { + return &ioCopier{ + w: w, + done: make(chan struct{}), + } +} + +func (s *ioCopier) Name() string { + return "ioCopier" +} + +// This method always returns `nil, nil`. +func (s *ioCopier) Start(ctx context.Context, _ Env, r io.ReadCloser) (io.ReadCloser, error) { + go func() { + _, err := io.Copy(s.w, r) + // We don't consider `ErrClosed` an error (FIXME: is this + // correct?): + if err != nil && !errors.Is(err, os.ErrClosed) { + s.err = err + } + if err := r.Close(); err != nil && s.err == nil { + s.err = err + } + if err := s.w.Close(); err != nil && s.err == nil { + s.err = err + } + close(s.done) + }() + + // FIXME: if `s.w.Write()` is blocking (e.g., because there is a + // downstream process that is not reading from the other side), + // there's no way to terminate the copy when the context expires. + // This is not too bad, because the `io.Copy()` call will exit by + // itself when its input is closed. + // + // We could, however, be smarter about exiting more quickly if the + // context expires but `s.w.Write()` is not blocking. + + return nil, nil +} + +func (s *ioCopier) Wait() error { + <-s.done + return s.err +} diff --git a/internal/pipe/linewise.go b/internal/pipe/linewise.go new file mode 100644 index 0000000..7b5c6ef --- /dev/null +++ b/internal/pipe/linewise.go @@ -0,0 +1,74 @@ +package pipe + +import ( + "bufio" + "bytes" + "context" + "io" +) + +// LinewiseStageFunc is a function that can be embedded in a +// `goStage`. It is called once per line in the input (where "line" +// can be defined via any `bufio.Scanner`). It should process the line +// and may write whatever it likes to `stdout`, which is a buffered +// writer whose contents are forwarded to the input of the next stage +// of the pipeline. The function needn't write one line of output per +// line of input. +// +// The function mustn't retain copies of `line`, since it may be +// overwritten every time the function is called. +// +// The function needn't flush or close `stdout` (this will be done +// automatically when all of the input has been processed). +// +// If there is an error parsing the input into lines, or if this +// function returns an error, then the whole pipeline will be aborted +// with that error. However, if the function returns the special error +// `pipe.FinishEarly`, the stage will stop processing immediately with +// a `nil` error value. +// +// The function will be called in a separate goroutine, so it must be +// careful to synchronize any data access aside from writing to +// `stdout`. +type LinewiseStageFunc func( + ctx context.Context, env Env, line []byte, stdout *bufio.Writer, +) error + +// LinewiseFunction returns a function-based `Stage`. The input will +// be split into LF-terminated lines and passed to the function one +// line at a time (without the LF). The function may emit output to +// its `stdout` argument. See the definition of `LinewiseStageFunc` +// for more information. +// +// Note that the stage will emit an error if any line (including its +// end-of-line terminator) exceeds 64 kiB in length. If this is too +// short, use `ScannerFunction()` directly with your own +// `NewScannerFunc` as argument, or use `Function()` directly with +// your own `StageFunc`. +func LinewiseFunction(name string, f LinewiseStageFunc) Stage { + return ScannerFunction( + name, + func(r io.Reader) (Scanner, error) { + scanner := bufio.NewScanner(r) + // Split based on strict LF (we don't accept CRLF): + scanner.Split(ScanLFTerminatedLines) + return scanner, nil + }, + f, + ) +} + +// ScanLFTerminatedLines is a `bufio.SplitFunc` that splits its input +// into lines at LF characters (not treating CR specially). +func ScanLFTerminatedLines(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexByte(data, '\n'); i != -1 { + return i + 1, data[0:i], nil + } + if atEOF { + return len(data), data, nil + } + return 0, nil, nil +} diff --git a/internal/pipe/pipeline.go b/internal/pipe/pipeline.go new file mode 100644 index 0000000..d14200b --- /dev/null +++ b/internal/pipe/pipeline.go @@ -0,0 +1,211 @@ +package pipe + +import ( + "bytes" + "context" + "fmt" + "io" + "io/ioutil" + "sync/atomic" +) + +// Env represents the environment that a pipeline stage should run in. +// It is passed to `Stage.Start()`. +type Env struct { + // The directory in which external commands should be executed by + // default. + Dir string +} + +// Pipeline represents a Unix-like pipe that can include multiple +// stages, including external processes but also and stages written in +// Go. +type Pipeline struct { + env Env + + stdin io.Reader + stdout io.WriteCloser + stages []Stage + cancel func() + + // Atomically written and read value, nonzero if the pipeline has + // been started. This is only used for lifecycle sanity checks but + // does not guarantee that clients are using the class correctly. + started uint32 +} + +type nopWriteCloser struct { + io.Writer +} + +func (w nopWriteCloser) Close() error { + return nil +} + +// NewPipeline returns a Pipeline struct with all of the `options` +// applied. +func New(options ...Option) *Pipeline { + p := &Pipeline{} + + for _, option := range options { + option(p) + } + + return p +} + +// Option is a type alias for Pipeline functional options. +type Option func(*Pipeline) + +// WithDir sets the default directory for running external commands. +func WithDir(dir string) Option { + return func(p *Pipeline) { + p.env.Dir = dir + } +} + +// WithStdin assigns stdin to the first command in the pipeline. +func WithStdin(stdin io.Reader) Option { + return func(p *Pipeline) { + p.stdin = stdin + } +} + +// WithStdout assigns stdout to the last command in the pipeline. +func WithStdout(stdout io.Writer) Option { + return func(p *Pipeline) { + p.stdout = nopWriteCloser{stdout} + } +} + +// WithStdoutCloser assigns stdout to the last command in the +// pipeline, and closes stdout when it's done. +func WithStdoutCloser(stdout io.WriteCloser) Option { + return func(p *Pipeline) { + p.stdout = stdout + } +} + +func (p *Pipeline) hasStarted() bool { + return atomic.LoadUint32(&p.started) != 0 +} + +// Add appends one or more stages to the pipeline. +func (p *Pipeline) Add(stages ...Stage) { + if p.hasStarted() { + panic("attempt to modify a pipeline that has already started") + } + + p.stages = append(p.stages, stages...) +} + +// AddWithIgnoredError appends one or more stages that are ignoring +// the passed in error to the pipeline. +func (p *Pipeline) AddWithIgnoredError(em ErrorMatcher, stages ...Stage) { + if p.hasStarted() { + panic("attempt to modify a pipeline that has already started") + } + + for _, stage := range stages { + p.stages = append(p.stages, IgnoreError(stage, em)) + } +} + +// Start starts the commands in the pipeline. If `Start()` exits +// without an error, `Wait()` must also be called, to allow all +// resources to be freed. +func (p *Pipeline) Start(ctx context.Context) error { + if p.hasStarted() { + panic("attempt to start a pipeline that has already started") + } + + atomic.StoreUint32(&p.started, 1) + ctx, p.cancel = context.WithCancel(ctx) + + var nextStdin io.ReadCloser + if p.stdin != nil { + // We don't want the first stage to actually close this, and + // it's not even an `io.ReadCloser`, so fake it: + nextStdin = ioutil.NopCloser(p.stdin) + } + + for i, s := range p.stages { + var err error + stdout, err := s.Start(ctx, p.env, nextStdin) + if err != nil { + // Close the pipe that the previous stage was writing to. + // That should cause it to exit even if it's not minding + // its context. + if nextStdin != nil { + _ = nextStdin.Close() + } + + // Kill and wait for any stages that have been started + // already to finish: + p.cancel() + for _, s := range p.stages[:i] { + _ = s.Wait() + } + return fmt.Errorf("starting pipeline stage %q: %w", s.Name(), err) + } + nextStdin = stdout + } + + // If the pipeline was configured with a `stdout`, add a synthetic + // stage to copy the last stage's stdout to that writer: + if p.stdout != nil { + c := newIOCopier(p.stdout) + p.stages = append(p.stages, c) + // `ioCopier.Start()` never fails: + _, _ = c.Start(ctx, p.env, nextStdin) + } + + return nil +} + +func (p *Pipeline) Output(ctx context.Context) ([]byte, error) { + var buf bytes.Buffer + p.stdout = nopWriteCloser{&buf} + err := p.Run(ctx) + return buf.Bytes(), err +} + +// Wait waits for each stage in the pipeline to exit. +func (p *Pipeline) Wait() error { + if !p.hasStarted() { + panic("unable to wait on a pipeline that has not started") + } + + // Make sure that all of the cleanup eventually happens: + defer p.cancel() + + var earliestStageErr error + var earliestFailedStage Stage + + for i := len(p.stages) - 1; i >= 0; i-- { + s := p.stages[i] + err := s.Wait() + if err != nil { + // Overwrite any existing values here so that we end up + // retaining the last error that we see; i.e., the error + // that happened earliest in the pipeline. + earliestStageErr = err + earliestFailedStage = s + } + } + + if earliestStageErr != nil { + return fmt.Errorf("%s: %w", earliestFailedStage.Name(), earliestStageErr) + } + + return nil +} + +// Run starts and waits for the commands in the pipeline. +func (p *Pipeline) Run(ctx context.Context) error { + if err := p.Start(ctx); err != nil { + return err + } + + return p.Wait() +} diff --git a/internal/pipe/pipeline_test.go b/internal/pipe/pipeline_test.go new file mode 100644 index 0000000..d30a04b --- /dev/null +++ b/internal/pipe/pipeline_test.go @@ -0,0 +1,664 @@ +package pipe_test + +import ( + "bufio" + "bytes" + "context" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "strconv" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/goleak" + + "github.com/github/git-sizer/internal/pipe" +) + +func TestMain(m *testing.M) { + // Check whether this package's test suite leaks any goroutines: + goleak.VerifyTestMain(m) +} + +func TestPipelineFirstStageFailsToStart(t *testing.T) { + t.Parallel() + ctx := context.Background() + + startErr := errors.New("foo") + + p := pipe.New() + p.Add( + ErrorStartingStage{startErr}, + ErrorStartingStage{errors.New("this error should never happen")}, + ) + assert.ErrorIs(t, p.Run(ctx), startErr) +} + +func TestPipelineSecondStageFailsToStart(t *testing.T) { + t.Parallel() + ctx := context.Background() + + startErr := errors.New("foo") + + p := pipe.New() + p.Add( + seqFunction(20000), + ErrorStartingStage{startErr}, + ) + assert.ErrorIs(t, p.Run(ctx), startErr) +} + +func TestPipelineSingleCommandOutput(t *testing.T) { + t.Parallel() + ctx := context.Background() + + p := pipe.New() + p.Add(pipe.Command("echo", "hello world")) + out, err := p.Output(ctx) + if assert.NoError(t, err) { + assert.EqualValues(t, "hello world\n", out) + } +} + +func TestPipelineSingleCommandWithStdout(t *testing.T) { + t.Parallel() + ctx := context.Background() + + stdout := &bytes.Buffer{} + + p := pipe.New(pipe.WithStdout(stdout)) + p.Add(pipe.Command("echo", "hello world")) + if assert.NoError(t, p.Run(ctx)) { + assert.Equal(t, "hello world\n", stdout.String()) + } +} + +func TestNontrivialPipeline(t *testing.T) { + t.Parallel() + ctx := context.Background() + + p := pipe.New() + p.Add( + pipe.Command("echo", "hello world"), + pipe.Command("sed", "s/hello/goodbye/"), + ) + out, err := p.Output(ctx) + if assert.NoError(t, err) { + assert.EqualValues(t, "goodbye world\n", out) + } +} + +func TestPipelineReadFromSlowly(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + r, w := io.Pipe() + + var buf []byte + readErr := make(chan error, 1) + + go func() { + time.Sleep(200 * time.Millisecond) + var err error + buf, err = ioutil.ReadAll(r) + readErr <- err + }() + + p := pipe.New(pipe.WithStdout(w)) + p.Add(pipe.Command("echo", "hello world")) + assert.NoError(t, p.Run(ctx)) + + time.Sleep(100 * time.Millisecond) + // It's not super-intuitive, but `w` has to be closed here so that + // the `ioutil.ReadAll()` call above knows that it's done: + _ = w.Close() + + assert.NoError(t, <-readErr) + assert.Equal(t, "hello world\n", string(buf)) +} + +func TestPipelineReadFromSlowly2(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + r, w := io.Pipe() + + var buf []byte + readErr := make(chan error, 1) + + go func() { + time.Sleep(100 * time.Millisecond) + for { + var c [1]byte + _, err := r.Read(c[:]) + if err != nil { + if err == io.EOF { + readErr <- nil + return + } + readErr <- err + return + } + buf = append(buf, c[0]) + time.Sleep(1 * time.Millisecond) + } + }() + + p := pipe.New(pipe.WithStdout(w)) + p.Add(pipe.Command("seq", "100")) + assert.NoError(t, p.Run(ctx)) + + time.Sleep(200 * time.Millisecond) + // It's not super-intuitive, but `w` has to be closed here so that + // the `ioutil.ReadAll()` call above knows that it's done: + _ = w.Close() + + assert.NoError(t, <-readErr) + assert.Equal(t, 292, len(buf)) +} + +func TestPipelineTwoCommandsPiping(t *testing.T) { + t.Parallel() + ctx := context.Background() + + p := pipe.New() + p.Add(pipe.Command("echo", "hello world")) + assert.Panics(t, func() { p.Add(pipe.Command("")) }) + out, err := p.Output(ctx) + if assert.NoError(t, err) { + assert.EqualValues(t, "hello world\n", out) + } +} + +func TestPipelineDir(t *testing.T) { + t.Parallel() + ctx := context.Background() + + wdir, err := os.Getwd() + require.NoError(t, err) + dir, err := ioutil.TempDir(wdir, "pipeline-test-") + require.NoError(t, err) + defer os.RemoveAll(dir) + + p := pipe.New(pipe.WithDir(dir)) + p.Add(pipe.Command("pwd")) + + out, err := p.Output(ctx) + if assert.NoError(t, err) { + assert.Equal(t, dir, strings.TrimSuffix(string(out), "\n")) + } +} + +func TestPipelineExit(t *testing.T) { + t.Parallel() + ctx := context.Background() + + p := pipe.New() + p.Add( + pipe.Command("false"), + pipe.Command("true"), + ) + assert.EqualError(t, p.Run(ctx), "false: exit status 1") +} + +func TestPipelineStderr(t *testing.T) { + t.Parallel() + ctx := context.Background() + + dir, err := ioutil.TempDir("", "pipeline-test-") + require.NoError(t, err) + defer os.RemoveAll(dir) + + p := pipe.New(pipe.WithDir(dir)) + p.Add(pipe.Command("ls", "doesnotexist")) + + _, err = p.Output(ctx) + if assert.Error(t, err) { + assert.Contains(t, err.Error(), "ls: exit status") + } +} + +func TestPipelineInterrupted(t *testing.T) { + t.Parallel() + stdout := &bytes.Buffer{} + + p := pipe.New(pipe.WithStdout(stdout)) + p.Add(pipe.Command("sleep", "10")) + + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond) + defer cancel() + + err := p.Start(ctx) + require.NoError(t, err) + + err = p.Wait() + assert.ErrorIs(t, err, context.DeadlineExceeded) +} + +func TestPipelineCanceled(t *testing.T) { + t.Parallel() + + stdout := &bytes.Buffer{} + + p := pipe.New(pipe.WithStdout(stdout)) + p.Add(pipe.Command("sleep", "10")) + + ctx, cancel := context.WithCancel(context.Background()) + + err := p.Start(ctx) + require.NoError(t, err) + + cancel() + + err = p.Wait() + assert.ErrorIs(t, err, context.Canceled) +} + +// Verify the correct error if a command in the pipeline exits before +// reading all of its predecessor's output. Note that the amount of +// unread output in this case *does fit* within the OS-level pipe +// buffer. +func TestLittleEPIPE(t *testing.T) { + t.Parallel() + + p := pipe.New() + p.Add( + pipe.Command("sh", "-c", "sleep 1; echo foo"), + pipe.Command("true"), + ) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + err := p.Run(ctx) + assert.EqualError(t, err, "sh: signal: broken pipe") +} + +// Verify the correct error if one command in the pipeline exits +// before reading all of its predecessor's output. Note that the +// amount of unread output in this case *does not fit* within the +// OS-level pipe buffer. +func TestBigEPIPE(t *testing.T) { + t.Parallel() + + p := pipe.New() + p.Add( + pipe.Command("seq", "100000"), + pipe.Command("true"), + ) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + err := p.Run(ctx) + assert.EqualError(t, err, "seq: signal: broken pipe") +} + +// Verify the correct error if one command in the pipeline exits +// before reading all of its predecessor's output. Note that the +// amount of unread output in this case *does not fit* within the +// OS-level pipe buffer. +func TestIgnoredSIGPIPE(t *testing.T) { + t.Parallel() + + p := pipe.New() + p.Add( + pipe.IgnoreError(pipe.Command("seq", "100000"), pipe.IsSIGPIPE), + pipe.Command("echo", "foo"), + ) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + out, err := p.Output(ctx) + assert.NoError(t, err) + assert.EqualValues(t, "foo\n", out) +} + +func TestFunction(t *testing.T) { + t.Parallel() + ctx := context.Background() + + p := pipe.New() + p.Add( + pipe.Print("hello world"), + pipe.Function( + "farewell", + func(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { + buf, err := ioutil.ReadAll(stdin) + if err != nil { + return err + } + if string(buf) != "hello world" { + return fmt.Errorf("expected \"hello world\"; got %q", string(buf)) + } + _, err = stdout.Write([]byte("goodbye, cruel world")) + return err + }, + ), + ) + + out, err := p.Output(ctx) + assert.NoError(t, err) + assert.EqualValues(t, "goodbye, cruel world", out) +} + +func TestPipelineWithFunction(t *testing.T) { + t.Parallel() + ctx := context.Background() + + p := pipe.New() + p.Add( + pipe.Command("echo", "-n", "hello world"), + pipe.Function( + "farewell", + func(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { + buf, err := ioutil.ReadAll(stdin) + if err != nil { + return err + } + if string(buf) != "hello world" { + return fmt.Errorf("expected \"hello world\"; got %q", string(buf)) + } + _, err = stdout.Write([]byte("goodbye, cruel world")) + return err + }, + ), + pipe.Command("tr", "a-z", "A-Z"), + ) + + out, err := p.Output(ctx) + assert.NoError(t, err) + assert.EqualValues(t, "GOODBYE, CRUEL WORLD", out) +} + +type ErrorStartingStage struct { + err error +} + +func (s ErrorStartingStage) Name() string { + return "errorStartingStage" +} + +func (s ErrorStartingStage) Start( + ctx context.Context, env pipe.Env, stdin io.ReadCloser, +) (io.ReadCloser, error) { + return ioutil.NopCloser(&bytes.Buffer{}), s.err +} + +func (s ErrorStartingStage) Wait() error { + return nil +} + +func seqFunction(n int) pipe.Stage { + return pipe.Function( + "seq", + func(_ context.Context, _ pipe.Env, _ io.Reader, stdout io.Writer) error { + for i := 1; i <= n; i++ { + _, err := fmt.Fprintf(stdout, "%d\n", i) + if err != nil { + return err + } + } + return nil + }, + ) +} + +func TestPipelineWithLinewiseFunction(t *testing.T) { + t.Parallel() + ctx := context.Background() + + p := pipe.New() + // Print the numbers from 1 to 20 (generated from scratch): + p.Add( + seqFunction(20), + // Discard all but the multiples of 5, and emit the results + // separated by spaces on one line: + pipe.LinewiseFunction( + "multiples-of-5", + func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { + n, err := strconv.Atoi(string(line)) + if err != nil { + return err + } + if n%5 != 0 { + return nil + } + _, err = fmt.Fprintf(w, " %d", n) + return err + }, + ), + // Read the words and square them, emitting the results one per + // line: + pipe.ScannerFunction( + "square-multiples-of-5", + func(r io.Reader) (pipe.Scanner, error) { + scanner := bufio.NewScanner(r) + scanner.Split(bufio.ScanWords) + return scanner, nil + }, + func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { + n, err := strconv.Atoi(string(line)) + if err != nil { + return err + } + _, err = fmt.Fprintf(w, "%d\n", n*n) + return err + }, + ), + ) + + out, err := p.Output(ctx) + assert.NoError(t, err) + assert.EqualValues(t, "25\n100\n225\n400\n", out) +} + +func TestScannerAlwaysFlushes(t *testing.T) { + t.Parallel() + ctx := context.Background() + + var length int64 + + p := pipe.New() + // Print the numbers from 1 to 20 (generated from scratch): + p.Add( + pipe.IgnoreError( + seqFunction(20), + pipe.IsPipeError, + ), + // Pass the numbers through up to 7, then exit with an + // ignored error: + pipe.IgnoreError( + pipe.LinewiseFunction( + "error-after-7", + func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { + fmt.Fprintf(w, "%s\n", line) + if string(line) == "7" { + return errors.New("ignore") + } + return nil + }, + ), + func(err error) bool { + return err.Error() == "ignore" + }, + ), + // Read the numbers and add them into the sum: + pipe.Function( + "compute-length", + func(_ context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { + var err error + length, err = io.Copy(ioutil.Discard, stdin) + return err + }, + ), + ) + + err := p.Run(ctx) + assert.NoError(t, err) + // Make sure that all of the bytes emitted before the second + // stage's error were received by the third stage: + assert.EqualValues(t, 14, length) +} + +func TestScannerFinishEarly(t *testing.T) { + t.Parallel() + ctx := context.Background() + + var length int64 + + p := pipe.New() + // Print the numbers from 1 to 20 (generated from scratch): + p.Add( + pipe.IgnoreError( + seqFunction(20), + pipe.IsPipeError, + ), + // Pass the numbers through up to 7, then exit with an + // ignored error: + pipe.LinewiseFunction( + "finish-after-7", + func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { + fmt.Fprintf(w, "%s\n", line) + if string(line) == "7" { + return pipe.FinishEarly + } + return nil + }, + ), + // Read the numbers and add them into the sum: + pipe.Function( + "compute-length", + func(_ context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { + var err error + length, err = io.Copy(ioutil.Discard, stdin) + return err + }, + ), + ) + + err := p.Run(ctx) + assert.NoError(t, err) + // Make sure that all of the bytes emitted before the second + // stage's error were received by the third stage: + assert.EqualValues(t, 14, length) +} + +func TestPrintln(t *testing.T) { + t.Parallel() + ctx := context.Background() + + p := pipe.New() + p.Add(pipe.Println("Look Ma, no hands!")) + out, err := p.Output(ctx) + if assert.NoError(t, err) { + assert.EqualValues(t, "Look Ma, no hands!\n", out) + } +} + +func TestPrintf(t *testing.T) { + t.Parallel() + ctx := context.Background() + + p := pipe.New() + p.Add(pipe.Printf("Strangely recursive: %T", p)) + out, err := p.Output(ctx) + if assert.NoError(t, err) { + assert.EqualValues(t, "Strangely recursive: *pipe.Pipeline", out) + } +} + +func BenchmarkSingleProgram(b *testing.B) { + ctx := context.Background() + + for i := 0; i < b.N; i++ { + p := pipe.New() + p.Add( + pipe.Command("true"), + ) + assert.NoError(b, p.Run(ctx)) + } +} + +func BenchmarkTenPrograms(b *testing.B) { + ctx := context.Background() + + for i := 0; i < b.N; i++ { + p := pipe.New() + p.Add( + pipe.Command("echo", "hello world"), + pipe.Command("cat"), + pipe.Command("cat"), + pipe.Command("cat"), + pipe.Command("cat"), + pipe.Command("cat"), + pipe.Command("cat"), + pipe.Command("cat"), + pipe.Command("cat"), + pipe.Command("cat"), + ) + out, err := p.Output(ctx) + if assert.NoError(b, err) { + assert.EqualValues(b, "hello world\n", out) + } + } +} + +func BenchmarkTenFunctions(b *testing.B) { + ctx := context.Background() + + for i := 0; i < b.N; i++ { + p := pipe.New() + p.Add( + pipe.Println("hello world"), + pipe.Function("copy1", catFn), + pipe.Function("copy2", catFn), + pipe.Function("copy3", catFn), + pipe.Function("copy4", catFn), + pipe.Function("copy5", catFn), + pipe.Function("copy6", catFn), + pipe.Function("copy7", catFn), + pipe.Function("copy8", catFn), + pipe.Function("copy9", catFn), + ) + out, err := p.Output(ctx) + if assert.NoError(b, err) { + assert.EqualValues(b, "hello world\n", out) + } + } +} + +func BenchmarkTenMixedStages(b *testing.B) { + ctx := context.Background() + + for i := 0; i < b.N; i++ { + p := pipe.New() + p.Add( + pipe.Command("echo", "hello world"), + pipe.Function("copy1", catFn), + pipe.Command("cat"), + pipe.Function("copy2", catFn), + pipe.Command("cat"), + pipe.Function("copy3", catFn), + pipe.Command("cat"), + pipe.Function("copy4", catFn), + pipe.Command("cat"), + pipe.Function("copy5", catFn), + ) + out, err := p.Output(ctx) + if assert.NoError(b, err) { + assert.EqualValues(b, "hello world\n", out) + } + } +} + +func catFn(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { + _, err := io.Copy(stdout, stdin) + return err +} diff --git a/internal/pipe/print.go b/internal/pipe/print.go new file mode 100644 index 0000000..766418d --- /dev/null +++ b/internal/pipe/print.go @@ -0,0 +1,37 @@ +package pipe + +import ( + "context" + "fmt" + "io" +) + +func Print(a ...interface{}) Stage { + return Function( + "print", + func(_ context.Context, _ Env, _ io.Reader, stdout io.Writer) error { + _, err := fmt.Fprint(stdout, a...) + return err + }, + ) +} + +func Println(a ...interface{}) Stage { + return Function( + "println", + func(_ context.Context, _ Env, _ io.Reader, stdout io.Writer) error { + _, err := fmt.Fprintln(stdout, a...) + return err + }, + ) +} + +func Printf(format string, a ...interface{}) Stage { + return Function( + "printf", + func(_ context.Context, _ Env, _ io.Reader, stdout io.Writer) error { + _, err := fmt.Fprintf(stdout, format, a...) + return err + }, + ) +} diff --git a/internal/pipe/scanner.go b/internal/pipe/scanner.go new file mode 100644 index 0000000..6294179 --- /dev/null +++ b/internal/pipe/scanner.go @@ -0,0 +1,75 @@ +package pipe + +import ( + "bufio" + "context" + "errors" + "io" +) + +// Scanner defines the interface (which is implemented by +// `bufio.Scanner`) that is needed by `AddScannerFunction()`. See +// `bufio.Scanner` for how these methods should behave. +type Scanner interface { + Scan() bool + Bytes() []byte + Err() error +} + +// FinishEarly is an error that can be returned by a +// `LinewiseStageFunc` to request that the iteration be ended early, +// without an error. +//nolint:revive +var FinishEarly = errors.New("finish stage early") + +// NewScannerFunc is used to create a `Scanner` for scanning input +// that is coming from `r`. +type NewScannerFunc func(r io.Reader) (Scanner, error) + +// ScannerFunction creates a function-based `Stage`. The function will +// be passed input, one line at a time, and may emit output. See the +// definition of `LinewiseStageFunc` for more information. +func ScannerFunction( + name string, newScanner NewScannerFunc, f LinewiseStageFunc, +) Stage { + stage := Function( + name, + func(ctx context.Context, env Env, stdin io.Reader, stdout io.Writer) (theErr error) { + scanner, err := newScanner(stdin) + if err != nil { + return err + } + + var out *bufio.Writer + if stdout != nil { + out = bufio.NewWriter(stdout) + defer func() { + err := out.Flush() + if err != nil && theErr == nil { + // Note: this sets the named return value, + // thereby causing the whole stage to report + // the error. + theErr = err + } + }() + } + + for scanner.Scan() { + if ctx.Err() != nil { + return ctx.Err() + } + err := f(ctx, env, scanner.Bytes(), out) + if err != nil { + return err + } + } + if err := scanner.Err(); err != nil { + return err + } + + return nil + // `p.AddFunction()` arranges for `stdout` to be closed. + }, + ) + return IgnoreError(stage, IsError(FinishEarly)) +} diff --git a/internal/pipe/stage.go b/internal/pipe/stage.go new file mode 100644 index 0000000..f3d74d9 --- /dev/null +++ b/internal/pipe/stage.go @@ -0,0 +1,34 @@ +package pipe + +import ( + "context" + "io" +) + +// Stage is an element of a `Pipeline`. +type Stage interface { + // Name returns the name of the stage. + Name() string + + // Start starts the stage in the background, in the environment + // described by `env`, and using `stdin` as input. (`stdin` should + // be set to `nil` if the stage is to receive no input, which + // might be the case for the first stage in a pipeline.) It + // returns an `io.ReadCloser` from which the stage's output can be + // read (or `nil` if it generates no output, which should only be + // the case for the last stage in a pipeline). It is the stages' + // responsibility to close `stdin` (if it is not nil) when it has + // read all of the input that it needs, and to close the write end + // of its output reader when it is done, as that is generally how + // the subsequent stage knows that it has received all of its + // input and can finish its work, too. + // + // If `Start()` returns without an error, `Wait()` must also be + // called, to allow all resources to be freed. + Start(ctx context.Context, env Env, stdin io.ReadCloser) (io.ReadCloser, error) + + // Wait waits for the stage to be done, either because it has + // finished or because it has been killed due to the expiration of + // the context passed to `Start()`. + Wait() error +} From 5677b0d1edd6137ecf5bbea6dbc1113ae7d4cc25 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 5 Nov 2021 15:59:21 +0100 Subject: [PATCH 112/176] Pipeline: prefer to report non-pipe errors to the caller If a later stage of the pipeline fails, it is often the case that earlier stages fail with some sort of pipe-related error (because their `stdout` pipe gets closed). Those pipe errors are usually not very helpful in figuring out the underlying problem. So if there are any non-pipe errors, report the earliest one of those. If there are only pipe errors, report the latest one of those. --- internal/pipe/pipeline.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/internal/pipe/pipeline.go b/internal/pipe/pipeline.go index d14200b..43946d2 100644 --- a/internal/pipe/pipeline.go +++ b/internal/pipe/pipeline.go @@ -185,12 +185,17 @@ func (p *Pipeline) Wait() error { for i := len(p.stages) - 1; i >= 0; i-- { s := p.stages[i] err := s.Wait() - if err != nil { + + // We want to report the error that is most informative. We + // take that to be the error from the earliest pipeline stage + // that failed of a non-pipe error. If that didn't happen, + // take the error from the last pipeline stage that failed due + // to a pipe error. + if err != nil && (earliestStageErr == nil || !IsPipeError(err)) { // Overwrite any existing values here so that we end up // retaining the last error that we see; i.e., the error // that happened earliest in the pipeline. - earliestStageErr = err - earliestFailedStage = s + earliestFailedStage, earliestStageErr = s, err } } From 93e902c332eca4a4b4cf3160dd2058e68835f749 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 6 Nov 2021 13:10:42 +0100 Subject: [PATCH 113/176] Pipeline: handle `FinishEarly` errors at this level Handle `FinishEarly` errors at the `Pipeline` level rather than at the `Scanner` level. This is preparation for the next change. --- internal/pipe/pipeline.go | 35 +++++++++++++++++++++++++++++------ internal/pipe/scanner.go | 10 +--------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/internal/pipe/pipeline.go b/internal/pipe/pipeline.go index 43946d2..286a52c 100644 --- a/internal/pipe/pipeline.go +++ b/internal/pipe/pipeline.go @@ -3,6 +3,7 @@ package pipe import ( "bytes" "context" + "errors" "fmt" "io" "io/ioutil" @@ -17,6 +18,13 @@ type Env struct { Dir string } +// FinishEarly is an error that can be returned by a `Stage` to +// request that the iteration be ended early (possibly without reading +// all of its input). This "error" is considered a successful return, +// and is not reported to the caller. +//nolint:revive +var FinishEarly = errors.New("finish stage early") + // Pipeline represents a Unix-like pipe that can include multiple // stages, including external processes but also and stages written in // Go. @@ -186,12 +194,27 @@ func (p *Pipeline) Wait() error { s := p.stages[i] err := s.Wait() - // We want to report the error that is most informative. We - // take that to be the error from the earliest pipeline stage - // that failed of a non-pipe error. If that didn't happen, - // take the error from the last pipeline stage that failed due - // to a pipe error. - if err != nil && (earliestStageErr == nil || !IsPipeError(err)) { + // Error handling: + + if err == nil { + // No error to handle. + continue + } + + if err == FinishEarly { + // We ignore `FinishEarly` errors because that is how a + // stage informs us that it intentionally finished early. + continue + } + + // If we reach this point, then the stage exited with a + // non-ignorable error. But multiple stages might report + // errors, and we want to report the one that is most + // informative. We take that to be the error from the earliest + // pipeline stage that failed from a non-pipe error. If that + // didn't happen, take the error from the last pipeline stage + // that failed due to a pipe error. + if earliestStageErr == nil || !IsPipeError(err) { // Overwrite any existing values here so that we end up // retaining the last error that we see; i.e., the error // that happened earliest in the pipeline. diff --git a/internal/pipe/scanner.go b/internal/pipe/scanner.go index 6294179..b56b58c 100644 --- a/internal/pipe/scanner.go +++ b/internal/pipe/scanner.go @@ -3,7 +3,6 @@ package pipe import ( "bufio" "context" - "errors" "io" ) @@ -16,12 +15,6 @@ type Scanner interface { Err() error } -// FinishEarly is an error that can be returned by a -// `LinewiseStageFunc` to request that the iteration be ended early, -// without an error. -//nolint:revive -var FinishEarly = errors.New("finish stage early") - // NewScannerFunc is used to create a `Scanner` for scanning input // that is coming from `r`. type NewScannerFunc func(r io.Reader) (Scanner, error) @@ -32,7 +25,7 @@ type NewScannerFunc func(r io.Reader) (Scanner, error) func ScannerFunction( name string, newScanner NewScannerFunc, f LinewiseStageFunc, ) Stage { - stage := Function( + return Function( name, func(ctx context.Context, env Env, stdin io.Reader, stdout io.Writer) (theErr error) { scanner, err := newScanner(stdin) @@ -71,5 +64,4 @@ func ScannerFunction( // `p.AddFunction()` arranges for `stdout` to be closed. }, ) - return IgnoreError(stage, IsError(FinishEarly)) } From 68e73f72d1d1b7c7b47f418f7e81473477dfe2db Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 6 Nov 2021 13:37:46 +0100 Subject: [PATCH 114/176] Pipeline: suppress pipe errors in some circumstances Consider a pipeline p := pipe.New() p.Add(s1, s2, s3, s4) Suppose that stage `s3` sometimes intentionally exits early without reading all of its `stdin`. When this happens, it is common for the previous stage `s2` to fail with a pipe error. This is because `s2` often continues trying to write to its stdout, but the other end of its stdout is `s3`'s stdin, which got closed when `s3` exited. The result is that `s2` fails with `SIGPIPE`, `syscall.EPIPE`, or `io.ErrClosedPipe`. But if it is expected that `s3` exits early, then a pipe error from `s2` is uninteresting. The current way of dealing with this situation is to explicitly wrap `s2` with something like `pipe.IgnoreError(s2, IsPipeError)`, which causes a pipe error from that stage to be ignored. But often that's not the end of the story. If `s2` exits due to a pipe error, then it is often the case that `s1` will _also_ exit due to a pipe error trying to write to _its_ stdout. So to handle this situation correctly, the pipe would have to be created like p := pipe.New() p.Add( pipe.IgnoreError(s1, IsPipeError), pipe.IgnoreError(s2, IsPipeError), s3, s4, ) This is verbose, and experience shows that it is easy to forget. Let's make this simpler. If `s3` intentionally exits without reading all of its input, it should return the special `FinishEarly` error, thereby telling the enclosing pipeline that its early exit was intentional. In this case, change `Pipeline` to ignore any pipe error from the preceding stage, `s2`. Moreover, if `s2` exits with a pipe error, ignore any pipe error from `s1`, and so on. However, if a stage exits without an error, then again consider a pipe error from the previous stage to be interesting. There is some chance that this could suppress actual, interesting errors. For example, `s2` might exit due to a `SIGPIPE` that it got when trying to write to a file descriptor other than stdout, for example when writing to a subprocess or to a socket, and not handle that `EPIPE` internally. But I think that these situations will be vanishingly rare compared to the more common case described above, which has caused real-life irritation. Also add tests of the new behavior. --- internal/pipe/pipeline.go | 70 +++++++++--- internal/pipe/pipeline_test.go | 202 +++++++++++++++++++++++++++++++-- 2 files changed, 247 insertions(+), 25 deletions(-) diff --git a/internal/pipe/pipeline.go b/internal/pipe/pipeline.go index 286a52c..d32051b 100644 --- a/internal/pipe/pipeline.go +++ b/internal/pipe/pipeline.go @@ -190,35 +190,69 @@ func (p *Pipeline) Wait() error { var earliestStageErr error var earliestFailedStage Stage + finishedEarly := false for i := len(p.stages) - 1; i >= 0; i-- { s := p.stages[i] err := s.Wait() - // Error handling: - - if err == nil { - // No error to handle. + // Handle errors: + switch { + case err == nil: + // No error to handle. But unset the `finishedEarly` flag, + // because earlier stages shouldn't be affected by the + // later stage that finished early. + finishedEarly = false continue - } - if err == FinishEarly { + case err == FinishEarly: // We ignore `FinishEarly` errors because that is how a // stage informs us that it intentionally finished early. + // Moreover, if we see a `FinishEarly` error, ignore any + // pipe error from the immediately preceding stage, + // because it probably came from trying to write to this + // stage after this stage closed its stdin. + finishedEarly = true continue - } - // If we reach this point, then the stage exited with a - // non-ignorable error. But multiple stages might report - // errors, and we want to report the one that is most - // informative. We take that to be the error from the earliest - // pipeline stage that failed from a non-pipe error. If that - // didn't happen, take the error from the last pipeline stage - // that failed due to a pipe error. - if earliestStageErr == nil || !IsPipeError(err) { - // Overwrite any existing values here so that we end up - // retaining the last error that we see; i.e., the error - // that happened earliest in the pipeline. + case IsPipeError(err): + switch { + case finishedEarly: + // A successor stage finished early. It is common for + // this to cause earlier stages to fail with pipe + // errors. Such errors are uninteresting, so ignore + // them. Leave the `finishedEarly` flag set, because + // the preceding stage might get a pipe error from + // trying to write to this one. + case earliestStageErr != nil: + // A later stage has already reported an error. This + // means that we don't want to report the error from + // this stage: + // + // * If the later error was also a pipe error: we want + // to report the _last_ pipe error seen, which would + // be the one already recorded. + // + // * If the later error was not a pipe error: non-pipe + // errors are always considered more important than + // pipe errors, so again we would want to keep the + // error that is already recorded. + default: + // In this case, the pipe error from this stage is the + // most important error that we have seen so far, so + // remember it: + earliestFailedStage, earliestStageErr = s, err + } + + default: + // This stage exited with a non-pipe error. If multiple + // stages exited with such errors, we want to report the + // one that is most informative. We take that to be the + // error from the earliest failing stage. Since we are + // iterating through stages in reverse order, overwrite + // any existing remembered errors (which would have come + // from a later stage): earliestFailedStage, earliestStageErr = s, err + finishedEarly = false } } diff --git a/internal/pipe/pipeline_test.go b/internal/pipe/pipeline_test.go index d30a04b..d2be53d 100644 --- a/internal/pipe/pipeline_test.go +++ b/internal/pipe/pipeline_test.go @@ -514,14 +514,12 @@ func TestScannerFinishEarly(t *testing.T) { var length int64 p := pipe.New() - // Print the numbers from 1 to 20 (generated from scratch): p.Add( - pipe.IgnoreError( - seqFunction(20), - pipe.IsPipeError, - ), - // Pass the numbers through up to 7, then exit with an - // ignored error: + // Print the numbers from 1 to 20 (generated from scratch): + seqFunction(20), + + // Pass the numbers through up to 7, then exit with an ignored + // error: pipe.LinewiseFunction( "finish-after-7", func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { @@ -532,6 +530,7 @@ func TestScannerFinishEarly(t *testing.T) { return nil }, ), + // Read the numbers and add them into the sum: pipe.Function( "compute-length", @@ -574,6 +573,189 @@ func TestPrintf(t *testing.T) { } } +func TestErrors(t *testing.T) { + t.Parallel() + ctx := context.Background() + + err1 := errors.New("error1") + err2 := errors.New("error2") + + for _, tc := range []struct { + name string + stages []pipe.Stage + expectedErr error + }{ + { + name: "no-error", + stages: []pipe.Stage{ + pipe.Function("noop1", genErr(nil)), + pipe.Function("noop2", genErr(nil)), + pipe.Function("noop3", genErr(nil)), + }, + expectedErr: nil, + }, + { + name: "lonely-error", + stages: []pipe.Stage{ + pipe.Function("err1", genErr(err1)), + }, + expectedErr: err1, + }, + { + name: "error", + stages: []pipe.Stage{ + pipe.Function("noop1", genErr(nil)), + pipe.Function("err1", genErr(err1)), + pipe.Function("noop2", genErr(nil)), + }, + expectedErr: err1, + }, + { + name: "two-consecutive-errors", + stages: []pipe.Stage{ + pipe.Function("noop1", genErr(nil)), + pipe.Function("err1", genErr(err1)), + pipe.Function("err2", genErr(err2)), + pipe.Function("noop2", genErr(nil)), + }, + expectedErr: err1, + }, + { + name: "pipe-then-error", + stages: []pipe.Stage{ + pipe.Function("noop1", genErr(nil)), + pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), + pipe.Function("err1", genErr(err1)), + pipe.Function("noop2", genErr(nil)), + }, + expectedErr: err1, + }, + { + name: "error-then-pipe", + stages: []pipe.Stage{ + pipe.Function("noop1", genErr(nil)), + pipe.Function("err1", genErr(err1)), + pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), + pipe.Function("noop2", genErr(nil)), + }, + expectedErr: err1, + }, + { + name: "two-spaced-errors", + stages: []pipe.Stage{ + pipe.Function("noop1", genErr(nil)), + pipe.Function("err1", genErr(err1)), + pipe.Function("noop2", genErr(nil)), + pipe.Function("err2", genErr(err2)), + pipe.Function("noop3", genErr(nil)), + }, + expectedErr: err1, + }, + { + name: "finish-early-ignored", + stages: []pipe.Stage{ + pipe.Function("noop1", genErr(nil)), + pipe.Function("finish-early1", genErr(pipe.FinishEarly)), + pipe.Function("noop2", genErr(nil)), + pipe.Function("finish-early2", genErr(pipe.FinishEarly)), + pipe.Function("noop3", genErr(nil)), + }, + expectedErr: nil, + }, + { + name: "error-before-finish-early", + stages: []pipe.Stage{ + pipe.Function("err1", genErr(err1)), + pipe.Function("finish-early", genErr(pipe.FinishEarly)), + }, + expectedErr: err1, + }, + { + name: "error-after-finish-early", + stages: []pipe.Stage{ + pipe.Function("finish-early", genErr(pipe.FinishEarly)), + pipe.Function("err1", genErr(err1)), + }, + expectedErr: err1, + }, + { + name: "pipe-then-finish-early", + stages: []pipe.Stage{ + pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), + pipe.Function("finish-early", genErr(pipe.FinishEarly)), + }, + expectedErr: nil, + }, + { + name: "pipe-then-two-finish-early", + stages: []pipe.Stage{ + pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), + pipe.Function("finish-early1", genErr(pipe.FinishEarly)), + pipe.Function("finish-early2", genErr(pipe.FinishEarly)), + }, + expectedErr: nil, + }, + { + name: "two-pipe-then-finish-early", + stages: []pipe.Stage{ + pipe.Function("pipe-error1", genErr(io.ErrClosedPipe)), + pipe.Function("pipe-error2", genErr(io.ErrClosedPipe)), + pipe.Function("finish-early", genErr(pipe.FinishEarly)), + }, + expectedErr: nil, + }, + { + name: "pipe-then-finish-early-with-gap", + stages: []pipe.Stage{ + pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), + pipe.Function("noop", genErr(nil)), + pipe.Function("finish-early1", genErr(pipe.FinishEarly)), + }, + expectedErr: io.ErrClosedPipe, + }, + { + name: "finish-early-then-pipe", + stages: []pipe.Stage{ + pipe.Function("finish-early", genErr(pipe.FinishEarly)), + pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), + }, + expectedErr: io.ErrClosedPipe, + }, + { + name: "error-then-pipe-then-finish-early", + stages: []pipe.Stage{ + pipe.Function("err1", genErr(err1)), + pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), + pipe.Function("finish-early", genErr(pipe.FinishEarly)), + }, + expectedErr: err1, + }, + { + name: "pipe-then-error-then-finish-early", + stages: []pipe.Stage{ + pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), + pipe.Function("err1", genErr(err1)), + pipe.Function("finish-early", genErr(pipe.FinishEarly)), + }, + expectedErr: err1, + }, + } { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + p := pipe.New() + p.Add(tc.stages...) + err := p.Run(ctx) + if tc.expectedErr == nil { + assert.NoError(t, err) + } else { + assert.ErrorIs(t, err, tc.expectedErr) + } + }) + } +} + func BenchmarkSingleProgram(b *testing.B) { ctx := context.Background() @@ -662,3 +844,9 @@ func catFn(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) err _, err := io.Copy(stdout, stdin) return err } + +func genErr(err error) pipe.StageFunc { + return func(_ context.Context, _ pipe.Env, _ io.Reader, _ io.Writer) error { + return err + } +} From 4cb432e5c038095517641ea6a775d2f30eca17d1 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 11:27:47 +0100 Subject: [PATCH 115/176] internal/pipe: get the package to build on Windows, too --- internal/pipe/command.go | 60 +++-------------------------- internal/pipe/command_unix.go | 66 ++++++++++++++++++++++++++++++++ internal/pipe/command_windows.go | 24 ++++++++++++ internal/pipe/filter-error.go | 5 ++- 4 files changed, 100 insertions(+), 55 deletions(-) create mode 100644 internal/pipe/command_unix.go create mode 100644 internal/pipe/command_windows.go diff --git a/internal/pipe/command.go b/internal/pipe/command.go index b5d6c05..d370e28 100644 --- a/internal/pipe/command.go +++ b/internal/pipe/command.go @@ -9,7 +9,6 @@ import ( "os/exec" "sync/atomic" "syscall" - "time" "golang.org/x/sync/errgroup" ) @@ -98,11 +97,8 @@ func (s *commandStage) Start( }) } - // Put the command in its own process group: - if s.cmd.SysProcAttr == nil { - s.cmd.SysProcAttr = &syscall.SysProcAttr{} - } - s.cmd.SysProcAttr.Setpgid = true + // Put the command in its own process group, if possible: + s.runInOwnProcessGroup() if err := s.cmd.Start(); err != nil { return nil, err @@ -122,53 +118,6 @@ func (s *commandStage) Start( return stdout, nil } -// kill is called to kill the process if the context expires. `err` is -// the corresponding value of `Context.Err()`. -func (s *commandStage) kill(err error) { - // I believe that the calls to `syscall.Kill()` in this method are - // racy. It could be that s.cmd.Wait() succeeds immediately before - // this call, in which case the process group wouldn't exist - // anymore. But I don't see any way to avoid this without - // duplicating a lot of code from `exec.Cmd`. (`os.Cmd.Kill()` and - // `os.Cmd.Signal()` appear to be race-free, but only because they - // use internal synchronization. But those methods only kill the - // process, not the process group, so they are not suitable here. - - // We started the process with PGID == PID: - pid := s.cmd.Process.Pid - select { - case <-s.done: - // Process has ended; no need to kill it again. - return - default: - } - - // Record the `ctx.Err()`, which will be used as the error result - // for this stage. - s.ctxErr.Store(err) - - // First try to kill using a relatively gentle signal so that - // the processes have a chance to clean up after themselves: - _ = syscall.Kill(-pid, syscall.SIGTERM) - - // Well-behaved processes should commit suicide after the above, - // but if they don't exit within 2s, murder the whole lot of them: - go func() { - // Use an explicit `time.Timer` rather than `time.After()` so - // that we can stop it (freeing resources) promptly if the - // command exits before the timer triggers. - timer := time.NewTimer(2 * time.Second) - defer timer.Stop() - - select { - case <-s.done: - // Process has ended; no need to kill it again. - case <-timer.C: - _ = syscall.Kill(-pid, syscall.SIGKILL) - } - }() -} - // filterCmdError interprets `err`, which was returned by `Cmd.Wait()` // (possibly `nil`), possibly modifying it or ignoring it. It returns // the error that should actually be returned to the caller (possibly @@ -186,7 +135,10 @@ func (s *commandStage) filterCmdError(err error) error { ctxErr, ok := s.ctxErr.Load().(error) if ok { // If the process looks like it was killed by us, substitute - // `ctxErr` for the process's own exit error. + // `ctxErr` for the process's own exit error. Note that this + // doesn't do anything on Windows, where the `Signaled()` + // method isn't implemented (it is hardcoded to return + // `false`). ps, ok := eErr.ProcessState.Sys().(syscall.WaitStatus) if ok && ps.Signaled() && (ps.Signal() == syscall.SIGTERM || ps.Signal() == syscall.SIGKILL) { diff --git a/internal/pipe/command_unix.go b/internal/pipe/command_unix.go new file mode 100644 index 0000000..c84bcf5 --- /dev/null +++ b/internal/pipe/command_unix.go @@ -0,0 +1,66 @@ +//go:build !windows +// +build !windows + +package pipe + +import ( + "syscall" + "time" +) + +// runInOwnProcessGroup arranges for `cmd` to be run in its own +// process group. +func (s *commandStage) runInOwnProcessGroup() { + // Put the command in its own process group: + if s.cmd.SysProcAttr == nil { + s.cmd.SysProcAttr = &syscall.SysProcAttr{} + } + s.cmd.SysProcAttr.Setpgid = true +} + +// kill is called to kill the process if the context expires. `err` is +// the corresponding value of `Context.Err()`. +func (s *commandStage) kill(err error) { + // I believe that the calls to `syscall.Kill()` in this method are + // racy. It could be that s.cmd.Wait() succeeds immediately before + // this call, in which case the process group wouldn't exist + // anymore. But I don't see any way to avoid this without + // duplicating a lot of code from `exec.Cmd`. (`os.Cmd.Kill()` and + // `os.Cmd.Signal()` appear to be race-free, but only because they + // use internal synchronization. But those methods only kill the + // process, not the process group, so they are not suitable here. + + // We started the process with PGID == PID: + pid := s.cmd.Process.Pid + select { + case <-s.done: + // Process has ended; no need to kill it again. + return + default: + } + + // Record the `ctx.Err()`, which will be used as the error result + // for this stage. + s.ctxErr.Store(err) + + // First try to kill using a relatively gentle signal so that + // the processes have a chance to clean up after themselves: + _ = syscall.Kill(-pid, syscall.SIGTERM) + + // Well-behaved processes should commit suicide after the above, + // but if they don't exit within 2s, murder the whole lot of them: + go func() { + // Use an explicit `time.Timer` rather than `time.After()` so + // that we can stop it (freeing resources) promptly if the + // command exits before the timer triggers. + timer := time.NewTimer(2 * time.Second) + defer timer.Stop() + + select { + case <-s.done: + // Process has ended; no need to kill it again. + case <-timer.C: + _ = syscall.Kill(-pid, syscall.SIGKILL) + } + }() +} diff --git a/internal/pipe/command_windows.go b/internal/pipe/command_windows.go new file mode 100644 index 0000000..55af6e3 --- /dev/null +++ b/internal/pipe/command_windows.go @@ -0,0 +1,24 @@ +//go:build windows +// +build windows + +package pipe + +// runInOwnProcessGroup is not supported on Windows. +func (s *commandStage) runInOwnProcessGroup() {} + +// kill is called to kill the process if the context expires. `err` is +// the corresponding value of `Context.Err()`. +func (s *commandStage) kill(err error) { + select { + case <-s.done: + // Process has ended; no need to kill it again. + return + default: + } + + // Record the `ctx.Err()`, which will be used as the error result + // for this stage. + s.ctxErr.Store(err) + + s.cmd.Process.Kill() +} diff --git a/internal/pipe/filter-error.go b/internal/pipe/filter-error.go index 6373be3..6e2bdd5 100644 --- a/internal/pipe/filter-error.go +++ b/internal/pipe/filter-error.go @@ -87,7 +87,10 @@ func IsError(target error) ErrorMatcher { // IsSIGPIPE returns an `ErrorMatcher` that matches `*exec.ExitError`s // that were caused by the specified signal. The match for -// `*exec.ExitError`s uses `errors.As()`. +// `*exec.ExitError`s uses `errors.As()`. Note that under Windows this +// always returns false, because on that platform +// `WaitStatus.Signaled()` isn't implemented (it is hardcoded to +// return `false`). func IsSignal(signal syscall.Signal) ErrorMatcher { return func(err error) bool { var eErr *exec.ExitError From 79299d655d2e60cf28e55ce613b4a2da21c7c277 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 12:51:12 +0100 Subject: [PATCH 116/176] Skip some tests on Windows due to missing dependencies --- internal/pipe/pipeline_test.go | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/internal/pipe/pipeline_test.go b/internal/pipe/pipeline_test.go index d2be53d..bf91444 100644 --- a/internal/pipe/pipeline_test.go +++ b/internal/pipe/pipeline_test.go @@ -9,6 +9,7 @@ import ( "io" "io/ioutil" "os" + "runtime" "strconv" "strings" "testing" @@ -125,6 +126,10 @@ func TestPipelineReadFromSlowly(t *testing.T) { } func TestPipelineReadFromSlowly2(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("FIXME: test skipped on Windows: 'seq' unavailable") + } + t.Parallel() ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -179,6 +184,10 @@ func TestPipelineTwoCommandsPiping(t *testing.T) { } func TestPipelineDir(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("FIXME: test skipped on Windows: 'pwd' incompatibility") + } + t.Parallel() ctx := context.Background() @@ -227,6 +236,10 @@ func TestPipelineStderr(t *testing.T) { } func TestPipelineInterrupted(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("FIXME: test skipped on Windows: 'sleep' unavailable") + } + t.Parallel() stdout := &bytes.Buffer{} @@ -244,6 +257,10 @@ func TestPipelineInterrupted(t *testing.T) { } func TestPipelineCanceled(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("FIXME: test skipped on Windows: 'sleep' unavailable") + } + t.Parallel() stdout := &bytes.Buffer{} @@ -267,6 +284,10 @@ func TestPipelineCanceled(t *testing.T) { // unread output in this case *does fit* within the OS-level pipe // buffer. func TestLittleEPIPE(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("FIXME: test skipped on Windows: 'sleep' unavailable") + } + t.Parallel() p := pipe.New() @@ -286,6 +307,10 @@ func TestLittleEPIPE(t *testing.T) { // amount of unread output in this case *does not fit* within the // OS-level pipe buffer. func TestBigEPIPE(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("FIXME: test skipped on Windows: 'seq' unavailable") + } + t.Parallel() p := pipe.New() @@ -305,6 +330,10 @@ func TestBigEPIPE(t *testing.T) { // amount of unread output in this case *does not fit* within the // OS-level pipe buffer. func TestIgnoredSIGPIPE(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("FIXME: test skipped on Windows: 'seq' unavailable") + } + t.Parallel() p := pipe.New() From 141b75c064b99bf00efbec9c681728fb42cc44fd Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 15:16:21 +0100 Subject: [PATCH 117/176] NoProgressMeter: change type into a variable This makes it easier for other modules to use. --- meter/meter.go | 12 +++++++----- sizes/graph.go | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/meter/meter.go b/meter/meter.go index d241cc4..ca3df1f 100644 --- a/meter/meter.go +++ b/meter/meter.go @@ -105,9 +105,11 @@ func (p *progressMeter) Done() { // NoProgressMeter is a `Progress` that doesn't actually report // anything. -type NoProgressMeter struct{} +var NoProgressMeter noProgressMeter -func (p *NoProgressMeter) Start(string) {} -func (p *NoProgressMeter) Inc() {} -func (p *NoProgressMeter) Add(int64) {} -func (p *NoProgressMeter) Done() {} +type noProgressMeter struct{} + +func (p noProgressMeter) Start(string) {} +func (p noProgressMeter) Inc() {} +func (p noProgressMeter) Add(int64) {} +func (p noProgressMeter) Done() {} diff --git a/sizes/graph.go b/sizes/graph.go index eca2f0c..a1f5328 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -72,7 +72,7 @@ func ScanRepositoryUsingGraph( if progress { progressMeter = meter.NewProgressMeter(100 * time.Millisecond) } else { - progressMeter = &meter.NoProgressMeter{} + progressMeter = meter.NoProgressMeter } refIter, err := repo.NewReferenceIter() From 731aadecf5ad562da5f2ee3bd12e4c9db98a5025 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 15:18:12 +0100 Subject: [PATCH 118/176] meter.NewProgressMeter(): take an `io.Writer` as parameter Instead of writing directly to stderr, have the caller pass in the `io.Writer` that it should output to. --- meter/meter.go | 12 +++++++----- sizes/graph.go | 3 ++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/meter/meter.go b/meter/meter.go index ca3df1f..ea210fc 100644 --- a/meter/meter.go +++ b/meter/meter.go @@ -2,7 +2,7 @@ package meter import ( "fmt" - "os" + "io" "sync" "sync/atomic" "time" @@ -30,9 +30,10 @@ type Progress interface { var Spinners = []string{"|", "(", "<", "-", "<", "(", "|", ")", ">", "-", ">", ")"} // progressMeter is a `Progress` that reports the current state every -// `period`. +// `period` to an `io.Writer`. type progressMeter struct { lock sync.Mutex + w io.Writer format string period time.Duration lastShownCount int64 @@ -48,8 +49,9 @@ type progressMeter struct { // NewProgressMeter returns a progress meter that can be used to show // progress to a TTY periodically, including an increasing int64 // value. -func NewProgressMeter(period time.Duration) Progress { +func NewProgressMeter(w io.Writer, period time.Duration) Progress { return &progressMeter{ + w: w, period: period, } } @@ -81,7 +83,7 @@ func (p *progressMeter) Start(format string) { } else { s = "" } - fmt.Fprintf(os.Stderr, p.format, c, s, "\r") + fmt.Fprintf(p.w, p.format, c, s, "\r") p.lock.Unlock() } }() @@ -100,7 +102,7 @@ func (p *progressMeter) Done() { defer p.lock.Unlock() p.ticker = nil c := atomic.LoadInt64(&p.count) - fmt.Fprintf(os.Stderr, p.format, c, " ", "\n") + fmt.Fprintf(p.w, p.format, c, " ", "\n") } // NoProgressMeter is a `Progress` that doesn't actually report diff --git a/sizes/graph.go b/sizes/graph.go index a1f5328..2f0d862 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "os" "sync" "time" @@ -70,7 +71,7 @@ func ScanRepositoryUsingGraph( graph := NewGraph(rg, nameStyle) var progressMeter meter.Progress if progress { - progressMeter = meter.NewProgressMeter(100 * time.Millisecond) + progressMeter = meter.NewProgressMeter(os.Stderr, 100*time.Millisecond) } else { progressMeter = meter.NoProgressMeter } From 05e05d13792ce9de16301966dddc76f5e4456a25 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 15:25:43 +0100 Subject: [PATCH 119/176] ScanRepositoryUsingGraph(): take a `meter.Progress` argument Take a `meter.Progress` argument rather than a boolean, to give the caller more control about if/how progress is reported. --- git-sizer.go | 9 ++++++++- git_sizer_test.go | 11 ++++++----- sizes/graph.go | 11 ++--------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index a2a8c43..f6c1283 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -8,12 +8,14 @@ import ( "os" "runtime/pprof" "strconv" + "time" "github.com/spf13/pflag" "github.com/github/git-sizer/git" "github.com/github/git-sizer/internal/refopts" "github.com/github/git-sizer/isatty" + "github.com/github/git-sizer/meter" "github.com/github/git-sizer/sizes" ) @@ -269,7 +271,12 @@ func mainImplementation(args []string) error { return err } - historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progress) + var progressMeter meter.Progress = meter.NoProgressMeter + if progress { + progressMeter = meter.NewProgressMeter(os.Stderr, 100*time.Millisecond) + } + + historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progressMeter) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git_sizer_test.go b/git_sizer_test.go index a6b3dce..c85437a 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -19,6 +19,7 @@ import ( "github.com/github/git-sizer/counts" "github.com/github/git-sizer/git" "github.com/github/git-sizer/internal/testutils" + "github.com/github/git-sizer/meter" "github.com/github/git-sizer/sizes" ) @@ -548,7 +549,7 @@ func TestBomb(t *testing.T) { h, err := sizes.ScanRepositoryUsingGraph( repo.Repository(t), - refGrouper{}, sizes.NameStyleFull, false, + refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, ) require.NoError(t, err) @@ -621,7 +622,7 @@ func TestTaggedTags(t *testing.T) { h, err := sizes.ScanRepositoryUsingGraph( repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, false, + refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -643,7 +644,7 @@ func TestFromSubdir(t *testing.T) { h, err := sizes.ScanRepositoryUsingGraph( repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, false, + refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -696,7 +697,7 @@ func TestSubmodule(t *testing.T) { // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( mainRepo.Repository(t), - refGrouper{}, sizes.NameStyleNone, false, + refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -709,7 +710,7 @@ func TestSubmodule(t *testing.T) { } h, err = sizes.ScanRepositoryUsingGraph( submRepo2.Repository(t), - refGrouper{}, sizes.NameStyleNone, false, + refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/sizes/graph.go b/sizes/graph.go index 2f0d862..0cd70c2 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -5,9 +5,7 @@ import ( "errors" "fmt" "io" - "os" "sync" - "time" "github.com/github/git-sizer/counts" "github.com/github/git-sizer/git" @@ -66,15 +64,10 @@ type refSeen struct { // // It returns the size data for the repository. func ScanRepositoryUsingGraph( - repo *git.Repository, rg RefGrouper, nameStyle NameStyle, progress bool, + repo *git.Repository, rg RefGrouper, nameStyle NameStyle, + progressMeter meter.Progress, ) (HistorySize, error) { graph := NewGraph(rg, nameStyle) - var progressMeter meter.Progress - if progress { - progressMeter = meter.NewProgressMeter(os.Stderr, 100*time.Millisecond) - } else { - progressMeter = meter.NoProgressMeter - } refIter, err := repo.NewReferenceIter() if err != nil { From 631f619ca9f1b01f9daafa334d02a9c5ef75293a Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 15:47:40 +0100 Subject: [PATCH 120/176] git-sizer.go: set up the `showRefGrouper` here This makes it unnecessary for `RefGroupBuilder` to know about logging or to refer to `os.Stderr` directly. --- git-sizer.go | 8 ++++++++ internal/refopts/ref_group_builder.go | 10 ---------- internal/refopts/show_ref_grouper.go | 22 ++++++++++++++++------ 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index f6c1283..729b658 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -108,6 +108,7 @@ func mainImplementation(args []string) error { var threshold sizes.Threshold = 1 var progress bool var version bool + var showRefs bool // Try to open the repository, but it's not an error yet if this // fails, because the user might only be asking for `--help`. @@ -179,6 +180,8 @@ func mainImplementation(args []string) error { rgb.AddRefopts(flags) + flags.BoolVar(&showRefs, "show-refs", false, "list the references being processed") + flags.SortFlags = false err = flags.Parse(args) @@ -271,6 +274,11 @@ func mainImplementation(args []string) error { return err } + if showRefs { + fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") + rg = refopts.NewShowRefGrouper(rg, os.Stderr) + } + var progressMeter meter.Progress = meter.NoProgressMeter if progress { progressMeter = meter.NewProgressMeter(os.Stderr, 100*time.Millisecond) diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index b298af5..3c3179e 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -2,7 +2,6 @@ package refopts import ( "fmt" - "os" "strings" "github.com/spf13/pflag" @@ -21,8 +20,6 @@ type Configger interface { type RefGroupBuilder struct { topLevelGroup *refGroup groups map[sizes.RefGroupSymbol]*refGroup - - ShowRefs bool } // NewRefGroupBuilder creates and returns a `RefGroupBuilder` @@ -253,8 +250,6 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { ) flag.Hidden = true flag.Deprecated = "use --include=@REFGROUP" - - flags.BoolVar(&rgb.ShowRefs, "show-refs", false, "list the references being processed") } // Finish collects the information gained from processing the options @@ -280,11 +275,6 @@ func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { refGrouper.refGroups = append(refGrouper.refGroups, *refGrouper.ignoredRefGroup) } - if rgb.ShowRefs { - fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") - return showRefGrouper{&refGrouper, os.Stderr}, nil - } - return &refGrouper, nil } diff --git a/internal/refopts/show_ref_grouper.go b/internal/refopts/show_ref_grouper.go index 3b2f742..da78ca1 100644 --- a/internal/refopts/show_ref_grouper.go +++ b/internal/refopts/show_ref_grouper.go @@ -7,18 +7,28 @@ import ( "github.com/github/git-sizer/sizes" ) -// showRefFilter is a `git.ReferenceFilter` that logs its choices to Stderr. +// showRefFilter is a `git.ReferenceFilter` that logs its choices to +// an `io.Writer`. type showRefGrouper struct { - *refGrouper + sizes.RefGrouper w io.Writer } -func (refGrouper showRefGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { - walk, symbols := refGrouper.refGrouper.Categorize(refname) +// Return a `sizes.RefGrouper` that wraps its argument and behaves +// like it except that it also logs its decisions to an `io.Writer`. +func NewShowRefGrouper(rg sizes.RefGrouper, w io.Writer) sizes.RefGrouper { + return showRefGrouper{ + RefGrouper: rg, + w: w, + } +} + +func (rg showRefGrouper) Categorize(refname string) (bool, []sizes.RefGroupSymbol) { + walk, symbols := rg.RefGrouper.Categorize(refname) if walk { - fmt.Fprintf(refGrouper.w, "+ %s\n", refname) + fmt.Fprintf(rg.w, "+ %s\n", refname) } else { - fmt.Fprintf(refGrouper.w, " %s\n", refname) + fmt.Fprintf(rg.w, " %s\n", refname) } return walk, symbols } From 6778d12e3c8c2f970e4a75baa2d19b64d25c9081 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 15:54:34 +0100 Subject: [PATCH 121/176] mainImplementation(): take `stdout` and `stderr` as arguments Instead of writing directly to `os.Stdout` or `os.Stderr`, take two `io.Writer`s as arguments and write to those. This permits a caller to specify another destination for its output, which could be interesting for testing. --- git-sizer.go | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 729b658..2e7c02b 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -93,14 +93,14 @@ var ReleaseVersion string var BuildVersion string func main() { - err := mainImplementation(os.Args[1:]) + err := mainImplementation(os.Stdout, os.Stderr, os.Args[1:]) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } } -func mainImplementation(args []string) error { +func mainImplementation(stdout, stderr io.Writer, args []string) error { var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool @@ -116,7 +116,7 @@ func mainImplementation(args []string) error { flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) flags.Usage = func() { - fmt.Print(usage) + fmt.Fprint(stdout, usage) } flags.VarP( @@ -154,11 +154,15 @@ func mainImplementation(args []string) error { flags.BoolVarP(&jsonOutput, "json", "j", false, "output results in JSON format") flags.IntVar(&jsonVersion, "json-version", 1, "JSON format version to output (1 or 2)") - atty, err := isatty.Isatty(os.Stderr.Fd()) - if err != nil { - atty = false + defaultProgress := false + if f, ok := stderr.(*os.File); ok { + atty, err := isatty.Isatty(f.Fd()) + if err == nil && atty { + defaultProgress = true + } } - flags.BoolVar(&progress, "progress", atty, "report progress to stderr") + + flags.BoolVar(&progress, "progress", defaultProgress, "report progress to stderr") flags.BoolVar(&version, "version", false, "report the git-sizer version number") flags.Var(&NegatedBoolValue{&progress}, "no-progress", "suppress progress output") flags.Lookup("no-progress").NoOptDefVal = "true" @@ -205,9 +209,9 @@ func mainImplementation(args []string) error { if version { if ReleaseVersion != "" { - fmt.Printf("git-sizer release %s\n", ReleaseVersion) + fmt.Fprintf(stdout, "git-sizer release %s\n", ReleaseVersion) } else { - fmt.Printf("git-sizer build %s\n", BuildVersion) + fmt.Fprintf(stdout, "git-sizer build %s\n", BuildVersion) } return nil } @@ -275,13 +279,13 @@ func mainImplementation(args []string) error { } if showRefs { - fmt.Fprintf(os.Stderr, "References (included references marked with '+'):\n") - rg = refopts.NewShowRefGrouper(rg, os.Stderr) + fmt.Fprintf(stderr, "References (included references marked with '+'):\n") + rg = refopts.NewShowRefGrouper(rg, stderr) } var progressMeter meter.Progress = meter.NoProgressMeter if progress { - progressMeter = meter.NewProgressMeter(os.Stderr, 100*time.Millisecond) + progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progressMeter) @@ -303,11 +307,10 @@ func mainImplementation(args []string) error { if err != nil { return fmt.Errorf("could not convert %v to json: %w", historySize, err) } - fmt.Printf("%s\n", j) + fmt.Fprintf(stdout, "%s\n", j) } else { if _, err := io.WriteString( - os.Stdout, - historySize.TableString(rg.Groups(), threshold, nameStyle), + stdout, historySize.TableString(rg.Groups(), threshold, nameStyle), ); err != nil { return fmt.Errorf("writing output: %w", err) } From c991c8e0c8436a2547ef7cdfd48a554cec0eecee Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 10 Nov 2021 17:58:23 +0100 Subject: [PATCH 122/176] TestPipelineDir: use a substitute for `pwd` on Windows --- internal/pipe/pipeline_test.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/internal/pipe/pipeline_test.go b/internal/pipe/pipeline_test.go index bf91444..0abbedc 100644 --- a/internal/pipe/pipeline_test.go +++ b/internal/pipe/pipeline_test.go @@ -9,6 +9,7 @@ import ( "io" "io/ioutil" "os" + "path/filepath" "runtime" "strconv" "strings" @@ -184,10 +185,6 @@ func TestPipelineTwoCommandsPiping(t *testing.T) { } func TestPipelineDir(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'pwd' incompatibility") - } - t.Parallel() ctx := context.Background() @@ -198,11 +195,16 @@ func TestPipelineDir(t *testing.T) { defer os.RemoveAll(dir) p := pipe.New(pipe.WithDir(dir)) - p.Add(pipe.Command("pwd")) + switch runtime.GOOS { + case "windows": + p.Add(pipe.Command("bash", "-c", "pwd -W")) + default: + p.Add(pipe.Command("pwd")) + } out, err := p.Output(ctx) if assert.NoError(t, err) { - assert.Equal(t, dir, strings.TrimSuffix(string(out), "\n")) + assert.Equal(t, filepath.Clean(dir), filepath.Clean(strings.TrimSuffix(string(out), "\n"))) } } From 571f31cc859ceaa1ae8a246974fe32597779e870 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 13:51:17 +0100 Subject: [PATCH 123/176] go.mod: bump to Go 1.17 --- go.mod | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 58a3901..926a1c8 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/github/git-sizer -go 1.16 +go 1.17 require ( github.com/cli/safeexec v1.0.0 @@ -10,3 +10,8 @@ require ( go.uber.org/goleak v1.1.12 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c ) + +require ( + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect +) From c444fba62309d6da4d8143f185937681373e6751 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 13:57:45 +0100 Subject: [PATCH 124/176] Add an action to run a linter over the code --- .github/workflows/lint.yml | 55 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..52a9f07 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,55 @@ +name: Lint +on: + push: + paths: + - "**.go" + - go.mod + - go.sum + pull_request: + paths: + - "**.go" + - go.mod + - go.sum + +jobs: + lint: + runs-on: ubuntu-latest + + steps: + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: 1.17 + + - name: Check out code + uses: actions/checkout@v2 + + - name: Verify dependencies + run: | + go mod verify + go mod download + + LINT_VERSION=1.43.0 + curl -fsSL https://github.com/golangci/golangci-lint/releases/download/v${LINT_VERSION}/golangci-lint-${LINT_VERSION}-linux-amd64.tar.gz | \ + tar xz --strip-components 1 --wildcards \*/golangci-lint + mkdir -p bin && mv golangci-lint bin/ + + - name: Run checks + run: | + STATUS=0 + assert-nothing-changed() { + local diff + "$@" >/dev/null || return 1 + if ! diff="$(git diff -U1 --color --exit-code)"; then + printf '\e[31mError: running `\e[1m%s\e[22m` results in modifications that you must check into version control:\e[0m\n%s\n\n' "$*" "$diff" >&2 + git checkout -- . + STATUS=1 + fi + } + + assert-nothing-changed go fmt ./... + assert-nothing-changed go mod tidy + + bin/golangci-lint run --out-format=github-actions --timeout=3m || STATUS=$? + + exit $STATUS From 53142fbb833809c46ad8bd3e20c2cde6ed1d2735 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 13:58:12 +0100 Subject: [PATCH 125/176] test.yml: fix spelling (and make consistent with `lint.yml`) --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fa04802..f658b81 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,12 +8,12 @@ jobs: fail-fast: false runs-on: ${{ matrix.os }} steps: - - name: Setup go + - name: Set up Go uses: actions/setup-go@v2 with: go-version: '1.17' - - name: Checkout code + - name: Check out code uses: actions/checkout@v2 - name: Get full repo history From 939c1764d8303dbc0d4b24533fe9ed2b1ad7eeaf Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 2 Nov 2021 09:47:38 +0100 Subject: [PATCH 126/176] git/git.go: split the code out into multiple files: * `git/batch_obj_iter.go` * `git/commit.go` * `git/git.go` * `git/git_command.go` * `git/obj_head_iter.go` * `git/obj_iter.go` * `git/oid.go` * `git/ref_iter.go` * `git/reference.go` * `git/tag.go` * `git/tree.go` --- git/batch_obj_iter.go | 110 ++++++++ git/commit.go | 57 ++++ git/git.go | 605 ------------------------------------------ git/obj_head_iter.go | 63 +++++ git/obj_iter.go | 131 +++++++++ git/oid.go | 57 ++++ git/ref_iter.go | 92 +++++++ git/reference.go | 20 ++ git/tag.go | 61 +++++ git/tree.go | 85 ++++++ 10 files changed, 676 insertions(+), 605 deletions(-) create mode 100644 git/batch_obj_iter.go create mode 100644 git/commit.go create mode 100644 git/obj_head_iter.go create mode 100644 git/obj_iter.go create mode 100644 git/oid.go create mode 100644 git/ref_iter.go create mode 100644 git/reference.go create mode 100644 git/tag.go create mode 100644 git/tree.go diff --git a/git/batch_obj_iter.go b/git/batch_obj_iter.go new file mode 100644 index 0000000..023d7fa --- /dev/null +++ b/git/batch_obj_iter.go @@ -0,0 +1,110 @@ +package git + +import ( + "bufio" + "fmt" + "io" + "os" + "os/exec" + "strconv" + "strings" + + "github.com/github/git-sizer/counts" +) + +// BatchObjectIter iterates over objects whose names are fed into its +// stdin. The output is buffered, so it has to be closed before you +// can be sure that you have gotten all of the objects. +type BatchObjectIter struct { + cmd *exec.Cmd + out io.ReadCloser + f *bufio.Reader +} + +// NewBatchObjectIter returns a `*BatchObjectIterator` and an +// `io.WriteCloser`. The iterator iterates over objects whose names +// are fed into the `io.WriteCloser`, one per line. The +// `io.WriteCloser` should normally be closed and the iterator's +// output drained before `Close()` is called. +func (repo *Repository) NewBatchObjectIter() (*BatchObjectIter, io.WriteCloser, error) { + cmd := repo.gitCommand("cat-file", "--batch", "--buffer") + + in, err := cmd.StdinPipe() + if err != nil { + return nil, nil, err + } + + out, err := cmd.StdoutPipe() + if err != nil { + return nil, nil, err + } + + cmd.Stderr = os.Stderr + + err = cmd.Start() + if err != nil { + return nil, nil, err + } + + return &BatchObjectIter{ + cmd: cmd, + out: out, + f: bufio.NewReader(out), + }, in, nil +} + +// Next returns the next object: its OID, type, size, and contents. +// When no more data are available, it returns an `io.EOF` error. +func (iter *BatchObjectIter) Next() (OID, ObjectType, counts.Count32, []byte, error) { + header, err := iter.f.ReadString('\n') + if err != nil { + return OID{}, "", 0, nil, err + } + oid, objectType, objectSize, err := parseBatchHeader("", header) + if err != nil { + return OID{}, "", 0, nil, err + } + // +1 for LF: + data := make([]byte, objectSize+1) + _, err = io.ReadFull(iter.f, data) + if err != nil { + return OID{}, "", 0, nil, err + } + data = data[:len(data)-1] + return oid, objectType, objectSize, data, nil +} + +// Close closes the iterator and frees up resources. If any iterator +// output hasn't been read yet, it will be lost. +func (iter *BatchObjectIter) Close() error { + err := iter.out.Close() + err2 := iter.cmd.Wait() + if err == nil { + err = err2 + } + return err +} + +// Parse a `cat-file --batch[-check]` output header line (including +// the trailing LF). `spec`, if not "", is used in error messages. +func parseBatchHeader(spec string, header string) (OID, ObjectType, counts.Count32, error) { + header = header[:len(header)-1] + words := strings.Split(header, " ") + if words[len(words)-1] == "missing" { + if spec == "" { + spec = words[0] + } + return OID{}, "missing", 0, fmt.Errorf("missing object %s", spec) + } + + oid, err := NewOID(words[0]) + if err != nil { + return OID{}, "missing", 0, err + } + + size, err := strconv.ParseUint(words[2], 10, 0) + if err != nil { + return OID{}, "missing", 0, err + } + return oid, ObjectType(words[1]), counts.NewCount32(size), nil +} diff --git a/git/commit.go b/git/commit.go new file mode 100644 index 0000000..5e46ee7 --- /dev/null +++ b/git/commit.go @@ -0,0 +1,57 @@ +package git + +import ( + "fmt" + + "github.com/github/git-sizer/counts" +) + +// Commit represents the parts of a commit object that we need. +type Commit struct { + Size counts.Count32 + Parents []OID + Tree OID +} + +// ParseCommit parses the commit object whose contents are in `data`. +// `oid` is used only in error messages. +func ParseCommit(oid OID, data []byte) (*Commit, error) { + var parents []OID + var tree OID + var treeFound bool + iter, err := NewObjectHeaderIter(oid.String(), data) + if err != nil { + return nil, err + } + for iter.HasNext() { + key, value, err := iter.Next() + if err != nil { + return nil, err + } + switch key { + case "parent": + parent, err := NewOID(value) + if err != nil { + return nil, fmt.Errorf("malformed parent header in commit %s", oid) + } + parents = append(parents, parent) + case "tree": + if treeFound { + return nil, fmt.Errorf("multiple trees found in commit %s", oid) + } + tree, err = NewOID(value) + if err != nil { + return nil, fmt.Errorf("malformed tree header in commit %s", oid) + } + treeFound = true + } + } + if !treeFound { + return nil, fmt.Errorf("no tree found in commit %s", oid) + } + return &Commit{ + Size: counts.NewCount32(uint64(len(data))), + Parents: parents, + Tree: tree, + }, nil +} diff --git a/git/git.go b/git/git.go index 281f54d..ee8a74d 100644 --- a/git/git.go +++ b/git/git.go @@ -1,76 +1,18 @@ package git import ( - "bufio" "bytes" - "encoding/hex" "errors" "fmt" - "io" "os" "os/exec" "path/filepath" - "strconv" - "strings" - - "github.com/github/git-sizer/counts" ) // ObjectType represents the type of a Git object ("blob", "tree", // "commit", "tag", or "missing"). type ObjectType string -// OID represents the SHA-1 object ID of a Git object, in binary -// format. -type OID struct { - v [20]byte -} - -// NullOID is the null object ID; i.e., all zeros. -var NullOID OID - -// OIDFromBytes converts a byte slice containing an object ID in -// binary format into an `OID`. -func OIDFromBytes(oidBytes []byte) (OID, error) { - var oid OID - if len(oidBytes) != len(oid.v) { - return OID{}, errors.New("bytes oid has the wrong length") - } - copy(oid.v[0:20], oidBytes) - return oid, nil -} - -// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40}`) -// into an `OID`. -func NewOID(s string) (OID, error) { - oidBytes, err := hex.DecodeString(s) - if err != nil { - return OID{}, err - } - return OIDFromBytes(oidBytes) -} - -// String formats `oid` as a string in hex format. -func (oid OID) String() string { - return hex.EncodeToString(oid.v[:]) -} - -// Bytes returns a byte slice view of `oid`, in binary format. -func (oid OID) Bytes() []byte { - return oid.v[:] -} - -// MarshalJSON expresses `oid` as a JSON string with its enclosing -// quotation marks. -func (oid OID) MarshalJSON() ([]byte, error) { - src := oid.v[:] - dst := make([]byte, hex.EncodedLen(len(src))+2) - dst[0] = '"' - dst[len(dst)-1] = '"' - hex.Encode(dst[1:len(dst)-1], src) - return dst, nil -} - // Repository represents a Git repository on disk. type Repository struct { path string @@ -173,550 +115,3 @@ func (repo *Repository) gitCommand(callerArgs ...string) *exec.Cmd { func (repo *Repository) Path() string { return repo.path } - -// Reference represents a Git reference. -type Reference struct { - // Refname is the full reference name of the reference. - Refname string - - // ObjectType is the type of the object referenced. - ObjectType ObjectType - - // ObjectSize is the size of the referred-to object, in bytes. - ObjectSize counts.Count32 - - // OID is the OID of the referred-to object. - OID OID -} - -// ReferenceIter is an iterator that interates over references. -type ReferenceIter struct { - cmd *exec.Cmd - out io.ReadCloser - f *bufio.Reader - errChan <-chan error -} - -// NewReferenceIter returns an iterator that iterates over all of the -// references in `repo`. -func (repo *Repository) NewReferenceIter() (*ReferenceIter, error) { - cmd := repo.gitCommand( - "for-each-ref", "--format=%(objectname) %(objecttype) %(objectsize) %(refname)", - ) - - out, err := cmd.StdoutPipe() - if err != nil { - return nil, err - } - - cmd.Stderr = os.Stderr - - err = cmd.Start() - if err != nil { - return nil, err - } - - return &ReferenceIter{ - cmd: cmd, - out: out, - f: bufio.NewReader(out), - errChan: make(chan error, 1), - }, nil -} - -// Next returns either the next reference or a boolean `false` value -// indicating that the iteration is over. On errors, return an error -// (in this case, the caller must still call `Close()`). -func (iter *ReferenceIter) Next() (Reference, bool, error) { - line, err := iter.f.ReadString('\n') - if err != nil { - if err != io.EOF { - return Reference{}, false, err - } - return Reference{}, false, nil - } - line = line[:len(line)-1] - words := strings.Split(line, " ") - if len(words) != 4 { - return Reference{}, false, fmt.Errorf("line improperly formatted: %#v", line) - } - oid, err := NewOID(words[0]) - if err != nil { - return Reference{}, false, fmt.Errorf("SHA-1 improperly formatted: %#v", words[0]) - } - objectType := ObjectType(words[1]) - objectSize, err := strconv.ParseUint(words[2], 10, 32) - if err != nil { - return Reference{}, false, fmt.Errorf("object size improperly formatted: %#v", words[2]) - } - refname := words[3] - return Reference{ - Refname: refname, - ObjectType: objectType, - ObjectSize: counts.Count32(objectSize), - OID: oid, - }, true, nil -} - -// Close closes the iterator and frees up resources. -func (iter *ReferenceIter) Close() error { - err := iter.out.Close() - err2 := iter.cmd.Wait() - if err == nil { - err = err2 - } - return err -} - -// BatchObjectIter iterates over objects whose names are fed into its -// stdin. The output is buffered, so it has to be closed before you -// can be sure that you have gotten all of the objects. -type BatchObjectIter struct { - cmd *exec.Cmd - out io.ReadCloser - f *bufio.Reader -} - -// NewBatchObjectIter returns a `*BatchObjectIterator` and an -// `io.WriteCloser`. The iterator iterates over objects whose names -// are fed into the `io.WriteCloser`, one per line. The -// `io.WriteCloser` should normally be closed and the iterator's -// output drained before `Close()` is called. -func (repo *Repository) NewBatchObjectIter() (*BatchObjectIter, io.WriteCloser, error) { - cmd := repo.gitCommand("cat-file", "--batch", "--buffer") - - in, err := cmd.StdinPipe() - if err != nil { - return nil, nil, err - } - - out, err := cmd.StdoutPipe() - if err != nil { - return nil, nil, err - } - - cmd.Stderr = os.Stderr - - err = cmd.Start() - if err != nil { - return nil, nil, err - } - - return &BatchObjectIter{ - cmd: cmd, - out: out, - f: bufio.NewReader(out), - }, in, nil -} - -// Next returns the next object: its OID, type, size, and contents. -// When no more data are available, it returns an `io.EOF` error. -func (iter *BatchObjectIter) Next() (OID, ObjectType, counts.Count32, []byte, error) { - header, err := iter.f.ReadString('\n') - if err != nil { - return OID{}, "", 0, nil, err - } - oid, objectType, objectSize, err := parseBatchHeader("", header) - if err != nil { - return OID{}, "", 0, nil, err - } - // +1 for LF: - data := make([]byte, objectSize+1) - _, err = io.ReadFull(iter.f, data) - if err != nil { - return OID{}, "", 0, nil, err - } - data = data[:len(data)-1] - return oid, objectType, objectSize, data, nil -} - -// Close closes the iterator and frees up resources. If any iterator -// output hasn't been read yet, it will be lost. -func (iter *BatchObjectIter) Close() error { - err := iter.out.Close() - err2 := iter.cmd.Wait() - if err == nil { - err = err2 - } - return err -} - -// Parse a `cat-file --batch[-check]` output header line (including -// the trailing LF). `spec`, if not "", is used in error messages. -func parseBatchHeader(spec string, header string) (OID, ObjectType, counts.Count32, error) { - header = header[:len(header)-1] - words := strings.Split(header, " ") - if words[len(words)-1] == "missing" { - if spec == "" { - spec = words[0] - } - return OID{}, "missing", 0, fmt.Errorf("missing object %s", spec) - } - - oid, err := NewOID(words[0]) - if err != nil { - return OID{}, "missing", 0, err - } - - size, err := strconv.ParseUint(words[2], 10, 0) - if err != nil { - return OID{}, "missing", 0, err - } - return oid, ObjectType(words[1]), counts.NewCount32(size), nil -} - -// ObjectIter iterates over objects in a Git repository. -type ObjectIter struct { - cmd1 *exec.Cmd - cmd2 *exec.Cmd - out1 io.ReadCloser - out2 io.ReadCloser - f *bufio.Reader - errChan <-chan error -} - -// NewObjectIter returns an iterator that iterates over objects in -// `repo`. The arguments are passed to `git rev-list --objects`. The -// second return value is the stdin of the `rev-list` command. The -// caller can feed values into it but must close it in any case. -func (repo *Repository) NewObjectIter( - args ...string, -) (*ObjectIter, io.WriteCloser, error) { - cmd1 := repo.gitCommand(append([]string{"rev-list", "--objects"}, args...)...) - in1, err := cmd1.StdinPipe() - if err != nil { - return nil, nil, err - } - - out1, err := cmd1.StdoutPipe() - if err != nil { - return nil, nil, err - } - - cmd1.Stderr = os.Stderr - - err = cmd1.Start() - if err != nil { - return nil, nil, err - } - - cmd2 := repo.gitCommand("cat-file", "--batch-check", "--buffer") - in2, err := cmd2.StdinPipe() - if err != nil { - out1.Close() - cmd1.Wait() - return nil, nil, err - } - - out2, err := cmd2.StdoutPipe() - if err != nil { - in2.Close() - out1.Close() - cmd1.Wait() - return nil, nil, err - } - - cmd2.Stderr = os.Stderr - - err = cmd2.Start() - if err != nil { - return nil, nil, err - } - - errChan := make(chan error, 1) - - go func() { - defer in2.Close() - f1 := bufio.NewReader(out1) - f2 := bufio.NewWriter(in2) - defer f2.Flush() - for { - line, err := f1.ReadString('\n') - if err != nil { - if err != io.EOF { - errChan <- err - } else { - errChan <- nil - } - return - } - if len(line) <= 40 { - errChan <- fmt.Errorf("line too short: %#v", line) - } - f2.WriteString(line[:40]) - f2.WriteByte('\n') - } - }() - - return &ObjectIter{ - cmd1: cmd1, - cmd2: cmd2, - out1: out1, - out2: out2, - f: bufio.NewReader(out2), - errChan: errChan, - }, in1, nil -} - -// Next returns the next object: its OID, type, and size. When no more -// data are available, it returns an `io.EOF` error. -func (iter *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { - line, err := iter.f.ReadString('\n') - if err != nil { - return OID{}, "", 0, err - } - - return parseBatchHeader("", line) -} - -// Close closes the iterator and frees up resources. -func (iter *ObjectIter) Close() error { - iter.out1.Close() - err := <-iter.errChan - iter.out2.Close() - err2 := iter.cmd1.Wait() - if err == nil { - err = err2 - } - err2 = iter.cmd2.Wait() - if err == nil { - err = err2 - } - return err -} - -// ObjectHeaderIter iterates over the headers within a commit or tag -// object. -type ObjectHeaderIter struct { - name string - data string -} - -// NewObjectHeaderIter returns an `ObjectHeaderIter` that iterates -// over the headers in a commit or tag object. `data` should be the -// object's contents, which is usually terminated by a blank line that -// separates the header from the comment. However, annotated tags -// don't always include comments, and Git even tolerates commits -// without comments, so don't insist on a blank line. `name` is used -// in error messages. -func NewObjectHeaderIter(name string, data []byte) (ObjectHeaderIter, error) { - headerEnd := bytes.Index(data, []byte("\n\n")) - if headerEnd == -1 { - if len(data) == 0 { - return ObjectHeaderIter{}, fmt.Errorf("%s has zero length", name) - } - - if data[len(data)-1] != '\n' { - return ObjectHeaderIter{}, fmt.Errorf("%s has no terminating LF", name) - } - - return ObjectHeaderIter{name, string(data)}, nil - } - return ObjectHeaderIter{name, string(data[:headerEnd+1])}, nil -} - -// HasNext returns true iff there are more headers to retrieve. -func (iter *ObjectHeaderIter) HasNext() bool { - return len(iter.data) > 0 -} - -// Next returns the key and value of the next header. -func (iter *ObjectHeaderIter) Next() (string, string, error) { - if len(iter.data) == 0 { - return "", "", fmt.Errorf("header for %s read past end", iter.name) - } - header := iter.data - keyEnd := strings.IndexByte(header, ' ') - if keyEnd == -1 { - return "", "", fmt.Errorf("malformed header in %s", iter.name) - } - key := header[:keyEnd] - header = header[keyEnd+1:] - valueEnd := strings.IndexByte(header, '\n') - if valueEnd == -1 { - return "", "", fmt.Errorf("malformed header in %s", iter.name) - } - value := header[:valueEnd] - iter.data = header[valueEnd+1:] - return key, value, nil -} - -// Commit represents the parts of a commit object that we need. -type Commit struct { - Size counts.Count32 - Parents []OID - Tree OID -} - -// ParseCommit parses the commit object whose contents are in `data`. -// `oid` is used only in error messages. -func ParseCommit(oid OID, data []byte) (*Commit, error) { - var parents []OID - var tree OID - var treeFound bool - iter, err := NewObjectHeaderIter(oid.String(), data) - if err != nil { - return nil, err - } - for iter.HasNext() { - key, value, err := iter.Next() - if err != nil { - return nil, err - } - switch key { - case "parent": - parent, err := NewOID(value) - if err != nil { - return nil, fmt.Errorf("malformed parent header in commit %s", oid) - } - parents = append(parents, parent) - case "tree": - if treeFound { - return nil, fmt.Errorf("multiple trees found in commit %s", oid) - } - tree, err = NewOID(value) - if err != nil { - return nil, fmt.Errorf("malformed tree header in commit %s", oid) - } - treeFound = true - } - } - if !treeFound { - return nil, fmt.Errorf("no tree found in commit %s", oid) - } - return &Commit{ - Size: counts.NewCount32(uint64(len(data))), - Parents: parents, - Tree: tree, - }, nil -} - -// Tree represents a Git tree object. -type Tree struct { - data string -} - -// ParseTree parses the tree object whose contents are contained in -// `data`. `oid` is currently unused. -func ParseTree(oid OID, data []byte) (*Tree, error) { - return &Tree{string(data)}, nil -} - -// Size returns the size of the tree object. -func (tree Tree) Size() counts.Count32 { - return counts.NewCount32(uint64(len(tree.data))) -} - -// TreeEntry represents an entry in a Git tree object. Note that Name -// shares memory with the tree data that were originally read; i.e., -// retaining a pointer to Name keeps the tree data reachable. -type TreeEntry struct { - Name string - OID OID - Filemode uint -} - -// TreeIter is an iterator over the entries in a Git tree object. -type TreeIter struct { - // The as-yet-unread part of the tree's data. - data string -} - -// Iter returns an iterator over the entries in `tree`. -func (tree *Tree) Iter() *TreeIter { - return &TreeIter{ - data: tree.data, - } -} - -// NextEntry returns either the next entry in a Git tree, or a `false` -// boolean value if there are no more entries. -func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { - var entry TreeEntry - - if len(iter.data) == 0 { - return TreeEntry{}, false, nil - } - - spAt := strings.IndexByte(iter.data, ' ') - if spAt < 0 { - return TreeEntry{}, false, errors.New("failed to find SP after mode") - } - mode, err := strconv.ParseUint(iter.data[:spAt], 8, 32) - if err != nil { - return TreeEntry{}, false, err - } - entry.Filemode = uint(mode) - - iter.data = iter.data[spAt+1:] - nulAt := strings.IndexByte(iter.data, 0) - if nulAt < 0 { - return TreeEntry{}, false, errors.New("failed to find NUL after filename") - } - - entry.Name = iter.data[:nulAt] - - iter.data = iter.data[nulAt+1:] - if len(iter.data) < 20 { - return TreeEntry{}, false, errors.New("tree entry ends unexpectedly") - } - - copy(entry.OID.v[0:20], iter.data[0:20]) - iter.data = iter.data[20:] - - return entry, true, nil -} - -// Tag represents the information that we need about a Git tag object. -type Tag struct { - Size counts.Count32 - Referent OID - ReferentType ObjectType -} - -// ParseTag parses the Git tag object whose contents are contained in -// `data`. `oid` is used only in error messages. -func ParseTag(oid OID, data []byte) (*Tag, error) { - var referent OID - var referentFound bool - var referentType ObjectType - var referentTypeFound bool - iter, err := NewObjectHeaderIter(oid.String(), data) - if err != nil { - return nil, err - } - for iter.HasNext() { - key, value, err := iter.Next() - if err != nil { - return nil, err - } - switch key { - case "object": - if referentFound { - return nil, fmt.Errorf("multiple referents found in tag %s", oid) - } - referent, err = NewOID(value) - if err != nil { - return nil, fmt.Errorf("malformed object header in tag %s", oid) - } - referentFound = true - case "type": - if referentTypeFound { - return nil, fmt.Errorf("multiple types found in tag %s", oid) - } - referentType = ObjectType(value) - referentTypeFound = true - } - } - if !referentFound { - return nil, fmt.Errorf("no object found in tag %s", oid) - } - if !referentTypeFound { - return nil, fmt.Errorf("no type found in tag %s", oid) - } - return &Tag{ - Size: counts.NewCount32(uint64(len(data))), - Referent: referent, - ReferentType: referentType, - }, nil -} diff --git a/git/obj_head_iter.go b/git/obj_head_iter.go new file mode 100644 index 0000000..1454d1c --- /dev/null +++ b/git/obj_head_iter.go @@ -0,0 +1,63 @@ +package git + +import ( + "bytes" + "fmt" + "strings" +) + +// ObjectHeaderIter iterates over the headers within a commit or tag +// object. +type ObjectHeaderIter struct { + name string + data string +} + +// NewObjectHeaderIter returns an `ObjectHeaderIter` that iterates +// over the headers in a commit or tag object. `data` should be the +// object's contents, which is usually terminated by a blank line that +// separates the header from the comment. However, annotated tags +// don't always include comments, and Git even tolerates commits +// without comments, so don't insist on a blank line. `name` is used +// in error messages. +func NewObjectHeaderIter(name string, data []byte) (ObjectHeaderIter, error) { + headerEnd := bytes.Index(data, []byte("\n\n")) + if headerEnd == -1 { + if len(data) == 0 { + return ObjectHeaderIter{}, fmt.Errorf("%s has zero length", name) + } + + if data[len(data)-1] != '\n' { + return ObjectHeaderIter{}, fmt.Errorf("%s has no terminating LF", name) + } + + return ObjectHeaderIter{name, string(data)}, nil + } + return ObjectHeaderIter{name, string(data[:headerEnd+1])}, nil +} + +// HasNext returns true iff there are more headers to retrieve. +func (iter *ObjectHeaderIter) HasNext() bool { + return len(iter.data) > 0 +} + +// Next returns the key and value of the next header. +func (iter *ObjectHeaderIter) Next() (string, string, error) { + if len(iter.data) == 0 { + return "", "", fmt.Errorf("header for %s read past end", iter.name) + } + header := iter.data + keyEnd := strings.IndexByte(header, ' ') + if keyEnd == -1 { + return "", "", fmt.Errorf("malformed header in %s", iter.name) + } + key := header[:keyEnd] + header = header[keyEnd+1:] + valueEnd := strings.IndexByte(header, '\n') + if valueEnd == -1 { + return "", "", fmt.Errorf("malformed header in %s", iter.name) + } + value := header[:valueEnd] + iter.data = header[valueEnd+1:] + return key, value, nil +} diff --git a/git/obj_iter.go b/git/obj_iter.go new file mode 100644 index 0000000..5026378 --- /dev/null +++ b/git/obj_iter.go @@ -0,0 +1,131 @@ +package git + +import ( + "bufio" + "fmt" + "io" + "os" + "os/exec" + + "github.com/github/git-sizer/counts" +) + +// ObjectIter iterates over objects in a Git repository. +type ObjectIter struct { + cmd1 *exec.Cmd + cmd2 *exec.Cmd + out1 io.ReadCloser + out2 io.ReadCloser + f *bufio.Reader + errChan <-chan error +} + +// NewObjectIter returns an iterator that iterates over objects in +// `repo`. The arguments are passed to `git rev-list --objects`. The +// second return value is the stdin of the `rev-list` command. The +// caller can feed values into it but must close it in any case. +func (repo *Repository) NewObjectIter( + args ...string, +) (*ObjectIter, io.WriteCloser, error) { + cmd1 := repo.gitCommand(append([]string{"rev-list", "--objects"}, args...)...) + in1, err := cmd1.StdinPipe() + if err != nil { + return nil, nil, err + } + + out1, err := cmd1.StdoutPipe() + if err != nil { + return nil, nil, err + } + + cmd1.Stderr = os.Stderr + + err = cmd1.Start() + if err != nil { + return nil, nil, err + } + + cmd2 := repo.gitCommand("cat-file", "--batch-check", "--buffer") + in2, err := cmd2.StdinPipe() + if err != nil { + out1.Close() + cmd1.Wait() + return nil, nil, err + } + + out2, err := cmd2.StdoutPipe() + if err != nil { + in2.Close() + out1.Close() + cmd1.Wait() + return nil, nil, err + } + + cmd2.Stderr = os.Stderr + + err = cmd2.Start() + if err != nil { + return nil, nil, err + } + + errChan := make(chan error, 1) + + go func() { + defer in2.Close() + f1 := bufio.NewReader(out1) + f2 := bufio.NewWriter(in2) + defer f2.Flush() + for { + line, err := f1.ReadString('\n') + if err != nil { + if err != io.EOF { + errChan <- err + } else { + errChan <- nil + } + return + } + if len(line) <= 40 { + errChan <- fmt.Errorf("line too short: %#v", line) + } + f2.WriteString(line[:40]) + f2.WriteByte('\n') + } + }() + + return &ObjectIter{ + cmd1: cmd1, + cmd2: cmd2, + out1: out1, + out2: out2, + f: bufio.NewReader(out2), + errChan: errChan, + }, in1, nil +} + +// Next returns the next object: its OID, type, and size. When no more +// data are available, it returns an `io.EOF` error. +func (iter *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { + line, err := iter.f.ReadString('\n') + if err != nil { + return OID{}, "", 0, err + } + + return parseBatchHeader("", line) +} + +// Close closes the iterator and frees up resources. +func (iter *ObjectIter) Close() error { + iter.out1.Close() + err := <-iter.errChan + iter.out2.Close() + err2 := iter.cmd1.Wait() + if err == nil { + err = err2 + } + err2 = iter.cmd2.Wait() + if err == nil { + err = err2 + } + return err +} diff --git a/git/oid.go b/git/oid.go new file mode 100644 index 0000000..2aefbcb --- /dev/null +++ b/git/oid.go @@ -0,0 +1,57 @@ +package git + +import ( + "encoding/hex" + "errors" +) + +// OID represents the SHA-1 object ID of a Git object, in binary +// format. +type OID struct { + v [20]byte +} + +// NullOID is the null object ID; i.e., all zeros. +var NullOID OID + +// OIDFromBytes converts a byte slice containing an object ID in +// binary format into an `OID`. +func OIDFromBytes(oidBytes []byte) (OID, error) { + var oid OID + if len(oidBytes) != len(oid.v) { + return OID{}, errors.New("bytes oid has the wrong length") + } + copy(oid.v[0:20], oidBytes) + return oid, nil +} + +// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40}`) +// into an `OID`. +func NewOID(s string) (OID, error) { + oidBytes, err := hex.DecodeString(s) + if err != nil { + return OID{}, err + } + return OIDFromBytes(oidBytes) +} + +// String formats `oid` as a string in hex format. +func (oid OID) String() string { + return hex.EncodeToString(oid.v[:]) +} + +// Bytes returns a byte slice view of `oid`, in binary format. +func (oid OID) Bytes() []byte { + return oid.v[:] +} + +// MarshalJSON expresses `oid` as a JSON string with its enclosing +// quotation marks. +func (oid OID) MarshalJSON() ([]byte, error) { + src := oid.v[:] + dst := make([]byte, hex.EncodedLen(len(src))+2) + dst[0] = '"' + dst[len(dst)-1] = '"' + hex.Encode(dst[1:len(dst)-1], src) + return dst, nil +} diff --git a/git/ref_iter.go b/git/ref_iter.go new file mode 100644 index 0000000..e00dc44 --- /dev/null +++ b/git/ref_iter.go @@ -0,0 +1,92 @@ +package git + +import ( + "bufio" + "fmt" + "io" + "os" + "os/exec" + "strconv" + "strings" + + "github.com/github/git-sizer/counts" +) + +// ReferenceIter is an iterator that interates over references. +type ReferenceIter struct { + cmd *exec.Cmd + out io.ReadCloser + f *bufio.Reader + errChan <-chan error +} + +// NewReferenceIter returns an iterator that iterates over all of the +// references in `repo`. +func (repo *Repository) NewReferenceIter() (*ReferenceIter, error) { + cmd := repo.gitCommand( + "for-each-ref", "--format=%(objectname) %(objecttype) %(objectsize) %(refname)", + ) + + out, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + + cmd.Stderr = os.Stderr + + err = cmd.Start() + if err != nil { + return nil, err + } + + return &ReferenceIter{ + cmd: cmd, + out: out, + f: bufio.NewReader(out), + errChan: make(chan error, 1), + }, nil +} + +// Next returns either the next reference or a boolean `false` value +// indicating that the iteration is over. On errors, return an error +// (in this case, the caller must still call `Close()`). +func (iter *ReferenceIter) Next() (Reference, bool, error) { + line, err := iter.f.ReadString('\n') + if err != nil { + if err != io.EOF { + return Reference{}, false, err + } + return Reference{}, false, nil + } + line = line[:len(line)-1] + words := strings.Split(line, " ") + if len(words) != 4 { + return Reference{}, false, fmt.Errorf("line improperly formatted: %#v", line) + } + oid, err := NewOID(words[0]) + if err != nil { + return Reference{}, false, fmt.Errorf("SHA-1 improperly formatted: %#v", words[0]) + } + objectType := ObjectType(words[1]) + objectSize, err := strconv.ParseUint(words[2], 10, 32) + if err != nil { + return Reference{}, false, fmt.Errorf("object size improperly formatted: %#v", words[2]) + } + refname := words[3] + return Reference{ + Refname: refname, + ObjectType: objectType, + ObjectSize: counts.Count32(objectSize), + OID: oid, + }, true, nil +} + +// Close closes the iterator and frees up resources. +func (iter *ReferenceIter) Close() error { + err := iter.out.Close() + err2 := iter.cmd.Wait() + if err == nil { + err = err2 + } + return err +} diff --git a/git/reference.go b/git/reference.go new file mode 100644 index 0000000..c2653d4 --- /dev/null +++ b/git/reference.go @@ -0,0 +1,20 @@ +package git + +import ( + "github.com/github/git-sizer/counts" +) + +// Reference represents a Git reference. +type Reference struct { + // Refname is the full reference name of the reference. + Refname string + + // ObjectType is the type of the object referenced. + ObjectType ObjectType + + // ObjectSize is the size of the referred-to object, in bytes. + ObjectSize counts.Count32 + + // OID is the OID of the referred-to object. + OID OID +} diff --git a/git/tag.go b/git/tag.go new file mode 100644 index 0000000..abc76ba --- /dev/null +++ b/git/tag.go @@ -0,0 +1,61 @@ +package git + +import ( + "fmt" + + "github.com/github/git-sizer/counts" +) + +// Tag represents the information that we need about a Git tag object. +type Tag struct { + Size counts.Count32 + Referent OID + ReferentType ObjectType +} + +// ParseTag parses the Git tag object whose contents are contained in +// `data`. `oid` is used only in error messages. +func ParseTag(oid OID, data []byte) (*Tag, error) { + var referent OID + var referentFound bool + var referentType ObjectType + var referentTypeFound bool + iter, err := NewObjectHeaderIter(oid.String(), data) + if err != nil { + return nil, err + } + for iter.HasNext() { + key, value, err := iter.Next() + if err != nil { + return nil, err + } + switch key { + case "object": + if referentFound { + return nil, fmt.Errorf("multiple referents found in tag %s", oid) + } + referent, err = NewOID(value) + if err != nil { + return nil, fmt.Errorf("malformed object header in tag %s", oid) + } + referentFound = true + case "type": + if referentTypeFound { + return nil, fmt.Errorf("multiple types found in tag %s", oid) + } + referentType = ObjectType(value) + referentTypeFound = true + } + } + if !referentFound { + return nil, fmt.Errorf("no object found in tag %s", oid) + } + if !referentTypeFound { + return nil, fmt.Errorf("no type found in tag %s", oid) + } + return &Tag{ + Size: counts.NewCount32(uint64(len(data))), + Referent: referent, + ReferentType: referentType, + }, nil +} diff --git a/git/tree.go b/git/tree.go new file mode 100644 index 0000000..c31fa78 --- /dev/null +++ b/git/tree.go @@ -0,0 +1,85 @@ +package git + +import ( + "errors" + "strconv" + "strings" + + "github.com/github/git-sizer/counts" +) + +// Tree represents a Git tree object. +type Tree struct { + data string +} + +// ParseTree parses the tree object whose contents are contained in +// `data`. `oid` is currently unused. +func ParseTree(oid OID, data []byte) (*Tree, error) { + return &Tree{string(data)}, nil +} + +// Size returns the size of the tree object. +func (tree Tree) Size() counts.Count32 { + return counts.NewCount32(uint64(len(tree.data))) +} + +// TreeEntry represents an entry in a Git tree object. Note that Name +// shares memory with the tree data that were originally read; i.e., +// retaining a pointer to Name keeps the tree data reachable. +type TreeEntry struct { + Name string + OID OID + Filemode uint +} + +// TreeIter is an iterator over the entries in a Git tree object. +type TreeIter struct { + // The as-yet-unread part of the tree's data. + data string +} + +// Iter returns an iterator over the entries in `tree`. +func (tree *Tree) Iter() *TreeIter { + return &TreeIter{ + data: tree.data, + } +} + +// NextEntry returns either the next entry in a Git tree, or a `false` +// boolean value if there are no more entries. +func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { + var entry TreeEntry + + if len(iter.data) == 0 { + return TreeEntry{}, false, nil + } + + spAt := strings.IndexByte(iter.data, ' ') + if spAt < 0 { + return TreeEntry{}, false, errors.New("failed to find SP after mode") + } + mode, err := strconv.ParseUint(iter.data[:spAt], 8, 32) + if err != nil { + return TreeEntry{}, false, err + } + entry.Filemode = uint(mode) + + iter.data = iter.data[spAt+1:] + nulAt := strings.IndexByte(iter.data, 0) + if nulAt < 0 { + return TreeEntry{}, false, errors.New("failed to find NUL after filename") + } + + entry.Name = iter.data[:nulAt] + + iter.data = iter.data[nulAt+1:] + if len(iter.data) < 20 { + return TreeEntry{}, false, errors.New("tree entry ends unexpectedly") + } + + copy(entry.OID.v[0:20], iter.data[0:20]) + iter.data = iter.data[20:] + + return entry, true, nil +} From e10532810ae8923a57fa9424351b375c8cb4b9d2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 13:58:47 +0100 Subject: [PATCH 127/176] git_sizer_test.go: fix some linter warnings --- git_sizer_test.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index c6e7311..6e2def9 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -238,19 +238,19 @@ func TestRefSelections(t *testing.T) { name: "branches-refgroup", args: []string{"--include=@mygroup"}, config: []git.ConfigEntry{ - {"refgroup.mygroup.include", "refs/heads"}, + {Key: "refgroup.mygroup.include", Value: "refs/heads"}, }, }, { // 18 name: "combination-refgroup", args: []string{"--include=@mygroup"}, config: []git.ConfigEntry{ - {"refgroup.mygroup.include", "refs/heads"}, - {"refgroup.mygroup.include", "refs/tags"}, - {"refgroup.mygroup.exclude", "refs/heads/foo"}, - {"refgroup.mygroup.includeRegexp", ".*foo.*"}, - {"refgroup.mygroup.exclude", "refs/foo"}, - {"refgroup.mygroup.excludeRegexp", "refs/tags/release-.*"}, + {Key: "refgroup.mygroup.include", Value: "refs/heads"}, + {Key: "refgroup.mygroup.include", Value: "refs/tags"}, + {Key: "refgroup.mygroup.exclude", Value: "refs/heads/foo"}, + {Key: "refgroup.mygroup.includeRegexp", Value: ".*foo.*"}, + {Key: "refgroup.mygroup.exclude", Value: "refs/foo"}, + {Key: "refgroup.mygroup.excludeRegexp", Value: "refs/tags/release-.*"}, }, }, } { @@ -387,14 +387,14 @@ References (included references marked with '+'): config: []git.ConfigEntry{ // Note that refgroup "misc" is defined implicitly. - {"refgroup.misc.foo.includeRegexp", ".*foo.*"}, + {Key: "refgroup.misc.foo.includeRegexp", Value: ".*foo.*"}, - {"refgroup.misc.foo.oatend.includeRegexp", ".*o"}, + {Key: "refgroup.misc.foo.oatend.includeRegexp", Value: ".*o"}, - {"refgroup.misc.foo.bogus.include", "bogus"}, + {Key: "refgroup.misc.foo.bogus.include", Value: "bogus"}, - {"refgroup.tags.releases.name", "Releases"}, - {"refgroup.tags.releases.includeRegexp", "refs/tags/release-.*"}, + {Key: "refgroup.tags.releases.name", Value: "Releases"}, + {Key: "refgroup.tags.releases.includeRegexp", Value: "refs/tags/release-.*"}, }, stdout: ` | * References | | | @@ -420,10 +420,10 @@ References (included references marked with '+'): name: "include-refgroups", args: []string{"--include=@branches", "--include=@tags.releases", "--include=@oatend"}, config: []git.ConfigEntry{ - {"refgroup.oatend.includeRegexp", ".*o"}, + {Key: "refgroup.oatend.includeRegexp", Value: ".*o"}, - {"refgroup.tags.releases.name", "Releases"}, - {"refgroup.tags.releases.includeRegexp", "refs/tags/release-.*"}, + {Key: "refgroup.tags.releases.name", Value: "Releases"}, + {Key: "refgroup.tags.releases.includeRegexp", Value: "refs/tags/release-.*"}, }, stdout: ` | * References | | | From b3cbef941249de202e394b70dd0cfa6d819c9de8 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 2 Nov 2021 11:56:05 +0100 Subject: [PATCH 128/176] Repository.GitCommand(): make method public --- git/batch_obj_iter.go | 2 +- git/git.go | 2 +- git/gitconfig.go | 8 ++++---- git/obj_iter.go | 4 ++-- git/ref_iter.go | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/git/batch_obj_iter.go b/git/batch_obj_iter.go index 023d7fa..48af984 100644 --- a/git/batch_obj_iter.go +++ b/git/batch_obj_iter.go @@ -27,7 +27,7 @@ type BatchObjectIter struct { // `io.WriteCloser` should normally be closed and the iterator's // output drained before `Close()` is called. func (repo *Repository) NewBatchObjectIter() (*BatchObjectIter, io.WriteCloser, error) { - cmd := repo.gitCommand("cat-file", "--batch", "--buffer") + cmd := repo.GitCommand("cat-file", "--batch", "--buffer") in, err := cmd.StdinPipe() if err != nil { diff --git a/git/git.go b/git/git.go index ee8a74d..f451c54 100644 --- a/git/git.go +++ b/git/git.go @@ -84,7 +84,7 @@ func NewRepository(path string) (*Repository, error) { }, nil } -func (repo *Repository) gitCommand(callerArgs ...string) *exec.Cmd { +func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { args := []string{ // Disable replace references when running our commands: "--no-replace-objects", diff --git a/git/gitconfig.go b/git/gitconfig.go index 1cae881..d3378ae 100644 --- a/git/gitconfig.go +++ b/git/gitconfig.go @@ -36,7 +36,7 @@ type Config struct { // `configKeyMatchesPrefix()`), and strip off the prefix in the keys // that are returned. func (repo *Repository) GetConfig(prefix string) (*Config, error) { - cmd := repo.gitCommand("config", "--list", "-z") + cmd := repo.GitCommand("config", "--list", "-z") out, err := cmd.Output() if err != nil { @@ -114,7 +114,7 @@ func configKeyMatchesPrefix(key, prefix string) (bool, string) { } func (repo *Repository) ConfigStringDefault(key string, defaultValue string) (string, error) { - cmd := repo.gitCommand( + cmd := repo.GitCommand( "config", "--default", defaultValue, key, @@ -133,7 +133,7 @@ func (repo *Repository) ConfigStringDefault(key string, defaultValue string) (st } func (repo *Repository) ConfigBoolDefault(key string, defaultValue bool) (bool, error) { - cmd := repo.gitCommand( + cmd := repo.GitCommand( "config", "--type", "bool", "--default", strconv.FormatBool(defaultValue), @@ -155,7 +155,7 @@ func (repo *Repository) ConfigBoolDefault(key string, defaultValue bool) (bool, } func (repo *Repository) ConfigIntDefault(key string, defaultValue int) (int, error) { - cmd := repo.gitCommand( + cmd := repo.GitCommand( "config", "--type", "int", "--default", strconv.Itoa(defaultValue), diff --git a/git/obj_iter.go b/git/obj_iter.go index 5026378..57e44bc 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -27,7 +27,7 @@ type ObjectIter struct { func (repo *Repository) NewObjectIter( args ...string, ) (*ObjectIter, io.WriteCloser, error) { - cmd1 := repo.gitCommand(append([]string{"rev-list", "--objects"}, args...)...) + cmd1 := repo.GitCommand(append([]string{"rev-list", "--objects"}, args...)...) in1, err := cmd1.StdinPipe() if err != nil { return nil, nil, err @@ -45,7 +45,7 @@ func (repo *Repository) NewObjectIter( return nil, nil, err } - cmd2 := repo.gitCommand("cat-file", "--batch-check", "--buffer") + cmd2 := repo.GitCommand("cat-file", "--batch-check", "--buffer") in2, err := cmd2.StdinPipe() if err != nil { out1.Close() diff --git a/git/ref_iter.go b/git/ref_iter.go index e00dc44..56acb06 100644 --- a/git/ref_iter.go +++ b/git/ref_iter.go @@ -23,7 +23,7 @@ type ReferenceIter struct { // NewReferenceIter returns an iterator that iterates over all of the // references in `repo`. func (repo *Repository) NewReferenceIter() (*ReferenceIter, error) { - cmd := repo.gitCommand( + cmd := repo.GitCommand( "for-each-ref", "--format=%(objectname) %(objecttype) %(objectsize) %(refname)", ) From bf222edcc80f227d8a34b1dce00967f93f9f8530 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 15:29:39 +0100 Subject: [PATCH 129/176] Use `errors.Is()` rather than comparing errors using `==` Humor the linter. --- git-sizer.go | 2 +- internal/pipe/pipeline.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 2e7c02b..d1e075c 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -190,7 +190,7 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { err = flags.Parse(args) if err != nil { - if err == pflag.ErrHelp { + if errors.Is(err, pflag.ErrHelp) { return nil } return err diff --git a/internal/pipe/pipeline.go b/internal/pipe/pipeline.go index d32051b..40308d7 100644 --- a/internal/pipe/pipeline.go +++ b/internal/pipe/pipeline.go @@ -204,7 +204,7 @@ func (p *Pipeline) Wait() error { finishedEarly = false continue - case err == FinishEarly: + case errors.Is(err, FinishEarly): // We ignore `FinishEarly` errors because that is how a // stage informs us that it intentionally finished early. // Moreover, if we see a `FinishEarly` error, ignore any From 4029e5deb7baf02e72c40c9fc9a4046295809cc2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 2 Nov 2021 17:09:43 +0100 Subject: [PATCH 130/176] ParseReference(): new function, extracted from `ReferenceIter.Next()` --- git/ref_iter.go | 28 ++++------------------------ git/reference.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/git/ref_iter.go b/git/ref_iter.go index 56acb06..ca7b383 100644 --- a/git/ref_iter.go +++ b/git/ref_iter.go @@ -2,14 +2,9 @@ package git import ( "bufio" - "fmt" "io" "os" "os/exec" - "strconv" - "strings" - - "github.com/github/git-sizer/counts" ) // ReferenceIter is an iterator that interates over references. @@ -58,27 +53,12 @@ func (iter *ReferenceIter) Next() (Reference, bool, error) { } return Reference{}, false, nil } - line = line[:len(line)-1] - words := strings.Split(line, " ") - if len(words) != 4 { - return Reference{}, false, fmt.Errorf("line improperly formatted: %#v", line) - } - oid, err := NewOID(words[0]) + ref, err := ParseReference(line[:len(line)-1]) if err != nil { - return Reference{}, false, fmt.Errorf("SHA-1 improperly formatted: %#v", words[0]) + return ref, false, err } - objectType := ObjectType(words[1]) - objectSize, err := strconv.ParseUint(words[2], 10, 32) - if err != nil { - return Reference{}, false, fmt.Errorf("object size improperly formatted: %#v", words[2]) - } - refname := words[3] - return Reference{ - Refname: refname, - ObjectType: objectType, - ObjectSize: counts.Count32(objectSize), - OID: oid, - }, true, nil + + return ref, true, nil } // Close closes the iterator and frees up resources. diff --git a/git/reference.go b/git/reference.go index c2653d4..e8a1aaf 100644 --- a/git/reference.go +++ b/git/reference.go @@ -1,6 +1,10 @@ package git import ( + "fmt" + "strconv" + "strings" + "github.com/github/git-sizer/counts" ) @@ -18,3 +22,31 @@ type Reference struct { // OID is the OID of the referred-to object. OID OID } + +// ParseReference parses `line` (a non-LF-terminated line) into a +// `Reference`. It is assumed that `line` is formatted like the output +// of +// +// git for-each-ref --format='%(objectname) %(objecttype) %(objectsize) %(refname)' +func ParseReference(line string) (Reference, error) { + words := strings.Split(line, " ") + if len(words) != 4 { + return Reference{}, fmt.Errorf("line improperly formatted: %#v", line) + } + oid, err := NewOID(words[0]) + if err != nil { + return Reference{}, fmt.Errorf("SHA-1 improperly formatted: %#v", words[0]) + } + objectType := ObjectType(words[1]) + objectSize, err := strconv.ParseUint(words[2], 10, 32) + if err != nil { + return Reference{}, fmt.Errorf("object size improperly formatted: %#v", words[2]) + } + refname := words[3] + return Reference{ + Refname: refname, + ObjectType: objectType, + ObjectSize: counts.Count32(objectSize), + OID: oid, + }, nil +} From e655a21c79c8de777d41dd8fa53a60d1c1b9d6f5 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 15:30:50 +0100 Subject: [PATCH 131/176] Use new-style octal constants --- internal/testutils/repoutils.go | 2 +- sizes/graph.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index cb00dee..a382a12 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -216,7 +216,7 @@ func (repo *TestRepo) AddFile(t *testing.T, relativePath, contents string) { if dirPath != "." { require.NoError( t, - os.MkdirAll(filepath.Join(repo.Path, dirPath), 0777), + os.MkdirAll(filepath.Join(repo.Path, dirPath), 0o777), "creating subdir", ) } diff --git a/sizes/graph.go b/sizes/graph.go index 71c8676..7e923f6 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -571,7 +571,7 @@ func (r *treeRecord) initialize(g *Graph, oid git.OID, tree *git.Tree) error { name := entry.Name switch { - case entry.Filemode&0170000 == 0040000: + case entry.Filemode&0o170000 == 0o40000: // Tree listener := func(size TreeSize) { // This listener is called when the tree pointed to by @@ -595,12 +595,12 @@ func (r *treeRecord) initialize(g *Graph, oid git.OID, tree *git.Tree) error { } r.entryCount.Increment(1) - case entry.Filemode&0170000 == 0160000: + case entry.Filemode&0o170000 == 0o160000: // Commit (i.e., submodule) r.size.addSubmodule(name) r.entryCount.Increment(1) - case entry.Filemode&0170000 == 0120000: + case entry.Filemode&0o170000 == 0o120000: // Symlink g.pathResolver.RecordTreeEntry(oid, name, entry.OID) From c8151f9f157bab5d2688961a66ad665ee14b72b5 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 5 Nov 2021 12:56:16 +0100 Subject: [PATCH 132/176] ReferenceIter: use a pipeline Allow iteration to be canceled using a context. --- git/ref_iter.go | 102 ++++++++++++++++++++++++++++-------------------- sizes/graph.go | 13 +++--- 2 files changed, 65 insertions(+), 50 deletions(-) diff --git a/git/ref_iter.go b/git/ref_iter.go index ca7b383..955499b 100644 --- a/git/ref_iter.go +++ b/git/ref_iter.go @@ -2,71 +2,89 @@ package git import ( "bufio" + "context" + "fmt" "io" - "os" - "os/exec" + + "github.com/github/git-sizer/internal/pipe" ) // ReferenceIter is an iterator that interates over references. type ReferenceIter struct { - cmd *exec.Cmd - out io.ReadCloser - f *bufio.Reader - errChan <-chan error + refCh chan Reference + errCh chan error } // NewReferenceIter returns an iterator that iterates over all of the // references in `repo`. -func (repo *Repository) NewReferenceIter() (*ReferenceIter, error) { - cmd := repo.GitCommand( - "for-each-ref", "--format=%(objectname) %(objecttype) %(objectsize) %(refname)", - ) - - out, err := cmd.StdoutPipe() - if err != nil { - return nil, err +func (repo *Repository) NewReferenceIter(ctx context.Context) (*ReferenceIter, error) { + iter := ReferenceIter{ + refCh: make(chan Reference), + errCh: make(chan error), } - cmd.Stderr = os.Stderr + p := pipe.New() + p.Add( + // Output all references and their values: + pipe.CommandStage( + "git-for-each-ref", + repo.GitCommand( + "for-each-ref", + "--format=%(objectname) %(objecttype) %(objectsize) %(refname)", + ), + ), + + // Read the references and send them to `iter.refCh`, then close + // the channel. + pipe.Function( + "parse-refs", + func(ctx context.Context, env pipe.Env, stdin io.Reader, stdout io.Writer) error { + defer close(iter.refCh) + + in := bufio.NewReader(stdin) + for { + line, err := in.ReadBytes('\n') + if err != nil { + if err == io.EOF { + return nil + } + return fmt.Errorf("reading 'git for-each-ref' output: %w", err) + } + + ref, err := ParseReference(string(line[:len(line)-1])) + if err != nil { + return fmt.Errorf("parsing 'git for-each-ref' output: %w", err) + } + select { + case iter.refCh <- ref: + case <-ctx.Done(): + return ctx.Err() + } + } + }, + ), + ) - err = cmd.Start() + err := p.Start(ctx) if err != nil { return nil, err } - return &ReferenceIter{ - cmd: cmd, - out: out, - f: bufio.NewReader(out), - errChan: make(chan error, 1), - }, nil + go func() { + iter.errCh <- p.Wait() + }() + + return &iter, nil } // Next returns either the next reference or a boolean `false` value // indicating that the iteration is over. On errors, return an error // (in this case, the caller must still call `Close()`). func (iter *ReferenceIter) Next() (Reference, bool, error) { - line, err := iter.f.ReadString('\n') - if err != nil { - if err != io.EOF { - return Reference{}, false, err - } - return Reference{}, false, nil - } - ref, err := ParseReference(line[:len(line)-1]) - if err != nil { - return ref, false, err + ref, ok := <-iter.refCh + if !ok { + return Reference{}, false, <-iter.errCh } return ref, true, nil } - -// Close closes the iterator and frees up resources. -func (iter *ReferenceIter) Close() error { - err := iter.out.Close() - err2 := iter.cmd.Wait() - if err == nil { - err = err2 - } - return err -} diff --git a/sizes/graph.go b/sizes/graph.go index 0cd70c2..51f5fe6 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -2,6 +2,7 @@ package sizes import ( "bufio" + "context" "errors" "fmt" "io" @@ -69,15 +70,13 @@ func ScanRepositoryUsingGraph( ) (HistorySize, error) { graph := NewGraph(rg, nameStyle) - refIter, err := repo.NewReferenceIter() + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + refIter, err := repo.NewReferenceIter(ctx) if err != nil { return HistorySize{}, err } - defer func() { - if refIter != nil { - refIter.Close() - } - }() iter, in, err := repo.NewObjectIter("--stdin", "--date-order") if err != nil { @@ -134,8 +133,6 @@ func ScanRepositoryUsingGraph( return } } - err := refIter.Close() - refIter = nil errChan <- err }() From a075a1bf15a2ab6cae02642b76ff968d0f958bb2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 15:22:09 +0100 Subject: [PATCH 133/176] Use `io` package in preference to `io/ioutil` The latter is deprecated. --- internal/pipe/pipeline.go | 3 +-- internal/pipe/pipeline_test.go | 12 ++++++------ internal/testutils/repoutils.go | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/internal/pipe/pipeline.go b/internal/pipe/pipeline.go index 40308d7..f417db0 100644 --- a/internal/pipe/pipeline.go +++ b/internal/pipe/pipeline.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "sync/atomic" ) @@ -134,7 +133,7 @@ func (p *Pipeline) Start(ctx context.Context) error { if p.stdin != nil { // We don't want the first stage to actually close this, and // it's not even an `io.ReadCloser`, so fake it: - nextStdin = ioutil.NopCloser(p.stdin) + nextStdin = io.NopCloser(p.stdin) } for i, s := range p.stages { diff --git a/internal/pipe/pipeline_test.go b/internal/pipe/pipeline_test.go index 0abbedc..faf1e31 100644 --- a/internal/pipe/pipeline_test.go +++ b/internal/pipe/pipeline_test.go @@ -109,7 +109,7 @@ func TestPipelineReadFromSlowly(t *testing.T) { go func() { time.Sleep(200 * time.Millisecond) var err error - buf, err = ioutil.ReadAll(r) + buf, err = io.ReadAll(r) readErr <- err }() @@ -361,7 +361,7 @@ func TestFunction(t *testing.T) { pipe.Function( "farewell", func(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { - buf, err := ioutil.ReadAll(stdin) + buf, err := io.ReadAll(stdin) if err != nil { return err } @@ -389,7 +389,7 @@ func TestPipelineWithFunction(t *testing.T) { pipe.Function( "farewell", func(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { - buf, err := ioutil.ReadAll(stdin) + buf, err := io.ReadAll(stdin) if err != nil { return err } @@ -419,7 +419,7 @@ func (s ErrorStartingStage) Name() string { func (s ErrorStartingStage) Start( ctx context.Context, env pipe.Env, stdin io.ReadCloser, ) (io.ReadCloser, error) { - return ioutil.NopCloser(&bytes.Buffer{}), s.err + return io.NopCloser(&bytes.Buffer{}), s.err } func (s ErrorStartingStage) Wait() error { @@ -525,7 +525,7 @@ func TestScannerAlwaysFlushes(t *testing.T) { "compute-length", func(_ context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { var err error - length, err = io.Copy(ioutil.Discard, stdin) + length, err = io.Copy(io.Discard, stdin) return err }, ), @@ -567,7 +567,7 @@ func TestScannerFinishEarly(t *testing.T) { "compute-length", func(_ context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { var err error - length, err = io.Copy(ioutil.Discard, stdin) + length, err = io.Copy(io.Discard, stdin) return err }, ), diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index a382a12..60a2f9b 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -196,7 +196,7 @@ func (repo *TestRepo) CreateObject( t.FailNow() } - output, err := ioutil.ReadAll(out) + output, err := io.ReadAll(out) err2 = cmd.Wait() require.NoError(t, err) require.NoError(t, err2) From 1fe34af62d81ac0f2ad87fb937a3b0e2c395623a Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 2 Nov 2021 11:57:48 +0100 Subject: [PATCH 134/176] ParseBatchHeader(): make function public and change its signature --- git/batch_header.go | 47 +++++++++++++++++++++++++++++++++++++++++++ git/batch_obj_iter.go | 43 +++++++-------------------------------- git/obj_iter.go | 8 +++----- sizes/graph.go | 46 +++++++++++++++++++++--------------------- 4 files changed, 80 insertions(+), 64 deletions(-) create mode 100644 git/batch_header.go diff --git a/git/batch_header.go b/git/batch_header.go new file mode 100644 index 0000000..2500d4e --- /dev/null +++ b/git/batch_header.go @@ -0,0 +1,47 @@ +package git + +import ( + "fmt" + "strconv" + "strings" + + "github.com/github/git-sizer/counts" +) + +type BatchHeader struct { + OID OID + ObjectType ObjectType + ObjectSize counts.Count32 +} + +var missingHeader = BatchHeader{ + ObjectType: "missing", +} + +// Parse a `cat-file --batch[-check]` output header line (including +// the trailing LF). `spec`, if not "", is used in error messages. +func ParseBatchHeader(spec string, header string) (BatchHeader, error) { + header = header[:len(header)-1] + words := strings.Split(header, " ") + if words[len(words)-1] == "missing" { + if spec == "" { + spec = words[0] + } + return missingHeader, fmt.Errorf("missing object %s", spec) + } + + oid, err := NewOID(words[0]) + if err != nil { + return missingHeader, err + } + + size, err := strconv.ParseUint(words[2], 10, 0) + if err != nil { + return missingHeader, err + } + return BatchHeader{ + OID: oid, + ObjectType: ObjectType(words[1]), + ObjectSize: counts.NewCount32(size), + }, nil +} diff --git a/git/batch_obj_iter.go b/git/batch_obj_iter.go index 48af984..f2d38e1 100644 --- a/git/batch_obj_iter.go +++ b/git/batch_obj_iter.go @@ -2,14 +2,9 @@ package git import ( "bufio" - "fmt" "io" "os" "os/exec" - "strconv" - "strings" - - "github.com/github/git-sizer/counts" ) // BatchObjectIter iterates over objects whose names are fed into its @@ -55,23 +50,23 @@ func (repo *Repository) NewBatchObjectIter() (*BatchObjectIter, io.WriteCloser, // Next returns the next object: its OID, type, size, and contents. // When no more data are available, it returns an `io.EOF` error. -func (iter *BatchObjectIter) Next() (OID, ObjectType, counts.Count32, []byte, error) { +func (iter *BatchObjectIter) Next() (BatchHeader, []byte, error) { header, err := iter.f.ReadString('\n') if err != nil { - return OID{}, "", 0, nil, err + return missingHeader, nil, err } - oid, objectType, objectSize, err := parseBatchHeader("", header) + obj, err := ParseBatchHeader("", header) if err != nil { - return OID{}, "", 0, nil, err + return missingHeader, nil, err } // +1 for LF: - data := make([]byte, objectSize+1) + data := make([]byte, obj.ObjectSize+1) _, err = io.ReadFull(iter.f, data) if err != nil { - return OID{}, "", 0, nil, err + return missingHeader, nil, err } data = data[:len(data)-1] - return oid, objectType, objectSize, data, nil + return obj, data, nil } // Close closes the iterator and frees up resources. If any iterator @@ -84,27 +79,3 @@ func (iter *BatchObjectIter) Close() error { } return err } - -// Parse a `cat-file --batch[-check]` output header line (including -// the trailing LF). `spec`, if not "", is used in error messages. -func parseBatchHeader(spec string, header string) (OID, ObjectType, counts.Count32, error) { - header = header[:len(header)-1] - words := strings.Split(header, " ") - if words[len(words)-1] == "missing" { - if spec == "" { - spec = words[0] - } - return OID{}, "missing", 0, fmt.Errorf("missing object %s", spec) - } - - oid, err := NewOID(words[0]) - if err != nil { - return OID{}, "missing", 0, err - } - - size, err := strconv.ParseUint(words[2], 10, 0) - if err != nil { - return OID{}, "missing", 0, err - } - return oid, ObjectType(words[1]), counts.NewCount32(size), nil -} diff --git a/git/obj_iter.go b/git/obj_iter.go index 57e44bc..04b4825 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -6,8 +6,6 @@ import ( "io" "os" "os/exec" - - "github.com/github/git-sizer/counts" ) // ObjectIter iterates over objects in a Git repository. @@ -105,13 +103,13 @@ func (repo *Repository) NewObjectIter( // Next returns the next object: its OID, type, and size. When no more // data are available, it returns an `io.EOF` error. -func (iter *ObjectIter) Next() (OID, ObjectType, counts.Count32, error) { +func (iter *ObjectIter) Next() (BatchHeader, error) { line, err := iter.f.ReadString('\n') if err != nil { - return OID{}, "", 0, err + return missingHeader, err } - return parseBatchHeader("", line) + return ParseBatchHeader("", line) } // Close closes the iterator and frees up resources. diff --git a/sizes/graph.go b/sizes/graph.go index 51f5fe6..cf5331f 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -192,25 +192,25 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing blobs: %d") for { - oid, objectType, objectSize, err := iter.Next() + obj, err := iter.Next() if err != nil { if err != io.EOF { return HistorySize{}, err } break } - switch objectType { + switch obj.ObjectType { case "blob": progressMeter.Inc() - graph.RegisterBlob(oid, objectSize) + graph.RegisterBlob(obj.OID, obj.ObjectSize) case "tree": - trees = append(trees, ObjectHeader{oid, objectSize}) + trees = append(trees, ObjectHeader{obj.OID, obj.ObjectSize}) case "commit": - commits = append(commits, CommitHeader{ObjectHeader{oid, objectSize}, git.NullOID}) + commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, git.NullOID}) case "tag": - tags = append(tags, ObjectHeader{oid, objectSize}) + tags = append(tags, ObjectHeader{obj.OID, obj.ObjectSize}) default: - return HistorySize{}, fmt.Errorf("unexpected object type: %s", objectType) + return HistorySize{}, fmt.Errorf("unexpected object type: %s", obj.ObjectType) } } progressMeter.Done() @@ -286,22 +286,22 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing trees: %d") for range trees { - oid, objectType, _, data, err := objectIter.Next() + obj, data, err := objectIter.Next() if err != nil { if err != io.EOF { return HistorySize{}, err } return HistorySize{}, errors.New("fewer trees read than expected") } - if objectType != "tree" { - return HistorySize{}, fmt.Errorf("expected tree; read %#v", objectType) + if obj.ObjectType != "tree" { + return HistorySize{}, fmt.Errorf("expected tree; read %#v", obj.ObjectType) } progressMeter.Inc() - tree, err := git.ParseTree(oid, data) + tree, err := git.ParseTree(obj.OID, data) if err != nil { return HistorySize{}, err } - err = graph.RegisterTree(oid, tree) + err = graph.RegisterTree(obj.OID, tree) if err != nil { return HistorySize{}, err } @@ -313,26 +313,26 @@ func ScanRepositoryUsingGraph( // time: progressMeter.Start("Processing commits: %d") for i := len(commits); i > 0; i-- { - oid, objectType, _, data, err := objectIter.Next() + obj, data, err := objectIter.Next() if err != nil { if err != io.EOF { return HistorySize{}, err } return HistorySize{}, errors.New("fewer commits read than expected") } - if objectType != "commit" { - return HistorySize{}, fmt.Errorf("expected commit; read %#v", objectType) + if obj.ObjectType != "commit" { + return HistorySize{}, fmt.Errorf("expected commit; read %#v", obj.ObjectType) } - commit, err := git.ParseCommit(oid, data) + commit, err := git.ParseCommit(obj.OID, data) if err != nil { return HistorySize{}, err } - if oid != commits[i-1].oid { + if obj.OID != commits[i-1].oid { panic("commits not read in same order as requested") } commits[i-1].tree = commit.Tree progressMeter.Inc() - graph.RegisterCommit(oid, commit) + graph.RegisterCommit(obj.OID, commit) } progressMeter.Done() @@ -349,22 +349,22 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing annotated tags: %d") for range tags { - oid, objectType, _, data, err := objectIter.Next() + obj, data, err := objectIter.Next() if err != nil { if err != io.EOF { return HistorySize{}, err } return HistorySize{}, errors.New("fewer tags read than expected") } - if objectType != "tag" { - return HistorySize{}, fmt.Errorf("expected tag; read %#v", objectType) + if obj.ObjectType != "tag" { + return HistorySize{}, fmt.Errorf("expected tag; read %#v", obj.ObjectType) } - tag, err := git.ParseTag(oid, data) + tag, err := git.ParseTag(obj.OID, data) if err != nil { return HistorySize{}, err } progressMeter.Inc() - graph.RegisterTag(oid, tag) + graph.RegisterTag(obj.OID, tag) } progressMeter.Done() From 24c73607bdbb9dece9e7baf721f0636c1a305564 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 15:31:44 +0100 Subject: [PATCH 135/176] sizerExe(): add a `default` clause to the `switch` statement Humor the linter. --- git_sizer_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 6e2def9..580268a 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -27,10 +27,12 @@ import ( func sizerExe(t *testing.T) string { t.Helper() - v := "bin/git-sizer" + var v string switch runtime.GOOS { case "windows": v = `bin\git-sizer.exe` + default: + v = "bin/git-sizer" } v, err := exec.LookPath(v) From 3489e7d3ea8aa7ca2a05f7a53898f097fe583379 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Fri, 5 Nov 2021 18:29:57 +0100 Subject: [PATCH 136/176] BatchObjectIter: use a pipeline --- git/batch_obj_iter.go | 175 ++++++++++++++++++++++++++++++------------ sizes/graph.go | 102 +++++++++--------------- 2 files changed, 160 insertions(+), 117 deletions(-) diff --git a/git/batch_obj_iter.go b/git/batch_obj_iter.go index f2d38e1..ee17337 100644 --- a/git/batch_obj_iter.go +++ b/git/batch_obj_iter.go @@ -2,18 +2,27 @@ package git import ( "bufio" + "context" + "fmt" "io" - "os" - "os/exec" + + "github.com/github/git-sizer/internal/pipe" ) +type ObjectRecord struct { + BatchHeader + Data []byte +} + // BatchObjectIter iterates over objects whose names are fed into its // stdin. The output is buffered, so it has to be closed before you // can be sure that you have gotten all of the objects. type BatchObjectIter struct { - cmd *exec.Cmd - out io.ReadCloser - f *bufio.Reader + ctx context.Context + p *pipe.Pipeline + oidCh chan OID + objCh chan ObjectRecord + errCh chan error } // NewBatchObjectIter returns a `*BatchObjectIterator` and an @@ -21,61 +30,127 @@ type BatchObjectIter struct { // are fed into the `io.WriteCloser`, one per line. The // `io.WriteCloser` should normally be closed and the iterator's // output drained before `Close()` is called. -func (repo *Repository) NewBatchObjectIter() (*BatchObjectIter, io.WriteCloser, error) { - cmd := repo.GitCommand("cat-file", "--batch", "--buffer") - - in, err := cmd.StdinPipe() - if err != nil { - return nil, nil, err +func (repo *Repository) NewBatchObjectIter(ctx context.Context) (*BatchObjectIter, error) { + iter := BatchObjectIter{ + ctx: ctx, + p: pipe.New(), + oidCh: make(chan OID), + objCh: make(chan ObjectRecord), + errCh: make(chan error), } - out, err := cmd.StdoutPipe() - if err != nil { - return nil, nil, err - } + iter.p.Add( + // Read OIDs from `iter.oidCh` and write them to `git + // cat-file`: + pipe.Function( + "request-objects", + func(ctx context.Context, _ pipe.Env, _ io.Reader, stdout io.Writer) error { + out := bufio.NewWriter(stdout) + + for { + select { + case oid, ok := <-iter.oidCh: + if !ok { + return out.Flush() + } + if _, err := fmt.Fprintln(out, oid.String()); err != nil { + return fmt.Errorf("writing to 'git cat-file': %w", err) + } + case <-ctx.Done(): + return ctx.Err() + } + } + }, + ), + + // Read OIDs from `stdin` and output a header line followed by + // the contents of the corresponding Git objects: + pipe.CommandStage( + "git-cat-file", + repo.GitCommand("cat-file", "--batch", "--buffer"), + ), + + // Parse the object headers and read the object contents, and + // shove both into `objCh`: + pipe.Function( + "object-reader", + func(ctx context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { + defer close(iter.objCh) - cmd.Stderr = os.Stderr + f := bufio.NewReader(stdin) - err = cmd.Start() - if err != nil { - return nil, nil, err + for { + header, err := f.ReadString('\n') + if err != nil { + if err == io.EOF { + return nil + } + return fmt.Errorf("reading from 'git cat-file': %w", err) + } + batchHeader, err := ParseBatchHeader("", header) + if err != nil { + return fmt.Errorf("parsing output of 'git cat-file': %w", err) + } + + // Read the object contents plus the trailing LF + // (which is discarded below while creating the + // `ObjectRecord`): + data := make([]byte, batchHeader.ObjectSize+1) + if _, err := io.ReadFull(f, data); err != nil { + return fmt.Errorf( + "reading object data from 'git cat-file' for %s '%s': %w", + batchHeader.ObjectType, batchHeader.OID, err, + ) + } + + select { + case iter.objCh <- ObjectRecord{ + BatchHeader: batchHeader, + Data: data[:batchHeader.ObjectSize], + }: + case <-iter.ctx.Done(): + return iter.ctx.Err() + } + } + }, + ), + ) + + if err := iter.p.Start(ctx); err != nil { + return nil, err } - return &BatchObjectIter{ - cmd: cmd, - out: out, - f: bufio.NewReader(out), - }, in, nil + return &iter, nil } -// Next returns the next object: its OID, type, size, and contents. -// When no more data are available, it returns an `io.EOF` error. -func (iter *BatchObjectIter) Next() (BatchHeader, []byte, error) { - header, err := iter.f.ReadString('\n') - if err != nil { - return missingHeader, nil, err - } - obj, err := ParseBatchHeader("", header) - if err != nil { - return missingHeader, nil, err +// RequestObject requests that the object with the specified `oid` be +// processed. The objects registered via this method can be read using +// `Next()` in the order that they were requested. +func (iter *BatchObjectIter) RequestObject(oid OID) error { + select { + case iter.oidCh <- oid: + return nil + case <-iter.ctx.Done(): + return iter.ctx.Err() } - // +1 for LF: - data := make([]byte, obj.ObjectSize+1) - _, err = io.ReadFull(iter.f, data) - if err != nil { - return missingHeader, nil, err - } - data = data[:len(data)-1] - return obj, data, nil } -// Close closes the iterator and frees up resources. If any iterator -// output hasn't been read yet, it will be lost. -func (iter *BatchObjectIter) Close() error { - err := iter.out.Close() - err2 := iter.cmd.Wait() - if err == nil { - err = err2 +// Close closes the iterator and frees up resources. Close must be +// called exactly once. +func (iter *BatchObjectIter) Close() { + close(iter.oidCh) +} + +// Next either returns the next object (its header and contents), or a +// `false` boolean value if no more objects are left. Objects need to +// be read asynchronously, but the last objects won't necessarily show +// up here until `Close()` has been called. +func (iter *BatchObjectIter) Next() (ObjectRecord, bool, error) { + obj, ok := <-iter.objCh + if !ok { + return ObjectRecord{ + BatchHeader: missingHeader, + }, false, iter.p.Wait() } - return err + return obj, true, nil } diff --git a/sizes/graph.go b/sizes/graph.go index cf5331f..e93293d 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -68,11 +68,11 @@ func ScanRepositoryUsingGraph( repo *git.Repository, rg RefGrouper, nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { - graph := NewGraph(rg, nameStyle) - ctx, cancel := context.WithCancel(context.TODO()) defer cancel() + graph := NewGraph(rg, nameStyle) + refIter, err := repo.NewReferenceIter(ctx) if err != nil { return HistorySize{}, err @@ -226,78 +226,52 @@ func ScanRepositoryUsingGraph( return HistorySize{}, err } - objectIter, objectIn, err := repo.NewBatchObjectIter() + objectIter, err := repo.NewBatchObjectIter(ctx) if err != nil { return HistorySize{}, err } - defer func() { - if objectIter != nil { - objectIter.Close() - } - }() go func() { - defer objectIn.Close() - bufin := bufio.NewWriter(objectIn) - defer bufin.Flush() + defer objectIter.Close() - for _, obj := range trees { - _, err := bufin.WriteString(obj.oid.String()) - if err != nil { - errChan <- err - return + errChan <- func() error { + for _, obj := range trees { + if err := objectIter.RequestObject(obj.oid); err != nil { + return fmt.Errorf("requesting tree '%s': %w", obj.oid, err) + } } - err = bufin.WriteByte('\n') - if err != nil { - errChan <- err - return - } - } - for i := len(commits); i > 0; i-- { - obj := commits[i-1] - _, err := bufin.WriteString(obj.oid.String()) - if err != nil { - errChan <- err - return - } - err = bufin.WriteByte('\n') - if err != nil { - errChan <- err - return + for i := len(commits); i > 0; i-- { + obj := commits[i-1] + if err := objectIter.RequestObject(obj.oid); err != nil { + return fmt.Errorf("requesting commit '%s': %w", obj.oid, err) + } } - } - for _, obj := range tags { - _, err := bufin.WriteString(obj.oid.String()) - if err != nil { - errChan <- err - return + for _, obj := range tags { + if err := objectIter.RequestObject(obj.oid); err != nil { + return fmt.Errorf("requesting tag '%s': %w", obj.oid, err) + } } - err = bufin.WriteByte('\n') - if err != nil { - errChan <- err - return - } - } - errChan <- nil + return nil + }() }() progressMeter.Start("Processing trees: %d") for range trees { - obj, data, err := objectIter.Next() + obj, ok, err := objectIter.Next() if err != nil { - if err != io.EOF { - return HistorySize{}, err - } + return HistorySize{}, err + } + if !ok { return HistorySize{}, errors.New("fewer trees read than expected") } if obj.ObjectType != "tree" { return HistorySize{}, fmt.Errorf("expected tree; read %#v", obj.ObjectType) } progressMeter.Inc() - tree, err := git.ParseTree(obj.OID, data) + tree, err := git.ParseTree(obj.OID, obj.Data) if err != nil { return HistorySize{}, err } @@ -313,17 +287,17 @@ func ScanRepositoryUsingGraph( // time: progressMeter.Start("Processing commits: %d") for i := len(commits); i > 0; i-- { - obj, data, err := objectIter.Next() + obj, ok, err := objectIter.Next() if err != nil { - if err != io.EOF { - return HistorySize{}, err - } + return HistorySize{}, err + } + if !ok { return HistorySize{}, errors.New("fewer commits read than expected") } if obj.ObjectType != "commit" { return HistorySize{}, fmt.Errorf("expected commit; read %#v", obj.ObjectType) } - commit, err := git.ParseCommit(obj.OID, data) + commit, err := git.ParseCommit(obj.OID, obj.Data) if err != nil { return HistorySize{}, err } @@ -349,17 +323,17 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing annotated tags: %d") for range tags { - obj, data, err := objectIter.Next() + obj, ok, err := objectIter.Next() if err != nil { - if err != io.EOF { - return HistorySize{}, err - } + return HistorySize{}, err + } + if !ok { return HistorySize{}, errors.New("fewer tags read than expected") } if obj.ObjectType != "tag" { return HistorySize{}, fmt.Errorf("expected tag; read %#v", obj.ObjectType) } - tag, err := git.ParseTag(obj.OID, data) + tag, err := git.ParseTag(obj.OID, obj.Data) if err != nil { return HistorySize{}, err } @@ -373,12 +347,6 @@ func ScanRepositoryUsingGraph( return HistorySize{}, err } - err = objectIter.Close() - objectIter = nil - if err != nil { - return HistorySize{}, err - } - progressMeter.Start("Processing references: %d") for _, refSeen := range refsSeen { progressMeter.Inc() From 9dc78e30b06c00d2c118293dbb8a777a613ac524 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 16:06:03 +0100 Subject: [PATCH 137/176] pipe.FinishEarly: suppress a different linter's warning --- internal/pipe/pipeline.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/pipe/pipeline.go b/internal/pipe/pipeline.go index f417db0..4725907 100644 --- a/internal/pipe/pipeline.go +++ b/internal/pipe/pipeline.go @@ -21,7 +21,7 @@ type Env struct { // request that the iteration be ended early (possibly without reading // all of its input). This "error" is considered a successful return, // and is not reported to the caller. -//nolint:revive +//nolint:errname var FinishEarly = errors.New("finish stage early") // Pipeline represents a Unix-like pipe that can include multiple From 15487a4f52053f007e2e54213158438bb7971ec9 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 6 Nov 2021 12:36:45 +0100 Subject: [PATCH 138/176] ObjectIter: use a pipeline --- git/obj_iter.go | 219 ++++++++++++++++++++++++++---------------------- sizes/graph.go | 86 +++++++------------ 2 files changed, 151 insertions(+), 154 deletions(-) diff --git a/git/obj_iter.go b/git/obj_iter.go index 04b4825..53b7103 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -2,128 +2,147 @@ package git import ( "bufio" + "context" "fmt" "io" - "os" - "os/exec" + + "github.com/github/git-sizer/internal/pipe" ) // ObjectIter iterates over objects in a Git repository. type ObjectIter struct { - cmd1 *exec.Cmd - cmd2 *exec.Cmd - out1 io.ReadCloser - out2 io.ReadCloser - f *bufio.Reader - errChan <-chan error + ctx context.Context + p *pipe.Pipeline + oidCh chan OID + errCh chan error + headerCh chan BatchHeader } // NewObjectIter returns an iterator that iterates over objects in // `repo`. The arguments are passed to `git rev-list --objects`. The // second return value is the stdin of the `rev-list` command. The // caller can feed values into it but must close it in any case. -func (repo *Repository) NewObjectIter( - args ...string, -) (*ObjectIter, io.WriteCloser, error) { - cmd1 := repo.GitCommand(append([]string{"rev-list", "--objects"}, args...)...) - in1, err := cmd1.StdinPipe() - if err != nil { - return nil, nil, err - } - - out1, err := cmd1.StdoutPipe() - if err != nil { - return nil, nil, err - } - - cmd1.Stderr = os.Stderr - - err = cmd1.Start() - if err != nil { - return nil, nil, err - } - - cmd2 := repo.GitCommand("cat-file", "--batch-check", "--buffer") - in2, err := cmd2.StdinPipe() - if err != nil { - out1.Close() - cmd1.Wait() - return nil, nil, err - } - - out2, err := cmd2.StdoutPipe() - if err != nil { - in2.Close() - out1.Close() - cmd1.Wait() - return nil, nil, err +func (repo *Repository) NewObjectIter(ctx context.Context, args ...string) (*ObjectIter, error) { + iter := ObjectIter{ + ctx: ctx, + p: pipe.New(), + oidCh: make(chan OID), + errCh: make(chan error), + headerCh: make(chan BatchHeader), } - cmd2.Stderr = os.Stderr + iter.p.Add( + // Read OIDs from `iter.oidCh` and write them to `git + // rev-list`: + pipe.Function( + "request-objects", + func(ctx context.Context, _ pipe.Env, _ io.Reader, stdout io.Writer) error { + out := bufio.NewWriter(stdout) + + for { + select { + case oid, ok := <-iter.oidCh: + if !ok { + return out.Flush() + } + if _, err := fmt.Fprintln(out, oid.String()); err != nil { + return fmt.Errorf("writing to 'git cat-file': %w", err) + } + case <-ctx.Done(): + return ctx.Err() + } + } + }, + ), + + // Walk starting at the OIDs on `stdin` and output the OIDs + // (possibly followed by paths) of all of the Git objects + // found. + pipe.CommandStage( + "git-rev-list", + repo.GitCommand("rev-list", "--objects", "--stdin", "--date-order"), + ), + + // Read the output of `git rev-list --objects`, strip off any + // trailing information, and write the OIDs to `git cat-file`: + pipe.LinewiseFunction( + "copy-oids", + func(_ context.Context, _ pipe.Env, line []byte, stdout *bufio.Writer) error { + if len(line) < 40 { + return fmt.Errorf("line too short: '%s'", line) + } + if _, err := stdout.Write(line[:40]); err != nil { + return fmt.Errorf("writing OID to 'git cat-file': %w", err) + } + if err := stdout.WriteByte('\n'); err != nil { + return fmt.Errorf("writing LF to 'git cat-file': %w", err) + } + return nil + }, + ), + + // Process the OIDs from stdin and, for each object, output a + // header: + pipe.CommandStage( + "git-cat-file", + repo.GitCommand("cat-file", "--batch-check", "--buffer"), + ), + + // Parse the object headers and shove them into `headerCh`: + pipe.Function( + "object-parser", + func(ctx context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { + defer close(iter.headerCh) + + f := bufio.NewReader(stdin) + + for { + header, err := f.ReadString('\n') + if err != nil { + if err == io.EOF { + return nil + } + return fmt.Errorf("reading from 'git cat-file': %w", err) + } + batchHeader, err := ParseBatchHeader("", header) + if err != nil { + return fmt.Errorf("parsing output of 'git cat-file': %w", err) + } + + iter.headerCh <- batchHeader + } + }, + ), + ) - err = cmd2.Start() - if err != nil { - return nil, nil, err + if err := iter.p.Start(ctx); err != nil { + return nil, err } - errChan := make(chan error, 1) - - go func() { - defer in2.Close() - f1 := bufio.NewReader(out1) - f2 := bufio.NewWriter(in2) - defer f2.Flush() - for { - line, err := f1.ReadString('\n') - if err != nil { - if err != io.EOF { - errChan <- err - } else { - errChan <- nil - } - return - } - if len(line) <= 40 { - errChan <- fmt.Errorf("line too short: %#v", line) - } - f2.WriteString(line[:40]) - f2.WriteByte('\n') - } - }() - - return &ObjectIter{ - cmd1: cmd1, - cmd2: cmd2, - out1: out1, - out2: out2, - f: bufio.NewReader(out2), - errChan: errChan, - }, in1, nil + return &iter, nil } -// Next returns the next object: its OID, type, and size. When no more -// data are available, it returns an `io.EOF` error. -func (iter *ObjectIter) Next() (BatchHeader, error) { - line, err := iter.f.ReadString('\n') - if err != nil { - return missingHeader, err +// AddRoot adds another OID to be included in the walk. +func (iter *ObjectIter) AddRoot(oid OID) error { + select { + case iter.oidCh <- oid: + return nil + case <-iter.ctx.Done(): + return iter.ctx.Err() } - - return ParseBatchHeader("", line) } // Close closes the iterator and frees up resources. -func (iter *ObjectIter) Close() error { - iter.out1.Close() - err := <-iter.errChan - iter.out2.Close() - err2 := iter.cmd1.Wait() - if err == nil { - err = err2 - } - err2 = iter.cmd2.Wait() - if err == nil { - err = err2 +func (iter *ObjectIter) Close() { + close(iter.oidCh) +} + +// Next returns either the next object (its OID, type, and size), or a +// `false` boolean value to indicate that there are no data left. +func (iter *ObjectIter) Next() (BatchHeader, bool, error) { + header, ok := <-iter.headerCh + if !ok { + return missingHeader, false, iter.p.Wait() } - return err + return header, true, nil } diff --git a/sizes/graph.go b/sizes/graph.go index e93293d..71c8676 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -1,11 +1,9 @@ package sizes import ( - "bufio" "context" "errors" "fmt" - "io" "sync" "github.com/github/git-sizer/counts" @@ -78,62 +76,48 @@ func ScanRepositoryUsingGraph( return HistorySize{}, err } - iter, in, err := repo.NewObjectIter("--stdin", "--date-order") + objIter, err := repo.NewObjectIter(context.TODO()) if err != nil { return HistorySize{}, err } - defer func() { - if iter != nil { - iter.Close() - } - }() errChan := make(chan error, 1) var refsSeen []refSeen // Feed the references that we want into the stdin of the object // iterator: go func() { - defer in.Close() - bufin := bufio.NewWriter(in) - defer bufin.Flush() - - for { - ref, ok, err := refIter.Next() - if err != nil { - errChan <- err - return - } - if !ok { - break - } + defer objIter.Close() - walk, groups := rg.Categorize(ref.Refname) + errChan <- func() error { + for { + ref, ok, err := refIter.Next() + if err != nil { + return err + } + if !ok { + return nil + } - refsSeen = append( - refsSeen, - refSeen{ - Reference: ref, - walked: walk, - groups: groups, - }, - ) + walk, groups := rg.Categorize(ref.Refname) - if !walk { - continue - } + refsSeen = append( + refsSeen, + refSeen{ + Reference: ref, + walked: walk, + groups: groups, + }, + ) - _, err = bufin.WriteString(ref.OID.String()) - if err != nil { - errChan <- err - return - } - err = bufin.WriteByte('\n') - if err != nil { - errChan <- err - return + if !walk { + continue + } + + if err := objIter.AddRoot(ref.OID); err != nil { + return err + } } - } - errChan <- err + }() }() type ObjectHeader struct { @@ -192,11 +176,11 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing blobs: %d") for { - obj, err := iter.Next() + obj, ok, err := objIter.Next() if err != nil { - if err != io.EOF { - return HistorySize{}, err - } + return HistorySize{}, err + } + if !ok { break } switch obj.ObjectType { @@ -220,12 +204,6 @@ func ScanRepositoryUsingGraph( return HistorySize{}, err } - err = iter.Close() - iter = nil - if err != nil { - return HistorySize{}, err - } - objectIter, err := repo.NewBatchObjectIter(ctx) if err != nil { return HistorySize{}, err From 3faeb539981a79e2b8c25f15eee4c7c5210262e3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sun, 7 Nov 2021 16:06:46 +0100 Subject: [PATCH 139/176] .golangci.toml: add an explicit linter config This tightens things up (including some things that have been fixed in recent commits). --- .golangci.toml | 443 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 443 insertions(+) create mode 100644 .golangci.toml diff --git a/.golangci.toml b/.golangci.toml new file mode 100644 index 0000000..362ac4f --- /dev/null +++ b/.golangci.toml @@ -0,0 +1,443 @@ +[linters] + # This file is intended to be used by your IDE to show you what linting + # issues exist in the code as you work on it. The github actions will run + # only the Tier 1 linters against the whole codebase (see + # .golangci-repo.toml, but it should be the same as the tier 1 list here). + # The tier 2 and 3 linters will run only against the files you change in a + # PR, so that you can clean up as you go. + # + # To see what issues will be present on just the PR files, you can run + # golangci-lint run --new-from-rev=origin/main + + # format of this list: + # "lintername", # description + # reason it's enabled + enable = [ + # + # Full Repo Scan - Linters that find bugs. + # + + "bodyclose", # checks whether HTTP response body is closed successfully + # Forgetting to close an HTTP body can be a memory leak + "durationcheck", # check for two durations multiplied together + # this is probably a rare bug, but should have basically zero false positives. + "errcheck", # finds unchecked error returns + # Checking all errors is just good dev practice. + "errorlint", # finds code that will cause problems with the error wrapping scheme introduced in Go 1.13 + # This ensures you use errors.Is instead of == to compare errors, to avoid bugs with wrapping. + "exportloopref", # catch bugs resulting from referencing variables on range scope + # variables initialized in for loops change with each loop, which can cause bugs. + "forcetypeassert", # finds type asserts where you don't use the v, ok format + # if you do v := foo.(bar) and foo is not a bar, this will panic, and that's bad. + "gocritic", # Provides many diagnostics that check for bugs, performance and style issues. + # This is highly configurable, see the gocritic config section below. + "goerr113", # checks that you use errors.Is and don't define your own errors except as package variables. + # If you don't use errors.Is, then your code can break if someone wraps an error before they + # return it. Creating errors with errors.New("some message") makes a magic error that no one + # can handle, so either create it as a sentinel, or give it a type that people can check against. + "goimports", # check that all code is formatted with goimports + # Formating is good. goimports is better (and formats imports slightly differently than gofmt). + "gosec", # Inspects source code for security problems + # high quality linter that finds real bugs + "govet", # reports suspicious constructs like printf calls that don't have the right # of arguments + # high quality, low false positives + "ineffassign", # Detects when assignments to existing variables are not used + # this finds bugs all the time, where you assign to a value but then never use + # the assigned value due to shadowing etc. + "nolintlint", # Reports ill-formed or insufficient nolint directives + # ensures that you don't typo nolint comments. and that you justify them with why you are ignoring a linter here. + "rowserrcheck", # checks whether Err of rows is checked successfully + # finds bugs in SQL code + "sqlclosecheck", # Checks that sql.Rows and sql.Stmt are closed. + # easy and finds bugs + "typecheck", # parses and type-checks Go code + # probably unnecessary, but shouldn't hurt anything + "wastedassign", # finds wasted assignment statements. + # can find bugs where you assign something but never use it + + # + # PR Scan - less critical, but should be fixed as we go along + # + + "deadcode", # Finds unused code + # dead code can be a bug or just confusing for the next dev + "depguard", # checks if package imports are in a list of acceptable packages + # this is useful for ensuring people use the company-standard packages for logging etc. + "errname", # Checks that sentinel errors are prefixed with the Err and error types are suffixed with the Error. + # This is standard practice and makes it easy to find error types and sentinels in the code. + "gochecknoinits", # Checks that no init functions are present in Go code + # init is bad, and is almost never necessary, nor is it a good idea. + "godot", # Check if comments end in a period + # this is a recommended Go style, and not only makes your doc comments look more + # professional, it ensures that you don't stop a comment in the middle and forget + # to write the end of it. + #"godox", # detects use of FIXME, TODO and other comment keywords + # These should be issues in an issue tracker, not comments in the code. + "gosimple", # tells you where you can simplify your code + # simple is good + "makezero", # checks that you don't accidentally make a slice w/ nonzero length and then append to it + # this can cause bugs where you make a slice of length 5 and then append 5 items to it, + # giving you a length of 10 where the first 5 are all zero values. + "misspell", # Finds commonly misspelled English words in comments + # we all suck at spelling and tpying + "nakedret", # Finds naked returns in functions greater than a specified function length + # naked returns are evil + #"nestif", # Reports deeply nested if statements + # deeply nested ifs are hard to read + "nilerr", # Finds the code that returns nil even if it checks that the error is not nil. + # finds fairly common bug + "noctx", # noctx finds sending http request without context.Context + # you should always use context so we can cancel external requests + "prealloc", # Finds slice declarations that could potentially be preallocated + # this can save some memory and copying, otherwise append guesses how big to make slices and may need to + # copy all items in a slice to a bigger one. + "predeclared", # find code that shadows one of Go's predeclared identifiers + # you can make a variable called "true", but it's a bad idea. + #"revive", # finds common style mistakes + # style and other mistakes that you really should listen to. + "staticcheck", # go vet on steroids, applying a ton of static analysis checks + # encompasses many linters in one, good stuff + "structcheck", # Finds unused struct fields + # can find bugs or trim unused fields to save memory + #"tparallel", # tparallel detects inappropriate usage of t.Parallel() + # likely a rare problem, but should have low false positives + "unconvert", # Remove unnecessary type conversions + # can save a little memory, unlikely to have false positives + "unused", # Checks for unused constants, variables, functions and types + # may have false positives, should watch this one + "varcheck", # Finds unused global variables and constants + # may have false positives, should watch this one + ] + + # we don't bother putting anything in disable, since we manually enable each linter. + # See the bottom of the file for disabled linters. + disable = [] + + +[run] + # options for analysis running + # Increase timeout from default 1m, first pre-cache run can take a bit in CI/CD + timeout = "5m" + + # default concurrency is the available CPU number + # concurrency = 4 + + # exit code when at least one issue was found, default is 1 + issues-exit-code = 1 + + # include test files or not, default is true + tests = true + + # list of build tags, all linters use it. Default is empty list. + build-tags = [] + + # which dirs to skip: issues from them won't be reported; + # can use regexp here: generated.*, regexp is applied on full path; + # default value is empty list, but default dirs are skipped independently + # from this option's value (see skip-dirs-use-default). + # "/" will be replaced by current OS file path separator to properly work + # on Windows. + skip-dirs = [] + + # default is true. Enables skipping of directories: + # vendor$, third_party$, testdata$, examples$, Godeps$, builtin$ + skip-dirs-use-default = true + + # which files to skip: they will be analyzed, but issues from them + # won't be reported. Default value is empty list, but there is + # no need to include all autogenerated files, we confidently recognize + # autogenerated files. If it's not please let us know. + # "/" will be replaced by current OS file path separator to properly work + # on Windows. + skip-files = [] + + # by default isn't set. If set we pass it to "go list -mod={option}". From "go help modules": + # If invoked with -mod=readonly, the go command is disallowed from the implicit + # automatic updating of go.mod described above. Instead, it fails when any changes + # to go.mod are needed. This setting is most useful to check that go.mod does + # not need updates, such as in a continuous integration and testing system. + # If invoked with -mod=vendor, the go command assumes that the vendor + # directory holds the correct copies of dependencies and ignores + # the dependency descriptions in go.mod. + modules-download-mode = "" + + # Allow multiple parallel golangci-lint instances running. + # If false (default) - golangci-lint acquires file lock on start. + allow-parallel-runners = false + + +[output] + # colored-line-number|line-number|json|tab|checkstyle|code-climate|junit-xml|github-actions + # default is "colored-line-number" + format = "colored-line-number" + + # print lines of code with issue, default is true + print-issued-lines = true + + # print linter name in the end of issue text, default is true + print-linter-name = true + + # make issues output unique by line, default is true + uniq-by-line = true + + # add a prefix to the output file references; default is no prefix + path-prefix = "" + + # sorts results by: filepath, line and column + sort-results = true + + + +# options to enable differentiating between error and warning severities +[severity] + # GitHub Actions annotations support error and warning only: + # https://docs.github.com/en/free-pro-team@latest/actions/reference/workflow-commands-for-github-actions#setting-an-error-message + default-severity = "error" + + # If set to true severity-rules regular expressions become case sensitive. + # The default value is false. + case-sensitive = false + + # Default value is empty list. + # When a list of severity rules are provided, severity information will be added to lint + # issues. Severity rules have the same filtering capability as exclude rules except you + # are allowed to specify one matcher per severity rule. + # Only affects out formats that support setting severity information. + # [[severity.rules]] + # linters = [ + # "revive", + # ] + # severity = "warning" + +[issues] + # List of regexps of issue texts to exclude, empty list by default. + # Please document every exception here so we know what we're suppressing and why. + exclude = [ + # err113 doesn't like it when people use errors.New("abc"). + # That's kinda valid but also kind of a PITA if you don't actually want + # to define static errors everywhere, and no one actually depends on them. + ".*do not define dynamic errors, use wrapped static errors instead.*" + ] + + # Maximum issues count per one linter. Set to 0 to disable. Default is 50. + max-issues-per-linter = 0 + + # Maximum count of issues with the same text. Set to 0 to disable. Default is 3. + max-same-issues = 0 + + # The default value is false. If set to true exclude and exclude-rules + # regular expressions become case sensitive. + # exclude-case-sensitive = false + + # This flag suppresses lint issues from several linters, overriding any other configuration you have set. + # It defaults to true. + # NEVER remove this configuration. If you want to suppress something, do so explicitly elsewhere. + exclude-use-default = false + + # The list of ids of default excludes to include or disable. By default it's empty. + # We shouldn't ever need this, since we turn off default excludes. + include = [] + + # Show only new issues: if there are unstaged changes or untracked files, + # only those changes are analyzed, else only changes in HEAD~ are analyzed. + # It's a super-useful option for integration of golangci-lint into existing + # large codebase. It's not practical to fix all existing issues at the moment + # of integration: much better don't allow issues in new code. + # Default is false. + new = false + + # Show only new issues created in git patch with set file path. + # new-from-patch = "path/to/patch/file" + + # Show only new issues created after git revision `REV` + # new-from-rev = "REV" + + # Fix found issues (if it's supported by the linter). Default is false. + fix = false + + # reduce noise in some linters that don't necessarily need to be run in tests + [[issues.exclude-rules]] + path = "_test\\.go" + linters = ["errcheck", "gosec", "gocyclo", "noctx", "govet"] + +# +# Specific Linter Settings +# + +[linters-settings.depguard] + # ban some modules with replacements + list-type = "blacklist" + include-go-root = true + packages = [ + # we shouldn't use pkg/error anymore + "github.com/pkg/error", + ] + + [[linters-settings.depguard.packages-with-error-message]] + "github.com/pkg/error" = "Please use stdlib errors module" + +[linters-settings.errcheck] + # report about not checking of errors in type assertions: `a := b.(MyStruct)`; + # default is false: such cases aren't reported by default. + check-type-assertions = true + + # report about assignment of errors to blank identifier: `num, _ := strconv.Atoi(numStr)`; + # default is false: such cases aren't reported by default. + check-blank = false + + # path to a file containing a list of functions to exclude from checking + # see https://github.com/kisielk/errcheck#excluding-functions for details + exclude = "" + + # list of functions to exclude from checking, where each entry is a single function to exclude. + # see https://github.com/kisielk/errcheck#excluding-functions for details + exclude-functions = [] + +[linters-settings.errorlint] + # Check whether fmt.Errorf uses the %w verb for formatting errors. See the readme for caveats + errorf = true + # Check for plain type assertions and type switches + asserts = false + # Check for plain error comparisons + comparison = false + +[linters-settings.gocritic] + # Enable multiple checks by tags, run `GL_DEBUG=gocritic golangci-lint run` to see all tags and checks. + # Empty list by default. See https://github.com/go-critic/go-critic#usage -> section "Tags". + enabled-tags = [ + "diagnostic", + "performance", + "style", + ] + disabled-checks = [ + # import shadow warns if a variable shadow the name of an imported package. + # kind of noisy, doesn't actually hurt anything, just may be momentarily confusing. + "importShadow", + "preferStringWriter", + "paramTypeCombine", + "unnamedResult", + "emptyStringTest", + "elseif", + "whyNoLint", + ] + + # HugeParam: warn if passing huge parameters by value; consider passing pointers instead. + [linters-settings.gocritic.settings.hugeParam] + # increase threshold from default (80 bytes) to 256 bytes. + sizeThreshold = 256 + + + + +[linters-settings.goimports] + # Goimports checks whether code was formatted with goimports. + # uncomment if we want to enforce having GitHub-owned packages sorted into a separate section + #local-prefixes = "github.com/github/" + +[linters-settings.govet] + enable = [ "httpresponse" ] + +[linters-settings.gosec] + excludes = [ + "G301", # Expect directory permissions to be 0750 or less. See umask. + "G307", # deferring methods with errors. This duplicates errcheck, and I don't want to have to use two nolints. + ] + + +[linters-settings.nolintlint] + # adds some protections around nolint directives + + # Enable to ensure that nolint directives are all used. Default is true. + allow-unused = false + # Disable to ensure that nolint directives don't have a leading space. Default is true. + allow-leading-space = false + # Exclude following linters from requiring an explanation. Default is []. + allow-no-explanation = [] + # Enable to require an explanation of nonzero length after each nolint directive. Default is false. + require-explanation = false + # Enable to require nolint directives to mention the specific linter being suppressed. Default is false. + require-specific = true + + + + +# List of linters supported by golangci-lint that we intentionally do not use. +# Intentionally formatted the same as the "enabled" list, so you can just move one +# up to that list to enable it. +# list is in the form + # "name", # description + # reason to disable + + + # "asciicheck", # checks that your code does not contain non-ASCII identifiers + # Honestly not sure why anyone cares? + # "cyclop", # checks function and package cyclomatic complexity + # Too hard to know when you trip over this, and I feel like it needs a human + # to understand if a function is too complex. + # "dogsled", # Checks assignments with too many blank identifiers (e.g. x, _, _, _, := f()) + # This doesn't seem to be a common problem, nor a source of bugs. It would be + # better to have a linter that just tells you not to return 4 things in the + # first place. + # "dupl", # Tool for code clone detection + # This feels too likely to have high false positives on trivial code, and miss + # more complicated duplicates. + # "exhaustive", # checks exhaustiveness of enum switch statements + # This tends to hit a lot of false positives, and can lead to a lot of nolint statements. + # Definitely could be useful for specific repos of focused libraries where you know you + # update enums a lot, and want to make sure your switch statements stay up to date. + # "exhaustivestruct", # Checks if all struct's fields are initialized + # This is generally a feature, not a bug. Requiring a //nolint whenever you partially + # initialize a struct would be pretty annoying. + # "forbidigo", # Can be configured to forbids specific identifiers, like fmt.Printf, for example. + # This can actually be really useful, but needs a deep understanding of patterns + # we want devs to avoid in our specific repos. Definitely look into it if you have + # a list of "don't use XYZ" items. + # "funlen", # Tool for detection of long functions + # We could maybe put this in with a pretty big size limit, but it feels like it would be + # of limited benefit and cause grumbling. + # "gci", # control golang package import order and make it always deterministic + # I haven't really had a problem with this, when using goimports, so I'm not sure it's useful. + # "gochecknoglobals", # check that no global variables exist + # this is actually good to have on, but I'm afraid it would cause more heartburn than good. + # "gocognit", # Computes and checks the cognitive complexity of functions + # Too hard to know when you trip over this, and I feel like it needs a human + # to understand if a function is too complex. + # "goconst", # Finds repeated strings that could be replaced by a constant + # magic strings are bad, but I feel like this could reduce adoption of the linter. + # "gofmt", # checks whether code was gofmt-ed. + # use goimports instead, they have slightly different formatting. + # "gofumpt", # checks whether code is gofumpt-ed + # use goimports instead, they have slightly different formatting. + # "goheader", # checks if file header matches a pattern + # useful for companies that mandate a copyright header on every file. That's not github. + # "golint", # unmaintained + # "gomnd", # an analyzer to detect magic numbers + # just too noisy + # "ifshort", # makes sure you use if err := foo(); err != nil + # this is really more personal preference, and sometimes can hinder readability. + # "importas", # enforces consistent import aliases + # this is kind of a special case for avoiding import collisions, and not really needed for us. + # "interfacer", # unmaintined + # "lll" # reports long lines + # duplicated by other checks + # "nlreturn", # nlreturn checks for a new line before return and branch statements to increase code clarity + # I'm not a monster, newline if you like, or not. + # "paralleltest", # paralleltest detects missing usage of t.Parallel() method in your Go test + # parallel tests are good, but packages are already run in parallel, so it's not a huge gain. + # "promlinter", # Check Prometheus metrics naming via promlint + # enable if you use prometheus + # "scopelint", # unmaintained + # "tagliatelle", # Checks that struct tags match a certain format (camelcase, snakecase etc) + # likely to cause a lot of false positives if you're making tags for other people's APIs + # "testpackage", # makes you use a separate _test package + # I actually think this is a bad idea in general, and I would want a linter that does the opposite. + # "thelper", # detects golang test helpers without t.Helper() + # t.Helper is sometimes useful and sometimes not. + # "unparam", # Reports unused function parameters + # seems likely to have false positives + # "whitespace", # finds extra newlines at the beginning of functions and if statements + # I like this, but I feel like it would be too nitpicky for most people + # "wrapcheck", # Checks that errors returned from external packages are wrapped + # I mean, yeah, but you don't *always* need to wrap, that gets excesssive. + # "wsl", # Whitespace Linter - Forces you to use empty lines! + # meh, I'm not that much of a control freak From 0532607b104d5588e19f891274bce5909b8fbc39 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 10 Nov 2021 16:52:57 +0100 Subject: [PATCH 140/176] Add a `CODEOWNERS` file --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) create mode 100644 CODEOWNERS diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..9b79bdd --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @github/git-storage-reviewers From 7dd1823523aab18d2999e96c9e0fd0377c3d26af Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Tue, 16 Nov 2021 12:51:20 +0100 Subject: [PATCH 141/176] NewObjectIter(): remove unused arg --- git/obj_iter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git/obj_iter.go b/git/obj_iter.go index 53b7103..268280b 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -22,7 +22,7 @@ type ObjectIter struct { // `repo`. The arguments are passed to `git rev-list --objects`. The // second return value is the stdin of the `rev-list` command. The // caller can feed values into it but must close it in any case. -func (repo *Repository) NewObjectIter(ctx context.Context, args ...string) (*ObjectIter, error) { +func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) { iter := ObjectIter{ ctx: ctx, p: pipe.New(), From 2c988fab54cadec8281786dec63c54d74bb159b9 Mon Sep 17 00:00:00 2001 From: Oliver Bone Date: Fri, 21 Oct 2022 14:18:01 +0000 Subject: [PATCH 142/176] test: fix tests use of 'file' protocol In response to CVE-2022-39253, Git now considers the `file://` protocol to be unsafe by default. The default value of the `protocol.file.allow` config variable was changed to `user` [1], meaning that a file URL or a local path is only trusted if it came directly from user input, and not if it came through a command which executes a clone/fetch/push internally. The tests fall foul of this new requirement by attempting to run a `git submodule add` with a local directory. Internally, this performs a clone, which is no longer trusted because of the change described above. This results in the command failing with a "transport 'file' not allowed" message. Since this is only the case for a single command, then fix the test by setting `protocol.file.allow` to `always` when we run it. [1] https://github.blog/2022-10-18-git-security-vulnerabilities-announced/#cve-2022-39253 --- git_sizer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 580268a..6ab132f 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -701,7 +701,7 @@ func TestSubmodule(t *testing.T) { require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = mainRepo.GitCommand(t, "submodule", "add", submRepo.Path, "sub") + cmd = mainRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submRepo.Path, "sub") cmd.Dir = mainRepo.Path require.NoError(t, cmd.Run(), "adding submodule") From 9e95b4b8a63c71f19dfd62b940eedb5927bd5dd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Pastor=20Olivar?= Date: Mon, 28 Nov 2022 16:05:50 +0100 Subject: [PATCH 143/176] This code has been moved into the go-pipe library --- internal/pipe/command.go | 175 ------ internal/pipe/command_unix.go | 66 --- internal/pipe/command_windows.go | 24 - internal/pipe/filter-error.go | 135 ----- internal/pipe/function.go | 66 --- internal/pipe/iocopier.go | 62 --- internal/pipe/linewise.go | 74 --- internal/pipe/pipeline.go | 272 ---------- internal/pipe/pipeline_test.go | 883 ------------------------------- internal/pipe/print.go | 37 -- internal/pipe/scanner.go | 67 --- internal/pipe/stage.go | 34 -- 12 files changed, 1895 deletions(-) delete mode 100644 internal/pipe/command.go delete mode 100644 internal/pipe/command_unix.go delete mode 100644 internal/pipe/command_windows.go delete mode 100644 internal/pipe/filter-error.go delete mode 100644 internal/pipe/function.go delete mode 100644 internal/pipe/iocopier.go delete mode 100644 internal/pipe/linewise.go delete mode 100644 internal/pipe/pipeline.go delete mode 100644 internal/pipe/pipeline_test.go delete mode 100644 internal/pipe/print.go delete mode 100644 internal/pipe/scanner.go delete mode 100644 internal/pipe/stage.go diff --git a/internal/pipe/command.go b/internal/pipe/command.go deleted file mode 100644 index d370e28..0000000 --- a/internal/pipe/command.go +++ /dev/null @@ -1,175 +0,0 @@ -package pipe - -import ( - "bytes" - "context" - "errors" - "io" - "os" - "os/exec" - "sync/atomic" - "syscall" - - "golang.org/x/sync/errgroup" -) - -// commandStage is a pipeline `Stage` based on running an external -// command and piping the data through its stdin and stdout. -type commandStage struct { - name string - stdin io.Closer - cmd *exec.Cmd - done chan struct{} - wg errgroup.Group - stderr bytes.Buffer - - // If the context expired and we attempted to kill the command, - // `ctx.Err()` is stored here. - ctxErr atomic.Value -} - -// Command returns a pipeline `Stage` based on the specified external -// `command`, run with the given command-line `args`. Its stdin and -// stdout are handled as usual, and its stderr is collected and -// included in any `*exec.ExitError` that the command might emit. -func Command(command string, args ...string) Stage { - if len(command) == 0 { - panic("attempt to create command with empty command") - } - - cmd := exec.Command(command, args...) - return CommandStage(command, cmd) -} - -// Command returns a pipeline `Stage` with the name `name`, based on -// the specified `cmd`. Its stdin and stdout are handled as usual, and -// its stderr is collected and included in any `*exec.ExitError` that -// the command might emit. -func CommandStage(name string, cmd *exec.Cmd) Stage { - return &commandStage{ - name: name, - cmd: cmd, - done: make(chan struct{}), - } -} - -func (s *commandStage) Name() string { - return s.name -} - -func (s *commandStage) Start( - ctx context.Context, env Env, stdin io.ReadCloser, -) (io.ReadCloser, error) { - if s.cmd.Dir == "" { - s.cmd.Dir = env.Dir - } - - if stdin != nil { - s.cmd.Stdin = stdin - // Also keep a copy so that we can close it when the command exits: - s.stdin = stdin - } - - stdout, err := s.cmd.StdoutPipe() - if err != nil { - return nil, err - } - - // If the caller hasn't arranged otherwise, read the command's - // standard error into our `stderr` field: - if s.cmd.Stderr == nil { - // We can't just set `s.cmd.Stderr = &s.stderr`, because if we - // do then `s.cmd.Wait()` doesn't wait to be sure that all - // error output has been captured. By doing this ourselves, we - // can be sure. - p, err := s.cmd.StderrPipe() - if err != nil { - return nil, err - } - s.wg.Go(func() error { - _, err := io.Copy(&s.stderr, p) - // We don't consider `ErrClosed` an error (FIXME: is this - // correct?): - if err != nil && !errors.Is(err, os.ErrClosed) { - return err - } - return nil - }) - } - - // Put the command in its own process group, if possible: - s.runInOwnProcessGroup() - - if err := s.cmd.Start(); err != nil { - return nil, err - } - - // Arrange for the process to be killed (gently) if the context - // expires before the command exits normally: - go func() { - select { - case <-ctx.Done(): - s.kill(ctx.Err()) - case <-s.done: - // Process already done; no need to kill anything. - } - }() - - return stdout, nil -} - -// filterCmdError interprets `err`, which was returned by `Cmd.Wait()` -// (possibly `nil`), possibly modifying it or ignoring it. It returns -// the error that should actually be returned to the caller (possibly -// `nil`). -func (s *commandStage) filterCmdError(err error) error { - if err == nil { - return nil - } - - eErr, ok := err.(*exec.ExitError) - if !ok { - return err - } - - ctxErr, ok := s.ctxErr.Load().(error) - if ok { - // If the process looks like it was killed by us, substitute - // `ctxErr` for the process's own exit error. Note that this - // doesn't do anything on Windows, where the `Signaled()` - // method isn't implemented (it is hardcoded to return - // `false`). - ps, ok := eErr.ProcessState.Sys().(syscall.WaitStatus) - if ok && ps.Signaled() && - (ps.Signal() == syscall.SIGTERM || ps.Signal() == syscall.SIGKILL) { - return ctxErr - } - } - - eErr.Stderr = s.stderr.Bytes() - return eErr -} - -func (s *commandStage) Wait() error { - defer close(s.done) - - // Make sure that any stderr is copied before `s.cmd.Wait()` - // closes the read end of the pipe: - wErr := s.wg.Wait() - - err := s.cmd.Wait() - err = s.filterCmdError(err) - - if err == nil && wErr != nil { - err = wErr - } - - if s.stdin != nil { - cErr := s.stdin.Close() - if cErr != nil && err == nil { - return cErr - } - } - - return err -} diff --git a/internal/pipe/command_unix.go b/internal/pipe/command_unix.go deleted file mode 100644 index c84bcf5..0000000 --- a/internal/pipe/command_unix.go +++ /dev/null @@ -1,66 +0,0 @@ -//go:build !windows -// +build !windows - -package pipe - -import ( - "syscall" - "time" -) - -// runInOwnProcessGroup arranges for `cmd` to be run in its own -// process group. -func (s *commandStage) runInOwnProcessGroup() { - // Put the command in its own process group: - if s.cmd.SysProcAttr == nil { - s.cmd.SysProcAttr = &syscall.SysProcAttr{} - } - s.cmd.SysProcAttr.Setpgid = true -} - -// kill is called to kill the process if the context expires. `err` is -// the corresponding value of `Context.Err()`. -func (s *commandStage) kill(err error) { - // I believe that the calls to `syscall.Kill()` in this method are - // racy. It could be that s.cmd.Wait() succeeds immediately before - // this call, in which case the process group wouldn't exist - // anymore. But I don't see any way to avoid this without - // duplicating a lot of code from `exec.Cmd`. (`os.Cmd.Kill()` and - // `os.Cmd.Signal()` appear to be race-free, but only because they - // use internal synchronization. But those methods only kill the - // process, not the process group, so they are not suitable here. - - // We started the process with PGID == PID: - pid := s.cmd.Process.Pid - select { - case <-s.done: - // Process has ended; no need to kill it again. - return - default: - } - - // Record the `ctx.Err()`, which will be used as the error result - // for this stage. - s.ctxErr.Store(err) - - // First try to kill using a relatively gentle signal so that - // the processes have a chance to clean up after themselves: - _ = syscall.Kill(-pid, syscall.SIGTERM) - - // Well-behaved processes should commit suicide after the above, - // but if they don't exit within 2s, murder the whole lot of them: - go func() { - // Use an explicit `time.Timer` rather than `time.After()` so - // that we can stop it (freeing resources) promptly if the - // command exits before the timer triggers. - timer := time.NewTimer(2 * time.Second) - defer timer.Stop() - - select { - case <-s.done: - // Process has ended; no need to kill it again. - case <-timer.C: - _ = syscall.Kill(-pid, syscall.SIGKILL) - } - }() -} diff --git a/internal/pipe/command_windows.go b/internal/pipe/command_windows.go deleted file mode 100644 index 55af6e3..0000000 --- a/internal/pipe/command_windows.go +++ /dev/null @@ -1,24 +0,0 @@ -//go:build windows -// +build windows - -package pipe - -// runInOwnProcessGroup is not supported on Windows. -func (s *commandStage) runInOwnProcessGroup() {} - -// kill is called to kill the process if the context expires. `err` is -// the corresponding value of `Context.Err()`. -func (s *commandStage) kill(err error) { - select { - case <-s.done: - // Process has ended; no need to kill it again. - return - default: - } - - // Record the `ctx.Err()`, which will be used as the error result - // for this stage. - s.ctxErr.Store(err) - - s.cmd.Process.Kill() -} diff --git a/internal/pipe/filter-error.go b/internal/pipe/filter-error.go deleted file mode 100644 index 6e2bdd5..0000000 --- a/internal/pipe/filter-error.go +++ /dev/null @@ -1,135 +0,0 @@ -package pipe - -import ( - "errors" - "io" - "os/exec" - "syscall" -) - -// ErrorFilter is a function that can filter errors from -// `Stage.Wait()`. The original error (possibly nil) is passed in as -// an argument, and whatever the function returns is the error -// (possibly nil) that is actually emitted. -type ErrorFilter func(err error) error - -func FilterError(s Stage, filter ErrorFilter) Stage { - return efStage{Stage: s, filter: filter} -} - -type efStage struct { - Stage - filter ErrorFilter -} - -func (s efStage) Wait() error { - return s.filter(s.Stage.Wait()) -} - -// ErrorMatcher decides whether its argument matches some class of -// errors (e.g., errors that we want to ignore). The function will -// only be invoked for non-nil errors. -type ErrorMatcher func(err error) bool - -// IgnoreError creates a stage that acts like `s` except that it -// ignores any errors that are matched by `em`. Use like -// -// p.Add(pipe.IgnoreError( -// someStage, -// func(err error) bool { -// var myError *MyErrorType -// return errors.As(err, &myError) && myError.foo == 42 -// }, -// ) -// -// The second argument can also be one of the `ErrorMatcher`s that are -// provided by this package (e.g., `IsError(target)`, -// IsSignal(signal), `IsSIGPIPE`, `IsEPIPE`, `IsPipeError`), or one of -// the functions from the standard library that has the same signature -// (e.g., `os.IsTimeout`), or some combination of these (e.g., -// `AnyError(IsSIGPIPE, os.IsTimeout)`). -func IgnoreError(s Stage, em ErrorMatcher) Stage { - return FilterError(s, - func(err error) error { - if err == nil || em(err) { - return nil - } - return err - }, - ) -} - -// AnyError returns an `ErrorMatcher` that returns true for an error -// that matches any of the `ems`. -func AnyError(ems ...ErrorMatcher) ErrorMatcher { - return func(err error) bool { - if err == nil { - return false - } - for _, em := range ems { - if em(err) { - return true - } - } - return false - } -} - -// IsError returns an ErrorIdentifier for the specified target error, -// matched using `errors.Is()`. Use like -// -// p.Add(pipe.IgnoreError(someStage, IsError(io.EOF))) -func IsError(target error) ErrorMatcher { - return func(err error) bool { - return errors.Is(err, target) - } -} - -// IsSIGPIPE returns an `ErrorMatcher` that matches `*exec.ExitError`s -// that were caused by the specified signal. The match for -// `*exec.ExitError`s uses `errors.As()`. Note that under Windows this -// always returns false, because on that platform -// `WaitStatus.Signaled()` isn't implemented (it is hardcoded to -// return `false`). -func IsSignal(signal syscall.Signal) ErrorMatcher { - return func(err error) bool { - var eErr *exec.ExitError - - if !errors.As(err, &eErr) { - return false - } - - status, ok := eErr.Sys().(syscall.WaitStatus) - return ok && status.Signaled() && status.Signal() == signal - } -} - -var ( - // IsSIGPIPE is an `ErrorMatcher` that matches `*exec.ExitError`s - // that were caused by SIGPIPE. The match for `*exec.ExitError`s - // uses `errors.As()`. Use like - // - // p.Add(IgnoreError(someStage, IsSIGPIPE)) - IsSIGPIPE = IsSignal(syscall.SIGPIPE) - - // IsEPIPE is an `ErrorMatcher` that matches `syscall.EPIPE` using - // `errors.Is()`. Use like - // - // p.Add(IgnoreError(someStage, IsEPIPE)) - IsEPIPE = IsError(syscall.EPIPE) - - // IsErrClosedPipe is an `ErrorMatcher` that matches - // `io.ErrClosedPipe` using `errors.Is()`. (`io.ErrClosedPipe` is - // the error that results from writing to a closed - // `*io.PipeWriter`.) Use like - // - // p.Add(IgnoreError(someStage, IsErrClosedPipe)) - IsErrClosedPipe = IsError(io.ErrClosedPipe) - - // IsPipeError is an `ErrorMatcher` that matches a few different - // errors that typically result if a stage writes to a subsequent - // stage that has stopped reading from its stdin. Use like - // - // p.Add(IgnoreError(someStage, IsPipeError)) - IsPipeError = AnyError(IsSIGPIPE, IsEPIPE, IsErrClosedPipe) -) diff --git a/internal/pipe/function.go b/internal/pipe/function.go deleted file mode 100644 index bc5d0bd..0000000 --- a/internal/pipe/function.go +++ /dev/null @@ -1,66 +0,0 @@ -package pipe - -import ( - "context" - "fmt" - "io" -) - -// StageFunc is a function that can be used to power a `goStage`. It -// should read its input from `stdin` and write its output to -// `stdout`. `stdin` and `stdout` will be closed automatically (if -// necessary) once the function returns. -// -// Neither `stdin` nor `stdout` are necessarily buffered. If the -// `StageFunc` requires buffering, it needs to arrange that itself. -// -// A `StageFunc` is run in a separate goroutine, so it must be careful -// to synchronize any data access aside from reading and writing. -type StageFunc func(ctx context.Context, env Env, stdin io.Reader, stdout io.Writer) error - -// Function returns a pipeline `Stage` that will run a `StageFunc` in -// a separate goroutine to process the data. See `StageFunc` for more -// information. -func Function(name string, f StageFunc) Stage { - return &goStage{ - name: name, - f: f, - done: make(chan struct{}), - } -} - -// goStage is a `Stage` that does its work by running an arbitrary -// `stageFunc` in a goroutine. -type goStage struct { - name string - f StageFunc - done chan struct{} - err error -} - -func (s *goStage) Name() string { - return s.name -} - -func (s *goStage) Start(ctx context.Context, env Env, stdin io.ReadCloser) (io.ReadCloser, error) { - r, w := io.Pipe() - go func() { - s.err = s.f(ctx, env, stdin, w) - if err := w.Close(); err != nil && s.err == nil { - s.err = fmt.Errorf("error closing output pipe for stage %q: %w", s.Name(), err) - } - if stdin != nil { - if err := stdin.Close(); err != nil && s.err == nil { - s.err = fmt.Errorf("error closing stdin for stage %q: %w", s.Name(), err) - } - } - close(s.done) - }() - - return r, nil -} - -func (s *goStage) Wait() error { - <-s.done - return s.err -} diff --git a/internal/pipe/iocopier.go b/internal/pipe/iocopier.go deleted file mode 100644 index 26d5b0f..0000000 --- a/internal/pipe/iocopier.go +++ /dev/null @@ -1,62 +0,0 @@ -package pipe - -import ( - "context" - "errors" - "io" - "os" -) - -// ioCopier is a stage that copies its stdin to a specified -// `io.Writer`. It generates no stdout itself. -type ioCopier struct { - w io.WriteCloser - done chan struct{} - err error -} - -func newIOCopier(w io.WriteCloser) *ioCopier { - return &ioCopier{ - w: w, - done: make(chan struct{}), - } -} - -func (s *ioCopier) Name() string { - return "ioCopier" -} - -// This method always returns `nil, nil`. -func (s *ioCopier) Start(ctx context.Context, _ Env, r io.ReadCloser) (io.ReadCloser, error) { - go func() { - _, err := io.Copy(s.w, r) - // We don't consider `ErrClosed` an error (FIXME: is this - // correct?): - if err != nil && !errors.Is(err, os.ErrClosed) { - s.err = err - } - if err := r.Close(); err != nil && s.err == nil { - s.err = err - } - if err := s.w.Close(); err != nil && s.err == nil { - s.err = err - } - close(s.done) - }() - - // FIXME: if `s.w.Write()` is blocking (e.g., because there is a - // downstream process that is not reading from the other side), - // there's no way to terminate the copy when the context expires. - // This is not too bad, because the `io.Copy()` call will exit by - // itself when its input is closed. - // - // We could, however, be smarter about exiting more quickly if the - // context expires but `s.w.Write()` is not blocking. - - return nil, nil -} - -func (s *ioCopier) Wait() error { - <-s.done - return s.err -} diff --git a/internal/pipe/linewise.go b/internal/pipe/linewise.go deleted file mode 100644 index 7b5c6ef..0000000 --- a/internal/pipe/linewise.go +++ /dev/null @@ -1,74 +0,0 @@ -package pipe - -import ( - "bufio" - "bytes" - "context" - "io" -) - -// LinewiseStageFunc is a function that can be embedded in a -// `goStage`. It is called once per line in the input (where "line" -// can be defined via any `bufio.Scanner`). It should process the line -// and may write whatever it likes to `stdout`, which is a buffered -// writer whose contents are forwarded to the input of the next stage -// of the pipeline. The function needn't write one line of output per -// line of input. -// -// The function mustn't retain copies of `line`, since it may be -// overwritten every time the function is called. -// -// The function needn't flush or close `stdout` (this will be done -// automatically when all of the input has been processed). -// -// If there is an error parsing the input into lines, or if this -// function returns an error, then the whole pipeline will be aborted -// with that error. However, if the function returns the special error -// `pipe.FinishEarly`, the stage will stop processing immediately with -// a `nil` error value. -// -// The function will be called in a separate goroutine, so it must be -// careful to synchronize any data access aside from writing to -// `stdout`. -type LinewiseStageFunc func( - ctx context.Context, env Env, line []byte, stdout *bufio.Writer, -) error - -// LinewiseFunction returns a function-based `Stage`. The input will -// be split into LF-terminated lines and passed to the function one -// line at a time (without the LF). The function may emit output to -// its `stdout` argument. See the definition of `LinewiseStageFunc` -// for more information. -// -// Note that the stage will emit an error if any line (including its -// end-of-line terminator) exceeds 64 kiB in length. If this is too -// short, use `ScannerFunction()` directly with your own -// `NewScannerFunc` as argument, or use `Function()` directly with -// your own `StageFunc`. -func LinewiseFunction(name string, f LinewiseStageFunc) Stage { - return ScannerFunction( - name, - func(r io.Reader) (Scanner, error) { - scanner := bufio.NewScanner(r) - // Split based on strict LF (we don't accept CRLF): - scanner.Split(ScanLFTerminatedLines) - return scanner, nil - }, - f, - ) -} - -// ScanLFTerminatedLines is a `bufio.SplitFunc` that splits its input -// into lines at LF characters (not treating CR specially). -func ScanLFTerminatedLines(data []byte, atEOF bool) (advance int, token []byte, err error) { - if atEOF && len(data) == 0 { - return 0, nil, nil - } - if i := bytes.IndexByte(data, '\n'); i != -1 { - return i + 1, data[0:i], nil - } - if atEOF { - return len(data), data, nil - } - return 0, nil, nil -} diff --git a/internal/pipe/pipeline.go b/internal/pipe/pipeline.go deleted file mode 100644 index 4725907..0000000 --- a/internal/pipe/pipeline.go +++ /dev/null @@ -1,272 +0,0 @@ -package pipe - -import ( - "bytes" - "context" - "errors" - "fmt" - "io" - "sync/atomic" -) - -// Env represents the environment that a pipeline stage should run in. -// It is passed to `Stage.Start()`. -type Env struct { - // The directory in which external commands should be executed by - // default. - Dir string -} - -// FinishEarly is an error that can be returned by a `Stage` to -// request that the iteration be ended early (possibly without reading -// all of its input). This "error" is considered a successful return, -// and is not reported to the caller. -//nolint:errname -var FinishEarly = errors.New("finish stage early") - -// Pipeline represents a Unix-like pipe that can include multiple -// stages, including external processes but also and stages written in -// Go. -type Pipeline struct { - env Env - - stdin io.Reader - stdout io.WriteCloser - stages []Stage - cancel func() - - // Atomically written and read value, nonzero if the pipeline has - // been started. This is only used for lifecycle sanity checks but - // does not guarantee that clients are using the class correctly. - started uint32 -} - -type nopWriteCloser struct { - io.Writer -} - -func (w nopWriteCloser) Close() error { - return nil -} - -// NewPipeline returns a Pipeline struct with all of the `options` -// applied. -func New(options ...Option) *Pipeline { - p := &Pipeline{} - - for _, option := range options { - option(p) - } - - return p -} - -// Option is a type alias for Pipeline functional options. -type Option func(*Pipeline) - -// WithDir sets the default directory for running external commands. -func WithDir(dir string) Option { - return func(p *Pipeline) { - p.env.Dir = dir - } -} - -// WithStdin assigns stdin to the first command in the pipeline. -func WithStdin(stdin io.Reader) Option { - return func(p *Pipeline) { - p.stdin = stdin - } -} - -// WithStdout assigns stdout to the last command in the pipeline. -func WithStdout(stdout io.Writer) Option { - return func(p *Pipeline) { - p.stdout = nopWriteCloser{stdout} - } -} - -// WithStdoutCloser assigns stdout to the last command in the -// pipeline, and closes stdout when it's done. -func WithStdoutCloser(stdout io.WriteCloser) Option { - return func(p *Pipeline) { - p.stdout = stdout - } -} - -func (p *Pipeline) hasStarted() bool { - return atomic.LoadUint32(&p.started) != 0 -} - -// Add appends one or more stages to the pipeline. -func (p *Pipeline) Add(stages ...Stage) { - if p.hasStarted() { - panic("attempt to modify a pipeline that has already started") - } - - p.stages = append(p.stages, stages...) -} - -// AddWithIgnoredError appends one or more stages that are ignoring -// the passed in error to the pipeline. -func (p *Pipeline) AddWithIgnoredError(em ErrorMatcher, stages ...Stage) { - if p.hasStarted() { - panic("attempt to modify a pipeline that has already started") - } - - for _, stage := range stages { - p.stages = append(p.stages, IgnoreError(stage, em)) - } -} - -// Start starts the commands in the pipeline. If `Start()` exits -// without an error, `Wait()` must also be called, to allow all -// resources to be freed. -func (p *Pipeline) Start(ctx context.Context) error { - if p.hasStarted() { - panic("attempt to start a pipeline that has already started") - } - - atomic.StoreUint32(&p.started, 1) - ctx, p.cancel = context.WithCancel(ctx) - - var nextStdin io.ReadCloser - if p.stdin != nil { - // We don't want the first stage to actually close this, and - // it's not even an `io.ReadCloser`, so fake it: - nextStdin = io.NopCloser(p.stdin) - } - - for i, s := range p.stages { - var err error - stdout, err := s.Start(ctx, p.env, nextStdin) - if err != nil { - // Close the pipe that the previous stage was writing to. - // That should cause it to exit even if it's not minding - // its context. - if nextStdin != nil { - _ = nextStdin.Close() - } - - // Kill and wait for any stages that have been started - // already to finish: - p.cancel() - for _, s := range p.stages[:i] { - _ = s.Wait() - } - return fmt.Errorf("starting pipeline stage %q: %w", s.Name(), err) - } - nextStdin = stdout - } - - // If the pipeline was configured with a `stdout`, add a synthetic - // stage to copy the last stage's stdout to that writer: - if p.stdout != nil { - c := newIOCopier(p.stdout) - p.stages = append(p.stages, c) - // `ioCopier.Start()` never fails: - _, _ = c.Start(ctx, p.env, nextStdin) - } - - return nil -} - -func (p *Pipeline) Output(ctx context.Context) ([]byte, error) { - var buf bytes.Buffer - p.stdout = nopWriteCloser{&buf} - err := p.Run(ctx) - return buf.Bytes(), err -} - -// Wait waits for each stage in the pipeline to exit. -func (p *Pipeline) Wait() error { - if !p.hasStarted() { - panic("unable to wait on a pipeline that has not started") - } - - // Make sure that all of the cleanup eventually happens: - defer p.cancel() - - var earliestStageErr error - var earliestFailedStage Stage - - finishedEarly := false - for i := len(p.stages) - 1; i >= 0; i-- { - s := p.stages[i] - err := s.Wait() - - // Handle errors: - switch { - case err == nil: - // No error to handle. But unset the `finishedEarly` flag, - // because earlier stages shouldn't be affected by the - // later stage that finished early. - finishedEarly = false - continue - - case errors.Is(err, FinishEarly): - // We ignore `FinishEarly` errors because that is how a - // stage informs us that it intentionally finished early. - // Moreover, if we see a `FinishEarly` error, ignore any - // pipe error from the immediately preceding stage, - // because it probably came from trying to write to this - // stage after this stage closed its stdin. - finishedEarly = true - continue - - case IsPipeError(err): - switch { - case finishedEarly: - // A successor stage finished early. It is common for - // this to cause earlier stages to fail with pipe - // errors. Such errors are uninteresting, so ignore - // them. Leave the `finishedEarly` flag set, because - // the preceding stage might get a pipe error from - // trying to write to this one. - case earliestStageErr != nil: - // A later stage has already reported an error. This - // means that we don't want to report the error from - // this stage: - // - // * If the later error was also a pipe error: we want - // to report the _last_ pipe error seen, which would - // be the one already recorded. - // - // * If the later error was not a pipe error: non-pipe - // errors are always considered more important than - // pipe errors, so again we would want to keep the - // error that is already recorded. - default: - // In this case, the pipe error from this stage is the - // most important error that we have seen so far, so - // remember it: - earliestFailedStage, earliestStageErr = s, err - } - - default: - // This stage exited with a non-pipe error. If multiple - // stages exited with such errors, we want to report the - // one that is most informative. We take that to be the - // error from the earliest failing stage. Since we are - // iterating through stages in reverse order, overwrite - // any existing remembered errors (which would have come - // from a later stage): - earliestFailedStage, earliestStageErr = s, err - finishedEarly = false - } - } - - if earliestStageErr != nil { - return fmt.Errorf("%s: %w", earliestFailedStage.Name(), earliestStageErr) - } - - return nil -} - -// Run starts and waits for the commands in the pipeline. -func (p *Pipeline) Run(ctx context.Context) error { - if err := p.Start(ctx); err != nil { - return err - } - - return p.Wait() -} diff --git a/internal/pipe/pipeline_test.go b/internal/pipe/pipeline_test.go deleted file mode 100644 index faf1e31..0000000 --- a/internal/pipe/pipeline_test.go +++ /dev/null @@ -1,883 +0,0 @@ -package pipe_test - -import ( - "bufio" - "bytes" - "context" - "errors" - "fmt" - "io" - "io/ioutil" - "os" - "path/filepath" - "runtime" - "strconv" - "strings" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.uber.org/goleak" - - "github.com/github/git-sizer/internal/pipe" -) - -func TestMain(m *testing.M) { - // Check whether this package's test suite leaks any goroutines: - goleak.VerifyTestMain(m) -} - -func TestPipelineFirstStageFailsToStart(t *testing.T) { - t.Parallel() - ctx := context.Background() - - startErr := errors.New("foo") - - p := pipe.New() - p.Add( - ErrorStartingStage{startErr}, - ErrorStartingStage{errors.New("this error should never happen")}, - ) - assert.ErrorIs(t, p.Run(ctx), startErr) -} - -func TestPipelineSecondStageFailsToStart(t *testing.T) { - t.Parallel() - ctx := context.Background() - - startErr := errors.New("foo") - - p := pipe.New() - p.Add( - seqFunction(20000), - ErrorStartingStage{startErr}, - ) - assert.ErrorIs(t, p.Run(ctx), startErr) -} - -func TestPipelineSingleCommandOutput(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add(pipe.Command("echo", "hello world")) - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.EqualValues(t, "hello world\n", out) - } -} - -func TestPipelineSingleCommandWithStdout(t *testing.T) { - t.Parallel() - ctx := context.Background() - - stdout := &bytes.Buffer{} - - p := pipe.New(pipe.WithStdout(stdout)) - p.Add(pipe.Command("echo", "hello world")) - if assert.NoError(t, p.Run(ctx)) { - assert.Equal(t, "hello world\n", stdout.String()) - } -} - -func TestNontrivialPipeline(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add( - pipe.Command("echo", "hello world"), - pipe.Command("sed", "s/hello/goodbye/"), - ) - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.EqualValues(t, "goodbye world\n", out) - } -} - -func TestPipelineReadFromSlowly(t *testing.T) { - t.Parallel() - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - - r, w := io.Pipe() - - var buf []byte - readErr := make(chan error, 1) - - go func() { - time.Sleep(200 * time.Millisecond) - var err error - buf, err = io.ReadAll(r) - readErr <- err - }() - - p := pipe.New(pipe.WithStdout(w)) - p.Add(pipe.Command("echo", "hello world")) - assert.NoError(t, p.Run(ctx)) - - time.Sleep(100 * time.Millisecond) - // It's not super-intuitive, but `w` has to be closed here so that - // the `ioutil.ReadAll()` call above knows that it's done: - _ = w.Close() - - assert.NoError(t, <-readErr) - assert.Equal(t, "hello world\n", string(buf)) -} - -func TestPipelineReadFromSlowly2(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'seq' unavailable") - } - - t.Parallel() - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - - r, w := io.Pipe() - - var buf []byte - readErr := make(chan error, 1) - - go func() { - time.Sleep(100 * time.Millisecond) - for { - var c [1]byte - _, err := r.Read(c[:]) - if err != nil { - if err == io.EOF { - readErr <- nil - return - } - readErr <- err - return - } - buf = append(buf, c[0]) - time.Sleep(1 * time.Millisecond) - } - }() - - p := pipe.New(pipe.WithStdout(w)) - p.Add(pipe.Command("seq", "100")) - assert.NoError(t, p.Run(ctx)) - - time.Sleep(200 * time.Millisecond) - // It's not super-intuitive, but `w` has to be closed here so that - // the `ioutil.ReadAll()` call above knows that it's done: - _ = w.Close() - - assert.NoError(t, <-readErr) - assert.Equal(t, 292, len(buf)) -} - -func TestPipelineTwoCommandsPiping(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add(pipe.Command("echo", "hello world")) - assert.Panics(t, func() { p.Add(pipe.Command("")) }) - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.EqualValues(t, "hello world\n", out) - } -} - -func TestPipelineDir(t *testing.T) { - t.Parallel() - ctx := context.Background() - - wdir, err := os.Getwd() - require.NoError(t, err) - dir, err := ioutil.TempDir(wdir, "pipeline-test-") - require.NoError(t, err) - defer os.RemoveAll(dir) - - p := pipe.New(pipe.WithDir(dir)) - switch runtime.GOOS { - case "windows": - p.Add(pipe.Command("bash", "-c", "pwd -W")) - default: - p.Add(pipe.Command("pwd")) - } - - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.Equal(t, filepath.Clean(dir), filepath.Clean(strings.TrimSuffix(string(out), "\n"))) - } -} - -func TestPipelineExit(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add( - pipe.Command("false"), - pipe.Command("true"), - ) - assert.EqualError(t, p.Run(ctx), "false: exit status 1") -} - -func TestPipelineStderr(t *testing.T) { - t.Parallel() - ctx := context.Background() - - dir, err := ioutil.TempDir("", "pipeline-test-") - require.NoError(t, err) - defer os.RemoveAll(dir) - - p := pipe.New(pipe.WithDir(dir)) - p.Add(pipe.Command("ls", "doesnotexist")) - - _, err = p.Output(ctx) - if assert.Error(t, err) { - assert.Contains(t, err.Error(), "ls: exit status") - } -} - -func TestPipelineInterrupted(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'sleep' unavailable") - } - - t.Parallel() - stdout := &bytes.Buffer{} - - p := pipe.New(pipe.WithStdout(stdout)) - p.Add(pipe.Command("sleep", "10")) - - ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond) - defer cancel() - - err := p.Start(ctx) - require.NoError(t, err) - - err = p.Wait() - assert.ErrorIs(t, err, context.DeadlineExceeded) -} - -func TestPipelineCanceled(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'sleep' unavailable") - } - - t.Parallel() - - stdout := &bytes.Buffer{} - - p := pipe.New(pipe.WithStdout(stdout)) - p.Add(pipe.Command("sleep", "10")) - - ctx, cancel := context.WithCancel(context.Background()) - - err := p.Start(ctx) - require.NoError(t, err) - - cancel() - - err = p.Wait() - assert.ErrorIs(t, err, context.Canceled) -} - -// Verify the correct error if a command in the pipeline exits before -// reading all of its predecessor's output. Note that the amount of -// unread output in this case *does fit* within the OS-level pipe -// buffer. -func TestLittleEPIPE(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'sleep' unavailable") - } - - t.Parallel() - - p := pipe.New() - p.Add( - pipe.Command("sh", "-c", "sleep 1; echo foo"), - pipe.Command("true"), - ) - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - err := p.Run(ctx) - assert.EqualError(t, err, "sh: signal: broken pipe") -} - -// Verify the correct error if one command in the pipeline exits -// before reading all of its predecessor's output. Note that the -// amount of unread output in this case *does not fit* within the -// OS-level pipe buffer. -func TestBigEPIPE(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'seq' unavailable") - } - - t.Parallel() - - p := pipe.New() - p.Add( - pipe.Command("seq", "100000"), - pipe.Command("true"), - ) - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - err := p.Run(ctx) - assert.EqualError(t, err, "seq: signal: broken pipe") -} - -// Verify the correct error if one command in the pipeline exits -// before reading all of its predecessor's output. Note that the -// amount of unread output in this case *does not fit* within the -// OS-level pipe buffer. -func TestIgnoredSIGPIPE(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'seq' unavailable") - } - - t.Parallel() - - p := pipe.New() - p.Add( - pipe.IgnoreError(pipe.Command("seq", "100000"), pipe.IsSIGPIPE), - pipe.Command("echo", "foo"), - ) - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - out, err := p.Output(ctx) - assert.NoError(t, err) - assert.EqualValues(t, "foo\n", out) -} - -func TestFunction(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add( - pipe.Print("hello world"), - pipe.Function( - "farewell", - func(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { - buf, err := io.ReadAll(stdin) - if err != nil { - return err - } - if string(buf) != "hello world" { - return fmt.Errorf("expected \"hello world\"; got %q", string(buf)) - } - _, err = stdout.Write([]byte("goodbye, cruel world")) - return err - }, - ), - ) - - out, err := p.Output(ctx) - assert.NoError(t, err) - assert.EqualValues(t, "goodbye, cruel world", out) -} - -func TestPipelineWithFunction(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add( - pipe.Command("echo", "-n", "hello world"), - pipe.Function( - "farewell", - func(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { - buf, err := io.ReadAll(stdin) - if err != nil { - return err - } - if string(buf) != "hello world" { - return fmt.Errorf("expected \"hello world\"; got %q", string(buf)) - } - _, err = stdout.Write([]byte("goodbye, cruel world")) - return err - }, - ), - pipe.Command("tr", "a-z", "A-Z"), - ) - - out, err := p.Output(ctx) - assert.NoError(t, err) - assert.EqualValues(t, "GOODBYE, CRUEL WORLD", out) -} - -type ErrorStartingStage struct { - err error -} - -func (s ErrorStartingStage) Name() string { - return "errorStartingStage" -} - -func (s ErrorStartingStage) Start( - ctx context.Context, env pipe.Env, stdin io.ReadCloser, -) (io.ReadCloser, error) { - return io.NopCloser(&bytes.Buffer{}), s.err -} - -func (s ErrorStartingStage) Wait() error { - return nil -} - -func seqFunction(n int) pipe.Stage { - return pipe.Function( - "seq", - func(_ context.Context, _ pipe.Env, _ io.Reader, stdout io.Writer) error { - for i := 1; i <= n; i++ { - _, err := fmt.Fprintf(stdout, "%d\n", i) - if err != nil { - return err - } - } - return nil - }, - ) -} - -func TestPipelineWithLinewiseFunction(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - // Print the numbers from 1 to 20 (generated from scratch): - p.Add( - seqFunction(20), - // Discard all but the multiples of 5, and emit the results - // separated by spaces on one line: - pipe.LinewiseFunction( - "multiples-of-5", - func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { - n, err := strconv.Atoi(string(line)) - if err != nil { - return err - } - if n%5 != 0 { - return nil - } - _, err = fmt.Fprintf(w, " %d", n) - return err - }, - ), - // Read the words and square them, emitting the results one per - // line: - pipe.ScannerFunction( - "square-multiples-of-5", - func(r io.Reader) (pipe.Scanner, error) { - scanner := bufio.NewScanner(r) - scanner.Split(bufio.ScanWords) - return scanner, nil - }, - func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { - n, err := strconv.Atoi(string(line)) - if err != nil { - return err - } - _, err = fmt.Fprintf(w, "%d\n", n*n) - return err - }, - ), - ) - - out, err := p.Output(ctx) - assert.NoError(t, err) - assert.EqualValues(t, "25\n100\n225\n400\n", out) -} - -func TestScannerAlwaysFlushes(t *testing.T) { - t.Parallel() - ctx := context.Background() - - var length int64 - - p := pipe.New() - // Print the numbers from 1 to 20 (generated from scratch): - p.Add( - pipe.IgnoreError( - seqFunction(20), - pipe.IsPipeError, - ), - // Pass the numbers through up to 7, then exit with an - // ignored error: - pipe.IgnoreError( - pipe.LinewiseFunction( - "error-after-7", - func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { - fmt.Fprintf(w, "%s\n", line) - if string(line) == "7" { - return errors.New("ignore") - } - return nil - }, - ), - func(err error) bool { - return err.Error() == "ignore" - }, - ), - // Read the numbers and add them into the sum: - pipe.Function( - "compute-length", - func(_ context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { - var err error - length, err = io.Copy(io.Discard, stdin) - return err - }, - ), - ) - - err := p.Run(ctx) - assert.NoError(t, err) - // Make sure that all of the bytes emitted before the second - // stage's error were received by the third stage: - assert.EqualValues(t, 14, length) -} - -func TestScannerFinishEarly(t *testing.T) { - t.Parallel() - ctx := context.Background() - - var length int64 - - p := pipe.New() - p.Add( - // Print the numbers from 1 to 20 (generated from scratch): - seqFunction(20), - - // Pass the numbers through up to 7, then exit with an ignored - // error: - pipe.LinewiseFunction( - "finish-after-7", - func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { - fmt.Fprintf(w, "%s\n", line) - if string(line) == "7" { - return pipe.FinishEarly - } - return nil - }, - ), - - // Read the numbers and add them into the sum: - pipe.Function( - "compute-length", - func(_ context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { - var err error - length, err = io.Copy(io.Discard, stdin) - return err - }, - ), - ) - - err := p.Run(ctx) - assert.NoError(t, err) - // Make sure that all of the bytes emitted before the second - // stage's error were received by the third stage: - assert.EqualValues(t, 14, length) -} - -func TestPrintln(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add(pipe.Println("Look Ma, no hands!")) - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.EqualValues(t, "Look Ma, no hands!\n", out) - } -} - -func TestPrintf(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add(pipe.Printf("Strangely recursive: %T", p)) - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.EqualValues(t, "Strangely recursive: *pipe.Pipeline", out) - } -} - -func TestErrors(t *testing.T) { - t.Parallel() - ctx := context.Background() - - err1 := errors.New("error1") - err2 := errors.New("error2") - - for _, tc := range []struct { - name string - stages []pipe.Stage - expectedErr error - }{ - { - name: "no-error", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("noop2", genErr(nil)), - pipe.Function("noop3", genErr(nil)), - }, - expectedErr: nil, - }, - { - name: "lonely-error", - stages: []pipe.Stage{ - pipe.Function("err1", genErr(err1)), - }, - expectedErr: err1, - }, - { - name: "error", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("err1", genErr(err1)), - pipe.Function("noop2", genErr(nil)), - }, - expectedErr: err1, - }, - { - name: "two-consecutive-errors", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("err1", genErr(err1)), - pipe.Function("err2", genErr(err2)), - pipe.Function("noop2", genErr(nil)), - }, - expectedErr: err1, - }, - { - name: "pipe-then-error", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("err1", genErr(err1)), - pipe.Function("noop2", genErr(nil)), - }, - expectedErr: err1, - }, - { - name: "error-then-pipe", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("err1", genErr(err1)), - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("noop2", genErr(nil)), - }, - expectedErr: err1, - }, - { - name: "two-spaced-errors", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("err1", genErr(err1)), - pipe.Function("noop2", genErr(nil)), - pipe.Function("err2", genErr(err2)), - pipe.Function("noop3", genErr(nil)), - }, - expectedErr: err1, - }, - { - name: "finish-early-ignored", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("finish-early1", genErr(pipe.FinishEarly)), - pipe.Function("noop2", genErr(nil)), - pipe.Function("finish-early2", genErr(pipe.FinishEarly)), - pipe.Function("noop3", genErr(nil)), - }, - expectedErr: nil, - }, - { - name: "error-before-finish-early", - stages: []pipe.Stage{ - pipe.Function("err1", genErr(err1)), - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - }, - expectedErr: err1, - }, - { - name: "error-after-finish-early", - stages: []pipe.Stage{ - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - pipe.Function("err1", genErr(err1)), - }, - expectedErr: err1, - }, - { - name: "pipe-then-finish-early", - stages: []pipe.Stage{ - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - }, - expectedErr: nil, - }, - { - name: "pipe-then-two-finish-early", - stages: []pipe.Stage{ - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("finish-early1", genErr(pipe.FinishEarly)), - pipe.Function("finish-early2", genErr(pipe.FinishEarly)), - }, - expectedErr: nil, - }, - { - name: "two-pipe-then-finish-early", - stages: []pipe.Stage{ - pipe.Function("pipe-error1", genErr(io.ErrClosedPipe)), - pipe.Function("pipe-error2", genErr(io.ErrClosedPipe)), - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - }, - expectedErr: nil, - }, - { - name: "pipe-then-finish-early-with-gap", - stages: []pipe.Stage{ - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("noop", genErr(nil)), - pipe.Function("finish-early1", genErr(pipe.FinishEarly)), - }, - expectedErr: io.ErrClosedPipe, - }, - { - name: "finish-early-then-pipe", - stages: []pipe.Stage{ - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - }, - expectedErr: io.ErrClosedPipe, - }, - { - name: "error-then-pipe-then-finish-early", - stages: []pipe.Stage{ - pipe.Function("err1", genErr(err1)), - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - }, - expectedErr: err1, - }, - { - name: "pipe-then-error-then-finish-early", - stages: []pipe.Stage{ - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("err1", genErr(err1)), - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - }, - expectedErr: err1, - }, - } { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - p := pipe.New() - p.Add(tc.stages...) - err := p.Run(ctx) - if tc.expectedErr == nil { - assert.NoError(t, err) - } else { - assert.ErrorIs(t, err, tc.expectedErr) - } - }) - } -} - -func BenchmarkSingleProgram(b *testing.B) { - ctx := context.Background() - - for i := 0; i < b.N; i++ { - p := pipe.New() - p.Add( - pipe.Command("true"), - ) - assert.NoError(b, p.Run(ctx)) - } -} - -func BenchmarkTenPrograms(b *testing.B) { - ctx := context.Background() - - for i := 0; i < b.N; i++ { - p := pipe.New() - p.Add( - pipe.Command("echo", "hello world"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - ) - out, err := p.Output(ctx) - if assert.NoError(b, err) { - assert.EqualValues(b, "hello world\n", out) - } - } -} - -func BenchmarkTenFunctions(b *testing.B) { - ctx := context.Background() - - for i := 0; i < b.N; i++ { - p := pipe.New() - p.Add( - pipe.Println("hello world"), - pipe.Function("copy1", catFn), - pipe.Function("copy2", catFn), - pipe.Function("copy3", catFn), - pipe.Function("copy4", catFn), - pipe.Function("copy5", catFn), - pipe.Function("copy6", catFn), - pipe.Function("copy7", catFn), - pipe.Function("copy8", catFn), - pipe.Function("copy9", catFn), - ) - out, err := p.Output(ctx) - if assert.NoError(b, err) { - assert.EqualValues(b, "hello world\n", out) - } - } -} - -func BenchmarkTenMixedStages(b *testing.B) { - ctx := context.Background() - - for i := 0; i < b.N; i++ { - p := pipe.New() - p.Add( - pipe.Command("echo", "hello world"), - pipe.Function("copy1", catFn), - pipe.Command("cat"), - pipe.Function("copy2", catFn), - pipe.Command("cat"), - pipe.Function("copy3", catFn), - pipe.Command("cat"), - pipe.Function("copy4", catFn), - pipe.Command("cat"), - pipe.Function("copy5", catFn), - ) - out, err := p.Output(ctx) - if assert.NoError(b, err) { - assert.EqualValues(b, "hello world\n", out) - } - } -} - -func catFn(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { - _, err := io.Copy(stdout, stdin) - return err -} - -func genErr(err error) pipe.StageFunc { - return func(_ context.Context, _ pipe.Env, _ io.Reader, _ io.Writer) error { - return err - } -} diff --git a/internal/pipe/print.go b/internal/pipe/print.go deleted file mode 100644 index 766418d..0000000 --- a/internal/pipe/print.go +++ /dev/null @@ -1,37 +0,0 @@ -package pipe - -import ( - "context" - "fmt" - "io" -) - -func Print(a ...interface{}) Stage { - return Function( - "print", - func(_ context.Context, _ Env, _ io.Reader, stdout io.Writer) error { - _, err := fmt.Fprint(stdout, a...) - return err - }, - ) -} - -func Println(a ...interface{}) Stage { - return Function( - "println", - func(_ context.Context, _ Env, _ io.Reader, stdout io.Writer) error { - _, err := fmt.Fprintln(stdout, a...) - return err - }, - ) -} - -func Printf(format string, a ...interface{}) Stage { - return Function( - "printf", - func(_ context.Context, _ Env, _ io.Reader, stdout io.Writer) error { - _, err := fmt.Fprintf(stdout, format, a...) - return err - }, - ) -} diff --git a/internal/pipe/scanner.go b/internal/pipe/scanner.go deleted file mode 100644 index b56b58c..0000000 --- a/internal/pipe/scanner.go +++ /dev/null @@ -1,67 +0,0 @@ -package pipe - -import ( - "bufio" - "context" - "io" -) - -// Scanner defines the interface (which is implemented by -// `bufio.Scanner`) that is needed by `AddScannerFunction()`. See -// `bufio.Scanner` for how these methods should behave. -type Scanner interface { - Scan() bool - Bytes() []byte - Err() error -} - -// NewScannerFunc is used to create a `Scanner` for scanning input -// that is coming from `r`. -type NewScannerFunc func(r io.Reader) (Scanner, error) - -// ScannerFunction creates a function-based `Stage`. The function will -// be passed input, one line at a time, and may emit output. See the -// definition of `LinewiseStageFunc` for more information. -func ScannerFunction( - name string, newScanner NewScannerFunc, f LinewiseStageFunc, -) Stage { - return Function( - name, - func(ctx context.Context, env Env, stdin io.Reader, stdout io.Writer) (theErr error) { - scanner, err := newScanner(stdin) - if err != nil { - return err - } - - var out *bufio.Writer - if stdout != nil { - out = bufio.NewWriter(stdout) - defer func() { - err := out.Flush() - if err != nil && theErr == nil { - // Note: this sets the named return value, - // thereby causing the whole stage to report - // the error. - theErr = err - } - }() - } - - for scanner.Scan() { - if ctx.Err() != nil { - return ctx.Err() - } - err := f(ctx, env, scanner.Bytes(), out) - if err != nil { - return err - } - } - if err := scanner.Err(); err != nil { - return err - } - - return nil - // `p.AddFunction()` arranges for `stdout` to be closed. - }, - ) -} diff --git a/internal/pipe/stage.go b/internal/pipe/stage.go deleted file mode 100644 index f3d74d9..0000000 --- a/internal/pipe/stage.go +++ /dev/null @@ -1,34 +0,0 @@ -package pipe - -import ( - "context" - "io" -) - -// Stage is an element of a `Pipeline`. -type Stage interface { - // Name returns the name of the stage. - Name() string - - // Start starts the stage in the background, in the environment - // described by `env`, and using `stdin` as input. (`stdin` should - // be set to `nil` if the stage is to receive no input, which - // might be the case for the first stage in a pipeline.) It - // returns an `io.ReadCloser` from which the stage's output can be - // read (or `nil` if it generates no output, which should only be - // the case for the last stage in a pipeline). It is the stages' - // responsibility to close `stdin` (if it is not nil) when it has - // read all of the input that it needs, and to close the write end - // of its output reader when it is done, as that is generally how - // the subsequent stage knows that it has received all of its - // input and can finish its work, too. - // - // If `Start()` returns without an error, `Wait()` must also be - // called, to allow all resources to be freed. - Start(ctx context.Context, env Env, stdin io.ReadCloser) (io.ReadCloser, error) - - // Wait waits for the stage to be done, either because it has - // finished or because it has been killed due to the expiration of - // the context passed to `Start()`. - Wait() error -} From 9849429feceedf21184ce57de4723fdef99b92e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Pastor=20Olivar?= Date: Tue, 29 Nov 2022 09:47:23 +0100 Subject: [PATCH 144/176] Add the go-pipe dependency v1.0.1 --- go.mod | 11 +++++++---- go.sum | 26 +++++++++++++++++--------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/go.mod b/go.mod index 926a1c8..0cba7b5 100644 --- a/go.mod +++ b/go.mod @@ -6,12 +6,15 @@ require ( github.com/cli/safeexec v1.0.0 github.com/davecgh/go-spew v1.1.1 // indirect github.com/spf13/pflag v1.0.5 - github.com/stretchr/testify v1.7.0 - go.uber.org/goleak v1.1.12 - golang.org/x/sync v0.0.0-20210220032951-036812b2e83c + github.com/stretchr/testify v1.8.1 + golang.org/x/sync v0.1.0 // indirect ) +require github.com/github/go-pipe v1.0.1 + require ( + github.com/kr/pretty v0.1.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect + gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index d977b15..f17f036 100644 --- a/go.sum +++ b/go.sum @@ -1,26 +1,33 @@ github.com/cli/safeexec v1.0.0 h1:0VngyaIyqACHdcMNWfo6+KdUYnqEr2Sg+bSP1pdF+dI= github.com/cli/safeexec v1.0.0/go.mod h1:Z/D4tTN8Vs5gXYHDCbaM1S/anmEDnJb1iW0+EJ5zx3Q= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/github/go-pipe v1.0.1 h1:EX3LIhOueA/GptjP8KoIQh/2tvKqKY5nn3QI5adpa+M= +github.com/github/go-pipe v1.0.1/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= -go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= +go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= +go.uber.org/goleak v1.2.0/go.mod h1:XJYK+MuIchqpmGmUSAzotztawfKvYLUIgg7guXrwVUo= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -28,8 +35,9 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -41,7 +49,6 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.1.5 h1:ouewzE6p+/VEB31YYnTbEJdi8pFqKp4P4n85vwo3DHA= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -49,5 +56,6 @@ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8T gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 36e307e503182d987c8487e7f590cede24a94d48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Pastor=20Olivar?= Date: Tue, 29 Nov 2022 09:48:05 +0100 Subject: [PATCH 145/176] Update the source code to use the abstraction provided by the go-pipe dependency --- git/batch_obj_iter.go | 2 +- git/obj_iter.go | 2 +- git/ref_iter.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/git/batch_obj_iter.go b/git/batch_obj_iter.go index ee17337..05c6928 100644 --- a/git/batch_obj_iter.go +++ b/git/batch_obj_iter.go @@ -6,7 +6,7 @@ import ( "fmt" "io" - "github.com/github/git-sizer/internal/pipe" + "github.com/github/go-pipe/pipe" ) type ObjectRecord struct { diff --git a/git/obj_iter.go b/git/obj_iter.go index 268280b..cecdc2a 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -6,7 +6,7 @@ import ( "fmt" "io" - "github.com/github/git-sizer/internal/pipe" + "github.com/github/go-pipe/pipe" ) // ObjectIter iterates over objects in a Git repository. diff --git a/git/ref_iter.go b/git/ref_iter.go index 955499b..74e8415 100644 --- a/git/ref_iter.go +++ b/git/ref_iter.go @@ -6,7 +6,7 @@ import ( "fmt" "io" - "github.com/github/git-sizer/internal/pipe" + "github.com/github/go-pipe/pipe" ) // ReferenceIter is an iterator that interates over references. From 35f14cc271d73f65f049fb2fae64d6d736b27592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Pastor=20Olivar?= Date: Tue, 29 Nov 2022 13:40:06 +0100 Subject: [PATCH 146/176] Use the main branch in go-pipe If everything works as expected we will tag 1.0.2 in go-pipe --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 0cba7b5..ddb40e0 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( golang.org/x/sync v0.1.0 // indirect ) -require github.com/github/go-pipe v1.0.1 +require github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b require ( github.com/kr/pretty v0.1.0 // indirect diff --git a/go.sum b/go.sum index f17f036..349c471 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/github/go-pipe v1.0.1 h1:EX3LIhOueA/GptjP8KoIQh/2tvKqKY5nn3QI5adpa+M= -github.com/github/go-pipe v1.0.1/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= +github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b h1:aPBpibgZUa5ITdtlMWWgkv9VrYAk5Kpd2Djo0E2Bo2w= +github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= From 842cb88a3eac240a8b1c45f2eaaa2c60bf419534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Pastor=20Olivar?= Date: Tue, 29 Nov 2022 13:45:29 +0100 Subject: [PATCH 147/176] Update go-pipe to v1.0.2. This new versions fixes the compilation problems found in Windows --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index ddb40e0..9db294d 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( golang.org/x/sync v0.1.0 // indirect ) -require github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b +require github.com/github/go-pipe v1.0.2 require ( github.com/kr/pretty v0.1.0 // indirect diff --git a/go.sum b/go.sum index 349c471..5c5d0a9 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b h1:aPBpibgZUa5ITdtlMWWgkv9VrYAk5Kpd2Djo0E2Bo2w= -github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= +github.com/github/go-pipe v1.0.2 h1:befTXflsc6ir/h9f6Q7QCDmfojoBswD1MfQrPhmmSoA= +github.com/github/go-pipe v1.0.2/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= From 6238db186d89690d02f4a1ebfd9c499eff0a9b9e Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 3 Dec 2022 15:20:38 +0100 Subject: [PATCH 148/176] git.Repository: invoke `git config` more backwards-compatibly Some `git-config` options that we were using (`--default`, `--type=bool`, and `--type=int`) were only added in git 2.18, released 2018-06-21. This means that some fairly recent platforms, like Ubuntu 18.04 "bionic", don't have those features in their default `git`. Change `git.Repository` to invoke `git config` without using those newer options. --- git/gitconfig.go | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/git/gitconfig.go b/git/gitconfig.go index d3378ae..76b8422 100644 --- a/git/gitconfig.go +++ b/git/gitconfig.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "os/exec" "strconv" "strings" ) @@ -114,14 +115,18 @@ func configKeyMatchesPrefix(key, prefix string) (bool, string) { } func (repo *Repository) ConfigStringDefault(key string, defaultValue string) (string, error) { + // Note that `git config --get` didn't get `--default` until Git + // 2.18 (released 2018-06-21). cmd := repo.GitCommand( - "config", - "--default", defaultValue, - key, + "config", "--get", key, ) out, err := cmd.Output() if err != nil { + if err, ok := err.(*exec.ExitError); ok && err.ExitCode() == 1 { + // This indicates that the value was not found. + return defaultValue, nil + } return defaultValue, fmt.Errorf("running 'git config': %w", err) } @@ -133,15 +138,18 @@ func (repo *Repository) ConfigStringDefault(key string, defaultValue string) (st } func (repo *Repository) ConfigBoolDefault(key string, defaultValue bool) (bool, error) { + // Note that `git config --get` didn't get `--type=bool` or + // `--default` until Git 2.18 (released 2018-06-21). cmd := repo.GitCommand( - "config", - "--type", "bool", - "--default", strconv.FormatBool(defaultValue), - key, + "config", "--get", "--bool", key, ) out, err := cmd.Output() if err != nil { + if err, ok := err.(*exec.ExitError); ok && err.ExitCode() == 1 { + // This indicates that the value was not found. + return defaultValue, nil + } return defaultValue, fmt.Errorf("running 'git config': %w", err) } @@ -155,15 +163,18 @@ func (repo *Repository) ConfigBoolDefault(key string, defaultValue bool) (bool, } func (repo *Repository) ConfigIntDefault(key string, defaultValue int) (int, error) { + // Note that `git config --get` didn't get `--type=int` or + // `--default` until Git 2.18 (released 2018-06-21). cmd := repo.GitCommand( - "config", - "--type", "int", - "--default", strconv.Itoa(defaultValue), - key, + "config", "--get", "--int", key, ) out, err := cmd.Output() if err != nil { + if err, ok := err.(*exec.ExitError); ok && err.ExitCode() == 1 { + // This indicates that the value was not found. + return defaultValue, nil + } return defaultValue, fmt.Errorf("running 'git config': %w", err) } From 9ed78b17d5dd41a9525bd6a57b2dc321b806f265 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 14:03:17 +0200 Subject: [PATCH 149/176] Graph.rg: remove member It wasn't used. --- sizes/graph.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index 7e923f6..9187907 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -69,7 +69,7 @@ func ScanRepositoryUsingGraph( ctx, cancel := context.WithCancel(context.TODO()) defer cancel() - graph := NewGraph(rg, nameStyle) + graph := NewGraph(nameStyle) refIter, err := repo.NewReferenceIter(ctx) if err != nil { @@ -337,8 +337,6 @@ func ScanRepositoryUsingGraph( // Graph is an object graph that is being built up. type Graph struct { - rg RefGrouper - blobLock sync.Mutex blobSizes map[git.OID]BlobSize @@ -361,10 +359,8 @@ type Graph struct { } // NewGraph creates and returns a new `*Graph` instance. -func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { +func NewGraph(nameStyle NameStyle) *Graph { return &Graph{ - rg: rg, - blobSizes: make(map[git.OID]BlobSize), treeRecords: make(map[git.OID]*treeRecord), From 559b030c9aa7b8fbc8803863e20aae4a720cbb18 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 16:57:25 +0200 Subject: [PATCH 150/176] Collect references before starting the object traversal This provides a better separation of concerns, which will be taken advantage of shortly. --- sizes/graph.go | 79 +++++----------------------------------------- sizes/grouper.go | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 71 deletions(-) create mode 100644 sizes/grouper.go diff --git a/sizes/graph.go b/sizes/graph.go index 9187907..a56cbc2 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -11,50 +11,6 @@ import ( "github.com/github/git-sizer/meter" ) -// RefGroupSymbol is the string "identifier" that is used to refer to -// a refgroup, for example in the gitconfig. Nesting of refgroups is -// inferred from their names, using "." as separator between -// components. For example, if there are three refgroups with symbols -// "tags", "tags.releases", and "foo.bar", then "tags.releases" is -// considered to be nested within "tags", and "foo.bar" is considered -// to be nested within "foo", the latter being created automatically -// if it was not configured explicitly. -type RefGroupSymbol string - -// RefGroup is a group of references, for example "branches" or -// "tags". Reference groups might overlap. -type RefGroup struct { - // Symbol is the unique string by which this `RefGroup` is - // identified and configured. It consists of dot-separated - // components, which implicitly makes a nested tree-like - // structure. - Symbol RefGroupSymbol - - // Name is the name for this `ReferenceGroup` to be presented - // in user-readable output. - Name string -} - -// RefGrouper describes a type that can collate reference names into -// groups and decide which ones to walk. -type RefGrouper interface { - // Categorize tells whether `refname` should be walked at all, - // and if so, the symbols of the reference groups to which it - // belongs. - Categorize(refname string) (bool, []RefGroupSymbol) - - // Groups returns the list of `ReferenceGroup`s, in the order - // that they should be presented. The return value might - // depend on which references have been seen so far. - Groups() []RefGroup -} - -type refSeen struct { - git.Reference - walked bool - groups []RefGroupSymbol -} - // ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which // references to scan and how to group them. `nameStyle` specifies // whether the output should include full names, hashes only, or @@ -71,9 +27,9 @@ func ScanRepositoryUsingGraph( graph := NewGraph(nameStyle) - refIter, err := repo.NewReferenceIter(ctx) + refsSeen, err := CollectReferences(ctx, repo, rg) if err != nil { - return HistorySize{}, err + return HistorySize{}, fmt.Errorf("reading references: %w", err) } objIter, err := repo.NewObjectIter(context.TODO()) @@ -82,41 +38,22 @@ func ScanRepositoryUsingGraph( } errChan := make(chan error, 1) - var refsSeen []refSeen - // Feed the references that we want into the stdin of the object - // iterator: + // Feed the references that we want to walk into the stdin of the + // object iterator: go func() { defer objIter.Close() errChan <- func() error { - for { - ref, ok, err := refIter.Next() - if err != nil { - return err - } - if !ok { - return nil - } - - walk, groups := rg.Categorize(ref.Refname) - - refsSeen = append( - refsSeen, - refSeen{ - Reference: ref, - walked: walk, - groups: groups, - }, - ) - - if !walk { + for _, refSeen := range refsSeen { + if !refSeen.walked { continue } - if err := objIter.AddRoot(ref.OID); err != nil { + if err := objIter.AddRoot(refSeen.OID); err != nil { return err } } + return nil }() }() diff --git a/sizes/grouper.go b/sizes/grouper.go new file mode 100644 index 0000000..a5b8a26 --- /dev/null +++ b/sizes/grouper.go @@ -0,0 +1,82 @@ +package sizes + +import ( + "context" + + "github.com/github/git-sizer/git" +) + +// RefGroupSymbol is the string "identifier" that is used to refer to +// a refgroup, for example in the gitconfig. Nesting of refgroups is +// inferred from their names, using "." as separator between +// components. For example, if there are three refgroups with symbols +// "tags", "tags.releases", and "foo.bar", then "tags.releases" is +// considered to be nested within "tags", and "foo.bar" is considered +// to be nested within "foo", the latter being created automatically +// if it was not configured explicitly. +type RefGroupSymbol string + +// RefGroup is a group of references, for example "branches" or +// "tags". Reference groups might overlap. +type RefGroup struct { + // Symbol is the unique string by which this `RefGroup` is + // identified and configured. It consists of dot-separated + // components, which implicitly makes a nested tree-like + // structure. + Symbol RefGroupSymbol + + // Name is the name for this `ReferenceGroup` to be presented + // in user-readable output. + Name string +} + +// RefGrouper describes a type that can collate reference names into +// groups and decide which ones to walk. +type RefGrouper interface { + // Categorize tells whether `refname` should be walked at all, + // and if so, the symbols of the reference groups to which it + // belongs. + Categorize(refname string) (bool, []RefGroupSymbol) + + // Groups returns the list of `ReferenceGroup`s, in the order + // that they should be presented. The return value might + // depend on which references have been seen so far. + Groups() []RefGroup +} + +type refSeen struct { + git.Reference + walked bool + groups []RefGroupSymbol +} + +func CollectReferences( + ctx context.Context, repo *git.Repository, rg RefGrouper, +) ([]refSeen, error) { + refIter, err := repo.NewReferenceIter(ctx) + if err != nil { + return nil, err + } + + var refsSeen []refSeen + for { + ref, ok, err := refIter.Next() + if err != nil { + return nil, err + } + if !ok { + return refsSeen, nil + } + + walk, groups := rg.Categorize(ref.Refname) + + refsSeen = append( + refsSeen, + refSeen{ + Reference: ref, + walked: walk, + groups: groups, + }, + ) + } +} From fdfa791791c392324ec0cde0e42d070f6c9b96c3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 17:57:31 +0200 Subject: [PATCH 151/176] ScanRepositoryUsingGraph(): take a context argument --- git-sizer.go | 10 +++++++--- git_sizer_test.go | 11 ++++++----- sizes/graph.go | 6 ++---- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index d1e075c..6c9e7a3 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -1,6 +1,7 @@ package main import ( + "context" "encoding/json" "errors" "fmt" @@ -93,14 +94,17 @@ var ReleaseVersion string var BuildVersion string func main() { - err := mainImplementation(os.Stdout, os.Stderr, os.Args[1:]) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } } -func mainImplementation(stdout, stderr io.Writer, args []string) error { +func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []string) error { var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool @@ -288,7 +292,7 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progressMeter) + historySize, err := sizes.ScanRepositoryUsingGraph(ctx, repo, rg, nameStyle, progressMeter) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git_sizer_test.go b/git_sizer_test.go index 6ab132f..b08985b 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -2,6 +2,7 @@ package main_test import ( "bytes" + "context" "encoding/json" "fmt" "io" @@ -563,7 +564,7 @@ func TestBomb(t *testing.T) { newGitBomb(t, repo, 10, 10, "boom!\n") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, ) require.NoError(t, err) @@ -636,7 +637,7 @@ func TestTaggedTags(t *testing.T) { require.NoError(t, cmd.Run(), "creating tag 3") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -658,7 +659,7 @@ func TestFromSubdir(t *testing.T) { require.NoError(t, cmd.Run(), "creating commit") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -711,7 +712,7 @@ func TestSubmodule(t *testing.T) { // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - mainRepo.Repository(t), + context.Background(), mainRepo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -724,7 +725,7 @@ func TestSubmodule(t *testing.T) { Path: filepath.Join(mainRepo.Path, "sub"), } h, err = sizes.ScanRepositoryUsingGraph( - submRepo2.Repository(t), + context.Background(), submRepo2.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") diff --git a/sizes/graph.go b/sizes/graph.go index a56cbc2..1b908cc 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -19,12 +19,10 @@ import ( // // It returns the size data for the repository. func ScanRepositoryUsingGraph( + ctx context.Context, repo *git.Repository, rg RefGrouper, nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() - graph := NewGraph(nameStyle) refsSeen, err := CollectReferences(ctx, repo, rg) @@ -32,7 +30,7 @@ func ScanRepositoryUsingGraph( return HistorySize{}, fmt.Errorf("reading references: %w", err) } - objIter, err := repo.NewObjectIter(context.TODO()) + objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err } From 1a2c0b51069b8eedecac2fccf532b7e6da11a1d3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 18:00:39 +0200 Subject: [PATCH 152/176] refSeen: make type and its members public and rename it to `RefRoot` --- sizes/graph.go | 12 ++++++------ sizes/grouper.go | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index 1b908cc..59a6365 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -25,7 +25,7 @@ func ScanRepositoryUsingGraph( ) (HistorySize, error) { graph := NewGraph(nameStyle) - refsSeen, err := CollectReferences(ctx, repo, rg) + refRoots, err := CollectReferences(ctx, repo, rg) if err != nil { return HistorySize{}, fmt.Errorf("reading references: %w", err) } @@ -42,12 +42,12 @@ func ScanRepositoryUsingGraph( defer objIter.Close() errChan <- func() error { - for _, refSeen := range refsSeen { - if !refSeen.walked { + for _, refRoot := range refRoots { + if !refRoot.Walk { continue } - if err := objIter.AddRoot(refSeen.OID); err != nil { + if err := objIter.AddRoot(refRoot.OID); err != nil { return err } } @@ -261,9 +261,9 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, refSeen := range refsSeen { + for _, refRoot := range refRoots { progressMeter.Inc() - graph.RegisterReference(refSeen.Reference, refSeen.walked, refSeen.groups) + graph.RegisterReference(refRoot.Reference, refRoot.Walk, refRoot.Groups) } progressMeter.Done() diff --git a/sizes/grouper.go b/sizes/grouper.go index a5b8a26..3807b0e 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -44,21 +44,21 @@ type RefGrouper interface { Groups() []RefGroup } -type refSeen struct { +type RefRoot struct { git.Reference - walked bool - groups []RefGroupSymbol + Walk bool + Groups []RefGroupSymbol } func CollectReferences( ctx context.Context, repo *git.Repository, rg RefGrouper, -) ([]refSeen, error) { +) ([]RefRoot, error) { refIter, err := repo.NewReferenceIter(ctx) if err != nil { return nil, err } - var refsSeen []refSeen + var refsSeen []RefRoot for { ref, ok, err := refIter.Next() if err != nil { @@ -72,10 +72,10 @@ func CollectReferences( refsSeen = append( refsSeen, - refSeen{ + RefRoot{ Reference: ref, - walked: walk, - groups: groups, + Walk: walk, + Groups: groups, }, ) } From 757866b5adda4d0cff52d917d48eab0dc92275ae Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 18:13:34 +0200 Subject: [PATCH 153/176] ScanRepositoryUsingGraph(): take a list of `RefRoot`s as argument --- git-sizer.go | 9 +++- git_sizer_test.go | 110 ++++++++++++++++++++++++++++++---------------- sizes/graph.go | 7 +-- 3 files changed, 81 insertions(+), 45 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 6c9e7a3..0336d13 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -292,7 +292,14 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - historySize, err := sizes.ScanRepositoryUsingGraph(ctx, repo, rg, nameStyle, progressMeter) + refRoots, err := sizes.CollectReferences(ctx, repo, rg) + if err != nil { + return fmt.Errorf("determining which reference to scan: %w", err) + } + + historySize, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, refRoots, nameStyle, progressMeter, + ) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git_sizer_test.go b/git_sizer_test.go index b08985b..54d90d5 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -558,14 +558,21 @@ func (rg refGrouper) Groups() []sizes.RefGroup { func TestBomb(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, true, "bomb") - t.Cleanup(func() { repo.Remove(t) }) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, true, "bomb") + t.Cleanup(func() { testRepo.Remove(t) }) + + newGitBomb(t, testRepo, 10, 10, "boom!\n") - newGitBomb(t, repo, 10, 10, "boom!\n") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, + ctx, repo, + refRoots, sizes.NameStyleFull, meter.NoProgressMeter, ) require.NoError(t, err) @@ -613,32 +620,39 @@ func TestBomb(t *testing.T) { func TestTaggedTags(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "tagged-tags") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "tagged-tags") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - cmd := repo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = repo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = repo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = repo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), repo, + refRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -647,20 +661,27 @@ func TestTaggedTags(t *testing.T) { func TestFromSubdir(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "subdir") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "subdir") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - repo.AddFile(t, "subdir/file.txt", "Hello, world!\n") + testRepo.AddFile(t, "subdir/file.txt", "Hello, world!\n") - cmd := repo.GitCommand(t, "commit", "-m", "initial") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), testRepo.Repository(t), + refRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -669,6 +690,8 @@ func TestFromSubdir(t *testing.T) { func TestSubmodule(t *testing.T) { t.Parallel() + ctx := context.Background() + tmp, err := ioutil.TempDir("", "submodule") require.NoError(t, err, "creating temporary directory") @@ -678,42 +701,47 @@ func TestSubmodule(t *testing.T) { timestamp := time.Unix(1112911993, 0) - submRepo := testutils.TestRepo{ + submTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "subm"), } - submRepo.Init(t, false) - submRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") - submRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") - submRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") + submTestRepo.Init(t, false) + submTestRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") + submTestRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") + submTestRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") - cmd := submRepo.GitCommand(t, "commit", "-m", "subm initial") + cmd := submTestRepo.GitCommand(t, "commit", "-m", "subm initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") - mainRepo := testutils.TestRepo{ + mainTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "main"), } - mainRepo.Init(t, false) + mainTestRepo.Init(t, false) - mainRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") + mainTestRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") - cmd = mainRepo.GitCommand(t, "commit", "-m", "main initial") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "main initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = mainRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submRepo.Path, "sub") - cmd.Dir = mainRepo.Path + cmd = mainTestRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submTestRepo.Path, "sub") + cmd.Dir = mainTestRepo.Path require.NoError(t, cmd.Run(), "adding submodule") - cmd = mainRepo.GitCommand(t, "commit", "-m", "add submodule") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "add submodule") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") + mainRepo := mainTestRepo.Repository(t) + + mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) + require.NoError(t, err) + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), mainRepo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), mainTestRepo.Repository(t), + mainRefRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -721,12 +749,18 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(1), h.MaxExpandedSubmoduleCount, "max expanded submodule count") // Analyze the submodule: - submRepo2 := testutils.TestRepo{ - Path: filepath.Join(mainRepo.Path, "sub"), + submTestRepo2 := testutils.TestRepo{ + Path: filepath.Join(mainTestRepo.Path, "sub"), } + + submRepo2 := submTestRepo2.Repository(t) + + submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) + require.NoError(t, err) + h, err = sizes.ScanRepositoryUsingGraph( - context.Background(), submRepo2.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), submRepo2, + submRefRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/sizes/graph.go b/sizes/graph.go index 59a6365..e9033ef 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -20,16 +20,11 @@ import ( // It returns the size data for the repository. func ScanRepositoryUsingGraph( ctx context.Context, - repo *git.Repository, rg RefGrouper, nameStyle NameStyle, + repo *git.Repository, refRoots []RefRoot, nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { graph := NewGraph(nameStyle) - refRoots, err := CollectReferences(ctx, repo, rg) - if err != nil { - return HistorySize{}, fmt.Errorf("reading references: %w", err) - } - objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err From 897baa1a96585fbc44238d0a536c92bf8a11f3ec Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 18:28:54 +0200 Subject: [PATCH 154/176] RefRoot: add some methods We want to add another type of root, so start the virtualization process. --- sizes/graph.go | 6 +++--- sizes/grouper.go | 17 +++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index e9033ef..660f682 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -38,11 +38,11 @@ func ScanRepositoryUsingGraph( errChan <- func() error { for _, refRoot := range refRoots { - if !refRoot.Walk { + if !refRoot.Walk() { continue } - if err := objIter.AddRoot(refRoot.OID); err != nil { + if err := objIter.AddRoot(refRoot.OID()); err != nil { return err } } @@ -258,7 +258,7 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing references: %d") for _, refRoot := range refRoots { progressMeter.Inc() - graph.RegisterReference(refRoot.Reference, refRoot.Walk, refRoot.Groups) + graph.RegisterReference(refRoot.Reference(), refRoot.Walk(), refRoot.Groups()) } progressMeter.Done() diff --git a/sizes/grouper.go b/sizes/grouper.go index 3807b0e..32d63ca 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -45,11 +45,16 @@ type RefGrouper interface { } type RefRoot struct { - git.Reference - Walk bool - Groups []RefGroupSymbol + ref git.Reference + walk bool + groups []RefGroupSymbol } +func (rr RefRoot) OID() git.OID { return rr.ref.OID } +func (rr RefRoot) Reference() git.Reference { return rr.ref } +func (rr RefRoot) Walk() bool { return rr.walk } +func (rr RefRoot) Groups() []RefGroupSymbol { return rr.groups } + func CollectReferences( ctx context.Context, repo *git.Repository, rg RefGrouper, ) ([]RefRoot, error) { @@ -73,9 +78,9 @@ func CollectReferences( refsSeen = append( refsSeen, RefRoot{ - Reference: ref, - Walk: walk, - Groups: groups, + ref: ref, + walk: walk, + groups: groups, }, ) } From 9e8b14fe3012f05c163ffdf79a32bcb2b48ea422 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Mon, 14 Aug 2023 20:14:59 +0200 Subject: [PATCH 155/176] Allow arbitrary reachability roots to be fed in Instead of only traversing objects starting at references, allow the user to specify explicit Git objects via the command line. In that case, the traversal includes objects reachable from those objects. --- git-sizer.go | 50 ++++++-- git/obj_resolver.go | 20 +++ git/ref_filter.go | 16 ++- git_sizer_test.go | 178 ++++++++++++++++++-------- internal/refopts/ref_group_builder.go | 9 +- sizes/explicit_root.go | 19 +++ sizes/graph.go | 41 ++++-- sizes/grouper.go | 1 + sizes/path_resolver.go | 60 +++++---- 9 files changed, 290 insertions(+), 104 deletions(-) create mode 100644 git/obj_resolver.go create mode 100644 sizes/explicit_root.go diff --git a/git-sizer.go b/git-sizer.go index 0336d13..7cfd6ff 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -20,7 +20,9 @@ import ( "github.com/github/git-sizer/sizes" ) -const usage = `usage: git-sizer [OPTS] +const usage = `usage: git-sizer [OPTS] [ROOT...] + + Scan objects in your Git repository and emit statistics about them. --threshold THRESHOLD minimum level of concern (i.e., number of stars) that should be reported. Default: @@ -46,12 +48,29 @@ const usage = `usage: git-sizer [OPTS] be set via gitconfig: 'sizer.progress'. --version only report the git-sizer version number + Object selection: + + git-sizer traverses through your Git history to find objects to + process. By default, it processes all objects that are reachable from + any reference. You can tell it to process only some of your + references; see "Reference selection" below. + + If explicit ROOTs are specified on the command line, each one should + be a string that 'git rev-parse' can convert into a single Git object + ID, like 'main', 'main~:src', or an abbreviated SHA-1. See + git-rev-parse(1) for details. In that case, git-sizer also treats + those objects as starting points for its traversal, and also includes + the Git objects that are reachable from those roots in the analysis. + + As a special case, if one or more ROOTs are specified on the command + line but _no_ reference selection options, then _only_ the specified + ROOTs are traversed, and no references. + Reference selection: - By default, git-sizer processes all Git objects that are reachable - from any reference. The following options can be used to limit which - references to process. The last rule matching a reference determines - whether that reference is processed. + The following options can be used to limit which references to + process. The last rule matching a reference determines whether that + reference is processed. --[no-]branches process [don't process] branches --[no-]tags process [don't process] tags @@ -220,10 +239,6 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return nil } - if len(flags.Args()) != 0 { - return errors.New("excess arguments") - } - if repoErr != nil { return fmt.Errorf("couldn't open Git repository: %w", repoErr) } @@ -277,7 +292,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st progress = v } - rg, err := rgb.Finish() + rg, err := rgb.Finish(len(flags.Args()) == 0) if err != nil { return err } @@ -297,8 +312,21 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return fmt.Errorf("determining which reference to scan: %w", err) } + roots := make([]sizes.Root, 0, len(refRoots)+len(flags.Args())) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + for _, arg := range flags.Args() { + oid, err := repo.ResolveObject(arg) + if err != nil { + return fmt.Errorf("resolving command-line argument %q: %w", arg, err) + } + roots = append(roots, sizes.NewExplicitRoot(arg, oid)) + } + historySize, err := sizes.ScanRepositoryUsingGraph( - ctx, repo, refRoots, nameStyle, progressMeter, + ctx, repo, roots, nameStyle, progressMeter, ) if err != nil { return fmt.Errorf("error scanning repository: %w", err) diff --git a/git/obj_resolver.go b/git/obj_resolver.go new file mode 100644 index 0000000..418e293 --- /dev/null +++ b/git/obj_resolver.go @@ -0,0 +1,20 @@ +package git + +import ( + "bytes" + "fmt" +) + +func (repo *Repository) ResolveObject(name string) (OID, error) { + cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) + output, err := cmd.Output() + if err != nil { + return NullOID, fmt.Errorf("resolving object %q: %w", name, err) + } + oidString := string(bytes.TrimSpace(output)) + oid, err := NewOID(oidString) + if err != nil { + return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + } + return oid, nil +} diff --git a/git/ref_filter.go b/git/ref_filter.go index 8eb8a9b..46aff66 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -83,15 +83,23 @@ func (_ allReferencesFilter) Filter(_ string) bool { var AllReferencesFilter allReferencesFilter +type noReferencesFilter struct{} + +func (_ noReferencesFilter) Filter(_ string) bool { + return false +} + +var NoReferencesFilter noReferencesFilter + // PrefixFilter returns a `ReferenceFilter` that matches references // whose names start with the specified `prefix`, which must match at // a component boundary. For example, // -// * Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not -// "refs/foobar". +// - Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not +// "refs/foobar". // -// * Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or -// "refs/foobar". +// - Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or +// "refs/foobar". func PrefixFilter(prefix string) ReferenceFilter { if prefix == "" { return AllReferencesFilter diff --git a/git_sizer_test.go b/git_sizer_test.go index 54d90d5..16d58c9 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -567,54 +567,112 @@ func TestBomb(t *testing.T) { repo := testRepo.Repository(t) - refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) - require.NoError(t, err) + t.Run("full", func(t *testing.T) { + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) - h, err := sizes.ScanRepositoryUsingGraph( - ctx, repo, - refRoots, sizes.NameStyleFull, meter.NoProgressMeter, - ) - require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") + assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.BestPath(), "max commit size commit") + assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.BestPath(), "max parent count commit") + + assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) + + t.Run("partial", func(t *testing.T) { + name := "master:d0/d0" + oid, err := repo.ResolveObject(name) + require.NoError(t, err) + roots := []sizes.Root{sizes.NewExplicitRoot(name, oid)} - assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") - assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") - assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") - assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.Path(), "max commit size commit") - assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") - assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") - assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.Path(), "max parent count commit") - - assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") - assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") - assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") - assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.Path(), "max tree entries tree") - - assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") - assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") - assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.Path(), "max blob size blob") - - assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") - assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") - - assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") - - assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.Path(), "max path depth tree") - assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.Path(), "max path length tree") - - assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.Path(), "max expanded tree count tree") - assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.Path(), "max expanded blob count tree") - assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.Path(), "max expanded blob size tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") - assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") - assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(0), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(0), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(0), h.MaxCommitSize, "max commit size") + assert.Nil(t, h.MaxCommitSizeCommit) + assert.Equal(t, counts.Count32(0), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Nil(t, h.MaxParentCountCommit, "max parent count commit") + + assert.Equal(t, counts.Count32(8), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2330), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(80), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(0), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(8), h.MaxPathDepth, "max path depth") + assert.Equal(t, "master:d0/d0", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(23), h.MaxPathLength, "max path length") + assert.Equal(t, "master:d0/d0", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 8)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(pow(10, 8)), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 8)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) } func TestTaggedTags(t *testing.T) { @@ -650,9 +708,14 @@ func TestTaggedTags(t *testing.T) { refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( context.Background(), repo, - refRoots, sizes.NameStyleNone, meter.NoProgressMeter, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -679,9 +742,14 @@ func TestFromSubdir(t *testing.T) { refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( context.Background(), testRepo.Repository(t), - refRoots, sizes.NameStyleNone, meter.NoProgressMeter, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -738,10 +806,15 @@ func TestSubmodule(t *testing.T) { mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) require.NoError(t, err) + mainRoots := make([]sizes.Root, 0, len(mainRefRoots)) + for _, refRoot := range mainRefRoots { + mainRoots = append(mainRoots, refRoot) + } + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( context.Background(), mainTestRepo.Repository(t), - mainRefRoots, sizes.NameStyleNone, meter.NoProgressMeter, + mainRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -758,9 +831,14 @@ func TestSubmodule(t *testing.T) { submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) require.NoError(t, err) + submRoots2 := make([]sizes.Root, 0, len(submRefRoots2)) + for _, refRoot := range submRefRoots2 { + submRoots2 = append(submRoots2, refRoot) + } + h, err = sizes.ScanRepositoryUsingGraph( context.Background(), submRepo2, - submRefRoots2, sizes.NameStyleNone, meter.NoProgressMeter, + submRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 3c3179e..48f1190 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -254,9 +254,14 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { // Finish collects the information gained from processing the options // and returns a `sizes.RefGrouper`. -func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { +func (rgb *RefGroupBuilder) Finish(defaultAll bool) (sizes.RefGrouper, error) { if rgb.topLevelGroup.filter == nil { - rgb.topLevelGroup.filter = git.AllReferencesFilter + // User didn't specify any reference options. + if defaultAll { + rgb.topLevelGroup.filter = git.AllReferencesFilter + } else { + rgb.topLevelGroup.filter = git.NoReferencesFilter + } } refGrouper := refGrouper{ diff --git a/sizes/explicit_root.go b/sizes/explicit_root.go new file mode 100644 index 0000000..09348db --- /dev/null +++ b/sizes/explicit_root.go @@ -0,0 +1,19 @@ +package sizes + +import "github.com/github/git-sizer/git" + +type ExplicitRoot struct { + name string + oid git.OID +} + +func NewExplicitRoot(name string, oid git.OID) ExplicitRoot { + return ExplicitRoot{ + name: name, + oid: oid, + } +} + +func (er ExplicitRoot) Name() string { return er.name } +func (er ExplicitRoot) OID() git.OID { return er.oid } +func (er ExplicitRoot) Walk() bool { return true } diff --git a/sizes/graph.go b/sizes/graph.go index 660f682..0fb1c8a 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -11,6 +11,18 @@ import ( "github.com/github/git-sizer/meter" ) +type Root interface { + Name() string + OID() git.OID + Walk() bool +} + +type ReferenceRoot interface { + Root + Reference() git.Reference + Groups() []RefGroupSymbol +} + // ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which // references to scan and how to group them. `nameStyle` specifies // whether the output should include full names, hashes only, or @@ -20,7 +32,9 @@ import ( // It returns the size data for the repository. func ScanRepositoryUsingGraph( ctx context.Context, - repo *git.Repository, refRoots []RefRoot, nameStyle NameStyle, + repo *git.Repository, + roots []Root, + nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { graph := NewGraph(nameStyle) @@ -37,12 +51,12 @@ func ScanRepositoryUsingGraph( defer objIter.Close() errChan <- func() error { - for _, refRoot := range refRoots { - if !refRoot.Walk() { + for _, root := range roots { + if !root.Walk() { continue } - if err := objIter.AddRoot(refRoot.OID()); err != nil { + if err := objIter.AddRoot(root.OID()); err != nil { return err } } @@ -256,9 +270,15 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, refRoot := range refRoots { + for _, root := range roots { progressMeter.Inc() - graph.RegisterReference(refRoot.Reference(), refRoot.Walk(), refRoot.Groups()) + if refRoot, ok := root.(ReferenceRoot); ok { + graph.RegisterReference(refRoot.Reference(), refRoot.Groups()) + } + + if root.Walk() { + graph.pathResolver.RecordName(root.Name(), root.OID()) + } } progressMeter.Done() @@ -310,17 +330,18 @@ func NewGraph(nameStyle NameStyle) *Graph { } // RegisterReference records the specified reference in `g`. -func (g *Graph) RegisterReference(ref git.Reference, walked bool, groups []RefGroupSymbol) { +func (g *Graph) RegisterReference(ref git.Reference, groups []RefGroupSymbol) { g.historyLock.Lock() g.historySize.recordReference(g, ref) for _, group := range groups { g.historySize.recordReferenceGroup(g, group) } g.historyLock.Unlock() +} - if walked { - g.pathResolver.RecordReference(ref) - } +// Register a name that can be used for the specified OID. +func (g *Graph) RegisterName(name string, oid git.OID) { + g.pathResolver.RecordName(name, oid) } // HistorySize returns the size data that have been collected. diff --git a/sizes/grouper.go b/sizes/grouper.go index 32d63ca..fdaa927 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -50,6 +50,7 @@ type RefRoot struct { groups []RefGroupSymbol } +func (rr RefRoot) Name() string { return rr.ref.Refname } func (rr RefRoot) OID() git.OID { return rr.ref.OID } func (rr RefRoot) Reference() git.Reference { return rr.ref } func (rr RefRoot) Walk() bool { return rr.walk } diff --git a/sizes/path_resolver.go b/sizes/path_resolver.go index 2a3bb1c..275d19a 100644 --- a/sizes/path_resolver.go +++ b/sizes/path_resolver.go @@ -12,15 +12,15 @@ import ( // `rev-parse` input, including commit and/or file path) by which // specified objects are reachable. It is used as follows: // -// * Request an object's path using `RequestPath()`. The returned -// `Path` object is a placeholder for the object's path. +// - Request an object's path using `RequestPath()`. The returned +// `Path` object is a placeholder for the object's path. // -// * Tell the `PathResolver` about objects that might be along the -// object's reachability path, *in depth-first* order (i.e., -// referents before referers) by calling `RecordTree()`, -// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. +// - Tell the `PathResolver` about objects that might be along the +// object's reachability path, *in depth-first* order (i.e., +// referents before referers) by calling `RecordTree()`, +// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. // -// * Read the path out of the `Path` object using `Path.Path()`. +// - Read the path out of the `Path` object using `Path.Path()`. // // Multiple objects can be processed at once. // @@ -34,7 +34,7 @@ import ( type PathResolver interface { RequestPath(oid git.OID, objectType string) *Path ForgetPath(p *Path) - RecordReference(ref git.Reference) + RecordName(name string, oid git.OID) RecordTreeEntry(oid git.OID, name string, childOID git.OID) RecordCommit(oid, tree git.OID) RecordTag(oid git.OID, tag *git.Tag) @@ -60,7 +60,7 @@ func (n NullPathResolver) RequestPath(oid git.OID, objectType string) *Path { func (_ NullPathResolver) ForgetPath(p *Path) {} -func (_ NullPathResolver) RecordReference(ref git.Reference) {} +func (_ NullPathResolver) RecordName(name string, oid git.OID) {} func (_ NullPathResolver) RecordTreeEntry(oid git.OID, name string, childOID git.OID) {} @@ -77,19 +77,19 @@ type InOrderPathResolver struct { // (e.g., the biggest blob, or a tree containing the biggest blob, or // a commit whose tree contains the biggest blob). Valid states: // -// * `parent == nil && relativePath == ""`—we have not yet found -// anything that refers to this object. +// - `parent == nil && relativePath == ""`—we have not yet found +// anything that refers to this object. // -// * `parent != nil && relativePath == ""`—this object is a tree, and -// we have found a commit that refers to it. +// - `parent != nil && relativePath == ""`—this object is a tree, and +// we have found a commit that refers to it. // -// * `parent == nil && relativePath != ""`—we have found a reference -// that points directly at this object; `relativePath` is the full -// name of the reference. +// - `parent == nil && relativePath != ""`—we have found a reference +// that points directly at this object; `relativePath` is the full +// name of the reference. // -// * `parent != nil && relativePath != ""`—this object is a blob or -// tree, and we have found another tree that refers to it; -// `relativePath` is the corresponding tree entry name. +// - `parent != nil && relativePath != ""`—this object is a blob or +// tree, and we have found another tree that refers to it; +// `relativePath` is the corresponding tree entry name. type Path struct { // The OID of the object whose path we seek. This member is always // set. @@ -122,7 +122,8 @@ type Path struct { func (p *Path) TreePrefix() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return p.parent.TreePrefix() @@ -130,7 +131,9 @@ func (p *Path) TreePrefix() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath + "/" } - } else { + case p.relativePath != "": + return p.relativePath + "/" + default: return "???" } case "commit", "tag": @@ -153,7 +156,8 @@ func (p *Path) TreePrefix() string { func (p *Path) Path() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) @@ -161,7 +165,9 @@ func (p *Path) Path() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath } - } else { + case p.relativePath != "": + return p.relativePath + default: return "" } case "commit", "tag": @@ -274,18 +280,18 @@ func (pr *InOrderPathResolver) forgetPathLocked(p *Path) { } } -func (pr *InOrderPathResolver) RecordReference(ref git.Reference) { +func (pr *InOrderPathResolver) RecordName(name string, oid git.OID) { pr.lock.Lock() defer pr.lock.Unlock() - p, ok := pr.soughtPaths[ref.OID] + p, ok := pr.soughtPaths[oid] if !ok { // Nobody is looking for the path to the referent. return } - p.relativePath = ref.Refname - delete(pr.soughtPaths, ref.OID) + p.relativePath = name + delete(pr.soughtPaths, oid) } // Record that the tree with OID `oid` has an entry with the specified From 5d339ec292a3cc126f802efa98de90ea6a804626 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 19 Aug 2023 15:25:51 +0200 Subject: [PATCH 156/176] There's no reason to make this context cancelable --- git-sizer.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 7cfd6ff..0888d78 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -113,8 +113,7 @@ var ReleaseVersion string var BuildVersion string func main() { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() + ctx := context.Background() err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil { From e8d9c2eebde3389f80ec8a67a9d45f907d57298a Mon Sep 17 00:00:00 2001 From: rajhawaldar Date: Sat, 23 Sep 2023 10:34:59 +0530 Subject: [PATCH 157/176] Update the installation steps to use 'go install' Signed-off-by: rajhawaldar --- docs/BUILDING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/BUILDING.md b/docs/BUILDING.md index a977a2c..7f9fdef 100644 --- a/docs/BUILDING.md +++ b/docs/BUILDING.md @@ -7,11 +7,11 @@ Most people can just install a released version of `git-sizer`, [as described in 1. Make sure that you have a recent version of the [Go language toolchain](https://golang.org/doc/install) installed and that you have set `GOPATH`. -2. Get `git-sizer` using `go get`: +2. Get `git-sizer` using `go install`: - go get github.com/github/git-sizer + go install github.com/github/git-sizer@latest - This should fetch and compile the source code and write the executable file to `$GOPATH/bin/`. + This should install the executable file to `$GOPATH/bin/`. 3. Either add `$GOPATH/bin` to your `PATH`, or copy the executable file (`git-sizer` or `git-sizer.exe`) to a directory that is already in your `PATH`. From 1b0ecde670f17563805ee2f297155f9faf2c1f24 Mon Sep 17 00:00:00 2001 From: elhmn Date: Fri, 3 Nov 2023 11:26:36 +0100 Subject: [PATCH 158/176] Upgrade build scripts to go1.21 --- script/ensure-go-installed.sh | 6 +++--- script/install-vendored-go | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/script/ensure-go-installed.sh b/script/ensure-go-installed.sh index 3111b9e..1e301fd 100644 --- a/script/ensure-go-installed.sh +++ b/script/ensure-go-installed.sh @@ -4,17 +4,17 @@ if [ -z "$ROOTDIR" ]; then echo 1>&2 'ensure-go-installed.sh invoked without ROOTDIR set!' fi -# Is go installed, and at least 1.16? +# Is go installed, and at least 1.21? go_ok() { set -- $(go version 2>/dev/null | sed -n 's/.*go\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1 \2/p' | head -n 1) - [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 16 ] + [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 21 ] } # If a local go is installed, use it. set_up_vendored_go() { - GO_VERSION=go1.16.3 + GO_VERSION=go1.21.3 VENDORED_GOROOT="$ROOTDIR/vendor/$GO_VERSION/go" if [ -x "$VENDORED_GOROOT/bin/go" ]; then export GOROOT="$VENDORED_GOROOT" diff --git a/script/install-vendored-go b/script/install-vendored-go index 2407618..45ace01 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -1,20 +1,21 @@ #!/bin/sh # The checksums below must correspond to the downloads for this version. -GO_VERSION=go1.16.3 +# The checksums can be found on https://go.dev/dl +GO_VERSION=go1.21.3 case "$(uname -s):$(uname -m)" in Linux:x86_64) GO_PKG=${GO_VERSION}.linux-amd64.tar.gz - GO_PKG_SHA=951a3c7c6ce4e56ad883f97d9db74d3d6d80d5fec77455c6ada6c1f7ac4776d2 + GO_PKG_SHA=1241381b2843fae5a9707eec1f8fb2ef94d827990582c7c7c32f5bdfbfd420c8 ;; Darwin:x86_64) GO_PKG=${GO_VERSION}.darwin-amd64.tar.gz - GO_PKG_SHA=6bb1cf421f8abc2a9a4e39140b7397cdae6aca3e8d36dcff39a1a77f4f1170ac + GO_PKG_SHA=27014fc69e301d7588a169ca239b3cc609f0aa1abf38528bf0d20d3b259211eb ;; Darwin:arm64) GO_PKG=${GO_VERSION}.darwin-arm64.tar.gz - GO_PKG_SHA=f4e96bbcd5d2d1942f5b55d9e4ab19564da4fad192012f6d7b0b9b055ba4208f + GO_PKG_SHA=65302a7a9f7a4834932b3a7a14cb8be51beddda757b567a2f9e0cbd0d7b5a6ab ;; *) echo 1>&2 "I don't know how to install Go on your platform." From b1712756e47dd4f761b26fa326afab2e9b47f252 Mon Sep 17 00:00:00 2001 From: elhmn Date: Thu, 9 Nov 2023 16:15:51 +0100 Subject: [PATCH 159/176] Generate automatic draft release We needed a way to generate draft releases for git-sizer binaries. This commit adds a new `.github/workflows/release.yml` github action that will generate a draft release when a new tag version is pushed. the action will be triggered After the tag is created and pushed using: ``` git tag -as v$VERSION git push origin v$VERSION ``` --- .github/workflows/release.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..58af3d6 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,34 @@ +name: Release + +on: + push: + tags: + - "v*" + +permissions: + contents: write + +jobs: + lint: + name: Release + runs-on: ubuntu-latest + steps: + - name: Setup + uses: + actions/setup-go@v4 + with: + go-version: 1.21 + + - name: Checkout + uses: actions/checkout@v4 + + - name: Build releases + run: | + make releases VERSION=$GITHUB_REF_NAME + + - name: Release + uses: softprops/action-gh-release@v1 + with: + draft: true + files: | + releases/git-sizer-* From 2ed1053ff9d440ec0e405e2f25157e25926633dd Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 13:45:06 +0100 Subject: [PATCH 160/176] Stop using deprecated function `ioutil.TempDir()` --- git_sizer_test.go | 3 +-- internal/testutils/repoutils.go | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 16d58c9..fbf470d 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -6,7 +6,6 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -760,7 +759,7 @@ func TestSubmodule(t *testing.T) { ctx := context.Background() - tmp, err := ioutil.TempDir("", "submodule") + tmp, err := os.MkdirTemp("", "submodule") require.NoError(t, err, "creating temporary directory") defer func() { diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 60a2f9b..954cff4 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -29,7 +28,7 @@ type TestRepo struct { func NewTestRepo(t *testing.T, bare bool, pattern string) *TestRepo { t.Helper() - path, err := ioutil.TempDir("", pattern) + path, err := os.MkdirTemp("", pattern) require.NoError(t, err) repo := TestRepo{Path: path} @@ -73,7 +72,7 @@ func (repo *TestRepo) Remove(t *testing.T) { func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { t.Helper() - path, err := ioutil.TempDir("", pattern) + path, err := os.MkdirTemp("", pattern) require.NoError(t, err) err = repo.GitCommand( From c20cbb8693f82594b73d4a279390a1c7aa2b7644 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 12:45:42 +0100 Subject: [PATCH 161/176] Repository.gitDir: rename member from `path` The name `gitDir` is less ambiguous. Also rename method `Path()` to `GitDir()`. --- git/git.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/git/git.go b/git/git.go index f451c54..5531a6f 100644 --- a/git/git.go +++ b/git/git.go @@ -15,7 +15,10 @@ type ObjectType string // Repository represents a Git repository on disk. type Repository struct { - path string + // gitDir is the path to the `GIT_DIR` for this repository. It + // might be absolute or it might be relative to the current + // directory. + gitDir string // gitBin is the path of the `git` executable that should be used // when running commands in this repository. @@ -79,7 +82,7 @@ func NewRepository(path string) (*Repository, error) { } return &Repository{ - path: gitDir, + gitDir: gitDir, gitBin: gitBin, }, nil } @@ -103,7 +106,7 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { cmd.Env = append( os.Environ(), - "GIT_DIR="+repo.path, + "GIT_DIR="+repo.gitDir, // Disable grafts when running our commands: "GIT_GRAFT_FILE="+os.DevNull, ) @@ -111,7 +114,8 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { return cmd } -// Path returns the path to `repo`. -func (repo *Repository) Path() string { - return repo.path +// GitDir returns the path to `repo`'s `GIT_DIR`. It might be absolute +// or it might be relative to the current directory. +func (repo *Repository) GitDir() string { + return repo.gitDir } From 29fc88208a3a38f54fda3e7e555469bd6c8fff29 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 12:58:23 +0100 Subject: [PATCH 162/176] Repository.GitPath(): new method, extracted from `NewRepository()` Add a method `Repository.GitPath(relPath)`, which invokes `git rev-parse --git-path $relPath` to find the path to a file within the Git repository. In `NewRepository()`, instantiate the `Repository` object earlier so that the new method can be used to find the path to `shallow`. --- git/git.go | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/git/git.go b/git/git.go index 5531a6f..cba262d 100644 --- a/git/git.go +++ b/git/git.go @@ -66,25 +66,22 @@ func NewRepository(path string) (*Repository, error) { } gitDir := smartJoin(path, string(bytes.TrimSpace(out))) - //nolint:gosec // `gitBin` is chosen carefully. - cmd = exec.Command(gitBin, "rev-parse", "--git-path", "shallow") - cmd.Dir = gitDir - out, err = cmd.Output() + repo := Repository{ + gitDir: gitDir, + gitBin: gitBin, + } + + shallow, err := repo.GitPath("shallow") if err != nil { - return nil, fmt.Errorf( - "could not run 'git rev-parse --git-path shallow': %w", err, - ) + return nil, err } - shallow := smartJoin(gitDir, string(bytes.TrimSpace(out))) + _, err = os.Lstat(shallow) if err == nil { return nil, errors.New("this appears to be a shallow clone; full clone required") } - return &Repository{ - gitDir: gitDir, - gitBin: gitBin, - }, nil + return &repo, nil } func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { @@ -119,3 +116,20 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { func (repo *Repository) GitDir() string { return repo.gitDir } + +// GitPath returns that path of a file within the git repository, by +// calling `git rev-parse --git-path $relPath`. The returned path is +// relative to the current directory. +func (repo *Repository) GitPath(relPath string) (string, error) { + cmd := repo.GitCommand("rev-parse", "--git-path", relPath) + out, err := cmd.Output() + if err != nil { + return "", fmt.Errorf( + "running 'git rev-parse --git-path %s': %w", relPath, err, + ) + } + // `git rev-parse --git-path` is documented to return the path + // relative to the current directory. Since we haven't changed the + // current directory, we can use it as-is: + return string(bytes.TrimSpace(out)), nil +} From 1d75c744e2ed1ad45f469a356897b0e07ba9b7a2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 13:15:08 +0100 Subject: [PATCH 163/176] Repository.IsFull(): new method Extract a method to determine whether the repository seems to be a full clone. Call it from `NewRepository()`. --- git/git.go | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/git/git.go b/git/git.go index cba262d..a82d14c 100644 --- a/git/git.go +++ b/git/git.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "io/fs" "os" "os/exec" "path/filepath" @@ -71,17 +72,36 @@ func NewRepository(path string) (*Repository, error) { gitBin: gitBin, } + full, err := repo.IsFull() + if err != nil { + return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) + } + if !full { + return nil, errors.New("this appears to be a shallow clone; full clone required") + } + + return &repo, nil +} + +// IsFull returns `true` iff `repo` appears to be a full clone. +func (repo *Repository) IsFull() (bool, error) { shallow, err := repo.GitPath("shallow") if err != nil { - return nil, err + return false, err } _, err = os.Lstat(shallow) if err == nil { - return nil, errors.New("this appears to be a shallow clone; full clone required") + return false, nil } - return &repo, nil + if !errors.Is(err, fs.ErrNotExist) { + return false, err + } + + // The `shallow` file is absent, which is what we expect + // for a full clone. + return true, nil } func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { From 39102dfaa3c2fc57e53c9a909042bee382af1d11 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 13:27:01 +0100 Subject: [PATCH 164/176] findGitBin(): memoize the result --- git/git_bin.go | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/git/git_bin.go b/git/git_bin.go index fc03435..526e9bb 100644 --- a/git/git_bin.go +++ b/git/git_bin.go @@ -2,10 +2,20 @@ package git import ( "path/filepath" + "sync" "github.com/cli/safeexec" ) +// This variable will be used to memoize the result of `findGitBin()`, +// since its return value only depends on the environment. +var gitBinMemo struct { + once sync.Once + + gitBin string + err error +} + // findGitBin finds the `git` binary in PATH that should be used by // the rest of `git-sizer`. It uses `safeexec` to find the executable, // because on Windows, `exec.Cmd` looks not only in PATH, but also in @@ -13,15 +23,20 @@ import ( // being scanned is hostile and non-bare because it might possibly // contain an executable file named `git`. func findGitBin() (string, error) { - gitBin, err := safeexec.LookPath("git") - if err != nil { - return "", err - } + gitBinMemo.once.Do(func() { + p, err := safeexec.LookPath("git") + if err != nil { + gitBinMemo.err = err + return + } - gitBin, err = filepath.Abs(gitBin) - if err != nil { - return "", err - } + p, err = filepath.Abs(p) + if err != nil { + gitBinMemo.err = err + return + } - return gitBin, nil + gitBinMemo.gitBin = p + }) + return gitBinMemo.gitBin, gitBinMemo.err } From 51cf26bdfd5f80d278cc274427d91d593b585235 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 17:53:15 +0100 Subject: [PATCH 165/176] smartJoin(): improve docstring --- git/git.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/git/git.go b/git/git.go index a82d14c..3f51c53 100644 --- a/git/git.go +++ b/git/git.go @@ -26,9 +26,10 @@ type Repository struct { gitBin string } -// smartJoin returns the path that can be described as `relPath` -// relative to `path`, given that `path` is either absolute or is -// relative to the current directory. +// smartJoin returns `relPath` if it is an absolute path. If not, it +// assumes that `relPath` is relative to `path`, so it joins them +// together and returns the result. In that case, if `path` itself is +// relative, then the return value is also relative. func smartJoin(path, relPath string) string { if filepath.IsAbs(relPath) { return relPath From 02928f10bf9a42654333abc5d288c3e36b405477 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 13:35:19 +0100 Subject: [PATCH 166/176] NewRepositoryFromGitDir(): new function If you already have the desired `GIT_DIR`, there's no need to determine it from the current path. --- git/git.go | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/git/git.go b/git/git.go index 3f51c53..60a468a 100644 --- a/git/git.go +++ b/git/git.go @@ -37,8 +37,36 @@ func smartJoin(path, relPath string) string { return filepath.Join(path, relPath) } -// NewRepository creates a new repository object that can be used for -// running `git` commands within that repository. +// NewRepositoryFromGitDir creates a new `Repository` object that can +// be used for running `git` commands, given the value of `GIT_DIR` +// for the repository. +func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { + // Find the `git` executable to be used: + gitBin, err := findGitBin() + if err != nil { + return nil, fmt.Errorf( + "could not find 'git' executable (is it in your PATH?): %w", err, + ) + } + + repo := Repository{ + gitDir: gitDir, + gitBin: gitBin, + } + + full, err := repo.IsFull() + if err != nil { + return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) + } + if !full { + return nil, errors.New("this appears to be a shallow clone; full clone required") + } + + return &repo, nil +} + +// NewRepository creates a new `Repository` object that can be used +// for running `git` commands within `path`. func NewRepository(path string) (*Repository, error) { // Find the `git` executable to be used: gitBin, err := findGitBin() @@ -68,20 +96,7 @@ func NewRepository(path string) (*Repository, error) { } gitDir := smartJoin(path, string(bytes.TrimSpace(out))) - repo := Repository{ - gitDir: gitDir, - gitBin: gitBin, - } - - full, err := repo.IsFull() - if err != nil { - return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) - } - if !full { - return nil, errors.New("this appears to be a shallow clone; full clone required") - } - - return &repo, nil + return NewRepositoryFromGitDir(gitDir) } // IsFull returns `true` iff `repo` appears to be a full clone. From f9aec5023a77e9336b6ec2f29bad7804caca57a6 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 17:56:00 +0100 Subject: [PATCH 167/176] NewRepositoryFromPath(): function renamed from `NewRepository()` --- git-sizer.go | 2 +- git/git.go | 9 +++++---- internal/testutils/repoutils.go | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 0888d78..1ef9812 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -134,7 +134,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st // Try to open the repository, but it's not an error yet if this // fails, because the user might only be asking for `--help`. - repo, repoErr := git.NewRepository(".") + repo, repoErr := git.NewRepositoryFromPath(".") flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) flags.Usage = func() { diff --git a/git/git.go b/git/git.go index 60a468a..096ce81 100644 --- a/git/git.go +++ b/git/git.go @@ -65,10 +65,11 @@ func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { return &repo, nil } -// NewRepository creates a new `Repository` object that can be used -// for running `git` commands within `path`. -func NewRepository(path string) (*Repository, error) { - // Find the `git` executable to be used: +// NewRepositoryFromPath creates a new `Repository` object that can be +// used for running `git` commands within `path`. It does so by asking +// `git` what `GIT_DIR` to use. Git, in turn, bases its decision on +// the path and the environment. +func NewRepositoryFromPath(path string) (*Repository, error) { gitBin, err := findGitBin() if err != nil { return nil, fmt.Errorf( diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 954cff4..e530925 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -89,7 +89,7 @@ func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { func (repo *TestRepo) Repository(t *testing.T) *git.Repository { t.Helper() - r, err := git.NewRepository(repo.Path) + r, err := git.NewRepositoryFromPath(repo.Path) require.NoError(t, err) return r } From d605cdb7c5e61f2d24cc29445f30255488a046c0 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 13 Dec 2023 17:56:31 +0100 Subject: [PATCH 168/176] TestRepo: for bare repositories, use `NewRepositoryFromGitDir()` There's no need to deduce the `GIT_DIR` for a bare repository. --- internal/testutils/repoutils.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index e530925..48a8759 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -20,6 +20,7 @@ import ( // TestRepo represents a git repository used for tests. type TestRepo struct { Path string + bare bool } // NewTestRepo creates and initializes a test repository in a @@ -37,6 +38,7 @@ func NewTestRepo(t *testing.T, bare bool, pattern string) *TestRepo { return &TestRepo{ Path: path, + bare: bare, } } @@ -89,9 +91,15 @@ func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { func (repo *TestRepo) Repository(t *testing.T) *git.Repository { t.Helper() - r, err := git.NewRepositoryFromPath(repo.Path) - require.NoError(t, err) - return r + if repo.bare { + r, err := git.NewRepositoryFromGitDir(repo.Path) + require.NoError(t, err) + return r + } else { + r, err := git.NewRepositoryFromPath(repo.Path) + require.NoError(t, err) + return r + } } // localEnvVars is a list of the variable names that should be cleared From fb78b414e22c5c95dfb4c4847b6e7cee58b1b1af Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 12 Dec 2023 11:27:10 +0100 Subject: [PATCH 169/176] Be mindful of `safe.bareRepository` in the tests As of Git v2.38.0, there is an option to prevent Git from accessing bare repositories unless asked for explicitly (via `--git-dir` or `GIT_DIR`): `safe.bareRepository`. The tests of `git sizer`, however, assume that Git will access a bare repository when the current directory points inside that repository. This only works if `safe.bareRepository` indicates that this is safe. If that is not the case, i.e. if `safe.bareRepository` is set to `explicit`, Git demands that the environment variable `GIT_DIR` is set (either explicitly, or via `--git-dir`) when accessing bare repositories. So let's set `GIT_DIR` for the test cases that work on bare repositories. Signed-off-by: Johannes Schindelin --- git_sizer_test.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index fbf470d..8a7a2d2 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -272,7 +272,10 @@ func TestRefSelections(t *testing.T) { args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} args = append(args, p.args...) cmd := exec.Command(executable, args...) - cmd.Dir = repo.Path + cmd.Env = append( + os.Environ(), + "GIT_DIR="+repo.Path, + ) var stdout bytes.Buffer cmd.Stdout = &stdout var stderr bytes.Buffer @@ -519,7 +522,10 @@ References (included references marked with '+'): args := append([]string{"--show-refs", "-v", "--no-progress"}, p.args...) cmd := exec.Command(executable, args...) - cmd.Dir = repo.Path + cmd.Env = append( + os.Environ(), + "GIT_DIR="+repo.Path, + ) var stdout bytes.Buffer cmd.Stdout = &stdout var stderr bytes.Buffer From 10cb5b3e54f41bf4d8d323226fb7895cfc98ab7d Mon Sep 17 00:00:00 2001 From: J23 Date: Sat, 23 Aug 2025 13:52:23 +0800 Subject: [PATCH 170/176] Introduced sha256 support for git-sizer --- git/git.go | 27 ++++++++++++-- git/obj_iter.go | 6 ++-- git/obj_resolver.go | 4 +-- git/oid.go | 63 +++++++++++++++++++++++++++------ git/tree.go | 19 +++++----- git_sizer_test.go | 37 +++++++++++++++++++ internal/testutils/repoutils.go | 2 +- sizes/graph.go | 2 +- sizes/output.go | 4 +-- 9 files changed, 135 insertions(+), 29 deletions(-) diff --git a/git/git.go b/git/git.go index 096ce81..ef3cbc6 100644 --- a/git/git.go +++ b/git/git.go @@ -24,6 +24,8 @@ type Repository struct { // gitBin is the path of the `git` executable that should be used // when running commands in this repository. gitBin string + // hashAgo is repository hash algo + hashAlgo HashAlgo } // smartJoin returns `relPath` if it is an absolute path. If not, it @@ -49,9 +51,18 @@ func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { ) } + hashAlgo := HashSHA1 + cmd := exec.Command(gitBin, "--git-dir", gitDir, "rev-parse", "--show-object-format") //nolint:gosec + if out, err := cmd.Output(); err == nil { + if string(bytes.TrimSpace(out)) == "sha256" { + hashAlgo = HashSHA256 + } + } + repo := Repository{ - gitDir: gitDir, - gitBin: gitBin, + gitDir: gitDir, + gitBin: gitBin, + hashAlgo: hashAlgo, } full, err := repo.IsFull() @@ -170,3 +181,15 @@ func (repo *Repository) GitPath(relPath string) (string, error) { // current directory, we can use it as-is: return string(bytes.TrimSpace(out)), nil } + +func (repo *Repository) HashAlgo() HashAlgo { + return repo.hashAlgo +} + +func (repo *Repository) HashSize() int { + return repo.hashAlgo.HashSize() +} + +func (repo *Repository) NullOID() OID { + return repo.hashAlgo.NullOID() +} diff --git a/git/obj_iter.go b/git/obj_iter.go index cecdc2a..c367f11 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -30,7 +30,7 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) errCh: make(chan error), headerCh: make(chan BatchHeader), } - + hashHexSize := repo.HashSize() * 2 iter.p.Add( // Read OIDs from `iter.oidCh` and write them to `git // rev-list`: @@ -68,10 +68,10 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) pipe.LinewiseFunction( "copy-oids", func(_ context.Context, _ pipe.Env, line []byte, stdout *bufio.Writer) error { - if len(line) < 40 { + if len(line) < hashHexSize { return fmt.Errorf("line too short: '%s'", line) } - if _, err := stdout.Write(line[:40]); err != nil { + if _, err := stdout.Write(line[:hashHexSize]); err != nil { return fmt.Errorf("writing OID to 'git cat-file': %w", err) } if err := stdout.WriteByte('\n'); err != nil { diff --git a/git/obj_resolver.go b/git/obj_resolver.go index 418e293..fbeb246 100644 --- a/git/obj_resolver.go +++ b/git/obj_resolver.go @@ -9,12 +9,12 @@ func (repo *Repository) ResolveObject(name string) (OID, error) { cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) output, err := cmd.Output() if err != nil { - return NullOID, fmt.Errorf("resolving object %q: %w", name, err) + return repo.NullOID(), fmt.Errorf("resolving object %q: %w", name, err) } oidString := string(bytes.TrimSpace(output)) oid, err := NewOID(oidString) if err != nil { - return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + return repo.NullOID(), fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) } return oid, nil } diff --git a/git/oid.go b/git/oid.go index 2aefbcb..2a2bdfc 100644 --- a/git/oid.go +++ b/git/oid.go @@ -1,32 +1,75 @@ package git import ( + "bytes" + "crypto/sha1" //nolint:gosec + "crypto/sha256" "encoding/hex" "errors" ) +const ( + HashSizeSHA256 = sha256.Size + HashSizeSHA1 = sha1.Size + HashSizeMax = HashSizeSHA256 +) + +type HashAlgo int + +const ( + HashUnknown HashAlgo = iota + HashSHA1 + HashSHA256 +) + // OID represents the SHA-1 object ID of a Git object, in binary // format. type OID struct { - v [20]byte + v [HashSizeMax]byte + hashSize int } -// NullOID is the null object ID; i.e., all zeros. -var NullOID OID +func (h HashAlgo) NullOID() OID { + switch h { + case HashSHA1: + return OID{hashSize: HashSizeSHA1} + case HashSHA256: + return OID{hashSize: HashSizeSHA256} + } + return OID{} +} + +func (h HashAlgo) HashSize() int { + switch h { + case HashSHA1: + return HashSizeSHA1 + case HashSHA256: + return HashSizeSHA256 + } + return 0 +} + +// defaultNullOID is the null object ID; i.e., all zeros. +var defaultNullOID OID + +func IsNullOID(o OID) bool { + return bytes.Equal(o.v[:], defaultNullOID.v[:]) +} // OIDFromBytes converts a byte slice containing an object ID in // binary format into an `OID`. func OIDFromBytes(oidBytes []byte) (OID, error) { var oid OID - if len(oidBytes) != len(oid.v) { + oidSize := len(oidBytes) + if oidSize != HashSizeSHA1 && oidSize != HashSizeSHA256 { return OID{}, errors.New("bytes oid has the wrong length") } - copy(oid.v[0:20], oidBytes) + oid.hashSize = oidSize + copy(oid.v[0:oidSize], oidBytes) return oid, nil } -// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40}`) -// into an `OID`. +// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40,64}`) into an `OID`. func NewOID(s string) (OID, error) { oidBytes, err := hex.DecodeString(s) if err != nil { @@ -37,18 +80,18 @@ func NewOID(s string) (OID, error) { // String formats `oid` as a string in hex format. func (oid OID) String() string { - return hex.EncodeToString(oid.v[:]) + return hex.EncodeToString(oid.v[:oid.hashSize]) } // Bytes returns a byte slice view of `oid`, in binary format. func (oid OID) Bytes() []byte { - return oid.v[:] + return oid.v[:oid.hashSize] } // MarshalJSON expresses `oid` as a JSON string with its enclosing // quotation marks. func (oid OID) MarshalJSON() ([]byte, error) { - src := oid.v[:] + src := oid.v[:oid.hashSize] dst := make([]byte, hex.EncodedLen(len(src))+2) dst[0] = '"' dst[len(dst)-1] = '"' diff --git a/git/tree.go b/git/tree.go index c31fa78..18cb3ee 100644 --- a/git/tree.go +++ b/git/tree.go @@ -10,13 +10,14 @@ import ( // Tree represents a Git tree object. type Tree struct { - data string + data string + hashSize int } // ParseTree parses the tree object whose contents are contained in // `data`. `oid` is currently unused. func ParseTree(oid OID, data []byte) (*Tree, error) { - return &Tree{string(data)}, nil + return &Tree{string(data), oid.hashSize}, nil } // Size returns the size of the tree object. @@ -36,13 +37,15 @@ type TreeEntry struct { // TreeIter is an iterator over the entries in a Git tree object. type TreeIter struct { // The as-yet-unread part of the tree's data. - data string + data string + hashSize int } // Iter returns an iterator over the entries in `tree`. func (tree *Tree) Iter() *TreeIter { return &TreeIter{ - data: tree.data, + data: tree.data, + hashSize: tree.hashSize, } } @@ -74,12 +77,12 @@ func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { entry.Name = iter.data[:nulAt] iter.data = iter.data[nulAt+1:] - if len(iter.data) < 20 { + if len(iter.data) < iter.hashSize { return TreeEntry{}, false, errors.New("tree entry ends unexpectedly") } - - copy(entry.OID.v[0:20], iter.data[0:20]) - iter.data = iter.data[20:] + entry.OID.hashSize = iter.hashSize + copy(entry.OID.v[0:iter.hashSize], iter.data[0:iter.hashSize]) + iter.data = iter.data[iter.hashSize:] return entry, true, nil } diff --git a/git_sizer_test.go b/git_sizer_test.go index 8a7a2d2..c74b459 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -849,3 +849,40 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") assert.Equal(t, counts.Count32(3), h.MaxExpandedBlobCount, "max expanded blob count") } + +func TestSHA256(t *testing.T) { + t.Parallel() + + ctx := context.Background() + + t.Helper() + + path, err := os.MkdirTemp("", "sha256") + require.NoError(t, err) + + testRepo := testutils.TestRepo{Path: path} + defer testRepo.Remove(t) + + // Don't use `GitCommand()` because the directory might not + // exist yet: + cmd := exec.Command("git", "init", "--object-format", "sha256", testRepo.Path) + cmd.Env = testutils.CleanGitEnv() + err = cmd.Run() + require.NoError(t, err) + + timestamp := time.Unix(1112911993, 0) + + testRepo.AddFile(t, "hello.txt", "Hello, world!\n") + cmd = testRepo.GitCommand(t, "commit", "-m", "initial") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating initial commit") + + cmd = testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating commit") + + repo := testRepo.Repository(t) + + _, err = sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) +} diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 48a8759..e14e487 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -165,7 +165,7 @@ func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) { var cmd *exec.Cmd - if oid == git.NullOID { + if git.IsNullOID(oid) { cmd = repo.GitCommand(t, "update-ref", "-d", refname) } else { cmd = repo.GitCommand(t, "update-ref", refname, oid.String()) diff --git a/sizes/graph.go b/sizes/graph.go index 0fb1c8a..2101a00 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -134,7 +134,7 @@ func ScanRepositoryUsingGraph( case "tree": trees = append(trees, ObjectHeader{obj.OID, obj.ObjectSize}) case "commit": - commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, git.NullOID}) + commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, repo.NullOID()}) case "tag": tags = append(tags, ObjectHeader{obj.OID, obj.ObjectSize}) default: diff --git a/sizes/output.go b/sizes/output.go index 933cc05..037f905 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -155,7 +155,7 @@ func (i *item) Emit(t *table) { } func (i *item) Footnote(nameStyle NameStyle) string { - if i.path == nil || i.path.OID == git.NullOID { + if i.path == nil || git.IsNullOID(i.path.OID) { return "" } switch nameStyle { @@ -214,7 +214,7 @@ func (i *item) MarshalJSON() ([]byte, error) { LevelOfConcern: float64(value) / i.scale, } - if i.path != nil && i.path.OID != git.NullOID { + if i.path != nil && !git.IsNullOID(i.path.OID) { stat.ObjectName = i.path.OID.String() stat.ObjectDescription = i.path.Path() } From cf4ba45f9251b113a46f6636da087cb3a9d126a0 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 21 Nov 2025 18:47:53 +0000 Subject: [PATCH 171/176] workflows: add document header This is a best practice and yamllint warns about omitting it. --- .github/workflows/lint.yml | 1 + .github/workflows/release.yml | 1 + .github/workflows/test.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 52a9f07..0b08cfe 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,3 +1,4 @@ +--- name: Lint on: push: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 58af3d6..b35a733 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,3 +1,4 @@ +--- name: Release on: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f658b81..9340467 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,3 +1,4 @@ +--- on: [push, pull_request] name: Test jobs: From 3ca5f0e3dcf46c416dbea72976aa225575ee650a Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 21 Nov 2025 18:50:42 +0000 Subject: [PATCH 172/176] workflows: add permissions block We'd like to run GitHub Actions with the least possible permissions assigned to the token for security reasons. To make this possible, let's add a permissions block to each workflow that lacks one. --- .github/workflows/lint.yml | 3 +++ .github/workflows/test.yml | 2 ++ 2 files changed, 5 insertions(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 0b08cfe..f8cfb4b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -12,6 +12,9 @@ on: - go.mod - go.sum +permissions: + contents: read + jobs: lint: runs-on: ubuntu-latest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9340467..8efc5ea 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,8 @@ --- on: [push, pull_request] name: Test +permissions: + contents: read jobs: test: strategy: From 9d29e5a1b5bdf415f0ba81b711a42fa28b470be0 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Mon, 1 Dec 2025 15:24:57 -0800 Subject: [PATCH 173/176] install-vendored-go: update download link The Google storage account appears to no longer be valid, so let's use the official download link from https://go.dev. --- script/install-vendored-go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/install-vendored-go b/script/install-vendored-go index 45ace01..76d2195 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -39,7 +39,7 @@ fi ROOTDIR="$( cd "$( dirname "$0" )/.." && pwd )" VENDORDIR="$ROOTDIR/vendor" -DOWNLOAD_URL=https://storage.googleapis.com/golang/$GO_PKG +DOWNLOAD_URL=https://go.dev/dl/$GO_PKG ARCHIVE="$VENDORDIR/$GO_PKG" INSTALLDIR="$VENDORDIR/$GO_VERSION" export GOROOT="$INSTALLDIR/go" From dba52c5e298c0d9966af9aa87969bde1dc481cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 16 Jan 2026 12:41:00 +0100 Subject: [PATCH 174/176] Skip the SHA256 test if git has not support for it If you are building and running the tests in an environment with an older version of git, it might not have SHA256 support. This should not cause the git-sizer test suite to fail as it's not an issue with git-sizer. Detect this situation and skip the test. --- git_sizer_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index c74b459..09f088f 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -867,7 +867,11 @@ func TestSHA256(t *testing.T) { // exist yet: cmd := exec.Command("git", "init", "--object-format", "sha256", testRepo.Path) cmd.Env = testutils.CleanGitEnv() - err = cmd.Run() + output, err := cmd.CombinedOutput() + + if err != nil && strings.HasPrefix(string(output), "error: unknown option `object-format'") { + t.Skip("skipping due to lack of SHA256 support") + } require.NoError(t, err) timestamp := time.Unix(1112911993, 0) From 0579f1812beaf09679e0651fbd0b36047759f5e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 16 Jan 2026 12:48:24 +0100 Subject: [PATCH 175/176] ci: update the setup-go version Version 2 wants to use the old URL so that also fails to run. The latest is version 6 so let's update to that and at the same time update to the same Go version that we want to download in the build script. --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8efc5ea..542f410 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,9 +12,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - name: Set up Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v6 with: - go-version: '1.17' + go-version: '1.21.3' - name: Check out code uses: actions/checkout@v2 From 37ca70f5f033785298587bb642b83fce66616322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 16 Jan 2026 15:23:00 +0100 Subject: [PATCH 176/176] test: loosen the object-format error matching As pointed out by the robot, this can be an issue with different locales. It is enough for our purposes to know that the error message includes "object-format" so we know it's unhappy with it. --- git_sizer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 09f088f..f5c8006 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -869,7 +869,7 @@ func TestSHA256(t *testing.T) { cmd.Env = testutils.CleanGitEnv() output, err := cmd.CombinedOutput() - if err != nil && strings.HasPrefix(string(output), "error: unknown option `object-format'") { + if err != nil && strings.Contains(string(output), "object-format") { t.Skip("skipping due to lack of SHA256 support") } require.NoError(t, err)